Handbook
Glossary
jaro-similarity ( a b -- n )
Vocabulary
math
.
similarity
Inputs
a
an
object
b
an
object
Outputs
n
an
object
Definition
USING:
bit-arrays
combinators.short-circuit
kernel
math
math.order
sequences
sequences.extras
sequences.private
;
IN:
math.similarity
::
jaro-similarity
( a b -- n )
a b
2dup
[
length
]
bi@
2dup
<
[
[
swap
]
2bi@
]
when
:>
( str1 str2 len1 len2 ) len1 len2
max
2/
1
[-]
:>
delta len2
<bit-array>
:>
flags str1
[|
ch i |
i delta
[-]
:>
from i delta
+
1
+
len2
min
:>
to from to
[
integer>fixnum-strict
]
bi@
[|
j |
{
[
j flags
nth-unsafe
not
]
[
ch j str2
nth-unsafe
=
]
}
0&&
dup
[
t
j flags
set-nth-unsafe
]
when
]
find-integer-from
]
filter-index
:>
matches matches
[
0
]
[
length
:>
#matches 0
:>
i! str2 flags
[
[
i matches
nth-unsafe
=
not
i 1
+
i!
]
[
drop
f
]
if
]
2count
:>
#transpositions #matches len1
/f
#matches len2
/f
+
#matches #transpositions
2/
-
#matches
/f
+
3
/
]
if-empty
;