Handbook
Glossary
split-sentences ( str -- seq )
Vocabulary
text-analysis
.
private
Inputs
str
an
object
Outputs
seq
an
object
Definition
USING:
kernel
regexp
sequences
sequences.extras
splitting
;
IN:
text-analysis.private
:
split-sentences
( str -- seq )
R/ ((?:[\.?!]|[\r\n]+)(?:\"|\'|\)|\]|\})?)(\s+)/
[
[
".?!\r\n\"')]}"
member?
not
]
cut-when
"\x01"
glue
]
re-replace-with
R/ (\.\.\.*)/
[
but-last-slice
]
re-replace-with
R/ (?:\s(?:(?:(?:\w\.){2,}\w?)|(?:\w\.\w)))(\s+[a-z0-9])/
[
[
1
=
]
cut-when
append
]
re-replace-with
R/ (jr|mr|mrs|ms|dr|prof|sr|sen|rep|rev|gov|atty|supt|det|rev|col','gen|lt|cmdr|adm|capt|sgt|cpl|maj|dept|univ|uni|assn|bros|inc|ltd|co|corp|plc|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|sept|mon|tue|wed|thu|fri|sat|sun|vs|etc|no|esp|cf|ave|bld|blvd|cl|ct|cres|dr|rd|st)\./i
[
46
over
index
head
]
re-replace-with
"\x01"
split
trimmed
;