PageRenderTime 27ms CodeModel.GetById 11ms app.highlight 14ms RepoModel.GetById 1ms app.codeStats 0ms

/extra/boyer-moore/boyer-moore.factor

http://github.com/abeaumont/factor
Unknown | 79 lines | 58 code | 21 blank | 0 comment | 0 complexity | daa4fb458671f8dbb219bb79856a651b MD5 | raw file
 1! Copyright (C) 2010 Dmitry Shubin.
 2! See http://factorcode.org/license.txt for BSD license.
 3USING: accessors arrays assocs kernel locals math math.order
 4math.ranges sequences sequences.private z-algorithm ;
 5FROM: sequences.private => change-nth-unsafe ;
 6IN: boyer-moore
 7
 8<PRIVATE
 9
10:: (normal-suffixes) ( i zs ss -- )
11    i zs nth-unsafe ss
12    [ [ i ] unless* ] change-nth-unsafe ; inline
13
14: normal-suffixes ( zs -- ss )
15    [ length [ f <array> ] [ [1,b) ] bi ] keep pick
16    [ (normal-suffixes) ] 2curry each ; inline
17
18:: (partial-suffixes) ( len old elt i -- len old/new old )
19    len elt i 1 + = [ len elt - ] [ old ] if old ; inline
20
21: partial-suffixes ( zs -- ss )
22    [ length dup ] [ <reversed> ] bi
23    [ (partial-suffixes) ] map-index 2nip ; inline
24
25: <gs-table> ( seq -- table )
26    z-values [ partial-suffixes ] [ normal-suffixes ] bi
27    [ [ nip ] when* ] 2map reverse! ; inline
28
29: insert-bc-shift ( table elt len i -- table )
30    1 + swap - swap pick 2dup key?
31    [ 3drop ] [ set-at ] if ; inline
32
33: <bc-table> ( seq -- table )
34    H{ } clone swap [ length ] keep
35    [ insert-bc-shift ] with each-index ; inline
36
37TUPLE: boyer-moore pattern bc-table gs-table ;
38
39: gs-shift ( i c bm -- s ) nip gs-table>> nth-unsafe ; inline
40
41: bc-shift ( i c bm -- s ) bc-table>> at dup 1 ? + ; inline
42
43: do-shift ( pos i c bm -- newpos )
44    [ gs-shift ] [ bc-shift ] bi-curry 2bi max + ; inline
45
46: match? ( i1 s1 i2 s2 -- ? ) [ nth-unsafe ] 2bi@ = ; inline
47
48:: mismatch? ( s1 s2 pos len -- i/f )
49    len 1 - [ [ pos + s1 ] keep s2 match? not ]
50    find-last-integer ; inline
51
52:: (search-from) ( seq from bm -- i/f )
53    bm pattern>>      :> pat
54    pat length        :> plen
55    seq length plen - :> lim
56    from
57    [
58        dup lim <=
59        [
60            seq pat pick plen mismatch?
61            [ 2dup + seq nth-unsafe bm do-shift t ] [ f ] if*
62        ] [ drop f f ] if
63    ] loop ; inline
64
65PRIVATE>
66
67: <boyer-moore> ( pat -- bm )
68    dup <reversed> [ <bc-table> ] [ <gs-table> ] bi
69    boyer-moore boa ;
70
71GENERIC: search-from ( seq from obj -- i/f )
72
73M: sequence search-from
74    dup length zero?
75    [ 3drop 0 ] [ <boyer-moore> (search-from) ] if ;
76
77M: boyer-moore search-from (search-from) ;
78
79: search ( seq obj -- i/f ) [ 0 ] dip search-from ;