/extra/boyer-moore/boyer-moore.factor

http://github.com/abeaumont/factor · Factor · 79 lines · 56 code · 21 blank · 2 comment · 13 complexity · daa4fb458671f8dbb219bb79856a651b MD5 · raw file

  1. ! Copyright (C) 2010 Dmitry Shubin.
  2. ! See http://factorcode.org/license.txt for BSD license.
  3. USING: accessors arrays assocs kernel locals math math.order
  4. math.ranges sequences sequences.private z-algorithm ;
  5. FROM: sequences.private => change-nth-unsafe ;
  6. IN: boyer-moore
  7. <PRIVATE
  8. :: (normal-suffixes) ( i zs ss -- )
  9. i zs nth-unsafe ss
  10. [ [ i ] unless* ] change-nth-unsafe ; inline
  11. : normal-suffixes ( zs -- ss )
  12. [ length [ f <array> ] [ [1,b) ] bi ] keep pick
  13. [ (normal-suffixes) ] 2curry each ; inline
  14. :: (partial-suffixes) ( len old elt i -- len old/new old )
  15. len elt i 1 + = [ len elt - ] [ old ] if old ; inline
  16. : partial-suffixes ( zs -- ss )
  17. [ length dup ] [ <reversed> ] bi
  18. [ (partial-suffixes) ] map-index 2nip ; inline
  19. : <gs-table> ( seq -- table )
  20. z-values [ partial-suffixes ] [ normal-suffixes ] bi
  21. [ [ nip ] when* ] 2map reverse! ; inline
  22. : insert-bc-shift ( table elt len i -- table )
  23. 1 + swap - swap pick 2dup key?
  24. [ 3drop ] [ set-at ] if ; inline
  25. : <bc-table> ( seq -- table )
  26. H{ } clone swap [ length ] keep
  27. [ insert-bc-shift ] with each-index ; inline
  28. TUPLE: boyer-moore pattern bc-table gs-table ;
  29. : gs-shift ( i c bm -- s ) nip gs-table>> nth-unsafe ; inline
  30. : bc-shift ( i c bm -- s ) bc-table>> at dup 1 ? + ; inline
  31. : do-shift ( pos i c bm -- newpos )
  32. [ gs-shift ] [ bc-shift ] bi-curry 2bi max + ; inline
  33. : match? ( i1 s1 i2 s2 -- ? ) [ nth-unsafe ] 2bi@ = ; inline
  34. :: mismatch? ( s1 s2 pos len -- i/f )
  35. len 1 - [ [ pos + s1 ] keep s2 match? not ]
  36. find-last-integer ; inline
  37. :: (search-from) ( seq from bm -- i/f )
  38. bm pattern>> :> pat
  39. pat length :> plen
  40. seq length plen - :> lim
  41. from
  42. [
  43. dup lim <=
  44. [
  45. seq pat pick plen mismatch?
  46. [ 2dup + seq nth-unsafe bm do-shift t ] [ f ] if*
  47. ] [ drop f f ] if
  48. ] loop ; inline
  49. PRIVATE>
  50. : <boyer-moore> ( pat -- bm )
  51. dup <reversed> [ <bc-table> ] [ <gs-table> ] bi
  52. boyer-moore boa ;
  53. GENERIC: search-from ( seq from obj -- i/f )
  54. M: sequence search-from
  55. dup length zero?
  56. [ 3drop 0 ] [ <boyer-moore> (search-from) ] if ;
  57. M: boyer-moore search-from (search-from) ;
  58. : search ( seq obj -- i/f ) [ 0 ] dip search-from ;