/spice-notes/manuals/OpenSpice Manual Renderer/spice-dox/src/token.p

https://github.com/Spicery/spice · Prolog · 305 lines · 267 code · 38 blank · 0 comment · 39 complexity · f3384af09048927278788f7f456ced57 MD5 · raw file

  1. ;;; -- Syntactic Properties -----------------------------------
  2. ;;;
  3. ;;; The SynProps record defines the syntactic properties
  4. ;;; associated with tokens.
  5. ;;; strength block-level flag "strong", "weak", undef
  6. ;;; child strength permitted children general predicate
  7. ;;; style fixity "x", "F", "Fx", "xF", "xFx", "FxendF" or "endF"
  8. ;;; opening bracket } pair of words that act as brackets for outfix syntax
  9. ;;; closing bracket }
  10. ;;; rank precedence level
  11. ;;; action element name
  12. ;;;
  13. defclass SynProps {
  14. synPropsStyle,
  15. synPropsOpeningBracketAux,
  16. synPropsClosingBracketAux,
  17. synPropsRank,
  18. synPropsKernel
  19. };
  20. define newAnySynProps( style, opener, closer, rank, kernel );
  21. lvars is_brackets = ( style == "FxendF" or style == "endF" );
  22. if opener then
  23. unless is_brackets then
  24. mishap( 'Invalid opener', [ ^opener ] )
  25. endunless
  26. endif;
  27. if closer then
  28. unless is_brackets then
  29. mishap( 'Invalid closer', [ ^closer ] )
  30. endunless
  31. endif;
  32. unless rank.isinteger do
  33. mishap( 'Invalid rank', [ ^rank ] )
  34. endunless;
  35. unless lmember( style, [ F x Fx xFx $ FxendF endF ] ) do
  36. mishap( 'Invalid style', [ ^style ] )
  37. endunless;
  38. unless kernel.isKernel do
  39. mishap( 'Invalid kernel', [ ^kernel ] )
  40. endunless;
  41. consSynProps( style, opener, closer, rank, kernel )
  42. enddefine;
  43. define newOpSynProps( style, rank, kernel );
  44. newAnySynProps( style, false, false, rank, kernel )
  45. enddefine;
  46. define newSingleSynProps( style, kernel );
  47. newAnySynProps( style, false, false, 0, kernel )
  48. enddefine;
  49. define newPairedSynProps( opener, closer, kernel );
  50. newAnySynProps( "FxendF", opener, closer, 0, kernel );
  51. newAnySynProps( "endF", opener, closer, 0, ClosingKeyword );
  52. enddefine;
  53. define synPropsClosingBracket( s );
  54. lvars b = s.synPropsClosingBracketAux;
  55. unless b.isword do
  56. mishap( 'Trying to use undefined closer', [% s %] )
  57. endunless;
  58. b
  59. enddefine;
  60. define synPropsOpeningBracket( s );
  61. lvars b = s.synPropsOpeningBracketAux;
  62. unless b.isword do
  63. mishap( 'Trying to use undefined closer', [ ^s ] )
  64. endunless;
  65. b
  66. enddefine;
  67. ;;; -- Token --------------------------------------------------
  68. ;;;
  69. ;;; The output of the tokenizer is a Token which is effectively
  70. ;;; a pair ( item x syntactic-properties ). The item is a word,
  71. ;;; a string, or termin.
  72. ;;;
  73. defclass Token {
  74. tokenValue,
  75. tokenSynProps
  76. };
  77. define tokenKernel =
  78. tokenSynProps <> synPropsKernel
  79. enddefine;
  80. define newToken( item, synprops );
  81. unless item.isword or item.isstring or item == termin do
  82. mishap( 'Invalid item for Token', [ ^item ] )
  83. endunless;
  84. unless synprops.isSynProps do
  85. mishap( 'Invalid SynProps for Token', [ ^synprops ] )
  86. endunless;
  87. consToken( item, synprops )
  88. enddefine;
  89. define formString( s, endsWithNewline );
  90. lconstant StrongStringProps = newSingleSynProps( "x", ParaString );
  91. lconstant WeakStringProps = newSingleSynProps( "x", String );
  92. newToken(
  93. s,
  94. if endsWithNewline then
  95. StrongStringProps
  96. else
  97. WeakStringProps
  98. endif
  99. )
  100. enddefine;
  101. define formTermin();
  102. newToken(
  103. termin,
  104. newSingleSynProps( "endF", ClosingKeyword )
  105. )
  106. enddefine;
  107. define formWord( w );
  108. newToken(
  109. w,
  110. newSingleSynProps( "x", Word )
  111. )
  112. enddefine;
  113. ;;; -- Tokenization -------------------------------------------
  114. ;;;
  115. ;;; Mode is a positive integer that counts how deeply
  116. ;;; nested we are in a quasi-quoted content. I use the phrase
  117. ;;; quasi-quoted with some trepidation, I doubt it is correct!
  118. ;;;
  119. defclass Tokenizer {
  120. tokenizerSource,
  121. tokenizerMode : pint,
  122. tokenizerDump
  123. };
  124. define newTokenizer( procedure r );
  125. consTokenizer(
  126. r, ;;; The character repeater.
  127. 0, ;;; In the normal context.
  128. [] ;;; The nested modes.
  129. )
  130. enddefine;
  131. define saveTokenizerMode( t );
  132. conspair( t.tokenizerMode, t.tokenizerDump ) -> t.tokenizerDump
  133. enddefine;
  134. define restoreTokenizerMode( t );
  135. t.tokenizerDump.dest -> t.tokenizerDump -> t.tokenizerMode;
  136. enddefine;
  137. define enterQuasiQuotingMode( t );
  138. t.tokenizerMode + 1 -> t.tokenizerMode;
  139. enddefine;
  140. constant CHAR_plain = 0;
  141. constant CHAR_digit = 1;
  142. constant CHAR_letter = 2;
  143. constant CHAR_simple = 3;
  144. constant CHAR_sign = 4;
  145. constant CHAR_termin = 5;
  146. constant CHAR_slash = 6;
  147. constant CHAR_layout = 7;
  148. constant CHAR_dollar = 8;
  149. lconstant answer = consstring(#| repeat 256 times CHAR_plain endrepeat |#);
  150. constant charTable = (
  151. lblock
  152. define lconstant setType( charType, chars, table );
  153. lvars i;
  154. for i from 1 to chars.datalength do
  155. charType -> subscrs( subscrs( i, chars ), table )
  156. endfor
  157. enddefine;
  158. setType( CHAR_digit, '0123456789', answer );
  159. setType( CHAR_simple, '(){}[];', answer );
  160. setType( CHAR_layout, '\n\t\s', answer );
  161. setType( CHAR_slash, '\\', answer );
  162. setType( CHAR_dollar, '$', answer );
  163. setType( CHAR_sign, '!@#~%^&*+-=|:<>?/', answer );
  164. setType( CHAR_letter, 'abcdefghijklmnopqrstuvwxyz', answer );
  165. setType( CHAR_letter, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', answer );
  166. answer
  167. endlblock
  168. );
  169. define peekCh( procedure r );
  170. r() ->> r()
  171. enddefine;
  172. vars procedure ( lookupWord );
  173. define nextToken( procedure r, mode );
  174. define lconstant cantExtend( ch, chType );
  175. lvars newType = subscrs( ch, charTable );
  176. not(
  177. newType == chType or
  178. ( chType == CHAR_letter and newType == CHAR_digit )
  179. )
  180. enddefine;
  181. define lconstant eatString( procedure r, ch );
  182. formString(
  183. consstring(#|
  184. ch;
  185. repeat
  186. lvars newCh = r();
  187. lvars nextCh = newCh == termin and termin or r.peekCh;
  188. quitif(
  189. newCh == termin or
  190. ( newCh == `\\` and nextCh /== `\\` ) or
  191. ( newCh == `\n` and nextCh == `\n` )
  192. );
  193. if newCh == `\\` and nextCh == `\\` then
  194. `\\`, r() -> _
  195. else
  196. newCh
  197. endif;
  198. endrepeat;
  199. newCh -> r();
  200. |#),
  201. newCh == `\n`
  202. )
  203. enddefine;
  204. ;;; Dispose of leading whitespace.
  205. repeat
  206. lvars ch = r();
  207. quitunless( ch == ` ` or ch == `\n` or ch == `\t` );
  208. endrepeat;
  209. if ch == termin then
  210. formTermin()
  211. elseif ch == `\\` then
  212. lvars nextCh = r();
  213. if `a` <= nextCh and nextCh <= `z` then
  214. consword(#|
  215. nextCh;
  216. repeat
  217. lvars newCh = r();
  218. if `a` <= newCh and newCh <= `z` do
  219. newCh
  220. else
  221. newCh -> r();
  222. quitloop
  223. endif
  224. endrepeat
  225. |#).lookupWord
  226. elseif nextCh == `\\` then
  227. eatString( r, nextCh )
  228. else
  229. consword( nextCh, 1 ).lookupWord
  230. endif;
  231. elseif mode == 0 then ;;; starts as 0, incremented by \$ operator.
  232. eatString( r, ch )
  233. elseif ch == `\"` then
  234. formString(
  235. consstring(#|
  236. ch,
  237. repeat
  238. lvars newCh = r();
  239. quitif( newCh == termin or newCh == ch or newCh == `\n` );
  240. newCh
  241. endrepeat,
  242. ch
  243. |#),
  244. false ;;; weak = not block-level
  245. )
  246. else
  247. lvars chType = subscrs( ch, charTable );
  248. lvars word = (
  249. if chType == CHAR_simple then
  250. consword( ch, 1 )
  251. else
  252. consword(#|
  253. ch;
  254. repeat
  255. lvars newCh = r();
  256. quitif( newCh == termin or cantExtend( newCh, chType ) );
  257. newCh
  258. endrepeat
  259. |#), newCh -> r()
  260. endif
  261. );
  262. word.lookupWord
  263. endif
  264. enddefine;
  265. define readToken( tokenizer );
  266. nextToken( tokenizer.tokenizerSource, tokenizer.tokenizerMode )
  267. enddefine;