PageRenderTime 5193ms CodeModel.GetById 43ms RepoModel.GetById 64ms app.codeStats 0ms

/source/otp_src_R14B02/lib/edoc/src/edoc_scanner.erl

https://github.com/akiernan/omnibus
Erlang | 366 lines | 252 code | 40 blank | 74 comment | 1 complexity | 3289d4ef0ef93d3552c73a71e960aca5 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause
  1. %% ``The contents of this file are subject to the Erlang Public License,
  2. %% Version 1.1, (the "License"); you may not use this file except in
  3. %% compliance with the License. You should have received a copy of the
  4. %% Erlang Public License along with this software. If not, it can be
  5. %% retrieved via the world wide web at http://www.erlang.org/.
  6. %%
  7. %% Software distributed under the License is distributed on an "AS IS"
  8. %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
  9. %% the License for the specific language governing rights and
  10. %% limitations under the License.
  11. %%
  12. %% The Initial Developer of the Original Code is Ericsson Utvecklings
  13. %% AB. Portions created by Ericsson are Copyright 1999, Ericsson
  14. %% Utvecklings AB. All Rights Reserved.''
  15. %%
  16. %% $Id: $
  17. %%
  18. %% @private
  19. %% @copyright Richard Carlsson 2001-2003. Portions created by Ericsson
  20. %% are Copyright 1999, Ericsson Utvecklings AB. All Rights Reserved.
  21. %% @author Richard Carlsson <richardc@it.uu.se>
  22. %% @see edoc
  23. %% @end
  24. %% @doc Tokeniser for EDoc. Based on the Erlang standard library module
  25. %% {@link //stdlib/erl_scan}.
  26. -module(edoc_scanner).
  27. %% NOTE: the interface to this module is ancient and should be updated.
  28. %% Please do not regard these exported functions as stable. Their
  29. %% behaviour is described in the documentation of the module `erl_scan'.
  30. %%
  31. %% Since there are no `full stop' tokens in EDoc specifications, the
  32. %% `tokens' function *always* returns `{more, Continuation}' unless an
  33. %% error occurs.
  34. -export([string/1,string/2,format_error/1]).
  35. -import(lists, [reverse/1]).
  36. string(Cs) -> string(Cs, 1).
  37. string(Cs, StartPos) ->
  38. case scan(Cs, StartPos) of
  39. {ok,Toks} -> {ok,Toks,StartPos};
  40. {error,E} -> {error,E,StartPos}
  41. end.
  42. %% format_error(Error)
  43. %% Return a string describing the error.
  44. format_error({string,Quote,Head}) ->
  45. ["unterminated string starting with " ++ io_lib:write_string(Head,Quote)];
  46. format_error({illegal,Type}) -> io_lib:fwrite("illegal ~w", [Type]);
  47. format_error(char) -> "unterminated character";
  48. format_error(scan) -> "premature end";
  49. format_error({base,Base}) -> io_lib:fwrite("illegal base '~w'", [Base]);
  50. format_error(float) -> "bad float";
  51. format_error(Other) -> io_lib:write(Other).
  52. %% Reserved words, not atoms:
  53. reserved('where') -> true;
  54. reserved(_) -> false.
  55. %% scan(CharList, StartPos)
  56. %% This takes a list of characters and tries to tokenise them.
  57. %%
  58. %% The token list is built in reverse order (in a stack) to save appending
  59. %% and then reversed when all the tokens have been collected. Most tokens
  60. %% are built in the same way.
  61. %%
  62. %% Returns:
  63. %% {ok,[Tok]}
  64. %% {error,{ErrorPos,edoc_scanner,What}}
  65. scan(Cs, Pos) ->
  66. scan1(Cs, [], Pos).
  67. %% scan1(Characters, TokenStack, Position)
  68. %% Scan a list of characters into tokens.
  69. scan1([$\n|Cs], Toks, Pos) -> % Newline
  70. scan1(Cs, Toks, Pos+1);
  71. scan1([C|Cs], Toks, Pos) when C >= 0, C =< $ -> % Skip blanks
  72. scan1(Cs, Toks, Pos);
  73. scan1([C|Cs], Toks, Pos) when C >= $a, C =< $z -> % Unquoted atom
  74. scan_atom(C, Cs, Toks, Pos);
  75. scan1([C|Cs], Toks, Pos) when C >= $0, C =< $9 -> % Numbers
  76. scan_number(C, Cs, Toks, Pos);
  77. scan1([$-,C| Cs], Toks, Pos) when C >= $0, C =< $9 -> % Signed numbers
  78. scan_signed_number($-, C, Cs, Toks, Pos);
  79. scan1([$+,C| Cs], Toks, Pos) when C >= $0, C =< $9 -> % Signed numbers
  80. scan_signed_number($+, C, Cs, Toks, Pos);
  81. scan1([C|Cs], Toks, Pos) when C >= $A, C =< $Z -> % Variables
  82. scan_variable(C, Cs, Toks, Pos);
  83. scan1([$_|Cs], Toks, Pos) -> % Variables
  84. scan_variable($_, Cs, Toks, Pos);
  85. scan1([$$|Cs], Toks, Pos) -> % Character constant
  86. case scan_char_const(Cs, Toks, Pos) of
  87. {ok, Result} ->
  88. {ok, Result};
  89. {error, truncated_char} ->
  90. scan_error(char, Pos);
  91. {error, illegal_character} ->
  92. scan_error({illegal, char}, Pos)
  93. end;
  94. scan1([$'|Cs0], Toks, Pos) -> % Quoted atom
  95. case scan_string(Cs0, $', Pos) of
  96. {S,Cs1,Pos1} ->
  97. case catch list_to_atom(S) of
  98. A when is_atom(A) ->
  99. scan1(Cs1, [{atom,Pos,A}|Toks], Pos1);
  100. _Error -> scan_error({illegal,atom}, Pos)
  101. end;
  102. {error, premature_end} ->
  103. scan_error({string,$',Cs0}, Pos);
  104. {error, truncated_char} ->
  105. scan_error(char, Pos);
  106. {error, illegal_character} ->
  107. scan_error({illegal, atom}, Pos)
  108. end;
  109. scan1([$"|Cs0], Toks, Pos) -> % String
  110. case scan_string(Cs0, $", Pos) of
  111. {S,Cs1,Pos1} ->
  112. case Toks of
  113. [{string, Pos0, S0} | Toks1] ->
  114. scan1(Cs1, [{string, Pos0, S0 ++ S} | Toks1],
  115. Pos1);
  116. _ ->
  117. scan1(Cs1, [{string,Pos,S}|Toks], Pos1)
  118. end;
  119. {error, premature_end} ->
  120. scan_error({string,$",Cs0}, Pos);
  121. {error, truncated_char} ->
  122. scan_error(char, Pos);
  123. {error, illegal_character} ->
  124. scan_error({illegal, string}, Pos)
  125. end;
  126. %% Punctuation characters and operators, first recognise multiples.
  127. scan1([$<,$<|Cs], Toks, Pos) ->
  128. scan1(Cs, [{'<<',Pos}|Toks], Pos);
  129. scan1([$>,$>|Cs], Toks, Pos) ->
  130. scan1(Cs, [{'>>',Pos}|Toks], Pos);
  131. scan1([$-,$>|Cs], Toks, Pos) ->
  132. scan1(Cs, [{'->',Pos}|Toks], Pos);
  133. scan1([$:,$:|Cs], Toks, Pos) ->
  134. scan1(Cs, [{'::',Pos}|Toks], Pos);
  135. scan1([$/,$/|Cs], Toks, Pos) ->
  136. scan1(Cs, [{'//',Pos}|Toks], Pos);
  137. scan1([$.,$.,$.|Cs], Toks, Pos) ->
  138. scan1(Cs, [{'...',Pos}|Toks], Pos);
  139. scan1([$.,$.|Cs], Toks, Pos) ->
  140. scan1(Cs, [{'..',Pos}|Toks], Pos);
  141. scan1([C|Cs], Toks, Pos) -> % Punctuation character
  142. P = list_to_atom([C]),
  143. scan1(Cs, [{P,Pos}|Toks], Pos);
  144. scan1([], Toks0, _Pos) ->
  145. Toks = reverse(Toks0),
  146. {ok,Toks}.
  147. %% Note that `_' is not accepted as a variable token.
  148. scan_variable(C, Cs, Toks, Pos) ->
  149. {Wcs,Cs1} = scan_name(Cs, []),
  150. W = [C|reverse(Wcs)],
  151. case W of
  152. "_" ->
  153. scan1(Cs1, [{an_var,Pos,'_'}|Toks], Pos);
  154. _ ->
  155. case catch list_to_atom(W) of
  156. A when is_atom(A) ->
  157. scan1(Cs1, [{var,Pos,A}|Toks], Pos);
  158. _ ->
  159. scan_error({illegal,variable}, Pos)
  160. end
  161. end.
  162. scan_atom(C, Cs, Toks, Pos) ->
  163. {Wcs,Cs1} = scan_name(Cs, []),
  164. W = [C|reverse(Wcs)],
  165. case catch list_to_atom(W) of
  166. A when is_atom(A) ->
  167. case reserved(A) of
  168. true ->
  169. scan1(Cs1, [{A,Pos}|Toks], Pos);
  170. false ->
  171. scan1(Cs1, [{atom,Pos,A}|Toks], Pos)
  172. end;
  173. _ ->
  174. scan_error({illegal,token}, Pos)
  175. end.
  176. %% scan_name(Cs) -> lists:splitwith(fun (C) -> name_char(C) end, Cs).
  177. scan_name([C|Cs], Ncs) ->
  178. case name_char(C) of
  179. true ->
  180. scan_name(Cs, [C|Ncs]);
  181. false ->
  182. {Ncs,[C|Cs]} % Must rebuild here, sigh!
  183. end;
  184. scan_name([], Ncs) ->
  185. {Ncs,[]}.
  186. name_char(C) when C >= $a, C =< $z -> true;
  187. name_char(C) when C >= $\337, C =< $\377, C /= $\367 -> true;
  188. name_char(C) when C >= $A, C =< $Z -> true;
  189. name_char(C) when C >= $\300, C =< $\336, C /= $\327 -> true;
  190. name_char(C) when C >= $0, C =< $9 -> true;
  191. name_char($_) -> true;
  192. name_char($@) -> true;
  193. name_char(_) -> false.
  194. %% scan_string(CharList, QuoteChar, Pos) ->
  195. %% {StringChars,RestChars, NewPos}
  196. scan_string(Cs, Quote, Pos) ->
  197. scan_string(Cs, [], Quote, Pos).
  198. scan_string([Quote|Cs], Scs, Quote, Pos) ->
  199. {reverse(Scs),Cs,Pos};
  200. scan_string([], _Scs, _Quote, _Pos) ->
  201. {error, premature_end};
  202. scan_string(Cs0, Scs, Quote, Pos) ->
  203. case scan_char(Cs0, Pos) of
  204. {C,Cs,Pos1} ->
  205. %% Only build the string here
  206. scan_string(Cs, [C|Scs], Quote, Pos1);
  207. Error ->
  208. Error
  209. end.
  210. %% Note that space characters are not allowed
  211. scan_char_const([$\040 | _Cs0], _Toks, _Pos) ->
  212. {error, illegal_character};
  213. scan_char_const(Cs0, Toks, Pos) ->
  214. case scan_char(Cs0, Pos) of
  215. {C,Cs,Pos1} ->
  216. scan1(Cs, [{char,Pos,C}|Toks], Pos1);
  217. Error ->
  218. Error
  219. end.
  220. %% {Character,RestChars,NewPos} = scan_char(Chars, Pos)
  221. %% Read a single character from a string or character constant. The
  222. %% pre-scan phase has checked for errors here.
  223. %% Note that control characters are not allowed.
  224. scan_char([$\\|Cs], Pos) ->
  225. scan_escape(Cs, Pos);
  226. scan_char([C | _Cs], _Pos) when C =< 16#1f ->
  227. {error, illegal_character};
  228. scan_char([C|Cs], Pos) ->
  229. {C,Cs,Pos};
  230. scan_char([], _Pos) ->
  231. {error, truncated_char}.
  232. %% The following conforms to Standard Erlang escape sequences.
  233. scan_escape([O1, O2, O3 | Cs], Pos) when % \<1-3> octal digits
  234. O1 >= $0, O1 =< $3, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->
  235. Val = (O1*8 + O2)*8 + O3 - 73*$0,
  236. {Val,Cs,Pos};
  237. scan_escape([O1, O2 | Cs], Pos) when
  238. O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7 ->
  239. Val = (O1*8 + O2) - 9*$0,
  240. {Val,Cs,Pos};
  241. scan_escape([O1 | Cs], Pos) when
  242. O1 >= $0, O1 =< $7 ->
  243. {O1 - $0,Cs,Pos};
  244. scan_escape([$^, C | Cs], Pos) -> % \^X -> CTL-X
  245. if C >= $\100, C =< $\137 ->
  246. {C - $\100,Cs,Pos};
  247. true -> {error, illegal_control_character}
  248. end;
  249. scan_escape([C | Cs], Pos) ->
  250. case escape_char(C) of
  251. C1 when C1 > $\000 -> {C1,Cs,Pos};
  252. _ -> {error, undefined_escape_sequence}
  253. end;
  254. scan_escape([], _Pos) ->
  255. {error, truncated_char}.
  256. %% Note that we return $\000 for undefined escapes.
  257. escape_char($b) -> $\010; % \b = BS
  258. escape_char($d) -> $\177; % \d = DEL
  259. escape_char($e) -> $\033; % \e = ESC
  260. escape_char($f) -> $\014; % \f = FF
  261. escape_char($n) -> $\012; % \n = LF
  262. escape_char($r) -> $\015; % \r = CR
  263. escape_char($s) -> $\040; % \s = SPC
  264. escape_char($t) -> $\011; % \t = HT
  265. escape_char($v) -> $\013; % \v = VT
  266. escape_char($\\) -> $\134; % \\ = \
  267. escape_char($') -> $\047; % \' = '
  268. escape_char($") -> $\042; % \" = "
  269. escape_char(_C) -> $\000.
  270. %% scan_number(Char, CharList, TokenStack, Pos)
  271. %% We handle sign and radix notation:
  272. %% [+-]<digits> - the digits in base [+-]10
  273. %% [+-]<digits>.<digits>
  274. %% [+-]<digits>.<digits>E+-<digits>
  275. %% [+-]<digits>#<digits> - the digits read in base [+-]B
  276. %%
  277. %% Except for explicitly based integers we build a list of all the
  278. %% characters and then use list_to_integer/1 or list_to_float/1 to
  279. %% generate the value.
  280. %% SPos == Start position
  281. %% CPos == Current position
  282. scan_number(C, Cs0, Toks, Pos) ->
  283. {Ncs,Cs,Pos1} = scan_integer(Cs0, [C], Pos),
  284. scan_after_int(Cs, Ncs, Toks, Pos, Pos1).
  285. scan_signed_number(S, C, Cs0, Toks, Pos) ->
  286. {Ncs,Cs,Pos1} = scan_integer(Cs0, [C, S], Pos),
  287. scan_after_int(Cs, Ncs, Toks, Pos, Pos1).
  288. scan_integer([C|Cs], Stack, Pos) when C >= $0, C =< $9 ->
  289. scan_integer(Cs, [C|Stack], Pos);
  290. scan_integer(Cs, Stack, Pos) ->
  291. {Stack,Cs,Pos}.
  292. scan_after_int([$.,C|Cs0], Ncs0, Toks, SPos, CPos) when C >= $0, C =< $9 ->
  293. {Ncs,Cs,CPos1} = scan_integer(Cs0, [C,$.|Ncs0], CPos),
  294. scan_after_fraction(Cs, Ncs, Toks, SPos, CPos1);
  295. scan_after_int(Cs, Ncs, Toks, SPos, CPos) ->
  296. N = list_to_integer(reverse(Ncs)),
  297. scan1(Cs, [{integer,SPos,N}|Toks], CPos).
  298. scan_after_fraction([$E|Cs], Ncs, Toks, SPos, CPos) ->
  299. scan_exponent(Cs, [$E|Ncs], Toks, SPos, CPos);
  300. scan_after_fraction([$e|Cs], Ncs, Toks, SPos, CPos) ->
  301. scan_exponent(Cs, [$e|Ncs], Toks, SPos, CPos);
  302. scan_after_fraction(Cs, Ncs, Toks, SPos, CPos) ->
  303. case catch list_to_float(reverse(Ncs)) of
  304. N when is_float(N) ->
  305. scan1(Cs, [{float,SPos,N}|Toks], CPos);
  306. _Error -> scan_error({illegal,float}, SPos)
  307. end.
  308. %% scan_exponent(CharList, NumberCharStack, TokenStack, StartPos, CurPos)
  309. %% Generate an error here if E{+|-} not followed by any digits.
  310. scan_exponent([$+|Cs], Ncs, Toks, SPos, CPos) ->
  311. scan_exponent1(Cs, [$+|Ncs], Toks, SPos, CPos);
  312. scan_exponent([$-|Cs], Ncs, Toks, SPos, CPos) ->
  313. scan_exponent1(Cs, [$-|Ncs], Toks, SPos, CPos);
  314. scan_exponent(Cs, Ncs, Toks, SPos, CPos) ->
  315. scan_exponent1(Cs, Ncs, Toks, SPos, CPos).
  316. scan_exponent1([C|Cs0], Ncs0, Toks, SPos, CPos) when C >= $0, C =< $9 ->
  317. {Ncs,Cs,CPos1} = scan_integer(Cs0, [C|Ncs0], CPos),
  318. case catch list_to_float(reverse(Ncs)) of
  319. N when is_float(N) ->
  320. scan1(Cs, [{float,SPos,N}|Toks], CPos1);
  321. _Error -> scan_error({illegal,float}, SPos)
  322. end;
  323. scan_exponent1(_, _, _, _, CPos) ->
  324. scan_error(float, CPos).
  325. scan_error(In, Pos) ->
  326. {error,{Pos,edoc_scanner,In}}.