/source/otp_src_R14B02/lib/edoc/src/edoc_scanner.erl
Erlang | 366 lines | 252 code | 40 blank | 74 comment | 1 complexity | 3289d4ef0ef93d3552c73a71e960aca5 MD5 | raw file
Possible License(s): Apache-2.0, BSD-3-Clause
- %% ``The contents of this file are subject to the Erlang Public License,
- %% Version 1.1, (the "License"); you may not use this file except in
- %% compliance with the License. You should have received a copy of the
- %% Erlang Public License along with this software. If not, it can be
- %% retrieved via the world wide web at http://www.erlang.org/.
- %%
- %% Software distributed under the License is distributed on an "AS IS"
- %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- %% the License for the specific language governing rights and
- %% limitations under the License.
- %%
- %% The Initial Developer of the Original Code is Ericsson Utvecklings
- %% AB. Portions created by Ericsson are Copyright 1999, Ericsson
- %% Utvecklings AB. All Rights Reserved.''
- %%
- %% $Id: $
- %%
- %% @private
- %% @copyright Richard Carlsson 2001-2003. Portions created by Ericsson
- %% are Copyright 1999, Ericsson Utvecklings AB. All Rights Reserved.
- %% @author Richard Carlsson <richardc@it.uu.se>
- %% @see edoc
- %% @end
- %% @doc Tokeniser for EDoc. Based on the Erlang standard library module
- %% {@link //stdlib/erl_scan}.
- -module(edoc_scanner).
- %% NOTE: the interface to this module is ancient and should be updated.
- %% Please do not regard these exported functions as stable. Their
- %% behaviour is described in the documentation of the module `erl_scan'.
- %%
- %% Since there are no `full stop' tokens in EDoc specifications, the
- %% `tokens' function *always* returns `{more, Continuation}' unless an
- %% error occurs.
- -export([string/1,string/2,format_error/1]).
- -import(lists, [reverse/1]).
- string(Cs) -> string(Cs, 1).
- string(Cs, StartPos) ->
- case scan(Cs, StartPos) of
- {ok,Toks} -> {ok,Toks,StartPos};
- {error,E} -> {error,E,StartPos}
- end.
- %% format_error(Error)
- %% Return a string describing the error.
- format_error({string,Quote,Head}) ->
- ["unterminated string starting with " ++ io_lib:write_string(Head,Quote)];
- format_error({illegal,Type}) -> io_lib:fwrite("illegal ~w", [Type]);
- format_error(char) -> "unterminated character";
- format_error(scan) -> "premature end";
- format_error({base,Base}) -> io_lib:fwrite("illegal base '~w'", [Base]);
- format_error(float) -> "bad float";
- format_error(Other) -> io_lib:write(Other).
- %% Reserved words, not atoms:
- reserved('where') -> true;
- reserved(_) -> false.
- %% scan(CharList, StartPos)
- %% This takes a list of characters and tries to tokenise them.
- %%
- %% The token list is built in reverse order (in a stack) to save appending
- %% and then reversed when all the tokens have been collected. Most tokens
- %% are built in the same way.
- %%
- %% Returns:
- %% {ok,[Tok]}
- %% {error,{ErrorPos,edoc_scanner,What}}
- scan(Cs, Pos) ->
- scan1(Cs, [], Pos).
- %% scan1(Characters, TokenStack, Position)
- %% Scan a list of characters into tokens.
- scan1([$\n|Cs], Toks, Pos) -> % Newline
- scan1(Cs, Toks, Pos+1);
- scan1([C|Cs], Toks, Pos) when C >= 0, C =< $ -> % Skip blanks
- scan1(Cs, Toks, Pos);
- scan1([C|Cs], Toks, Pos) when C >= $a, C =< $z -> % Unquoted atom
- scan_atom(C, Cs, Toks, Pos);
- scan1([C|Cs], Toks, Pos) when C >= $0, C =< $9 -> % Numbers
- scan_number(C, Cs, Toks, Pos);
- scan1([$-,C| Cs], Toks, Pos) when C >= $0, C =< $9 -> % Signed numbers
- scan_signed_number($-, C, Cs, Toks, Pos);
- scan1([$+,C| Cs], Toks, Pos) when C >= $0, C =< $9 -> % Signed numbers
- scan_signed_number($+, C, Cs, Toks, Pos);
- scan1([C|Cs], Toks, Pos) when C >= $A, C =< $Z -> % Variables
- scan_variable(C, Cs, Toks, Pos);
- scan1([$_|Cs], Toks, Pos) -> % Variables
- scan_variable($_, Cs, Toks, Pos);
- scan1([$$|Cs], Toks, Pos) -> % Character constant
- case scan_char_const(Cs, Toks, Pos) of
- {ok, Result} ->
- {ok, Result};
- {error, truncated_char} ->
- scan_error(char, Pos);
- {error, illegal_character} ->
- scan_error({illegal, char}, Pos)
- end;
- scan1([$'|Cs0], Toks, Pos) -> % Quoted atom
- case scan_string(Cs0, $', Pos) of
- {S,Cs1,Pos1} ->
- case catch list_to_atom(S) of
- A when is_atom(A) ->
- scan1(Cs1, [{atom,Pos,A}|Toks], Pos1);
- _Error -> scan_error({illegal,atom}, Pos)
- end;
- {error, premature_end} ->
- scan_error({string,$',Cs0}, Pos);
- {error, truncated_char} ->
- scan_error(char, Pos);
- {error, illegal_character} ->
- scan_error({illegal, atom}, Pos)
- end;
- scan1([$"|Cs0], Toks, Pos) -> % String
- case scan_string(Cs0, $", Pos) of
- {S,Cs1,Pos1} ->
- case Toks of
- [{string, Pos0, S0} | Toks1] ->
- scan1(Cs1, [{string, Pos0, S0 ++ S} | Toks1],
- Pos1);
- _ ->
- scan1(Cs1, [{string,Pos,S}|Toks], Pos1)
- end;
- {error, premature_end} ->
- scan_error({string,$",Cs0}, Pos);
- {error, truncated_char} ->
- scan_error(char, Pos);
- {error, illegal_character} ->
- scan_error({illegal, string}, Pos)
- end;
- %% Punctuation characters and operators, first recognise multiples.
- scan1([$<,$<|Cs], Toks, Pos) ->
- scan1(Cs, [{'<<',Pos}|Toks], Pos);
- scan1([$>,$>|Cs], Toks, Pos) ->
- scan1(Cs, [{'>>',Pos}|Toks], Pos);
- scan1([$-,$>|Cs], Toks, Pos) ->
- scan1(Cs, [{'->',Pos}|Toks], Pos);
- scan1([$:,$:|Cs], Toks, Pos) ->
- scan1(Cs, [{'::',Pos}|Toks], Pos);
- scan1([$/,$/|Cs], Toks, Pos) ->
- scan1(Cs, [{'//',Pos}|Toks], Pos);
- scan1([$.,$.,$.|Cs], Toks, Pos) ->
- scan1(Cs, [{'...',Pos}|Toks], Pos);
- scan1([$.,$.|Cs], Toks, Pos) ->
- scan1(Cs, [{'..',Pos}|Toks], Pos);
- scan1([C|Cs], Toks, Pos) -> % Punctuation character
- P = list_to_atom([C]),
- scan1(Cs, [{P,Pos}|Toks], Pos);
- scan1([], Toks0, _Pos) ->
- Toks = reverse(Toks0),
- {ok,Toks}.
- %% Note that `_' is not accepted as a variable token.
- scan_variable(C, Cs, Toks, Pos) ->
- {Wcs,Cs1} = scan_name(Cs, []),
- W = [C|reverse(Wcs)],
- case W of
- "_" ->
- scan1(Cs1, [{an_var,Pos,'_'}|Toks], Pos);
- _ ->
- case catch list_to_atom(W) of
- A when is_atom(A) ->
- scan1(Cs1, [{var,Pos,A}|Toks], Pos);
- _ ->
- scan_error({illegal,variable}, Pos)
- end
- end.
- scan_atom(C, Cs, Toks, Pos) ->
- {Wcs,Cs1} = scan_name(Cs, []),
- W = [C|reverse(Wcs)],
- case catch list_to_atom(W) of
- A when is_atom(A) ->
- case reserved(A) of
- true ->
- scan1(Cs1, [{A,Pos}|Toks], Pos);
- false ->
- scan1(Cs1, [{atom,Pos,A}|Toks], Pos)
- end;
- _ ->
- scan_error({illegal,token}, Pos)
- end.
- %% scan_name(Cs) -> lists:splitwith(fun (C) -> name_char(C) end, Cs).
- scan_name([C|Cs], Ncs) ->
- case name_char(C) of
- true ->
- scan_name(Cs, [C|Ncs]);
- false ->
- {Ncs,[C|Cs]} % Must rebuild here, sigh!
- end;
- scan_name([], Ncs) ->
- {Ncs,[]}.
- name_char(C) when C >= $a, C =< $z -> true;
- name_char(C) when C >= $\337, C =< $\377, C /= $\367 -> true;
- name_char(C) when C >= $A, C =< $Z -> true;
- name_char(C) when C >= $\300, C =< $\336, C /= $\327 -> true;
- name_char(C) when C >= $0, C =< $9 -> true;
- name_char($_) -> true;
- name_char($@) -> true;
- name_char(_) -> false.
- %% scan_string(CharList, QuoteChar, Pos) ->
- %% {StringChars,RestChars, NewPos}
- scan_string(Cs, Quote, Pos) ->
- scan_string(Cs, [], Quote, Pos).
- scan_string([Quote|Cs], Scs, Quote, Pos) ->
- {reverse(Scs),Cs,Pos};
- scan_string([], _Scs, _Quote, _Pos) ->
- {error, premature_end};
- scan_string(Cs0, Scs, Quote, Pos) ->
- case scan_char(Cs0, Pos) of
- {C,Cs,Pos1} ->
- %% Only build the string here
- scan_string(Cs, [C|Scs], Quote, Pos1);
- Error ->
- Error
- end.
- %% Note that space characters are not allowed
- scan_char_const([$\040 | _Cs0], _Toks, _Pos) ->
- {error, illegal_character};
- scan_char_const(Cs0, Toks, Pos) ->
- case scan_char(Cs0, Pos) of
- {C,Cs,Pos1} ->
- scan1(Cs, [{char,Pos,C}|Toks], Pos1);
- Error ->
- Error
- end.
- %% {Character,RestChars,NewPos} = scan_char(Chars, Pos)
- %% Read a single character from a string or character constant. The
- %% pre-scan phase has checked for errors here.