PageRenderTime 182ms CodeModel.GetById 2ms app.highlight 165ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/stdlib/test/erl_scan_SUITE.erl

https://github.com/bsmr-erlang/otp
Erlang | 1442 lines | 1166 code | 191 blank | 85 comment | 30 complexity | 56e5f28ad59df1cb626c9edb0e5f9c30 MD5 | raw file
   1%%
   2%% %CopyrightBegin%
   3%%
   4%% Copyright Ericsson AB 1998-2017. All Rights Reserved.
   5%%
   6%% Licensed under the Apache License, Version 2.0 (the "License");
   7%% you may not use this file except in compliance with the License.
   8%% You may obtain a copy of the License at
   9%%
  10%%     http://www.apache.org/licenses/LICENSE-2.0
  11%%
  12%% Unless required by applicable law or agreed to in writing, software
  13%% distributed under the License is distributed on an "AS IS" BASIS,
  14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15%% See the License for the specific language governing permissions and
  16%% limitations under the License.
  17%%
  18%% %CopyrightEnd%
  19
  20-module(erl_scan_SUITE).
  21-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
  22	 init_per_testcase/2, end_per_testcase/2,
  23	 init_per_group/2,end_per_group/2]).
  24
  25-export([error_1/1, error_2/1, iso88591/1, otp_7810/1, otp_10302/1,
  26	 otp_10990/1, otp_10992/1, otp_11807/1]).
  27
  28-import(lists, [nth/2,flatten/1]).
  29-import(io_lib, [print/1]).
  30
  31%%
  32%% Define to run outside of test server
  33%%
  34%%-define(STANDALONE,1).
  35
  36-ifdef(STANDALONE).
  37-compile(export_all).
  38-define(line, put(line, ?LINE), ).
  39-define(config(A,B),config(A,B)).
  40-define(t, test_server).
  41%% config(priv_dir, _) ->
  42%%     ".";
  43%% config(data_dir, _) ->
  44%%     ".".
  45-else.
  46-include_lib("common_test/include/ct.hrl").
  47-endif.
  48
  49init_per_testcase(_Case, Config) ->
  50    Config.
  51
  52end_per_testcase(_Case, _Config) ->
  53    ok.
  54
  55suite() ->
  56    [{ct_hooks,[ts_install_cth]},
  57     {timetrap,{minutes,20}}].
  58
  59all() -> 
  60    [{group, error}, iso88591, otp_7810, otp_10302, otp_10990, otp_10992,
  61     otp_11807].
  62
  63groups() -> 
  64    [{error, [], [error_1, error_2]}].
  65
  66init_per_suite(Config) ->
  67    Config.
  68
  69end_per_suite(_Config) ->
  70    ok.
  71
  72init_per_group(_GroupName, Config) ->
  73    Config.
  74
  75end_per_group(_GroupName, Config) ->
  76    Config.
  77
  78
  79
  80%% (OTP-2347)
  81error_1(Config) when is_list(Config) ->
  82    {error, _, _} = erl_scan:string("'a"),
  83    ok.
  84
  85%% Checks that format_error works on the error cases.
  86error_2(Config) when is_list(Config) ->
  87    lists:foreach(fun check/1, error_cases()),
  88    ok.
  89
  90error_cases() ->
  91    ["'a",
  92     "\"a",
  93     "'\\",
  94     "\"\\",
  95     "$",
  96     "$\\",
  97     "2.3e",
  98     "2.3e-",
  99     "91#9"
 100    ].
 101
 102assert_type(N, integer) when is_integer(N) ->
 103    ok;
 104assert_type(N, atom) when is_atom(N) ->
 105    ok.
 106
 107check(String) ->
 108    Error = erl_scan:string(String),
 109    check_error(Error, erl_scan).
 110
 111%%% (This should be useful for all format_error functions.)
 112check_error({error, Info, EndLine}, Module0) ->
 113    {ErrorLine, Module, Desc} = Info,
 114    true = (Module == Module0),
 115    assert_type(EndLine, integer),
 116    assert_type(ErrorLine, integer),
 117    true = (ErrorLine =< EndLine),
 118    String = lists:flatten(Module0:format_error(Desc)),
 119    true = io_lib:printable_list(String).
 120
 121%% Tests the support for ISO-8859-1 i.e Latin-1.
 122iso88591(Config) when is_list(Config) ->
 123    ok =
 124	case catch begin
 125		       %% Some atom and variable names
 126		       V1s = [,,,],
 127		       V2s = [$N,,$r],
 128		       A1s = [$h,,$r],
 129		       A2s = [,$r,$e],
 130		       %% Test parsing atom and variable characters.
 131		       {ok,Ts1,_} = erl_scan_string(V1s ++ " " ++ V2s ++
 132							"\327" ++
 133							A1s ++ " " ++ A2s),
 134		       V1s = atom_to_list(element(3, nth(1, Ts1))),
 135		       V2s = atom_to_list(element(3, nth(2, Ts1))),
 136		       A1s = atom_to_list(element(3, nth(4, Ts1))),
 137		       A2s = atom_to_list(element(3, nth(5, Ts1))),
 138		       %% Test printing atoms
 139		       A1s = flatten(print(element(3, nth(4, Ts1)))),
 140		       A2s = flatten(print(element(3, nth(5, Ts1)))),
 141		       %% Test parsing and printing strings.
 142		       S1 = V1s ++ "\327" ++ A1s ++ "\250" ++ A2s,
 143		       S1s = "\"" ++ S1 ++ "\"",
 144		       {ok,Ts2,_} = erl_scan_string(S1s),
 145		       S1 = element(3, nth(1, Ts2)),
 146		       S1s = flatten(print(element(3, nth(1, Ts2)))),
 147		       ok				%It all worked
 148		   end of
 149	    {'EXIT',R} ->				%Something went wrong!
 150		{error,R};
 151	    ok -> ok				%Aok
 152	end.
 153
 154%% OTP-7810. White spaces, comments, and more...
 155otp_7810(Config) when is_list(Config) ->
 156    ok = reserved_words(),
 157    ok = atoms(),
 158    ok = punctuations(),
 159    ok = comments(),
 160    ok = errors(),
 161    ok = integers(),
 162    ok = base_integers(),
 163    ok = floats(),
 164    ok = dots(),
 165    ok = chars(),
 166    ok = variables(),
 167    ok = eof(),
 168    ok = illegal(),
 169    ok = crashes(),
 170
 171    ok = options(),
 172    ok = token_info(),
 173    ok = column_errors(),
 174    ok = white_spaces(),
 175
 176    ok = unicode(),
 177
 178    ok = more_chars(),
 179    ok = more_options(),
 180    ok = anno_info(),
 181
 182    ok.
 183
 184reserved_words() ->
 185    L = ['after', 'begin', 'case', 'try', 'cond', 'catch',
 186         'andalso', 'orelse', 'end', 'fun', 'if', 'let', 'of',
 187         'receive', 'when', 'bnot', 'not', 'div',
 188         'rem', 'band', 'and', 'bor', 'bxor', 'bsl', 'bsr',
 189         'or', 'xor'],
 190    [begin
 191         {RW, true} = {RW, erl_scan:reserved_word(RW)},
 192         S = atom_to_list(RW),
 193         Ts = [{RW,{1,1}}],
 194         test_string(S, Ts)
 195     end || RW <- L],
 196    ok.
 197
 198
 199atoms() ->
 200    test_string("a
 201                 b", [{atom,{1,1},a},{atom,{2,18},b}]),
 202    test_string("'a b'", [{atom,{1,1},'a b'}]),
 203		test_string("a", [{atom,{1,1},a}]),
 204		test_string("a@2", [{atom,{1,1},a@2}]),
 205		test_string([39,65,200,39], [{atom,{1,1},'AÈ'}]),
 206		test_string("ärlig östen", [{atom,{1,1},ärlig},{atom,{1,7},östen}]),
 207		{ok,[{atom,_,'$a'}],{1,6}} =
 208		    erl_scan_string("'$\\a'", {1,1}),
 209		test("'$\\a'"),
 210		ok.
 211
 212punctuations() ->
 213    L = ["<<", "<-", "<=", "<", ">>", ">=", ">", "->", "--",
 214         "-", "++", "+", "=:=", "=/=", "=<", "=>", "==", "=", "/=",
 215         "/", "||", "|", ":=", "::", ":"],
 216    %% One token at a time:
 217    [begin
 218         W = list_to_atom(S),
 219         Ts = [{W,{1,1}}],
 220         test_string(S, Ts)
 221     end || S <- L],
 222    Three = ["/=:=", "<=:=", "==:=", ">=:="], % three tokens...
 223    No = Three ++ L,
 224    SL0 = [{S1++S2,{-length(S1),S1,S2}} ||
 225              S1 <- L,
 226              S2 <- L,
 227              not lists:member(S1++S2, No)],
 228    SL = family_list(SL0),
 229    %% Two tokens. When there are several answers, the one with
 230    %% the longest first token is chosen:
 231    %% [the special case "=<<" is among the tested ones]
 232    [begin
 233         W1 = list_to_atom(S1),
 234         W2 = list_to_atom(S2),
 235         Ts = [{W1,{1,1}},{W2,{1,-L2+1}}],
 236         test_string(S, Ts)
 237     end || {S,[{L2,S1,S2}|_]}  <- SL],
 238
 239    PTs1 = [{'!',{1,1}},{'(',{1,2}},{')',{1,3}},{',',{1,4}},{';',{1,5}},
 240            {'=',{1,6}},{'[',{1,7}},{']',{1,8}},{'{',{1,9}},{'|',{1,10}},
 241            {'}',{1,11}}],
 242    test_string("!(),;=[]{|}", PTs1),
 243
 244    PTs2 = [{'#',{1,1}},{'&',{1,2}},{'*',{1,3}},{'+',{1,4}},{'/',{1,5}},
 245            {':',{1,6}},{'<',{1,7}},{'>',{1,8}},{'?',{1,9}},{'@',{1,10}},
 246            {'\\',{1,11}},{'^',{1,12}},{'`',{1,13}},{'~',{1,14}}],
 247    test_string("#&*+/:<>?@\\^`~", PTs2),
 248
 249    test_string(".. ", [{'..',{1,1}}]),
 250    test_string("1 .. 2",
 251                [{integer,{1,1},1},{'..',{1,3}},{integer,{1,6},2}]),
 252    test_string("...", [{'...',{1,1}}]),
 253    ok.
 254
 255comments() ->
 256    test("a %%\n b"),
 257    {ok,[],1} = erl_scan_string("%"),
 258    test("a %%\n b"),
 259    {ok,[{atom,{1,1},a},{atom,{2,2},b}],{2,3}} =
 260        erl_scan_string("a %%\n b", {1,1}),
 261    {ok,[{atom,{1,1},a},{comment,{1,3},"%%"},{atom,{2,2},b}],{2,3}} =
 262        erl_scan_string("a %%\n b",{1,1}, [return_comments]),
 263    {ok,[{atom,{1,1},a},
 264         {white_space,{1,2}," "},
 265         {white_space,{1,5},"\n "},
 266         {atom,{2,2},b}],
 267     {2,3}} =
 268        erl_scan_string("a %%\n b",{1,1},[return_white_spaces]),
 269    {ok,[{atom,{1,1},a},
 270         {white_space,{1,2}," "},
 271         {comment,{1,3},"%%"},
 272         {white_space,{1,5},"\n "},
 273         {atom,{2,2},b}],
 274     {2,3}} = erl_scan_string("a %%\n b",{1,1},[return]),
 275    ok.
 276
 277errors() ->
 278    {error,{1,erl_scan,{string,$',"qa"}},1} = erl_scan:string("'qa"), %'
 279    {error,{{1,1},erl_scan,{string,$',"qa"}},{1,4}} = %'
 280        erl_scan:string("'qa", {1,1}, []), %'
 281    {error,{1,erl_scan,{string,$","str"}},1} = %"
 282        erl_scan:string("\"str"), %"
 283    {error,{{1,1},erl_scan,{string,$","str"}},{1,5}} = %"
 284        erl_scan:string("\"str", {1,1}, []), %"
 285    {error,{1,erl_scan,char},1} = erl_scan:string("$"),
 286    {error,{{1,1},erl_scan,char},{1,2}} = erl_scan:string("$", {1,1}, []),
 287    test_string([34,65,200,34], [{string,{1,1},""}]),
 288    test_string("\\", [{'\\',{1,1}}]),
 289    {'EXIT',_} =
 290        (catch {foo, erl_scan:string('$\\a', {1,1})}), % type error
 291    {'EXIT',_} =
 292        (catch {foo, erl_scan:tokens([], '$\\a', {1,1})}), % type error
 293
 294    "{a,tuple}" = erl_scan:format_error({a,tuple}),
 295    ok.
 296
 297integers() ->
 298    [begin
 299         I = list_to_integer(S),
 300         Ts = [{integer,{1,1},I}],
 301         test_string(S, Ts)
 302     end || S <- [[N] || N <- lists:seq($0, $9)] ++ ["2323","000"] ],
 303    ok.
 304
 305base_integers() ->
 306    [begin
 307         B = list_to_integer(BS),
 308         I = erlang:list_to_integer(S, B),
 309         Ts = [{integer,{1,1},I}],
 310         test_string(BS++"#"++S, Ts)
 311     end || {BS,S} <- [{"2","11"}, {"5","23234"}, {"12","05a"},
 312                       {"16","abcdef"}, {"16","ABCDEF"}] ],
 313
 314    {error,{1,erl_scan,{base,1}},1} = erl_scan:string("1#000"),
 315    {error,{{1,1},erl_scan,{base,1}},{1,2}} =
 316        erl_scan:string("1#000", {1,1}, []),
 317
 318    test_string("12#bc", [{integer,{1,1},11},{atom,{1,5},c}]),
 319
 320    [begin
 321         Str = BS ++ "#" ++ S,
 322         {error,{1,erl_scan,{illegal,integer}},1} =
 323             erl_scan:string(Str)
 324     end || {BS,S} <- [{"3","3"},{"15","f"}, {"12","c"}] ],
 325
 326    {ok,[{integer,1,239},{'@',1}],1} = erl_scan_string("16#ef@"),
 327    {ok,[{integer,{1,1},239},{'@',{1,6}}],{1,7}} =
 328        erl_scan_string("16#ef@", {1,1}, []),
 329    {ok,[{integer,{1,1},14},{atom,{1,5},g@}],{1,7}} =
 330        erl_scan_string("16#eg@", {1,1}, []),
 331
 332    ok.
 333
 334floats() ->
 335    [begin
 336         F = list_to_float(FS),
 337         Ts = [{float,{1,1},F}],
 338         test_string(FS, Ts)
 339     end || FS <- ["1.0","001.17","3.31200","1.0e0","1.0E17",
 340                   "34.21E-18", "17.0E+14"]],
 341    test_string("1.e2", [{integer,{1,1},1},{'.',{1,2}},{atom,{1,3},e2}]),
 342
 343    {error,{1,erl_scan,{illegal,float}},1} =
 344        erl_scan:string("1.0e400"),
 345    {error,{{1,1},erl_scan,{illegal,float}},{1,8}} =
 346        erl_scan:string("1.0e400", {1,1}, []),
 347    [begin
 348         {error,{1,erl_scan,{illegal,float}},1} = erl_scan:string(S),
 349         {error,{{1,1},erl_scan,{illegal,float}},{1,_}} =
 350             erl_scan:string(S, {1,1}, [])
 351     end || S <- ["1.14Ea"]],
 352
 353    ok.
 354
 355dots() ->
 356    Dot = [{".",    {ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,2}}},
 357           {". ",   {ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,3}}},
 358           {".\n",  {ok,[{dot,1}],2}, {ok,[{dot,{1,1}}],{2,1}}},
 359           {".%",   {ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,3}}},
 360           {".\210",{ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,3}}},
 361           {".% öh",{ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,6}}},
 362           {".%\n", {ok,[{dot,1}],2}, {ok,[{dot,{1,1}}],{2,1}}},
 363           {".$",   {error,{1,erl_scan,char},1},
 364	    {error,{{1,2},erl_scan,char},{1,3}}},
 365           {".$\\", {error,{1,erl_scan,char},1},
 366                    {error,{{1,2},erl_scan,char},{1,4}}},
 367           {".a",   {ok,[{'.',1},{atom,1,a}],1},
 368	    {ok,[{'.',{1,1}},{atom,{1,2},a}],{1,3}}}
 369          ],
 370    [begin
 371         R = erl_scan_string(S),
 372         R2 = erl_scan_string(S, {1,1}, [])
 373     end || {S, R, R2} <- Dot],
 374
 375    {ok,[{dot,_}=T1],{1,2}} = erl_scan:string(".", {1,1}, text),
 376    [1, 1, "."] = token_info(T1),
 377    {ok,[{dot,_}=T2],{1,3}} = erl_scan:string(".%", {1,1}, text),
 378    [1, 1, "."] = token_info(T2),
 379    {ok,[{dot,_}=T3],{1,6}} =
 380        erl_scan:string(".% öh", {1,1}, text),
 381    [1, 1, "."] = token_info(T3),
 382    {error,{{1,2},erl_scan,char},{1,3}} = erl_scan:string(".$", {1,1}),
 383    {error,{{1,2},erl_scan,char},{1,4}} = erl_scan:string(".$\\", {1,1}),
 384
 385    test_string(". ", [{dot,{1,1}}]),
 386    test_string(".  ", [{dot,{1,1}}]),
 387    test_string(".\n", [{dot,{1,1}}]),
 388    test_string(".\n\n", [{dot,{1,1}}]),
 389    test_string(".\n\r", [{dot,{1,1}}]),
 390    test_string(".\n\n\n", [{dot,{1,1}}]),
 391    test_string(".\210", [{dot,{1,1}}]),
 392    test_string(".%\n", [{dot,{1,1}}]),
 393    test_string(".a", [{'.',{1,1}},{atom,{1,2},a}]),
 394
 395    test_string("%. \n. ", [{dot,{2,1}}]),
 396    {more,C} = erl_scan:tokens([], "%. ",{1,1}, return),
 397    {done,{ok,[{comment,{1,1},"%. "},
 398               {white_space,{1,4},"\n"},
 399               {dot,{2,1}}],
 400           {2,3}}, ""} =
 401        erl_scan_tokens(C, "\n. ", {1,1}, return), % any loc, any options
 402
 403    [test_string(S, R) ||
 404        {S, R} <- [{".$\n",   [{'.',{1,1}},{char,{1,2},$\n}]},
 405                   {"$\\\n",  [{char,{1,1},$\n}]},
 406                   {"'\\\n'", [{atom,{1,1},'\n'}]},
 407                   {"$\n",    [{char,{1,1},$\n}]}] ],
 408    ok.
 409
 410chars() ->
 411    [begin
 412         L = lists:flatten(io_lib:format("$\\~.8b", [C])),
 413         Ts = [{char,{1,1},C}],
 414         test_string(L, Ts)
 415     end || C <- lists:seq(0, 255)],
 416
 417    %% Leading zeroes...
 418    [begin
 419         L = lists:flatten(io_lib:format("$\\~3.8.0b", [C])),
 420         Ts = [{char,{1,1},C}],
 421         test_string(L, Ts)
 422     end || C <- lists:seq(0, 255)],
 423
 424    %% $\^\n now increments the line...
 425    [begin
 426         L = "$\\^" ++ [C],
 427         Ts = [{char,{1,1},C band 2#11111}],
 428         test_string(L, Ts)
 429     end || C <- lists:seq(0, 255)],
 430
 431    [begin
 432         L = "$\\" ++ [C],
 433         Ts = [{char,{1,1},V}],
 434         test_string(L, Ts)
 435     end || {C,V} <- [{$n,$\n}, {$r,$\r}, {$t,$\t}, {$v,$\v},
 436                      {$b,$\b}, {$f,$\f}, {$e,$\e}, {$s,$\s},
 437                      {$d,$\d}]],
 438
 439    EC = [$\n,$\r,$\t,$\v,$\b,$\f,$\e,$\s,$\d],
 440    Ds = lists:seq($0, $9),
 441    X = [$^,$n,$r,$t,$v,$b,$f,$e,$s,$d],
 442    New = [${,$x],
 443    No = EC ++ Ds ++ X ++ New,
 444    [begin
 445         L = "$\\" ++ [C],
 446         Ts = [{char,{1,1},C}],
 447         test_string(L, Ts)
 448     end || C <- lists:seq(0, 255) -- No],
 449
 450    [begin
 451         L = "'$\\" ++ [C] ++ "'",
 452         Ts = [{atom,{1,1},list_to_atom("$"++[C])}],
 453         test_string(L, Ts)
 454     end || C <- lists:seq(0, 255) -- No],
 455
 456    test_string("\"\\013a\\\n\"", [{string,{1,1},"\va\n"}]),
 457
 458    test_string("'\n'", [{atom,{1,1},'\n'}]),
 459    test_string("\"\n\a\"", [{string,{1,1},"\na"}]),
 460
 461    %% No escape
 462    [begin
 463         L = "$" ++ [C],
 464         Ts = [{char,{1,1},C}],
 465         test_string(L, Ts)
 466     end || C <- lists:seq(0, 255) -- (No ++ [$\\])],
 467    test_string("$\n", [{char,{1,1},$\n}]),
 468
 469    {error,{{1,1},erl_scan,char},{1,4}} =
 470        erl_scan:string("$\\^",{1,1}),
 471    test_string("$\\\n", [{char,{1,1},$\n}]),
 472    %% Robert's scanner returns line 1:
 473    test_string("$\\\n", [{char,{1,1},$\n}]),
 474    test_string("$\n\n", [{char,{1,1},$\n}]),
 475    test("$\n\n"),
 476    ok.
 477
 478
 479variables() ->
 480    test_string("     \237_Aouåeiyäö", [{var,{1,7},'_Aouåeiyäö'}]),
 481    test_string("A_b_c@", [{var,{1,1},'A_b_c@'}]),
 482    test_string("V@2", [{var,{1,1},'V@2'}]),
 483    test_string("ABDÀ", [{var,{1,1},'ABDÀ'}]),
 484    test_string("Ärlig Östen", [{var,{1,1},'Ärlig'},{var,{1,7},'Östen'}]),
 485    ok.
 486
 487eof() ->
 488    {done,{eof,1},eof} = erl_scan:tokens([], eof, 1),
 489    {more, C1} = erl_scan:tokens([],"    \n", 1),
 490    {done,{eof,2},eof} = erl_scan:tokens(C1, eof, 1),
 491    {more, C2} = erl_scan:tokens([], "abra", 1),
 492    %% An error before R13A.
 493    %% {done,Err={error,{1,erl_scan,scan},1},eof} =
 494    {done,{ok,[{atom,1,abra}],1},eof} =
 495        erl_scan_tokens(C2, eof, 1),
 496
 497    %% With column.
 498    {more, C3} = erl_scan:tokens([],"    \n",{1,1}),
 499    {done,{eof,{2,1}},eof} = erl_scan:tokens(C3, eof, 1),
 500    {more, C4} = erl_scan:tokens([], "abra", {1,1}),
 501    %% An error before R13A.
 502    %% {done,{error,{{1,1},erl_scan,scan},{1,5}},eof} =
 503    {done,{ok,[{atom,_,abra}],{1,5}},eof} =
 504        erl_scan_tokens(C4, eof, 1),
 505
 506    %% Robert's scanner returns "" as LeftoverChars;
 507    %% the R12B scanner returns eof as LeftoverChars: (eof is correct)
 508    {more, C5} = erl_scan:tokens([], "a", 1),
 509    %% An error before R13A.
 510    %% {done,{error,{1,erl_scan,scan},1},eof} =
 511    {done,{ok,[{atom,1,a}],1},eof} =
 512        erl_scan_tokens(C5,eof,1),
 513
 514    %% With column.
 515    {more, C6} = erl_scan:tokens([], "a", {1,1}),
 516    %% An error before R13A.
 517    %% {done,{error,{1,erl_scan,scan},1},eof} =
 518    {done,{ok,[{atom,{1,1},a}],{1,2}},eof} =
 519        erl_scan_tokens(C6,eof,1),
 520
 521    %% A dot followed by eof is special:
 522    {more, C} = erl_scan:tokens([], "a.", 1),
 523    {done,{ok,[{atom,1,a},{dot,1}],1},eof} = erl_scan_tokens(C,eof,1),
 524    {ok,[{atom,1,foo},{dot,1}],1} = erl_scan_string("foo."),
 525
 526    %% With column.
 527    {more, CCol} = erl_scan:tokens([], "a.", {1,1}),
 528    {done,{ok,[{atom,{1,1},a},{dot,{1,2}}],{1,3}},eof} =
 529        erl_scan_tokens(CCol,eof,1),
 530    {ok,[{atom,{1,1},foo},{dot,{1,4}}],{1,5}} =
 531        erl_scan_string("foo.", {1,1}, []),
 532
 533    ok.
 534
 535illegal() ->
 536    Atom = lists:duplicate(1000, $a),
 537    {error,{1,erl_scan,{illegal,atom}},1} = erl_scan:string(Atom),
 538    {done,{error,{1,erl_scan,{illegal,atom}},1},". "} =
 539        erl_scan:tokens([], Atom++". ", 1),
 540    QAtom = "'" ++ Atom ++ "'",
 541    {error,{1,erl_scan,{illegal,atom}},1} = erl_scan:string(QAtom),
 542    {done,{error,{1,erl_scan,{illegal,atom}},1},". "} =
 543        erl_scan:tokens([], QAtom++". ", 1),
 544    Var = lists:duplicate(1000, $A),
 545    {error,{1,erl_scan,{illegal,var}},1} = erl_scan:string(Var),
 546    {done,{error,{1,erl_scan,{illegal,var}},1},". "} =
 547        erl_scan:tokens([], Var++". ", 1),
 548    Float = "1" ++ lists:duplicate(400, $0) ++ ".0",
 549    {error,{1,erl_scan,{illegal,float}},1} = erl_scan:string(Float),
 550    {done,{error,{1,erl_scan,{illegal,float}},1},". "} =
 551        erl_scan:tokens([], Float++". ", 1),
 552    String = "\"43\\x{aaaaaa}34\"",
 553    {error,{1,erl_scan,{illegal,character}},1} = erl_scan:string(String),
 554    {done,{error,{1,erl_scan,{illegal,character}},1},"34\". "} =
 555        %% Would be nice if `34\"' were skipped...
 556        %% Maybe, but then the LeftOverChars would not be the characters
 557        %% immediately following the end location of the error.
 558        erl_scan:tokens([], String++". ", 1),
 559
 560    {error,{{1,1},erl_scan,{illegal,atom}},{1,1001}} =
 561        erl_scan:string(Atom, {1,1}),
 562    {done,{error,{{1,5},erl_scan,{illegal,atom}},{1,1005}},". "} =
 563        erl_scan:tokens([], "foo "++Atom++". ", {1,1}),
 564    {error,{{1,1},erl_scan,{illegal,atom}},{1,1003}} =
 565        erl_scan:string(QAtom, {1,1}),
 566    {done,{error,{{1,5},erl_scan,{illegal,atom}},{1,1007}},". "} =
 567        erl_scan:tokens([], "foo "++QAtom++". ", {1,1}),
 568    {error,{{1,1},erl_scan,{illegal,var}},{1,1001}} =
 569        erl_scan:string(Var, {1,1}),
 570    {done,{error,{{1,5},erl_scan,{illegal,var}},{1,1005}},". "} =
 571        erl_scan:tokens([], "foo "++Var++". ", {1,1}),
 572    {error,{{1,1},erl_scan,{illegal,float}},{1,404}} =
 573        erl_scan:string(Float, {1,1}),
 574    {done,{error,{{1,5},erl_scan,{illegal,float}},{1,408}},". "} =
 575        erl_scan:tokens([], "foo "++Float++". ", {1,1}),
 576    {error,{{1,4},erl_scan,{illegal,character}},{1,14}} =
 577        erl_scan:string(String, {1,1}),
 578    {done,{error,{{1,4},erl_scan,{illegal,character}},{1,14}},"34\". "} =
 579        erl_scan:tokens([], String++". ", {1,1}),
 580    ok.
 581
 582crashes() ->
 583    {'EXIT',_} = (catch {foo, erl_scan:string([-1])}), % type error
 584    {'EXIT',_} = (catch {foo, erl_scan:string("$"++[-1])}),
 585    {'EXIT',_} = (catch {foo, erl_scan:string("$\\"++[-1])}),
 586    {'EXIT',_} = (catch {foo, erl_scan:string("$\\^"++[-1])}),
 587    {'EXIT',_} = (catch {foo, erl_scan:string([$",-1,$"],{1,1})}),
 588    {'EXIT',_} = (catch {foo, erl_scan:string("\"\\v"++[-1,$"])}), %$"
 589    {'EXIT',_} = (catch {foo, erl_scan:string([$",-1,$"])}),
 590    {'EXIT',_} = (catch {foo, erl_scan:string("% foo"++[-1])}),
 591    {'EXIT',_} =
 592         (catch {foo, erl_scan:string("% foo"++[-1],{1,1})}),
 593
 594    {'EXIT',_} = (catch {foo, erl_scan:string([a])}), % type error
 595    {'EXIT',_} = (catch {foo, erl_scan:string("$"++[a])}),
 596    {'EXIT',_} = (catch {foo, erl_scan:string("$\\"++[a])}),
 597    {'EXIT',_} = (catch {foo, erl_scan:string("$\\^"++[a])}),
 598    {'EXIT',_} = (catch {foo, erl_scan:string([$",a,$"],{1,1})}),
 599    {'EXIT',_} = (catch {foo, erl_scan:string("\"\\v"++[a,$"])}), %$"
 600    {'EXIT',_} = (catch {foo, erl_scan:string([$",a,$"])}),
 601    {'EXIT',_} = (catch {foo, erl_scan:string("% foo"++[a])}),
 602    {'EXIT',_} =
 603         (catch {foo, erl_scan:string("% foo"++[a],{1,1})}),
 604
 605    {'EXIT',_} = (catch {foo, erl_scan:string([3.0])}), % type error
 606
 607    ok.
 608
 609options() ->
 610    %% line and column are not options, but tested here
 611    {ok,[{atom,1,foo},{white_space,1," "},{comment,1,"% bar"}], 1} =
 612        erl_scan_string("foo % bar", 1, return),
 613    {ok,[{atom,1,foo},{white_space,1," "}],1} =
 614        erl_scan_string("foo % bar", 1, return_white_spaces),
 615    {ok,[{atom,1,foo},{comment,1,"% bar"}],1} =
 616        erl_scan_string("foo % bar", 1, return_comments),
 617    {ok,[{atom,17,foo}],17} =
 618        erl_scan_string("foo % bar", 17),
 619    {'EXIT',{function_clause,_}} =
 620        (catch {foo,
 621                erl_scan:string("foo % bar", {a,1}, [])}), % type error
 622    {ok,[{atom,_,foo}],{17,18}} =
 623        erl_scan_string("foo % bar", {17,9}, []),
 624    {'EXIT',{function_clause,_}} =
 625        (catch {foo,
 626                erl_scan:string("foo % bar", {1,0}, [])}), % type error
 627    {ok,[{foo,1}],1} =
 628        erl_scan_string("foo % bar",1, [{reserved_word_fun,
 629                                         fun(W) -> W =:= foo end}]),
 630    {'EXIT',{badarg,_}} =
 631        (catch {foo,
 632                erl_scan:string("foo % bar",1, % type error
 633                                [{reserved_word_fun,
 634                                  fun(W,_) -> W =:= foo end}])}),
 635    ok.
 636
 637more_options() ->
 638    {ok,[{atom,_,foo}=T1],{19,20}} =
 639        erl_scan:string("foo", {19,17},[]),
 640    {19,17} = erl_scan:location(T1),
 641    {done,{ok,[{atom,_,foo}=T2,{dot,_}],{19,22}},[]} =
 642        erl_scan:tokens([], "foo. ", {19,17}, [bad_opt]), % type error
 643    {19,17} = erl_scan:location(T2),
 644    {ok,[{atom,_,foo}=T3],{19,20}} =
 645        erl_scan:string("foo", {19,17},[text]),
 646    {19,17} = erl_scan:location(T3),
 647    "foo" = erl_scan:text(T3),
 648
 649    {ok,[{atom,_,foo}=T4],1} = erl_scan:string("foo", 1, [text]),
 650    1 = erl_scan:line(T4),
 651    1 = erl_scan:location(T4),
 652    "foo" = erl_scan:text(T4),
 653
 654    ok.
 655
 656token_info() ->
 657    {ok,[T1],_} = erl_scan:string("foo", {1,18}, [text]),
 658    {'EXIT',{badarg,_}} =
 659        (catch {foo, erl_scan:category(foo)}), % type error
 660    {'EXIT',{badarg,_}} =
 661        (catch {foo, erl_scan:symbol(foo)}), % type error
 662    atom = erl_scan:category(T1),
 663    foo = erl_scan:symbol(T1),
 664
 665    {ok,[T2],_} = erl_scan:string("foo", 1, []),
 666    1 = erl_scan:line(T2),
 667    undefined = erl_scan:column(T2),
 668    undefined = erl_scan:text(T2),
 669    1 = erl_scan:location(T2),
 670
 671    {ok,[T3],_} = erl_scan:string("=", 1, []),
 672    '=' = erl_scan:category(T3),
 673    '=' = erl_scan:symbol(T3),
 674    ok.
 675
 676anno_info() ->
 677    {'EXIT',_} =
 678        (catch {foo,erl_scan:line(foo)}), % type error
 679    {ok,[{atom,_,foo}=T0],_} = erl_scan:string("foo", 19, [text]),
 680    19 = erl_scan:location(T0),
 681    19 = erl_scan:end_location(T0),
 682
 683    {ok,[{atom,_,foo}=T3],_} = erl_scan:string("foo", {1,3}, [text]),
 684    1 = erl_scan:line(T3),
 685    3 = erl_scan:column(T3),
 686    {1,3} = erl_scan:location(T3),
 687    {1,6} = erl_scan:end_location(T3),
 688    "foo" = erl_scan:text(T3),
 689
 690    {ok,[{atom,_,foo}=T4],_} = erl_scan:string("foo", 2, [text]),
 691    2 = erl_scan:line(T4),
 692    undefined = erl_scan:column(T4),
 693    2 = erl_scan:location(T4),
 694    "foo" = erl_scan:text(T4),
 695
 696    {ok,[{atom,_,foo}=T5],_} = erl_scan:string("foo", {1,3}, []),
 697    1 = erl_scan:line(T5),
 698    3 = erl_scan:column(T5),
 699    {1,3} = erl_scan:location(T5),
 700    undefined = erl_scan:text(T5),
 701
 702    ok.
 703
 704column_errors() ->
 705    {error,{{1,1},erl_scan,{string,$',""}},{1,3}} = % $'
 706        erl_scan:string("'\\",{1,1}),
 707    {error,{{1,1},erl_scan,{string,$",""}},{1,3}} = % $"
 708        erl_scan:string("\"\\",{1,1}),
 709
 710    {error,{{1,1},erl_scan,{string,$',""}},{1,2}} =  % $'
 711        erl_scan:string("'",{1,1}),
 712    {error,{{1,1},erl_scan,{string,$",""}},{1,2}} =  % $"
 713        erl_scan:string("\"",{1,1}),
 714
 715    {error,{{1,1},erl_scan,char},{1,2}} =
 716        erl_scan:string("$",{1,1}),
 717
 718    {error,{{1,2},erl_scan,{string,$',"1234567890123456"}},{1,20}} = %'
 719        erl_scan:string(" '12345678901234567", {1,1}),
 720    {error,{{1,2},erl_scan,{string,$',"123456789012345 "}}, {1,20}} = %'
 721        erl_scan:string(" '123456789012345\\s", {1,1}),
 722    {error,{{1,2},erl_scan,{string,$","1234567890123456"}},{1,20}} = %"
 723        erl_scan:string(" \"12345678901234567", {1,1}),
 724    {error,{{1,2},erl_scan,{string,$","123456789012345 "}}, {1,20}} = %"
 725        erl_scan:string(" \"123456789012345\\s", {1,1}),
 726    {error,{{1,2},erl_scan,{string,$',"1234567890123456"}},{2,1}} = %'
 727        erl_scan:string(" '12345678901234567\n", {1,1}),
 728    ok.
 729
 730white_spaces() ->
 731    {ok,[{white_space,_,"\r"},
 732               {white_space,_,"   "},
 733               {atom,_,a},
 734               {white_space,_,"\n"}],
 735           _} = erl_scan_string("\r   a\n", {1,1}, return),
 736    test("\r   a\n"),
 737    L = "{\"a\nb\", \"a\\nb\",\nabc\r,def}.\n\n",
 738    {ok,[{'{',_},
 739               {string,_,"a\nb"},
 740               {',',_},
 741               {white_space,_," "},
 742               {string,_,"a\nb"},
 743               {',',_},
 744               {white_space,_,"\n"},
 745               {atom,_,abc},
 746               {white_space,_,"\r"},
 747               {',',_},
 748               {atom,_,def},
 749               {'}',_},
 750               {dot,_},
 751               {white_space,_,"\n"}],
 752           _} = erl_scan_string(L, {1,1}, return),
 753    test(L),
 754    test("\"\n\"\n"),
 755    test("\n\r\n"),
 756    test("\n\r"),
 757    test("\r\n"),
 758    test("\n\f"),
 759    [test(lists:duplicate(N, $\t)) || N <- lists:seq(1, 20)],
 760    [test([$\n|lists:duplicate(N, $\t)]) || N <- lists:seq(1, 20)],
 761    [test(lists:duplicate(N, $\s)) || N <- lists:seq(1, 20)],
 762    [test([$\n|lists:duplicate(N, $\s)]) || N <- lists:seq(1, 20)],
 763    test("\v\f\n\v "),
 764    test("\n\e\n\b\f\n\da\n"),
 765    ok.
 766
 767unicode() ->
 768    {ok,[{char,1,83},{integer,1,45}],1} =
 769        erl_scan_string("$\\12345"), % not unicode
 770
 771    {error,{1,erl_scan,{illegal,character}},1} =
 772        erl_scan:string([1089]),
 773    {error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
 774        erl_scan:string([1089], {1,1}),
 775    {error,{{1,3},erl_scan,{illegal,character}},{1,4}} =
 776        erl_scan:string("'a" ++ [999999999] ++ "c'", {1,1}),
 777
 778    test("\"a"++[1089]++"b\""),
 779    {ok,[{char,1,1}],1} =
 780        erl_scan_string([$$,$\\,$^,1089], 1),
 781
 782    {error,{1,erl_scan,Error},1} =
 783        erl_scan:string("\"qa\x{aaa}", 1),
 784    "unterminated string starting with \"qa"++[2730]++"\"" =
 785        erl_scan:format_error(Error),
 786    {error,{{1,1},erl_scan,_},{1,11}} =
 787        erl_scan:string("\"qa\\x{aaa}",{1,1}),
 788    {error,{{1,1},erl_scan,_},{1,11}} =
 789        erl_scan:string("'qa\\x{aaa}",{1,1}),
 790
 791    {ok,[{char,1,1089}],1} =
 792        erl_scan_string([$$,1089], 1),
 793    {ok,[{char,1,1089}],1} =
 794        erl_scan_string([$$,$\\,1089], 1),
 795
 796    Qs = "$\\x{aaa}",
 797    {ok,[{char,1,$\x{aaa}}],1} =
 798        erl_scan_string(Qs, 1),
 799    {ok,[Q2],{1,9}} =
 800        erl_scan:string("$\\x{aaa}", {1,1}, [text]),
 801    [{category,char},{column,1},{line,1},{symbol,16#aaa},{text,Qs}] =
 802        token_info_long(Q2),
 803
 804    U1 = "\"\\x{aaa}\"",
 805    {ok,[{string,_,[2730]}=T1],{1,10}} = erl_scan:string(U1, {1,1}, [text]),
 806    {1,1} = erl_scan:location(T1),
 807    "\"\\x{aaa}\"" = erl_scan:text(T1),
 808    {ok,[{string,1,[2730]}],1} = erl_scan_string(U1, 1),
 809
 810    U2 = "\"\\x41\\x{fff}\\x42\"",
 811    {ok,[{string,1,[$\x41,$\x{fff},$\x42]}],1} = erl_scan_string(U2, 1),
 812
 813    U3 = "\"a\n\\x{fff}\n\"",
 814    {ok,[{string,1,[$a,$\n,$\x{fff},$\n]}],3} = erl_scan_string(U3, 1),
 815
 816    U4 = "\"\\^\n\\x{aaa}\\^\n\"",
 817    {ok,[{string,1,[$\n,$\x{aaa},$\n]}],3} = erl_scan_string(U4, 1),
 818
 819    %% Keep these tests:
 820    test(Qs),
 821    test(U1),
 822    test(U2),
 823    test(U3),
 824    test(U4),
 825
 826    Str1 = "\"ab" ++ [1089] ++ "cd\"",
 827    {ok,[{string,1,[$a,$b,1089,$c,$d]}],1} = erl_scan_string(Str1, 1),
 828    {ok,[{string,{1,1},[$a,$b,1089,$c,$d]}],{1,8}} =
 829        erl_scan_string(Str1, {1,1}),
 830    test(Str1),
 831    Comment = "%% "++[1089],
 832    {ok,[{comment,1,[$%,$%,$\s,1089]}],1} =
 833        erl_scan_string(Comment, 1, [return]),
 834    {ok,[{comment,{1,1},[$%,$%,$\s,1089]}],{1,5}} =
 835        erl_scan_string(Comment, {1,1}, [return]),
 836    ok.
 837
 838more_chars() ->
 839    %% Due to unicode, the syntax has been incompatibly augmented:
 840    %% $\x{...}, $\xHH
 841
 842    %% All kinds of tests...
 843    {ok,[{char,_,123}],{1,4}} =
 844        erl_scan_string("$\\{",{1,1}),
 845    {more, C1} = erl_scan:tokens([], "$\\{", {1,1}),
 846    {done,{ok,[{char,_,123}],{1,4}},eof} =
 847        erl_scan_tokens(C1, eof, 1),
 848    {ok,[{char,1,123},{atom,1,a},{'}',1}],1} =
 849        erl_scan_string("$\\{a}"),
 850
 851    {error,{{1,1},erl_scan,char},{1,4}} =
 852        erl_scan:string("$\\x", {1,1}),
 853    {error,{{1,1},erl_scan,char},{1,5}} =
 854        erl_scan:string("$\\x{",{1,1}),
 855    {more, C3} = erl_scan:tokens([], "$\\x", {1,1}),
 856    {done,{error,{{1,1},erl_scan,char},{1,4}},eof} =
 857        erl_scan:tokens(C3, eof, 1),
 858    {error,{{1,1},erl_scan,char},{1,5}} =
 859        erl_scan:string("$\\x{",{1,1}),
 860    {more, C2} = erl_scan:tokens([], "$\\x{", {1,1}),
 861    {done,{error,{{1,1},erl_scan,char},{1,5}},eof} =
 862        erl_scan:tokens(C2, eof, 1),
 863    {error,{1,erl_scan,{illegal,character}},1} =
 864        erl_scan:string("$\\x{g}"),
 865    {error,{{1,1},erl_scan,{illegal,character}},{1,5}} =
 866        erl_scan:string("$\\x{g}", {1,1}),
 867    {error,{{1,1},erl_scan,{illegal,character}},{1,6}} =
 868        erl_scan:string("$\\x{}",{1,1}),
 869
 870    test("\"\\{0}\""),
 871    test("\"\\x{0}\""),
 872    test("\'\\{0}\'"),
 873    test("\'\\x{0}\'"),
 874
 875    {error,{{2,3},erl_scan,{illegal,character}},{2,6}} =
 876        erl_scan:string("\"ab \n $\\x{g}\"",{1,1}),
 877    {error,{{2,3},erl_scan,{illegal,character}},{2,6}} =
 878        erl_scan:string("\'ab \n $\\x{g}\'",{1,1}),
 879
 880    test("$\\{34}"),
 881    test("$\\x{34}"),
 882    test("$\\{377}"),
 883    test("$\\x{FF}"),
 884    test("$\\{400}"),
 885    test("$\\x{100}"),
 886    test("$\\x{10FFFF}"),
 887    test("$\\x{10ffff}"),
 888    test("\"$\n \\{1}\""),
 889    {error,{1,erl_scan,{illegal,character}},1} =
 890        erl_scan:string("$\\x{110000}"),
 891    {error,{{1,1},erl_scan,{illegal,character}},{1,12}} =
 892        erl_scan:string("$\\x{110000}", {1,1}),
 893
 894    {error,{{1,1},erl_scan,{illegal,character}},{1,4}} =
 895        erl_scan:string("$\\xfg", {1,1}),
 896
 897    test("$\\xffg"),
 898
 899    {error,{{1,1},erl_scan,{illegal,character}},{1,4}} =
 900        erl_scan:string("$\\xg", {1,1}),
 901    ok.
 902
 903%% OTP-10302. Unicode characters scanner/parser.
 904otp_10302(Config) when is_list(Config) ->
 905    %% From unicode():
 906    {ok,[{atom,1,'aсb'}],1} =
 907        erl_scan_string("'a"++[1089]++"b'", 1),
 908    {ok,[{atom,{1,1},'qaપ'}],{1,12}} =
 909        erl_scan_string("'qa\\x{aaa}'",{1,1}),
 910
 911    {ok,[{char,1,1089}],1} = erl_scan_string([$$,1089], 1),
 912    {ok,[{char,1,1089}],1} = erl_scan_string([$$,$\\,1089],1),
 913
 914    Qs = "$\\x{aaa}",
 915    {ok,[{char,1,2730}],1} = erl_scan_string(Qs, 1),
 916    {ok,[Q2],{1,9}} = erl_scan:string(Qs,{1,1},[text]),
 917    [{category,char},{column,1},{line,1},{symbol,16#aaa},{text,Qs}] =
 918        token_info_long(Q2),
 919
 920    U1 = "\"\\x{aaa}\"",
 921    {ok,[T1],{1,10}} = erl_scan:string(U1, {1,1}, [text]),
 922    [{category,string},{column,1},{line,1},{symbol,[16#aaa]},{text,U1}] =
 923        token_info_long(T1),
 924
 925    U2 = "\"\\x41\\x{fff}\\x42\"",
 926    {ok,[{string,1,[65,4095,66]}],1} = erl_scan_string(U2, 1),
 927
 928    U3 = "\"a\n\\x{fff}\n\"",
 929    {ok,[{string,1,[97,10,4095,10]}],3} = erl_scan_string(U3, 1),
 930
 931    U4 = "\"\\^\n\\x{aaa}\\^\n\"",
 932    {ok,[{string,1,[10,2730,10]}],3} = erl_scan_string(U4, 1,[]),
 933
 934    Str1 = "\"ab" ++ [1089] ++ "cd\"",
 935    {ok,[{string,1,[97,98,1089,99,100]}],1} =
 936        erl_scan_string(Str1,1),
 937    {ok,[{string,{1,1},[97,98,1089,99,100]}],{1,8}} =
 938        erl_scan_string(Str1, {1,1}),
 939
 940    OK1 = 16#D800-1,
 941    OK2 = 16#DFFF+1,
 942    OK3 = 16#FFFE-1,
 943    OK4 = 16#FFFF+1,
 944    OKL = [OK1,OK2,OK3,OK4],
 945
 946    Illegal1 = 16#D800,
 947    Illegal2 = 16#DFFF,
 948    Illegal3 = 16#FFFE,
 949    Illegal4 = 16#FFFF,
 950    IllegalL = [Illegal1,Illegal2,Illegal3,Illegal4],
 951
 952    [{ok,[{comment,1,[$%,$%,$\s,OK]}],1} =
 953         erl_scan_string("%% "++[OK], 1, [return]) ||
 954        OK <- OKL],
 955    {ok,[{comment,_,[$%,$%,$\s,OK1]}],{1,5}} =
 956        erl_scan_string("%% "++[OK1], {1,1}, [return]),
 957    [{error,{1,erl_scan,{illegal,character}},1} =
 958         erl_scan:string("%% "++[Illegal], 1, [return]) ||
 959        Illegal <- IllegalL],
 960    {error,{{1,1},erl_scan,{illegal,character}},{1,5}} =
 961        erl_scan:string("%% "++[Illegal1], {1,1}, [return]),
 962
 963    [{ok,[],1} = erl_scan_string("%% "++[OK], 1, []) ||
 964        OK <- OKL],
 965    {ok,[],{1,5}} = erl_scan_string("%% "++[OK1], {1,1}, []),
 966    [{error,{1,erl_scan,{illegal,character}},1} =
 967         erl_scan:string("%% "++[Illegal], 1, []) ||
 968        Illegal <- IllegalL],
 969    {error,{{1,1},erl_scan,{illegal,character}},{1,5}} =
 970        erl_scan:string("%% "++[Illegal1], {1,1}, []),
 971
 972    [{ok,[{string,{1,1},[OK]}],{1,4}} =
 973        erl_scan_string("\""++[OK]++"\"",{1,1}) ||
 974        OK <- OKL],
 975    [{error,{{1,2},erl_scan,{illegal,character}},{1,3}} =
 976         erl_scan:string("\""++[OK]++"\"",{1,1}) ||
 977        OK <- IllegalL],
 978
 979    [{error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
 980        erl_scan:string([Illegal],{1,1}) ||
 981        Illegal <- IllegalL],
 982
 983    {ok,[{char,{1,1},OK1}],{1,3}} =
 984        erl_scan_string([$$,OK1],{1,1}),
 985    {error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
 986        erl_scan:string([$$,Illegal1],{1,1}),
 987
 988    {ok,[{char,{1,1},OK1}],{1,4}} =
 989        erl_scan_string([$$,$\\,OK1],{1,1}),
 990    {error,{{1,1},erl_scan,{illegal,character}},{1,4}} =
 991        erl_scan:string([$$,$\\,Illegal1],{1,1}),
 992
 993    {ok,[{string,{1,1},[55295]}],{1,5}} =
 994        erl_scan_string("\"\\"++[OK1]++"\"",{1,1}),
 995    {error,{{1,2},erl_scan,{illegal,character}},{1,4}} =
 996        erl_scan:string("\"\\"++[Illegal1]++"\"",{1,1}),
 997
 998    {ok,[{char,{1,1},OK1}],{1,10}} =
 999        erl_scan_string("$\\x{D7FF}",{1,1}),
1000    {error,{{1,1},erl_scan,{illegal,character}},{1,10}} =
1001        erl_scan:string("$\\x{D800}",{1,1}),
1002
1003    %% Not erl_scan, but erl_parse.
1004    {integer,0,1} = erl_parse_abstract(1),
1005    Float = 3.14, {float,0,Float} = erl_parse_abstract(Float),
1006    {nil,0} = erl_parse_abstract([]),
1007    {bin,0,
1008     [{bin_element,0,{integer,0,1},default,default},
1009      {bin_element,0,{integer,0,2},default,default}]} =
1010        erl_parse_abstract(<<1,2>>),
1011    {cons,0,{tuple,0,[{atom,0,a}]},{atom,0,b}} =
1012        erl_parse_abstract([{a} | b]),
1013    {string,0,"str"} = erl_parse_abstract("str"),
1014    {cons,0,
1015     {integer,0,$a},
1016     {cons,0,{integer,0,55296},{string,0,"c"}}} =
1017        erl_parse_abstract("a"++[55296]++"c"),
1018
1019    Line = 17,
1020    {integer,Line,1} = erl_parse_abstract(1, Line),
1021    Float = 3.14, {float,Line,Float} = erl_parse_abstract(Float, Line),
1022    {nil,Line} = erl_parse_abstract([], Line),
1023    {bin,Line,
1024     [{bin_element,Line,{integer,Line,1},default,default},
1025      {bin_element,Line,{integer,Line,2},default,default}]} =
1026        erl_parse_abstract(<<1,2>>, Line),
1027    {cons,Line,{tuple,Line,[{atom,Line,a}]},{atom,Line,b}} =
1028        erl_parse_abstract([{a} | b], Line),
1029    {string,Line,"str"} = erl_parse_abstract("str", Line),
1030    {cons,Line,
1031     {integer,Line,$a},
1032     {cons,Line,{integer,Line,55296},{string,Line,"c"}}} =
1033        erl_parse_abstract("a"++[55296]++"c", Line),
1034
1035    Opts1 = [{line,17}],
1036    {integer,Line,1} = erl_parse_abstract(1, Opts1),
1037    Float = 3.14, {float,Line,Float} = erl_parse_abstract(Float, Opts1),
1038    {nil,Line} = erl_parse_abstract([], Opts1),
1039    {bin,Line,
1040     [{bin_element,Line,{integer,Line,1},default,default},
1041      {bin_element,Line,{integer,Line,2},default,default}]} =
1042        erl_parse_abstract(<<1,2>>, Opts1),
1043    {cons,Line,{tuple,Line,[{atom,Line,a}]},{atom,Line,b}} =
1044        erl_parse_abstract([{a} | b], Opts1),
1045    {string,Line,"str"} = erl_parse_abstract("str", Opts1),
1046    {cons,Line,
1047     {integer,Line,$a},
1048     {cons,Line,{integer,Line,55296},{string,Line,"c"}}} =
1049        erl_parse_abstract("a"++[55296]++"c", Opts1),
1050
1051    [begin
1052         {integer,Line,1} = erl_parse_abstract(1, Opts2),
1053         Float = 3.14, {float,Line,Float} = erl_parse_abstract(Float, Opts2),
1054         {nil,Line} = erl_parse_abstract([], Opts2),
1055         {bin,Line,
1056          [{bin_element,Line,{integer,Line,1},default,default},
1057           {bin_element,Line,{integer,Line,2},default,default}]} =
1058             erl_parse_abstract(<<1,2>>, Opts2),
1059         {cons,Line,{tuple,Line,[{atom,Line,a}]},{atom,Line,b}} =
1060             erl_parse_abstract([{a} | b], Opts2),
1061         {string,Line,"str"} = erl_parse_abstract("str", Opts2),
1062         {string,Line,[97,1024,99]} =
1063             erl_parse_abstract("a"++[1024]++"c", Opts2)
1064     end || Opts2 <- [[{encoding,unicode},{line,Line}],
1065                      [{encoding,utf8},{line,Line}]]],
1066
1067    {cons,0,
1068     {integer,0,97},
1069     {cons,0,{integer,0,1024},{string,0,"c"}}} =
1070        erl_parse_abstract("a"++[1024]++"c", [{encoding,latin1}]),
1071    ok.
1072
1073%% OTP-10990. Floating point number in input string.
1074otp_10990(Config) when is_list(Config) ->
1075    {'EXIT',_} = (catch {foo, erl_scan:string([$",42.0,$"],1)}),
1076    ok.
1077
1078%% OTP-10992. List of floats to abstract format.
1079otp_10992(Config) when is_list(Config) ->
1080    {cons,0,{float,0,42.0},{nil,0}} =
1081        erl_parse_abstract([42.0], [{encoding,unicode}]),
1082    {cons,0,{float,0,42.0},{nil,0}} =
1083        erl_parse_abstract([42.0], [{encoding,utf8}]),
1084    {cons,0,{integer,0,65},{cons,0,{float,0,42.0},{nil,0}}} =
1085        erl_parse_abstract([$A,42.0], [{encoding,unicode}]),
1086    {cons,0,{integer,0,65},{cons,0,{float,0,42.0},{nil,0}}} =
1087        erl_parse_abstract([$A,42.0], [{encoding,utf8}]),
1088    ok.
1089
1090%% OTP-11807. Generalize erl_parse:abstract/2.
1091otp_11807(Config) when is_list(Config) ->
1092    {cons,0,{integer,0,97},{cons,0,{integer,0,98},{nil,0}}} =
1093        erl_parse_abstract("ab", [{encoding,none}]),
1094    {cons,0,{integer,0,-1},{nil,0}} =
1095        erl_parse_abstract([-1], [{encoding,latin1}]),
1096    ASCII = fun(I) -> I >= 0 andalso I < 128 end,
1097    {string,0,"xyz"} = erl_parse_abstract("xyz", [{encoding,ASCII}]),
1098    {cons,0,{integer,0,228},{nil,0}} =
1099        erl_parse_abstract([228], [{encoding,ASCII}]),
1100    {cons,0,{integer,0,97},{atom,0,a}} =
1101        erl_parse_abstract("a"++a, [{encoding,latin1}]),
1102    {'EXIT', {{badarg,bad},_}} = % minor backward incompatibility
1103         (catch erl_parse:abstract("string", [{encoding,bad}])),
1104   ok.
1105
1106test_string(String, ExpectedWithCol) ->
1107    {ok, ExpectedWithCol, _EndWithCol} = erl_scan_string(String, {1, 1}, []),
1108    Expected = [ begin
1109                     {L,_C} = element(2, T),
1110                     setelement(2, T, L)
1111                 end
1112                    || T <- ExpectedWithCol ],
1113    {ok, Expected, _End} = erl_scan_string(String),
1114    test(String).
1115
1116erl_scan_string(String) ->
1117    erl_scan_string(String, 1, []).
1118
1119erl_scan_string(String, StartLocation) ->
1120    erl_scan_string(String, StartLocation, []).
1121
1122erl_scan_string(String, StartLocation, Options) ->
1123    case erl_scan:string(String, StartLocation, Options) of
1124        {ok, Tokens, EndLocation} ->
1125            {ok, unopaque_tokens(Tokens), EndLocation};
1126        Else ->
1127            Else
1128    end.
1129
1130erl_scan_tokens(C, S, L) ->
1131    erl_scan_tokens(C, S, L, []).
1132
1133erl_scan_tokens(C, S, L, O) ->
1134    case erl_scan:tokens(C, S, L, O) of
1135        {done, {ok, Ts, End}, R} ->
1136            {done, {ok, unopaque_tokens(Ts), End}, R};
1137        Else ->
1138            Else
1139    end.
1140
1141unopaque_tokens([]) ->
1142    [];
1143unopaque_tokens([Token|Tokens]) ->
1144    Attrs = element(2, Token),
1145    Term = erl_anno:to_term(Attrs),
1146    T = setelement(2, Token, Term),
1147    [T | unopaque_tokens(Tokens)].
1148
1149erl_parse_abstract(Term) ->
1150    erl_parse_abstract(Term, []).
1151
1152erl_parse_abstract(Term, Options) ->
1153    Abstr = erl_parse:abstract(Term, Options),
1154    unopaque_abstract(Abstr).
1155
1156unopaque_abstract(Abstr) ->
1157    erl_parse:anno_to_term(Abstr).
1158
1159%% test_string(String, Expected, StartLocation, Options) ->
1160%%     {ok, Expected, _End} = erl_scan:string(String, StartLocation, Options),
1161%%     test(String).
1162
1163%% There are no checks of the tags...
1164test(String) ->
1165    %% io:format("Testing `~ts'~n", [String]),
1166    [{Tokens, End},
1167     {Wtokens, Wend},
1168     {Ctokens, Cend},
1169     {CWtokens, CWend},
1170     {CWtokens2, _}] =
1171        [scan_string_with_column(String, X) ||
1172            X <- [[],
1173                  [return_white_spaces],
1174                  [return_comments],
1175                  [return],
1176                  [return]]], % for white space compaction test
1177
1178    {end1,End,Wend} = {end1,Wend,End},
1179    {end2,Wend,Cend} = {end2,Cend,Wend},
1180    {end3,Cend,CWend} = {end3,CWend,Cend},
1181
1182    %% Test that the tokens that are common to two token lists are identical.
1183    {none,Tokens} = {none, filter_tokens(CWtokens, [white_space,comment])},
1184    {comments,Ctokens} =
1185        {comments,filter_tokens(CWtokens, [white_space])},
1186    {white_spaces,Wtokens} =
1187        {white_spaces,filter_tokens(CWtokens, [comment])},
1188
1189    %% Use token attributes to extract parts from the original string,
1190    %% and check that the parts are identical to the token strings.
1191    {Line,Column} = test_decorated_tokens(String, CWtokens),
1192    {deco,{Line,Column},End} = {deco,End,{Line,Column}},
1193
1194    %% Almost the same again: concat texts to get the original:
1195    Text = get_text(CWtokens),
1196    {text,Text,String} = {text,String,Text},
1197
1198    %% Test that white spaces occupy less heap than the worst case.
1199    ok = test_white_space_compaction(CWtokens, CWtokens2),
1200
1201    %% Test that white newlines are always first in text:
1202    WhiteTokens = select_tokens(CWtokens, [white_space]),
1203    ok = newlines_first(WhiteTokens),
1204
1205    %% Line attribute only:
1206    [Simple,Wsimple,Csimple,WCsimple] = Simples =
1207        [element(2, erl_scan:string(String, 1, Opts)) ||
1208            Opts <- [[],
1209                     [return_white_spaces],
1210                     [return_comments],
1211                     [return]]],
1212    {consistent,true} = {consistent,consistent_attributes(Simples)},
1213    {simple_wc,WCsimple} = {simple_wc,simplify(CWtokens)},
1214    {simple,Simple} = {simple,filter_tokens(WCsimple, [white_space,comment])},
1215    {simple_c,Csimple} = {simple_c,filter_tokens(WCsimple, [white_space])},
1216    {simple_w,Wsimple} = {simple_w,filter_tokens(WCsimple, [comment])},
1217
1218    %% Line attribute only, with text:
1219    [SimpleTxt,WsimpleTxt,CsimpleTxt,WCsimpleTxt] = SimplesTxt =
1220        [element(2, erl_scan:string(String, 1, [text|Opts])) ||
1221            Opts <- [[],
1222                     [return_white_spaces],
1223                     [return_comments],
1224                     [return]]],
1225    TextTxt = get_text(WCsimpleTxt),
1226    {text_txt,TextTxt,String} = {text_txt,String,TextTxt},
1227    {consistent_txt,true} =
1228        {consistent_txt,consistent_attributes(SimplesTxt)},
1229    {simple_txt,SimpleTxt} =
1230        {simple_txt,filter_tokens(WCsimpleTxt, [white_space,comment])},
1231    {simple_c_txt,CsimpleTxt} =
1232        {simple_c_txt,filter_tokens(WCsimpleTxt, [white_space])},
1233    {simple_w_txt,WsimpleTxt} =
1234        {simple_w_txt,filter_tokens(WCsimpleTxt, [comment])},
1235
1236    ok.
1237
1238test_white_space_compaction(Tokens, Tokens2) when Tokens =:= Tokens2 ->
1239    [WS, WS2] = [select_tokens(Ts, [white_space]) || Ts <- [Tokens, Tokens2]],
1240    test_wsc(WS, WS2).
1241
1242test_wsc([], []) ->
1243    ok;
1244test_wsc([Token|Tokens], [Token2|Tokens2]) ->
1245    [Text, Text2] = [Text ||
1246                        Text <- [erl_scan:text(T) || T <- [Token, Token2]]],
1247    Sz = erts_debug:size(Text),
1248    Sz2 = erts_debug:size({Text, Text2}),
1249    IsCompacted = Sz2 < 2*Sz+erts_debug:size({a,a}),
1250    ToBeCompacted = is_compacted(Text),
1251    if
1252        IsCompacted =:= ToBeCompacted ->
1253            test_wsc(Tokens, Tokens2);
1254        true ->
1255            {compaction_error, Token}
1256    end.
1257
1258is_compacted("\r") ->
1259    true;
1260is_compacted("\n\r") ->
1261    true;
1262is_compacted("\n\f") ->
1263    true;
1264is_compacted([$\n|String]) ->
1265      all_spaces(String)
1266    orelse
1267      all_tabs(String);
1268is_compacted(String) ->
1269      all_spaces(String)
1270    orelse
1271      all_tabs(String).
1272
1273all_spaces(L) ->
1274    all_same(L, $\s).
1275
1276all_tabs(L) ->
1277    all_same(L, $\t).
1278
1279all_same(L, Char) ->
1280    lists:all(fun(C) -> C =:= Char end, L).
1281
1282newlines_first([]) ->
1283    ok;
1284newlines_first([Token|Tokens]) ->
1285    Text = erl_scan:text(Token),
1286    Nnls = length([C || C <- Text, C =:= $\n]),
1287    OK = case Text of
1288             [$\n|_] ->
1289                 Nnls =:= 1;
1290             _ ->
1291                 Nnls =:= 0
1292         end,
1293    if
1294        OK -> newlines_first(Tokens);
1295        true -> OK
1296    end.
1297
1298filter_tokens(Tokens, Tags) ->
1299    lists:filter(fun(T) -> not lists:member(element(1, T), Tags) end, Tokens).
1300
1301select_tokens(Tokens, Tags) ->
1302    lists:filter(fun(T) -> lists:member(element(1, T), Tags) end, Tokens).
1303
1304simplify([Token|Tokens]) ->
1305    Line = erl_scan:line(Token),
1306    [setelement(2, Token, erl_anno:new(Line)) | simplify(Tokens)];
1307simplify([]) ->
1308    [].
1309
1310get_text(Tokens) ->
1311    lists:flatten(
1312      [T ||
1313          Token <- Tokens,
1314          (T = erl_scan:text(Token)) =/= []]).
1315
1316test_decorated_tokens(String, Tokens) ->
1317    ToksAttrs = token_attrs(Tokens),
1318    test_strings(ToksAttrs, String, 1, 1).
1319
1320token_attrs(Tokens) ->
1321    [{L,C,length(T),T} ||
1322        Token <- Tokens,
1323        ([C,L,T] = token_info(Token)) =/= []].
1324
1325token_info(T) ->
1326    Column = erl_scan:column(T),
1327    Line = erl_scan:line(T),
1328    Text = erl_scan:text(T),
1329    [Column, Line, Text].
1330
1331token_info_long(T) ->
1332    Column = erl_scan:column(T),
1333    Line = erl_scan:line(T),
1334    Text = erl_scan:text(T),
1335    Category = erl_scan:category(T),
1336    Symbol = erl_scan:symbol(T),
1337    [{category,Category},{column,Column},{line,Line},
1338     {symbol,Symbol},{text,Text}].
1339
1340test_strings([], _S, Line, Column) ->
1341    {Line,Column};
1342test_strings([{L,C,Len,T}=Attr|Attrs], String0, Line0, Column0) ->
1343    {String1, Column1} = skip_newlines(String0, L, Line0, Column0),
1344    String = skip_chars(String1, C-Column1),
1345    {Str,Rest} = lists:split(Len, String),
1346    if
1347        Str =:= T ->
1348            {Line,Column} = string_newlines(T, L, C),
1349            test_strings(Attrs, Rest, Line, Column);
1350        true ->
1351            {token_error, Attr, Str}
1352    end.
1353
1354skip_newlines(String, Line, Line, Column) ->
1355    {String, Column};
1356skip_newlines([$\n|String], L, Line, _Column) ->
1357    skip_newlines(String, L, Line+1, 1);
1358skip_newlines([_|String], L, Line, Column) ->
1359    skip_newlines(String, L, Line, Column+1).
1360
1361skip_chars(String, 0) ->
1362    String;
1363skip_chars([_|String], N) ->
1364    skip_chars(String, N-1).
1365
1366string_newlines([$\n|String], Line, _Column) ->
1367    string_newlines(String, Line+1, 1);
1368string_newlines([], Line, Column) ->
1369    {Line, Column};
1370string_newlines([_|String], Line, Column) ->
1371    string_newlines(String, Line, Column+1).
1372
1373scan_string_with_column(String, Options0) ->
1374    Options = [text | Options0],
1375    StartLoc = {1, 1},
1376    {ok, Ts1, End1} = erl_scan:string(String, StartLoc, Options),
1377    TString = String ++ ". ",
1378    {ok,Ts2,End2} = scan_tokens(TString, Options, [], StartLoc),
1379    {ok, Ts3, End3} =
1380        scan_tokens_1({more, []}, TString, Options, [], StartLoc),
1381    {end_2,End2,End3} = {end_2,End3,End2},
1382    {EndLine1,EndColumn1} = End1,
1383    End2 = {EndLine1,EndColumn1+2},
1384    {ts_1,Ts2,Ts3} = {ts_1,Ts3,Ts2},
1385    Ts2 = Ts1 ++ [lists:last(Ts2)],
1386
1387    %% Attributes are keylists, but have no text.
1388    {ok, Ts7, End7} = erl_scan:string(String, {1,1}, Options),
1389    {ok, Ts8, End8} = scan_tokens(TString, Options, [], {1,1}),
1390    {end1, End1} = {end1, End7},
1391    {end2, End2} = {end2, End8},
1392    Ts8 = Ts7 ++ [lists:last(Ts8)],
1393    {cons,true} = {cons,consistent_attributes([Ts1,Ts2,Ts3,Ts7,Ts8])},
1394
1395    {Ts1, End1}.
1396
1397scan_tokens(String, Options, Rs, Location) ->
1398    case erl_scan:tokens([], String, Location, Options) of
1399        {done, {ok,Ts,End}, ""} ->
1400            {ok, lists:append(lists:reverse([Ts|Rs])), End};
1401        {done, {ok,Ts,End}, Rest} ->
1402            scan_tokens(Rest, Options, [Ts|Rs], End)
1403    end.
1404
1405scan_tokens_1({done, {ok,Ts,End}, ""}, "", _Options, Rs, _Location) ->
1406    {ok,lists:append(lists:reverse([Ts|Rs])),End};
1407scan_tokens_1({done, {ok,Ts,End}, Rest}, Cs, Options, Rs, _Location) ->
1408    scan_tokens_1({more,[]}, Rest++Cs, Options, [Ts|Rs], End);
1409scan_tokens_1({more, Cont}, [C | Cs], Options, Rs, Loc) ->
1410    R = erl_scan:tokens(Cont, [C], Loc, Options),
1411    scan_tokens_1(R, Cs, Options, Rs, Loc).
1412
1413consistent_attributes([]) ->
1414    true;
1415consistent_attributes([Ts | TsL]) ->
1416    L = [T || T <- Ts, is_integer(element(2, T))],
1417    case L of
1418        [] ->
1419            TagsL = [[Tag || {Tag,_} <- defined(token_info_long(T))] ||
1420                        T <- Ts],
1421            case lists:usort(TagsL) of
1422                [_] ->
1423                    consistent_attributes(TsL);
1424                [] when Ts =:= [] ->
1425                    consistent_attributes(TsL);
1426                _ ->
1427                    Ts
1428            end;
1429        Ts ->
1430            consistent_attributes(TsL);
1431        _ ->
1432            Ts
1433    end.
1434
1435defined(L) ->
1436    [{T,V} || {T,V} <- L, V =/= undefined].
1437
1438family_list(L) ->
1439    sofs:to_external(family(L)).
1440
1441family(L) ->
1442    sofs:relation_to_family(sofs:relation(L)).