PageRenderTime 63ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/stdlib/test/erl_scan_SUITE.erl

https://github.com/bsmr-erlang/otp
Erlang | 1442 lines | 1166 code | 191 blank | 85 comment | 30 complexity | 56e5f28ad59df1cb626c9edb0e5f9c30 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, MPL-2.0-no-copyleft-exception, Apache-2.0
  1. %%
  2. %% %CopyrightBegin%
  3. %%
  4. %% Copyright Ericsson AB 1998-2017. All Rights Reserved.
  5. %%
  6. %% Licensed under the Apache License, Version 2.0 (the "License");
  7. %% you may not use this file except in compliance with the License.
  8. %% You may obtain a copy of the License at
  9. %%
  10. %% http://www.apache.org/licenses/LICENSE-2.0
  11. %%
  12. %% Unless required by applicable law or agreed to in writing, software
  13. %% distributed under the License is distributed on an "AS IS" BASIS,
  14. %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. %% See the License for the specific language governing permissions and
  16. %% limitations under the License.
  17. %%
  18. %% %CopyrightEnd%
  19. -module(erl_scan_SUITE).
  20. -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
  21. init_per_testcase/2, end_per_testcase/2,
  22. init_per_group/2,end_per_group/2]).
  23. -export([error_1/1, error_2/1, iso88591/1, otp_7810/1, otp_10302/1,
  24. otp_10990/1, otp_10992/1, otp_11807/1]).
  25. -import(lists, [nth/2,flatten/1]).
  26. -import(io_lib, [print/1]).
  27. %%
  28. %% Define to run outside of test server
  29. %%
  30. %%-define(STANDALONE,1).
  31. -ifdef(STANDALONE).
  32. -compile(export_all).
  33. -define(line, put(line, ?LINE), ).
  34. -define(config(A,B),config(A,B)).
  35. -define(t, test_server).
  36. %% config(priv_dir, _) ->
  37. %% ".";
  38. %% config(data_dir, _) ->
  39. %% ".".
  40. -else.
  41. -include_lib("common_test/include/ct.hrl").
  42. -endif.
  43. init_per_testcase(_Case, Config) ->
  44. Config.
  45. end_per_testcase(_Case, _Config) ->
  46. ok.
  47. suite() ->
  48. [{ct_hooks,[ts_install_cth]},
  49. {timetrap,{minutes,20}}].
  50. all() ->
  51. [{group, error}, iso88591, otp_7810, otp_10302, otp_10990, otp_10992,
  52. otp_11807].
  53. groups() ->
  54. [{error, [], [error_1, error_2]}].
  55. init_per_suite(Config) ->
  56. Config.
  57. end_per_suite(_Config) ->
  58. ok.
  59. init_per_group(_GroupName, Config) ->
  60. Config.
  61. end_per_group(_GroupName, Config) ->
  62. Config.
  63. %% (OTP-2347)
  64. error_1(Config) when is_list(Config) ->
  65. {error, _, _} = erl_scan:string("'a"),
  66. ok.
  67. %% Checks that format_error works on the error cases.
  68. error_2(Config) when is_list(Config) ->
  69. lists:foreach(fun check/1, error_cases()),
  70. ok.
  71. error_cases() ->
  72. ["'a",
  73. "\"a",
  74. "'\\",
  75. "\"\\",
  76. "$",
  77. "$\\",
  78. "2.3e",
  79. "2.3e-",
  80. "91#9"
  81. ].
  82. assert_type(N, integer) when is_integer(N) ->
  83. ok;
  84. assert_type(N, atom) when is_atom(N) ->
  85. ok.
  86. check(String) ->
  87. Error = erl_scan:string(String),
  88. check_error(Error, erl_scan).
  89. %%% (This should be useful for all format_error functions.)
  90. check_error({error, Info, EndLine}, Module0) ->
  91. {ErrorLine, Module, Desc} = Info,
  92. true = (Module == Module0),
  93. assert_type(EndLine, integer),
  94. assert_type(ErrorLine, integer),
  95. true = (ErrorLine =< EndLine),
  96. String = lists:flatten(Module0:format_error(Desc)),
  97. true = io_lib:printable_list(String).
  98. %% Tests the support for ISO-8859-1 i.e Latin-1.
  99. iso88591(Config) when is_list(Config) ->
  100. ok =
  101. case catch begin
  102. %% Some atom and variable names
  103. V1s = [$Á,$á,$é,$ë],
  104. V2s = [$N,$ä,$r],
  105. A1s = [$h,$ä,$r],
  106. A2s = [$ö,$r,$e],
  107. %% Test parsing atom and variable characters.
  108. {ok,Ts1,_} = erl_scan_string(V1s ++ " " ++ V2s ++
  109. "\327" ++
  110. A1s ++ " " ++ A2s),
  111. V1s = atom_to_list(element(3, nth(1, Ts1))),
  112. V2s = atom_to_list(element(3, nth(2, Ts1))),
  113. A1s = atom_to_list(element(3, nth(4, Ts1))),
  114. A2s = atom_to_list(element(3, nth(5, Ts1))),
  115. %% Test printing atoms
  116. A1s = flatten(print(element(3, nth(4, Ts1)))),
  117. A2s = flatten(print(element(3, nth(5, Ts1)))),
  118. %% Test parsing and printing strings.
  119. S1 = V1s ++ "\327" ++ A1s ++ "\250" ++ A2s,
  120. S1s = "\"" ++ S1 ++ "\"",
  121. {ok,Ts2,_} = erl_scan_string(S1s),
  122. S1 = element(3, nth(1, Ts2)),
  123. S1s = flatten(print(element(3, nth(1, Ts2)))),
  124. ok %It all worked
  125. end of
  126. {'EXIT',R} -> %Something went wrong!
  127. {error,R};
  128. ok -> ok %Aok
  129. end.
  130. %% OTP-7810. White spaces, comments, and more...
  131. otp_7810(Config) when is_list(Config) ->
  132. ok = reserved_words(),
  133. ok = atoms(),
  134. ok = punctuations(),
  135. ok = comments(),
  136. ok = errors(),
  137. ok = integers(),
  138. ok = base_integers(),
  139. ok = floats(),
  140. ok = dots(),
  141. ok = chars(),
  142. ok = variables(),
  143. ok = eof(),
  144. ok = illegal(),
  145. ok = crashes(),
  146. ok = options(),
  147. ok = token_info(),
  148. ok = column_errors(),
  149. ok = white_spaces(),
  150. ok = unicode(),
  151. ok = more_chars(),
  152. ok = more_options(),
  153. ok = anno_info(),
  154. ok.
  155. reserved_words() ->
  156. L = ['after', 'begin', 'case', 'try', 'cond', 'catch',
  157. 'andalso', 'orelse', 'end', 'fun', 'if', 'let', 'of',
  158. 'receive', 'when', 'bnot', 'not', 'div',
  159. 'rem', 'band', 'and', 'bor', 'bxor', 'bsl', 'bsr',
  160. 'or', 'xor'],
  161. [begin
  162. {RW, true} = {RW, erl_scan:reserved_word(RW)},
  163. S = atom_to_list(RW),
  164. Ts = [{RW,{1,1}}],
  165. test_string(S, Ts)
  166. end || RW <- L],
  167. ok.
  168. atoms() ->
  169. test_string("a
  170. b", [{atom,{1,1},a},{atom,{2,18},b}]),
  171. test_string("'a b'", [{atom,{1,1},'a b'}]),
  172. test_string("a", [{atom,{1,1},a}]),
  173. test_string("a@2", [{atom,{1,1},a@2}]),
  174. test_string([39,65,200,39], [{atom,{1,1},'AÈ'}]),
  175. test_string("ärlig östen", [{atom,{1,1},ärlig},{atom,{1,7},östen}]),
  176. {ok,[{atom,_,'$a'}],{1,6}} =
  177. erl_scan_string("'$\\a'", {1,1}),
  178. test("'$\\a'"),
  179. ok.
  180. punctuations() ->
  181. L = ["<<", "<-", "<=", "<", ">>", ">=", ">", "->", "--",
  182. "-", "++", "+", "=:=", "=/=", "=<", "=>", "==", "=", "/=",
  183. "/", "||", "|", ":=", "::", ":"],
  184. %% One token at a time:
  185. [begin
  186. W = list_to_atom(S),
  187. Ts = [{W,{1,1}}],
  188. test_string(S, Ts)
  189. end || S <- L],
  190. Three = ["/=:=", "<=:=", "==:=", ">=:="], % three tokens...
  191. No = Three ++ L,
  192. SL0 = [{S1++S2,{-length(S1),S1,S2}} ||
  193. S1 <- L,
  194. S2 <- L,
  195. not lists:member(S1++S2, No)],
  196. SL = family_list(SL0),
  197. %% Two tokens. When there are several answers, the one with
  198. %% the longest first token is chosen:
  199. %% [the special case "=<<" is among the tested ones]
  200. [begin
  201. W1 = list_to_atom(S1),
  202. W2 = list_to_atom(S2),
  203. Ts = [{W1,{1,1}},{W2,{1,-L2+1}}],
  204. test_string(S, Ts)
  205. end || {S,[{L2,S1,S2}|_]} <- SL],
  206. PTs1 = [{'!',{1,1}},{'(',{1,2}},{')',{1,3}},{',',{1,4}},{';',{1,5}},
  207. {'=',{1,6}},{'[',{1,7}},{']',{1,8}},{'{',{1,9}},{'|',{1,10}},
  208. {'}',{1,11}}],
  209. test_string("!(),;=[]{|}", PTs1),
  210. PTs2 = [{'#',{1,1}},{'&',{1,2}},{'*',{1,3}},{'+',{1,4}},{'/',{1,5}},
  211. {':',{1,6}},{'<',{1,7}},{'>',{1,8}},{'?',{1,9}},{'@',{1,10}},
  212. {'\\',{1,11}},{'^',{1,12}},{'`',{1,13}},{'~',{1,14}}],
  213. test_string("#&*+/:<>?@\\^`~", PTs2),
  214. test_string(".. ", [{'..',{1,1}}]),
  215. test_string("1 .. 2",
  216. [{integer,{1,1},1},{'..',{1,3}},{integer,{1,6},2}]),
  217. test_string("...", [{'...',{1,1}}]),
  218. ok.
  219. comments() ->
  220. test("a %%\n b"),
  221. {ok,[],1} = erl_scan_string("%"),
  222. test("a %%\n b"),
  223. {ok,[{atom,{1,1},a},{atom,{2,2},b}],{2,3}} =
  224. erl_scan_string("a %%\n b", {1,1}),
  225. {ok,[{atom,{1,1},a},{comment,{1,3},"%%"},{atom,{2,2},b}],{2,3}} =
  226. erl_scan_string("a %%\n b",{1,1}, [return_comments]),
  227. {ok,[{atom,{1,1},a},
  228. {white_space,{1,2}," "},
  229. {white_space,{1,5},"\n "},
  230. {atom,{2,2},b}],
  231. {2,3}} =
  232. erl_scan_string("a %%\n b",{1,1},[return_white_spaces]),
  233. {ok,[{atom,{1,1},a},
  234. {white_space,{1,2}," "},
  235. {comment,{1,3},"%%"},
  236. {white_space,{1,5},"\n "},
  237. {atom,{2,2},b}],
  238. {2,3}} = erl_scan_string("a %%\n b",{1,1},[return]),
  239. ok.
  240. errors() ->
  241. {error,{1,erl_scan,{string,$',"qa"}},1} = erl_scan:string("'qa"), %'
  242. {error,{{1,1},erl_scan,{string,$',"qa"}},{1,4}} = %'
  243. erl_scan:string("'qa", {1,1}, []), %'
  244. {error,{1,erl_scan,{string,$","str"}},1} = %"
  245. erl_scan:string("\"str"), %"
  246. {error,{{1,1},erl_scan,{string,$","str"}},{1,5}} = %"
  247. erl_scan:string("\"str", {1,1}, []), %"
  248. {error,{1,erl_scan,char},1} = erl_scan:string("$"),
  249. {error,{{1,1},erl_scan,char},{1,2}} = erl_scan:string("$", {1,1}, []),
  250. test_string([34,65,200,34], [{string,{1,1},"AÈ"}]),
  251. test_string("\\", [{'\\',{1,1}}]),
  252. {'EXIT',_} =
  253. (catch {foo, erl_scan:string('$\\a', {1,1})}), % type error
  254. {'EXIT',_} =
  255. (catch {foo, erl_scan:tokens([], '$\\a', {1,1})}), % type error
  256. "{a,tuple}" = erl_scan:format_error({a,tuple}),
  257. ok.
  258. integers() ->
  259. [begin
  260. I = list_to_integer(S),
  261. Ts = [{integer,{1,1},I}],
  262. test_string(S, Ts)
  263. end || S <- [[N] || N <- lists:seq($0, $9)] ++ ["2323","000"] ],
  264. ok.
  265. base_integers() ->
  266. [begin
  267. B = list_to_integer(BS),
  268. I = erlang:list_to_integer(S, B),
  269. Ts = [{integer,{1,1},I}],
  270. test_string(BS++"#"++S, Ts)
  271. end || {BS,S} <- [{"2","11"}, {"5","23234"}, {"12","05a"},
  272. {"16","abcdef"}, {"16","ABCDEF"}] ],
  273. {error,{1,erl_scan,{base,1}},1} = erl_scan:string("1#000"),
  274. {error,{{1,1},erl_scan,{base,1}},{1,2}} =
  275. erl_scan:string("1#000", {1,1}, []),
  276. test_string("12#bc", [{integer,{1,1},11},{atom,{1,5},c}]),
  277. [begin
  278. Str = BS ++ "#" ++ S,
  279. {error,{1,erl_scan,{illegal,integer}},1} =
  280. erl_scan:string(Str)
  281. end || {BS,S} <- [{"3","3"},{"15","f"}, {"12","c"}] ],
  282. {ok,[{integer,1,239},{'@',1}],1} = erl_scan_string("16#ef@"),
  283. {ok,[{integer,{1,1},239},{'@',{1,6}}],{1,7}} =
  284. erl_scan_string("16#ef@", {1,1}, []),
  285. {ok,[{integer,{1,1},14},{atom,{1,5},g@}],{1,7}} =
  286. erl_scan_string("16#eg@", {1,1}, []),
  287. ok.
  288. floats() ->
  289. [begin
  290. F = list_to_float(FS),
  291. Ts = [{float,{1,1},F}],
  292. test_string(FS, Ts)
  293. end || FS <- ["1.0","001.17","3.31200","1.0e0","1.0E17",
  294. "34.21E-18", "17.0E+14"]],
  295. test_string("1.e2", [{integer,{1,1},1},{'.',{1,2}},{atom,{1,3},e2}]),
  296. {error,{1,erl_scan,{illegal,float}},1} =
  297. erl_scan:string("1.0e400"),
  298. {error,{{1,1},erl_scan,{illegal,float}},{1,8}} =
  299. erl_scan:string("1.0e400", {1,1}, []),
  300. [begin
  301. {error,{1,erl_scan,{illegal,float}},1} = erl_scan:string(S),
  302. {error,{{1,1},erl_scan,{illegal,float}},{1,_}} =
  303. erl_scan:string(S, {1,1}, [])
  304. end || S <- ["1.14Ea"]],
  305. ok.
  306. dots() ->
  307. Dot = [{".", {ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,2}}},
  308. {". ", {ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,3}}},
  309. {".\n", {ok,[{dot,1}],2}, {ok,[{dot,{1,1}}],{2,1}}},
  310. {".%", {ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,3}}},
  311. {".\210",{ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,3}}},
  312. {".% öh",{ok,[{dot,1}],1}, {ok,[{dot,{1,1}}],{1,6}}},
  313. {".%\n", {ok,[{dot,1}],2}, {ok,[{dot,{1,1}}],{2,1}}},
  314. {".$", {error,{1,erl_scan,char},1},
  315. {error,{{1,2},erl_scan,char},{1,3}}},
  316. {".$\\", {error,{1,erl_scan,char},1},
  317. {error,{{1,2},erl_scan,char},{1,4}}},
  318. {".a", {ok,[{'.',1},{atom,1,a}],1},
  319. {ok,[{'.',{1,1}},{atom,{1,2},a}],{1,3}}}
  320. ],
  321. [begin
  322. R = erl_scan_string(S),
  323. R2 = erl_scan_string(S, {1,1}, [])
  324. end || {S, R, R2} <- Dot],
  325. {ok,[{dot,_}=T1],{1,2}} = erl_scan:string(".", {1,1}, text),
  326. [1, 1, "."] = token_info(T1),
  327. {ok,[{dot,_}=T2],{1,3}} = erl_scan:string(".%", {1,1}, text),
  328. [1, 1, "."] = token_info(T2),
  329. {ok,[{dot,_}=T3],{1,6}} =
  330. erl_scan:string(".% öh", {1,1}, text),
  331. [1, 1, "."] = token_info(T3),
  332. {error,{{1,2},erl_scan,char},{1,3}} = erl_scan:string(".$", {1,1}),
  333. {error,{{1,2},erl_scan,char},{1,4}} = erl_scan:string(".$\\", {1,1}),
  334. test_string(". ", [{dot,{1,1}}]),
  335. test_string(". ", [{dot,{1,1}}]),
  336. test_string(".\n", [{dot,{1,1}}]),
  337. test_string(".\n\n", [{dot,{1,1}}]),
  338. test_string(".\n\r", [{dot,{1,1}}]),
  339. test_string(".\n\n\n", [{dot,{1,1}}]),
  340. test_string(".\210", [{dot,{1,1}}]),
  341. test_string(".%\n", [{dot,{1,1}}]),
  342. test_string(".a", [{'.',{1,1}},{atom,{1,2},a}]),
  343. test_string("%. \n. ", [{dot,{2,1}}]),
  344. {more,C} = erl_scan:tokens([], "%. ",{1,1}, return),
  345. {done,{ok,[{comment,{1,1},"%. "},
  346. {white_space,{1,4},"\n"},
  347. {dot,{2,1}}],
  348. {2,3}}, ""} =
  349. erl_scan_tokens(C, "\n. ", {1,1}, return), % any loc, any options
  350. [test_string(S, R) ||
  351. {S, R} <- [{".$\n", [{'.',{1,1}},{char,{1,2},$\n}]},
  352. {"$\\\n", [{char,{1,1},$\n}]},
  353. {"'\\\n'", [{atom,{1,1},'\n'}]},
  354. {"$\n", [{char,{1,1},$\n}]}] ],
  355. ok.
  356. chars() ->
  357. [begin
  358. L = lists:flatten(io_lib:format("$\\~.8b", [C])),
  359. Ts = [{char,{1,1},C}],
  360. test_string(L, Ts)
  361. end || C <- lists:seq(0, 255)],
  362. %% Leading zeroes...
  363. [begin
  364. L = lists:flatten(io_lib:format("$\\~3.8.0b", [C])),
  365. Ts = [{char,{1,1},C}],
  366. test_string(L, Ts)
  367. end || C <- lists:seq(0, 255)],
  368. %% $\^\n now increments the line...
  369. [begin
  370. L = "$\\^" ++ [C],
  371. Ts = [{char,{1,1},C band 2#11111}],
  372. test_string(L, Ts)
  373. end || C <- lists:seq(0, 255)],
  374. [begin
  375. L = "$\\" ++ [C],
  376. Ts = [{char,{1,1},V}],
  377. test_string(L, Ts)
  378. end || {C,V} <- [{$n,$\n}, {$r,$\r}, {$t,$\t}, {$v,$\v},
  379. {$b,$\b}, {$f,$\f}, {$e,$\e}, {$s,$\s},
  380. {$d,$\d}]],
  381. EC = [$\n,$\r,$\t,$\v,$\b,$\f,$\e,$\s,$\d],
  382. Ds = lists:seq($0, $9),
  383. X = [$^,$n,$r,$t,$v,$b,$f,$e,$s,$d],
  384. New = [${,$x],
  385. No = EC ++ Ds ++ X ++ New,
  386. [begin
  387. L = "$\\" ++ [C],
  388. Ts = [{char,{1,1},C}],
  389. test_string(L, Ts)
  390. end || C <- lists:seq(0, 255) -- No],
  391. [begin
  392. L = "'$\\" ++ [C] ++ "'",
  393. Ts = [{atom,{1,1},list_to_atom("$"++[C])}],
  394. test_string(L, Ts)
  395. end || C <- lists:seq(0, 255) -- No],
  396. test_string("\"\\013a\\\n\"", [{string,{1,1},"\va\n"}]),
  397. test_string("'\n'", [{atom,{1,1},'\n'}]),
  398. test_string("\"\n\a\"", [{string,{1,1},"\na"}]),
  399. %% No escape
  400. [begin
  401. L = "$" ++ [C],
  402. Ts = [{char,{1,1},C}],
  403. test_string(L, Ts)
  404. end || C <- lists:seq(0, 255) -- (No ++ [$\\])],
  405. test_string("$\n", [{char,{1,1},$\n}]),
  406. {error,{{1,1},erl_scan,char},{1,4}} =
  407. erl_scan:string("$\\^",{1,1}),
  408. test_string("$\\\n", [{char,{1,1},$\n}]),
  409. %% Robert's scanner returns line 1:
  410. test_string("$\\\n", [{char,{1,1},$\n}]),
  411. test_string("$\n\n", [{char,{1,1},$\n}]),
  412. test("$\n\n"),
  413. ok.
  414. variables() ->
  415. test_string(" \237_Aouåeiyäö", [{var,{1,7},'_Aouåeiyäö'}]),
  416. test_string("A_b_c@", [{var,{1,1},'A_b_c@'}]),
  417. test_string("V@2", [{var,{1,1},'V@2'}]),
  418. test_string("ABDÀ", [{var,{1,1},'ABDÀ'}]),
  419. test_string("Ärlig Östen", [{var,{1,1},'Ärlig'},{var,{1,7},'Östen'}]),
  420. ok.
  421. eof() ->
  422. {done,{eof,1},eof} = erl_scan:tokens([], eof, 1),
  423. {more, C1} = erl_scan:tokens([]," \n", 1),
  424. {done,{eof,2},eof} = erl_scan:tokens(C1, eof, 1),
  425. {more, C2} = erl_scan:tokens([], "abra", 1),
  426. %% An error before R13A.
  427. %% {done,Err={error,{1,erl_scan,scan},1},eof} =
  428. {done,{ok,[{atom,1,abra}],1},eof} =
  429. erl_scan_tokens(C2, eof, 1),
  430. %% With column.
  431. {more, C3} = erl_scan:tokens([]," \n",{1,1}),
  432. {done,{eof,{2,1}},eof} = erl_scan:tokens(C3, eof, 1),
  433. {more, C4} = erl_scan:tokens([], "abra", {1,1}),
  434. %% An error before R13A.
  435. %% {done,{error,{{1,1},erl_scan,scan},{1,5}},eof} =
  436. {done,{ok,[{atom,_,abra}],{1,5}},eof} =
  437. erl_scan_tokens(C4, eof, 1),
  438. %% Robert's scanner returns "" as LeftoverChars;
  439. %% the R12B scanner returns eof as LeftoverChars: (eof is correct)
  440. {more, C5} = erl_scan:tokens([], "a", 1),
  441. %% An error before R13A.
  442. %% {done,{error,{1,erl_scan,scan},1},eof} =
  443. {done,{ok,[{atom,1,a}],1},eof} =
  444. erl_scan_tokens(C5,eof,1),
  445. %% With column.
  446. {more, C6} = erl_scan:tokens([], "a", {1,1}),
  447. %% An error before R13A.
  448. %% {done,{error,{1,erl_scan,scan},1},eof} =
  449. {done,{ok,[{atom,{1,1},a}],{1,2}},eof} =
  450. erl_scan_tokens(C6,eof,1),
  451. %% A dot followed by eof is special:
  452. {more, C} = erl_scan:tokens([], "a.", 1),
  453. {done,{ok,[{atom,1,a},{dot,1}],1},eof} = erl_scan_tokens(C,eof,1),
  454. {ok,[{atom,1,foo},{dot,1}],1} = erl_scan_string("foo."),
  455. %% With column.
  456. {more, CCol} = erl_scan:tokens([], "a.", {1,1}),
  457. {done,{ok,[{atom,{1,1},a},{dot,{1,2}}],{1,3}},eof} =
  458. erl_scan_tokens(CCol,eof,1),
  459. {ok,[{atom,{1,1},foo},{dot,{1,4}}],{1,5}} =
  460. erl_scan_string("foo.", {1,1}, []),
  461. ok.
  462. illegal() ->
  463. Atom = lists:duplicate(1000, $a),
  464. {error,{1,erl_scan,{illegal,atom}},1} = erl_scan:string(Atom),
  465. {done,{error,{1,erl_scan,{illegal,atom}},1},". "} =
  466. erl_scan:tokens([], Atom++". ", 1),
  467. QAtom = "'" ++ Atom ++ "'",
  468. {error,{1,erl_scan,{illegal,atom}},1} = erl_scan:string(QAtom),
  469. {done,{error,{1,erl_scan,{illegal,atom}},1},". "} =
  470. erl_scan:tokens([], QAtom++". ", 1),
  471. Var = lists:duplicate(1000, $A),
  472. {error,{1,erl_scan,{illegal,var}},1} = erl_scan:string(Var),
  473. {done,{error,{1,erl_scan,{illegal,var}},1},". "} =
  474. erl_scan:tokens([], Var++". ", 1),
  475. Float = "1" ++ lists:duplicate(400, $0) ++ ".0",
  476. {error,{1,erl_scan,{illegal,float}},1} = erl_scan:string(Float),
  477. {done,{error,{1,erl_scan,{illegal,float}},1},". "} =
  478. erl_scan:tokens([], Float++". ", 1),
  479. String = "\"43\\x{aaaaaa}34\"",
  480. {error,{1,erl_scan,{illegal,character}},1} = erl_scan:string(String),
  481. {done,{error,{1,erl_scan,{illegal,character}},1},"34\". "} =
  482. %% Would be nice if `34\"' were skipped...
  483. %% Maybe, but then the LeftOverChars would not be the characters
  484. %% immediately following the end location of the error.
  485. erl_scan:tokens([], String++". ", 1),
  486. {error,{{1,1},erl_scan,{illegal,atom}},{1,1001}} =
  487. erl_scan:string(Atom, {1,1}),
  488. {done,{error,{{1,5},erl_scan,{illegal,atom}},{1,1005}},". "} =
  489. erl_scan:tokens([], "foo "++Atom++". ", {1,1}),
  490. {error,{{1,1},erl_scan,{illegal,atom}},{1,1003}} =
  491. erl_scan:string(QAtom, {1,1}),
  492. {done,{error,{{1,5},erl_scan,{illegal,atom}},{1,1007}},". "} =
  493. erl_scan:tokens([], "foo "++QAtom++". ", {1,1}),
  494. {error,{{1,1},erl_scan,{illegal,var}},{1,1001}} =
  495. erl_scan:string(Var, {1,1}),
  496. {done,{error,{{1,5},erl_scan,{illegal,var}},{1,1005}},". "} =
  497. erl_scan:tokens([], "foo "++Var++". ", {1,1}),
  498. {error,{{1,1},erl_scan,{illegal,float}},{1,404}} =
  499. erl_scan:string(Float, {1,1}),
  500. {done,{error,{{1,5},erl_scan,{illegal,float}},{1,408}},". "} =
  501. erl_scan:tokens([], "foo "++Float++". ", {1,1}),
  502. {error,{{1,4},erl_scan,{illegal,character}},{1,14}} =
  503. erl_scan:string(String, {1,1}),
  504. {done,{error,{{1,4},erl_scan,{illegal,character}},{1,14}},"34\". "} =
  505. erl_scan:tokens([], String++". ", {1,1}),
  506. ok.
  507. crashes() ->
  508. {'EXIT',_} = (catch {foo, erl_scan:string([-1])}), % type error
  509. {'EXIT',_} = (catch {foo, erl_scan:string("$"++[-1])}),
  510. {'EXIT',_} = (catch {foo, erl_scan:string("$\\"++[-1])}),
  511. {'EXIT',_} = (catch {foo, erl_scan:string("$\\^"++[-1])}),
  512. {'EXIT',_} = (catch {foo, erl_scan:string([$",-1,$"],{1,1})}),
  513. {'EXIT',_} = (catch {foo, erl_scan:string("\"\\v"++[-1,$"])}), %$"
  514. {'EXIT',_} = (catch {foo, erl_scan:string([$",-1,$"])}),
  515. {'EXIT',_} = (catch {foo, erl_scan:string("% foo"++[-1])}),
  516. {'EXIT',_} =
  517. (catch {foo, erl_scan:string("% foo"++[-1],{1,1})}),
  518. {'EXIT',_} = (catch {foo, erl_scan:string([a])}), % type error
  519. {'EXIT',_} = (catch {foo, erl_scan:string("$"++[a])}),
  520. {'EXIT',_} = (catch {foo, erl_scan:string("$\\"++[a])}),
  521. {'EXIT',_} = (catch {foo, erl_scan:string("$\\^"++[a])}),
  522. {'EXIT',_} = (catch {foo, erl_scan:string([$",a,$"],{1,1})}),
  523. {'EXIT',_} = (catch {foo, erl_scan:string("\"\\v"++[a,$"])}), %$"
  524. {'EXIT',_} = (catch {foo, erl_scan:string([$",a,$"])}),
  525. {'EXIT',_} = (catch {foo, erl_scan:string("% foo"++[a])}),
  526. {'EXIT',_} =
  527. (catch {foo, erl_scan:string("% foo"++[a],{1,1})}),
  528. {'EXIT',_} = (catch {foo, erl_scan:string([3.0])}), % type error
  529. ok.
  530. options() ->
  531. %% line and column are not options, but tested here
  532. {ok,[{atom,1,foo},{white_space,1," "},{comment,1,"% bar"}], 1} =
  533. erl_scan_string("foo % bar", 1, return),
  534. {ok,[{atom,1,foo},{white_space,1," "}],1} =
  535. erl_scan_string("foo % bar", 1, return_white_spaces),
  536. {ok,[{atom,1,foo},{comment,1,"% bar"}],1} =
  537. erl_scan_string("foo % bar", 1, return_comments),
  538. {ok,[{atom,17,foo}],17} =
  539. erl_scan_string("foo % bar", 17),
  540. {'EXIT',{function_clause,_}} =
  541. (catch {foo,
  542. erl_scan:string("foo % bar", {a,1}, [])}), % type error
  543. {ok,[{atom,_,foo}],{17,18}} =
  544. erl_scan_string("foo % bar", {17,9}, []),
  545. {'EXIT',{function_clause,_}} =
  546. (catch {foo,
  547. erl_scan:string("foo % bar", {1,0}, [])}), % type error
  548. {ok,[{foo,1}],1} =
  549. erl_scan_string("foo % bar",1, [{reserved_word_fun,
  550. fun(W) -> W =:= foo end}]),
  551. {'EXIT',{badarg,_}} =
  552. (catch {foo,
  553. erl_scan:string("foo % bar",1, % type error
  554. [{reserved_word_fun,
  555. fun(W,_) -> W =:= foo end}])}),
  556. ok.
  557. more_options() ->
  558. {ok,[{atom,_,foo}=T1],{19,20}} =
  559. erl_scan:string("foo", {19,17},[]),
  560. {19,17} = erl_scan:location(T1),
  561. {done,{ok,[{atom,_,foo}=T2,{dot,_}],{19,22}},[]} =
  562. erl_scan:tokens([], "foo. ", {19,17}, [bad_opt]), % type error
  563. {19,17} = erl_scan:location(T2),
  564. {ok,[{atom,_,foo}=T3],{19,20}} =
  565. erl_scan:string("foo", {19,17},[text]),
  566. {19,17} = erl_scan:location(T3),
  567. "foo" = erl_scan:text(T3),
  568. {ok,[{atom,_,foo}=T4],1} = erl_scan:string("foo", 1, [text]),
  569. 1 = erl_scan:line(T4),
  570. 1 = erl_scan:location(T4),
  571. "foo" = erl_scan:text(T4),
  572. ok.
  573. token_info() ->
  574. {ok,[T1],_} = erl_scan:string("foo", {1,18}, [text]),
  575. {'EXIT',{badarg,_}} =
  576. (catch {foo, erl_scan:category(foo)}), % type error
  577. {'EXIT',{badarg,_}} =
  578. (catch {foo, erl_scan:symbol(foo)}), % type error
  579. atom = erl_scan:category(T1),
  580. foo = erl_scan:symbol(T1),
  581. {ok,[T2],_} = erl_scan:string("foo", 1, []),
  582. 1 = erl_scan:line(T2),
  583. undefined = erl_scan:column(T2),
  584. undefined = erl_scan:text(T2),
  585. 1 = erl_scan:location(T2),
  586. {ok,[T3],_} = erl_scan:string("=", 1, []),
  587. '=' = erl_scan:category(T3),
  588. '=' = erl_scan:symbol(T3),
  589. ok.
  590. anno_info() ->
  591. {'EXIT',_} =
  592. (catch {foo,erl_scan:line(foo)}), % type error
  593. {ok,[{atom,_,foo}=T0],_} = erl_scan:string("foo", 19, [text]),
  594. 19 = erl_scan:location(T0),
  595. 19 = erl_scan:end_location(T0),
  596. {ok,[{atom,_,foo}=T3],_} = erl_scan:string("foo", {1,3}, [text]),
  597. 1 = erl_scan:line(T3),
  598. 3 = erl_scan:column(T3),
  599. {1,3} = erl_scan:location(T3),
  600. {1,6} = erl_scan:end_location(T3),
  601. "foo" = erl_scan:text(T3),
  602. {ok,[{atom,_,foo}=T4],_} = erl_scan:string("foo", 2, [text]),
  603. 2 = erl_scan:line(T4),
  604. undefined = erl_scan:column(T4),
  605. 2 = erl_scan:location(T4),
  606. "foo" = erl_scan:text(T4),
  607. {ok,[{atom,_,foo}=T5],_} = erl_scan:string("foo", {1,3}, []),
  608. 1 = erl_scan:line(T5),
  609. 3 = erl_scan:column(T5),
  610. {1,3} = erl_scan:location(T5),
  611. undefined = erl_scan:text(T5),
  612. ok.
  613. column_errors() ->
  614. {error,{{1,1},erl_scan,{string,$',""}},{1,3}} = % $'
  615. erl_scan:string("'\\",{1,1}),
  616. {error,{{1,1},erl_scan,{string,$",""}},{1,3}} = % $"
  617. erl_scan:string("\"\\",{1,1}),
  618. {error,{{1,1},erl_scan,{string,$',""}},{1,2}} = % $'
  619. erl_scan:string("'",{1,1}),
  620. {error,{{1,1},erl_scan,{string,$",""}},{1,2}} = % $"
  621. erl_scan:string("\"",{1,1}),
  622. {error,{{1,1},erl_scan,char},{1,2}} =
  623. erl_scan:string("$",{1,1}),
  624. {error,{{1,2},erl_scan,{string,$',"1234567890123456"}},{1,20}} = %'
  625. erl_scan:string(" '12345678901234567", {1,1}),
  626. {error,{{1,2},erl_scan,{string,$',"123456789012345 "}}, {1,20}} = %'
  627. erl_scan:string(" '123456789012345\\s", {1,1}),
  628. {error,{{1,2},erl_scan,{string,$","1234567890123456"}},{1,20}} = %"
  629. erl_scan:string(" \"12345678901234567", {1,1}),
  630. {error,{{1,2},erl_scan,{string,$","123456789012345 "}}, {1,20}} = %"
  631. erl_scan:string(" \"123456789012345\\s", {1,1}),
  632. {error,{{1,2},erl_scan,{string,$',"1234567890123456"}},{2,1}} = %'
  633. erl_scan:string(" '12345678901234567\n", {1,1}),
  634. ok.
  635. white_spaces() ->
  636. {ok,[{white_space,_,"\r"},
  637. {white_space,_," "},
  638. {atom,_,a},
  639. {white_space,_,"\n"}],
  640. _} = erl_scan_string("\r a\n", {1,1}, return),
  641. test("\r a\n"),
  642. L = "{\"a\nb\", \"a\\nb\",\nabc\r,def}.\n\n",
  643. {ok,[{'{',_},
  644. {string,_,"a\nb"},
  645. {',',_},
  646. {white_space,_," "},
  647. {string,_,"a\nb"},
  648. {',',_},
  649. {white_space,_,"\n"},
  650. {atom,_,abc},
  651. {white_space,_,"\r"},
  652. {',',_},
  653. {atom,_,def},
  654. {'}',_},
  655. {dot,_},
  656. {white_space,_,"\n"}],
  657. _} = erl_scan_string(L, {1,1}, return),
  658. test(L),
  659. test("\"\n\"\n"),
  660. test("\n\r\n"),
  661. test("\n\r"),
  662. test("\r\n"),
  663. test("\n\f"),
  664. [test(lists:duplicate(N, $\t)) || N <- lists:seq(1, 20)],
  665. [test([$\n|lists:duplicate(N, $\t)]) || N <- lists:seq(1, 20)],
  666. [test(lists:duplicate(N, $\s)) || N <- lists:seq(1, 20)],
  667. [test([$\n|lists:duplicate(N, $\s)]) || N <- lists:seq(1, 20)],
  668. test("\v\f\n\v "),
  669. test("\n\e\n\b\f\n\da\n"),
  670. ok.
  671. unicode() ->
  672. {ok,[{char,1,83},{integer,1,45}],1} =
  673. erl_scan_string("$\\12345"), % not unicode
  674. {error,{1,erl_scan,{illegal,character}},1} =
  675. erl_scan:string([1089]),
  676. {error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
  677. erl_scan:string([1089], {1,1}),
  678. {error,{{1,3},erl_scan,{illegal,character}},{1,4}} =
  679. erl_scan:string("'a" ++ [999999999] ++ "c'", {1,1}),
  680. test("\"a"++[1089]++"b\""),
  681. {ok,[{char,1,1}],1} =
  682. erl_scan_string([$$,$\\,$^,1089], 1),
  683. {error,{1,erl_scan,Error},1} =
  684. erl_scan:string("\"qa\x{aaa}", 1),
  685. "unterminated string starting with \"qa"++[2730]++"\"" =
  686. erl_scan:format_error(Error),
  687. {error,{{1,1},erl_scan,_},{1,11}} =
  688. erl_scan:string("\"qa\\x{aaa}",{1,1}),
  689. {error,{{1,1},erl_scan,_},{1,11}} =
  690. erl_scan:string("'qa\\x{aaa}",{1,1}),
  691. {ok,[{char,1,1089}],1} =
  692. erl_scan_string([$$,1089], 1),
  693. {ok,[{char,1,1089}],1} =
  694. erl_scan_string([$$,$\\,1089], 1),
  695. Qs = "$\\x{aaa}",
  696. {ok,[{char,1,$\x{aaa}}],1} =
  697. erl_scan_string(Qs, 1),
  698. {ok,[Q2],{1,9}} =
  699. erl_scan:string("$\\x{aaa}", {1,1}, [text]),
  700. [{category,char},{column,1},{line,1},{symbol,16#aaa},{text,Qs}] =
  701. token_info_long(Q2),
  702. U1 = "\"\\x{aaa}\"",
  703. {ok,[{string,_,[2730]}=T1],{1,10}} = erl_scan:string(U1, {1,1}, [text]),
  704. {1,1} = erl_scan:location(T1),
  705. "\"\\x{aaa}\"" = erl_scan:text(T1),
  706. {ok,[{string,1,[2730]}],1} = erl_scan_string(U1, 1),
  707. U2 = "\"\\x41\\x{fff}\\x42\"",
  708. {ok,[{string,1,[$\x41,$\x{fff},$\x42]}],1} = erl_scan_string(U2, 1),
  709. U3 = "\"a\n\\x{fff}\n\"",
  710. {ok,[{string,1,[$a,$\n,$\x{fff},$\n]}],3} = erl_scan_string(U3, 1),
  711. U4 = "\"\\^\n\\x{aaa}\\^\n\"",
  712. {ok,[{string,1,[$\n,$\x{aaa},$\n]}],3} = erl_scan_string(U4, 1),
  713. %% Keep these tests:
  714. test(Qs),
  715. test(U1),
  716. test(U2),
  717. test(U3),
  718. test(U4),
  719. Str1 = "\"ab" ++ [1089] ++ "cd\"",
  720. {ok,[{string,1,[$a,$b,1089,$c,$d]}],1} = erl_scan_string(Str1, 1),
  721. {ok,[{string,{1,1},[$a,$b,1089,$c,$d]}],{1,8}} =
  722. erl_scan_string(Str1, {1,1}),
  723. test(Str1),
  724. Comment = "%% "++[1089],
  725. {ok,[{comment,1,[$%,$%,$\s,1089]}],1} =
  726. erl_scan_string(Comment, 1, [return]),
  727. {ok,[{comment,{1,1},[$%,$%,$\s,1089]}],{1,5}} =
  728. erl_scan_string(Comment, {1,1}, [return]),
  729. ok.
  730. more_chars() ->
  731. %% Due to unicode, the syntax has been incompatibly augmented:
  732. %% $\x{...}, $\xHH
  733. %% All kinds of tests...
  734. {ok,[{char,_,123}],{1,4}} =
  735. erl_scan_string("$\\{",{1,1}),
  736. {more, C1} = erl_scan:tokens([], "$\\{", {1,1}),
  737. {done,{ok,[{char,_,123}],{1,4}},eof} =
  738. erl_scan_tokens(C1, eof, 1),
  739. {ok,[{char,1,123},{atom,1,a},{'}',1}],1} =
  740. erl_scan_string("$\\{a}"),
  741. {error,{{1,1},erl_scan,char},{1,4}} =
  742. erl_scan:string("$\\x", {1,1}),
  743. {error,{{1,1},erl_scan,char},{1,5}} =
  744. erl_scan:string("$\\x{",{1,1}),
  745. {more, C3} = erl_scan:tokens([], "$\\x", {1,1}),
  746. {done,{error,{{1,1},erl_scan,char},{1,4}},eof} =
  747. erl_scan:tokens(C3, eof, 1),
  748. {error,{{1,1},erl_scan,char},{1,5}} =
  749. erl_scan:string("$\\x{",{1,1}),
  750. {more, C2} = erl_scan:tokens([], "$\\x{", {1,1}),
  751. {done,{error,{{1,1},erl_scan,char},{1,5}},eof} =
  752. erl_scan:tokens(C2, eof, 1),
  753. {error,{1,erl_scan,{illegal,character}},1} =
  754. erl_scan:string("$\\x{g}"),
  755. {error,{{1,1},erl_scan,{illegal,character}},{1,5}} =
  756. erl_scan:string("$\\x{g}", {1,1}),
  757. {error,{{1,1},erl_scan,{illegal,character}},{1,6}} =
  758. erl_scan:string("$\\x{}",{1,1}),
  759. test("\"\\{0}\""),
  760. test("\"\\x{0}\""),
  761. test("\'\\{0}\'"),
  762. test("\'\\x{0}\'"),
  763. {error,{{2,3},erl_scan,{illegal,character}},{2,6}} =
  764. erl_scan:string("\"ab \n $\\x{g}\"",{1,1}),
  765. {error,{{2,3},erl_scan,{illegal,character}},{2,6}} =
  766. erl_scan:string("\'ab \n $\\x{g}\'",{1,1}),
  767. test("$\\{34}"),
  768. test("$\\x{34}"),
  769. test("$\\{377}"),
  770. test("$\\x{FF}"),
  771. test("$\\{400}"),
  772. test("$\\x{100}"),
  773. test("$\\x{10FFFF}"),
  774. test("$\\x{10ffff}"),
  775. test("\"$\n \\{1}\""),
  776. {error,{1,erl_scan,{illegal,character}},1} =
  777. erl_scan:string("$\\x{110000}"),
  778. {error,{{1,1},erl_scan,{illegal,character}},{1,12}} =
  779. erl_scan:string("$\\x{110000}", {1,1}),
  780. {error,{{1,1},erl_scan,{illegal,character}},{1,4}} =
  781. erl_scan:string("$\\xfg", {1,1}),
  782. test("$\\xffg"),
  783. {error,{{1,1},erl_scan,{illegal,character}},{1,4}} =
  784. erl_scan:string("$\\xg", {1,1}),
  785. ok.
  786. %% OTP-10302. Unicode characters scanner/parser.
  787. otp_10302(Config) when is_list(Config) ->
  788. %% From unicode():
  789. {ok,[{atom,1,'aсb'}],1} =
  790. erl_scan_string("'a"++[1089]++"b'", 1),
  791. {ok,[{atom,{1,1},'qaપ'}],{1,12}} =
  792. erl_scan_string("'qa\\x{aaa}'",{1,1}),
  793. {ok,[{char,1,1089}],1} = erl_scan_string([$$,1089], 1),
  794. {ok,[{char,1,1089}],1} = erl_scan_string([$$,$\\,1089],1),
  795. Qs = "$\\x{aaa}",
  796. {ok,[{char,1,2730}],1} = erl_scan_string(Qs, 1),
  797. {ok,[Q2],{1,9}} = erl_scan:string(Qs,{1,1},[text]),
  798. [{category,char},{column,1},{line,1},{symbol,16#aaa},{text,Qs}] =
  799. token_info_long(Q2),
  800. U1 = "\"\\x{aaa}\"",
  801. {ok,[T1],{1,10}} = erl_scan:string(U1, {1,1}, [text]),
  802. [{category,string},{column,1},{line,1},{symbol,[16#aaa]},{text,U1}] =
  803. token_info_long(T1),
  804. U2 = "\"\\x41\\x{fff}\\x42\"",
  805. {ok,[{string,1,[65,4095,66]}],1} = erl_scan_string(U2, 1),
  806. U3 = "\"a\n\\x{fff}\n\"",
  807. {ok,[{string,1,[97,10,4095,10]}],3} = erl_scan_string(U3, 1),
  808. U4 = "\"\\^\n\\x{aaa}\\^\n\"",
  809. {ok,[{string,1,[10,2730,10]}],3} = erl_scan_string(U4, 1,[]),
  810. Str1 = "\"ab" ++ [1089] ++ "cd\"",
  811. {ok,[{string,1,[97,98,1089,99,100]}],1} =
  812. erl_scan_string(Str1,1),
  813. {ok,[{string,{1,1},[97,98,1089,99,100]}],{1,8}} =
  814. erl_scan_string(Str1, {1,1}),
  815. OK1 = 16#D800-1,
  816. OK2 = 16#DFFF+1,
  817. OK3 = 16#FFFE-1,
  818. OK4 = 16#FFFF+1,
  819. OKL = [OK1,OK2,OK3,OK4],
  820. Illegal1 = 16#D800,
  821. Illegal2 = 16#DFFF,
  822. Illegal3 = 16#FFFE,
  823. Illegal4 = 16#FFFF,
  824. IllegalL = [Illegal1,Illegal2,Illegal3,Illegal4],
  825. [{ok,[{comment,1,[$%,$%,$\s,OK]}],1} =
  826. erl_scan_string("%% "++[OK], 1, [return]) ||
  827. OK <- OKL],
  828. {ok,[{comment,_,[$%,$%,$\s,OK1]}],{1,5}} =
  829. erl_scan_string("%% "++[OK1], {1,1}, [return]),
  830. [{error,{1,erl_scan,{illegal,character}},1} =
  831. erl_scan:string("%% "++[Illegal], 1, [return]) ||
  832. Illegal <- IllegalL],
  833. {error,{{1,1},erl_scan,{illegal,character}},{1,5}} =
  834. erl_scan:string("%% "++[Illegal1], {1,1}, [return]),
  835. [{ok,[],1} = erl_scan_string("%% "++[OK], 1, []) ||
  836. OK <- OKL],
  837. {ok,[],{1,5}} = erl_scan_string("%% "++[OK1], {1,1}, []),
  838. [{error,{1,erl_scan,{illegal,character}},1} =
  839. erl_scan:string("%% "++[Illegal], 1, []) ||
  840. Illegal <- IllegalL],
  841. {error,{{1,1},erl_scan,{illegal,character}},{1,5}} =
  842. erl_scan:string("%% "++[Illegal1], {1,1}, []),
  843. [{ok,[{string,{1,1},[OK]}],{1,4}} =
  844. erl_scan_string("\""++[OK]++"\"",{1,1}) ||
  845. OK <- OKL],
  846. [{error,{{1,2},erl_scan,{illegal,character}},{1,3}} =
  847. erl_scan:string("\""++[OK]++"\"",{1,1}) ||
  848. OK <- IllegalL],
  849. [{error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
  850. erl_scan:string([Illegal],{1,1}) ||
  851. Illegal <- IllegalL],
  852. {ok,[{char,{1,1},OK1}],{1,3}} =
  853. erl_scan_string([$$,OK1],{1,1}),
  854. {error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
  855. erl_scan:string([$$,Illegal1],{1,1}),
  856. {ok,[{char,{1,1},OK1}],{1,4}} =
  857. erl_scan_string([$$,$\\,OK1],{1,1}),
  858. {error,{{1,1},erl_scan,{illegal,character}},{1,4}} =
  859. erl_scan:string([$$,$\\,Illegal1],{1,1}),
  860. {ok,[{string,{1,1},[55295]}],{1,5}} =
  861. erl_scan_string("\"\\"++[OK1]++"\"",{1,1}),
  862. {error,{{1,2},erl_scan,{illegal,character}},{1,4}} =
  863. erl_scan:string("\"\\"++[Illegal1]++"\"",{1,1}),
  864. {ok,[{char,{1,1},OK1}],{1,10}} =
  865. erl_scan_string("$\\x{D7FF}",{1,1}),
  866. {error,{{1,1},erl_scan,{illegal,character}},{1,10}} =
  867. erl_scan:string("$\\x{D800}",{1,1}),
  868. %% Not erl_scan, but erl_parse.
  869. {integer,0,1} = erl_parse_abstract(1),
  870. Float = 3.14, {float,0,Float} = erl_parse_abstract(Float),
  871. {nil,0} = erl_parse_abstract([]),
  872. {bin,0,
  873. [{bin_element,0,{integer,0,1},default,default},
  874. {bin_element,0,{integer,0,2},default,default}]} =
  875. erl_parse_abstract(<<1,2>>),
  876. {cons,0,{tuple,0,[{atom,0,a}]},{atom,0,b}} =
  877. erl_parse_abstract([{a} | b]),
  878. {string,0,"str"} = erl_parse_abstract("str"),
  879. {cons,0,
  880. {integer,0,$a},
  881. {cons,0,{integer,0,55296},{string,0,"c"}}} =
  882. erl_parse_abstract("a"++[55296]++"c"),
  883. Line = 17,
  884. {integer,Line,1} = erl_parse_abstract(1, Line),
  885. Float = 3.14, {float,Line,Float} = erl_parse_abstract(Float, Line),
  886. {nil,Line} = erl_parse_abstract([], Line),
  887. {bin,Line,
  888. [{bin_element,Line,{integer,Line,1},default,default},
  889. {bin_element,Line,{integer,Line,2},default,default}]} =
  890. erl_parse_abstract(<<1,2>>, Line),
  891. {cons,Line,{tuple,Line,[{atom,Line,a}]},{atom,Line,b}} =
  892. erl_parse_abstract([{a} | b], Line),
  893. {string,Line,"str"} = erl_parse_abstract("str", Line),
  894. {cons,Line,
  895. {integer,Line,$a},
  896. {cons,Line,{integer,Line,55296},{string,Line,"c"}}} =
  897. erl_parse_abstract("a"++[55296]++"c", Line),
  898. Opts1 = [{line,17}],
  899. {integer,Line,1} = erl_parse_abstract(1, Opts1),
  900. Float = 3.14, {float,Line,Float} = erl_parse_abstract(Float, Opts1),
  901. {nil,Line} = erl_parse_abstract([], Opts1),
  902. {bin,Line,
  903. [{bin_element,Line,{integer,Line,1},default,default},
  904. {bin_element,Line,{integer,Line,2},default,default}]} =
  905. erl_parse_abstract(<<1,2>>, Opts1),
  906. {cons,Line,{tuple,Line,[{atom,Line,a}]},{atom,Line,b}} =
  907. erl_parse_abstract([{a} | b], Opts1),
  908. {string,Line,"str"} = erl_parse_abstract("str", Opts1),
  909. {cons,Line,
  910. {integer,Line,$a},
  911. {cons,Line,{integer,Line,55296},{string,Line,"c"}}} =
  912. erl_parse_abstract("a"++[55296]++"c", Opts1),
  913. [begin
  914. {integer,Line,1} = erl_parse_abstract(1, Opts2),
  915. Float = 3.14, {float,Line,Float} = erl_parse_abstract(Float, Opts2),
  916. {nil,Line} = erl_parse_abstract([], Opts2),
  917. {bin,Line,
  918. [{bin_element,Line,{integer,Line,1},default,default},
  919. {bin_element,Line,{integer,Line,2},default,default}]} =
  920. erl_parse_abstract(<<1,2>>, Opts2),
  921. {cons,Line,{tuple,Line,[{atom,Line,a}]},{atom,Line,b}} =
  922. erl_parse_abstract([{a} | b], Opts2),
  923. {string,Line,"str"} = erl_parse_abstract("str", Opts2),
  924. {string,Line,[97,1024,99]} =
  925. erl_parse_abstract("a"++[1024]++"c", Opts2)
  926. end || Opts2 <- [[{encoding,unicode},{line,Line}],
  927. [{encoding,utf8},{line,Line}]]],
  928. {cons,0,
  929. {integer,0,97},
  930. {cons,0,{integer,0,1024},{string,0,"c"}}} =
  931. erl_parse_abstract("a"++[1024]++"c", [{encoding,latin1}]),
  932. ok.
  933. %% OTP-10990. Floating point number in input string.
  934. otp_10990(Config) when is_list(Config) ->
  935. {'EXIT',_} = (catch {foo, erl_scan:string([$",42.0,$"],1)}),
  936. ok.
  937. %% OTP-10992. List of floats to abstract format.
  938. otp_10992(Config) when is_list(Config) ->
  939. {cons,0,{float,0,42.0},{nil,0}} =
  940. erl_parse_abstract([42.0], [{encoding,unicode}]),
  941. {cons,0,{float,0,42.0},{nil,0}} =
  942. erl_parse_abstract([42.0], [{encoding,utf8}]),
  943. {cons,0,{integer,0,65},{cons,0,{float,0,42.0},{nil,0}}} =
  944. erl_parse_abstract([$A,42.0], [{encoding,unicode}]),
  945. {cons,0,{integer,0,65},{cons,0,{float,0,42.0},{nil,0}}} =
  946. erl_parse_abstract([$A,42.0], [{encoding,utf8}]),
  947. ok.
  948. %% OTP-11807. Generalize erl_parse:abstract/2.
  949. otp_11807(Config) when is_list(Config) ->
  950. {cons,0,{integer,0,97},{cons,0,{integer,0,98},{nil,0}}} =
  951. erl_parse_abstract("ab", [{encoding,none}]),
  952. {cons,0,{integer,0,-1},{nil,0}} =
  953. erl_parse_abstract([-1], [{encoding,latin1}]),
  954. ASCII = fun(I) -> I >= 0 andalso I < 128 end,
  955. {string,0,"xyz"} = erl_parse_abstract("xyz", [{encoding,ASCII}]),
  956. {cons,0,{integer,0,228},{nil,0}} =
  957. erl_parse_abstract([228], [{encoding,ASCII}]),
  958. {cons,0,{integer,0,97},{atom,0,a}} =
  959. erl_parse_abstract("a"++a, [{encoding,latin1}]),
  960. {'EXIT', {{badarg,bad},_}} = % minor backward incompatibility
  961. (catch erl_parse:abstract("string", [{encoding,bad}])),
  962. ok.
  963. test_string(String, ExpectedWithCol) ->
  964. {ok, ExpectedWithCol, _EndWithCol} = erl_scan_string(String, {1, 1}, []),
  965. Expected = [ begin
  966. {L,_C} = element(2, T),
  967. setelement(2, T, L)
  968. end
  969. || T <- ExpectedWithCol ],
  970. {ok, Expected, _End} = erl_scan_string(String),
  971. test(String).
  972. erl_scan_string(String) ->
  973. erl_scan_string(String, 1, []).
  974. erl_scan_string(String, StartLocation) ->
  975. erl_scan_string(String, StartLocation, []).
  976. erl_scan_string(String, StartLocation, Options) ->
  977. case erl_scan:string(String, StartLocation, Options) of
  978. {ok, Tokens, EndLocation} ->
  979. {ok, unopaque_tokens(Tokens), EndLocation};
  980. Else ->
  981. Else
  982. end.
  983. erl_scan_tokens(C, S, L) ->
  984. erl_scan_tokens(C, S, L, []).
  985. erl_scan_tokens(C, S, L, O) ->
  986. case erl_scan:tokens(C, S, L, O) of
  987. {done, {ok, Ts, End}, R} ->
  988. {done, {ok, unopaque_tokens(Ts), End}, R};
  989. Else ->
  990. Else
  991. end.
  992. unopaque_tokens([]) ->
  993. [];
  994. unopaque_tokens([Token|Tokens]) ->
  995. Attrs = element(2, Token),
  996. Term = erl_anno:to_term(Attrs),
  997. T = setelement(2, Token, Term),
  998. [T | unopaque_tokens(Tokens)].
  999. erl_parse_abstract(Term) ->
  1000. erl_parse_abstract(Term, []).
  1001. erl_parse_abstract(Term, Options) ->
  1002. Abstr = erl_parse:abstract(Term, Options),
  1003. unopaque_abstract(Abstr).
  1004. unopaque_abstract(Abstr) ->
  1005. erl_parse:anno_to_term(Abstr).
  1006. %% test_string(String, Expected, StartLocation, Options) ->
  1007. %% {ok, Expected, _End} = erl_scan:string(String, StartLocation, Options),
  1008. %% test(String).
  1009. %% There are no checks of the tags...
  1010. test(String) ->
  1011. %% io:format("Testing `~ts'~n", [String]),
  1012. [{Tokens, End},
  1013. {Wtokens, Wend},
  1014. {Ctokens, Cend},
  1015. {CWtokens, CWend},
  1016. {CWtokens2, _}] =
  1017. [scan_string_with_column(String, X) ||
  1018. X <- [[],
  1019. [return_white_spaces],
  1020. [return_comments],
  1021. [return],
  1022. [return]]], % for white space compaction test
  1023. {end1,End,Wend} = {end1,Wend,End},
  1024. {end2,Wend,Cend} = {end2,Cend,Wend},
  1025. {end3,Cend,CWend} = {end3,CWend,Cend},
  1026. %% Test that the tokens that are common to two token lists are identical.
  1027. {none,Tokens} = {none, filter_tokens(CWtokens, [white_space,comment])},
  1028. {comments,Ctokens} =
  1029. {comments,filter_tokens(CWtokens, [white_space])},
  1030. {white_spaces,Wtokens} =
  1031. {white_spaces,filter_tokens(CWtokens, [comment])},
  1032. %% Use token attributes to extract parts from the original string,
  1033. %% and check that the parts are identical to the token strings.
  1034. {Line,Column} = test_decorated_tokens(String, CWtokens),
  1035. {deco,{Line,Column},End} = {deco,End,{Line,Column}},
  1036. %% Almost the same again: concat texts to get the original:
  1037. Text = get_text(CWtokens),
  1038. {text,Text,String} = {text,String,Text},
  1039. %% Test that white spaces occupy less heap than the worst case.
  1040. ok = test_white_space_compaction(CWtokens, CWtokens2),
  1041. %% Test that white newlines are always first in text:
  1042. WhiteTokens = select_tokens(CWtokens, [white_space]),
  1043. ok = newlines_first(WhiteTokens),
  1044. %% Line attribute only:
  1045. [Simple,Wsimple,Csimple,WCsimple] = Simples =
  1046. [element(2, erl_scan:string(String, 1, Opts)) ||
  1047. Opts <- [[],
  1048. [return_white_spaces],
  1049. [return_comments],
  1050. [return]]],
  1051. {consistent,true} = {consistent,consistent_attributes(Simples)},
  1052. {simple_wc,WCsimple} = {simple_wc,simplify(CWtokens)},
  1053. {simple,Simple} = {simple,filter_tokens(WCsimple, [white_space,comment])},
  1054. {simple_c,Csimple} = {simple_c,filter_tokens(WCsimple, [white_space])},
  1055. {simple_w,Wsimple} = {simple_w,filter_tokens(WCsimple, [comment])},
  1056. %% Line attribute only, with text:
  1057. [SimpleTxt,WsimpleTxt,CsimpleTxt,WCsimpleTxt] = SimplesTxt =
  1058. [element(2, erl_scan:string(String, 1, [text|Opts])) ||
  1059. Opts <- [[],
  1060. [return_white_spaces],
  1061. [return_comments],
  1062. [return]]],
  1063. TextTxt = get_text(WCsimpleTxt),
  1064. {text_txt,TextTxt,String} = {text_txt,String,TextTxt},
  1065. {consistent_txt,true} =
  1066. {consistent_txt,consistent_attributes(SimplesTxt)},
  1067. {simple_txt,SimpleTxt} =
  1068. {simple_txt,filter_tokens(WCsimpleTxt, [white_space,comment])},
  1069. {simple_c_txt,CsimpleTxt} =
  1070. {simple_c_txt,filter_tokens(WCsimpleTxt, [white_space])},
  1071. {simple_w_txt,WsimpleTxt} =
  1072. {simple_w_txt,filter_tokens(WCsimpleTxt, [comment])},
  1073. ok.
  1074. test_white_space_compaction(Tokens, Tokens2) when Tokens =:= Tokens2 ->
  1075. [WS, WS2] = [select_tokens(Ts, [white_space]) || Ts <- [Tokens, Tokens2]],
  1076. test_wsc(WS, WS2).
  1077. test_wsc([], []) ->
  1078. ok;
  1079. test_wsc([Token|Tokens], [Token2|Tokens2]) ->
  1080. [Text, Text2] = [Text ||
  1081. Text <- [erl_scan:text(T) || T <- [Token, Token2]]],
  1082. Sz = erts_debug:size(Text),
  1083. Sz2 = erts_debug:size({Text, Text2}),
  1084. IsCompacted = Sz2 < 2*Sz+erts_debug:size({a,a}),
  1085. ToBeCompacted = is_compacted(Text),
  1086. if
  1087. IsCompacted =:= ToBeCompacted ->
  1088. test_wsc(Tokens, Tokens2);
  1089. true ->
  1090. {compaction_error, Token}
  1091. end.
  1092. is_compacted("\r") ->
  1093. true;
  1094. is_compacted("\n\r") ->
  1095. true;
  1096. is_compacted("\n\f") ->
  1097. true;
  1098. is_compacted([$\n|String]) ->
  1099. all_spaces(String)
  1100. orelse
  1101. all_tabs(String);
  1102. is_compacted(String) ->
  1103. all_spaces(String)
  1104. orelse
  1105. all_tabs(String).
  1106. all_spaces(L) ->
  1107. all_same(L, $\s).
  1108. all_tabs(L) ->
  1109. all_same(L, $\t).
  1110. all_same(L, Char) ->
  1111. lists:all(fun(C) -> C =:= Char end, L).
  1112. newlines_first([]) ->
  1113. ok;
  1114. newlines_first([Token|Tokens]) ->
  1115. Text = erl_scan:text(Token),
  1116. Nnls = length([C || C <- Text, C =:= $\n]),
  1117. OK = case Text of
  1118. [$\n|_] ->
  1119. Nnls =:= 1;
  1120. _ ->
  1121. Nnls =:= 0
  1122. end,
  1123. if
  1124. OK -> newlines_first(Tokens);
  1125. true -> OK
  1126. end.
  1127. filter_tokens(Tokens, Tags) ->
  1128. lists:filter(fun(T) -> not lists:member(element(1, T), Tags) end, Tokens).
  1129. select_tokens(Tokens, Tags) ->
  1130. lists:filter(fun(T) -> lists:member(element(1, T), Tags) end, Tokens).
  1131. simplify([Token|Tokens]) ->
  1132. Line = erl_scan:line(Token),
  1133. [setelement(2, Token, erl_anno:new(Line)) | simplify(Tokens)];
  1134. simplify([]) ->
  1135. [].
  1136. get_text(Tokens) ->
  1137. lists:flatten(
  1138. [T ||
  1139. Token <- Tokens,
  1140. (T = erl_scan:text(Token)) =/= []]).
  1141. test_decorated_tokens(String, Tokens) ->
  1142. ToksAttrs = token_attrs(Tokens),
  1143. test_strings(ToksAttrs, String, 1, 1).
  1144. token_attrs(Tokens) ->
  1145. [{L,C,length(T),T} ||
  1146. Token <- Tokens,
  1147. ([C,L,T] = token_info(Token)) =/= []].
  1148. token_info(T) ->
  1149. Column = erl_scan:column(T),
  1150. Line = erl_scan:line(T),
  1151. Text = erl_scan:text(T),
  1152. [Column, Line, Text].
  1153. token_info_long(T) ->
  1154. Column = erl_scan:column(T),
  1155. Line = erl_scan:line(T),
  1156. Text = erl_scan:text(T),
  1157. Category = erl_scan:category(T),
  1158. Symbol = erl_scan:symbol(T),
  1159. [{category,Category},{column,Column},{line,Line},
  1160. {symbol,Symbol},{text,Text}].
  1161. test_strings([], _S, Line, Column) ->
  1162. {Line,Column};
  1163. test_strings([{L,C,Len,T}=Attr|Attrs], String0, Line0, Column0) ->
  1164. {String1, Column1} = skip_newlines(String0, L, Line0, Column0),
  1165. String = skip_chars(String1, C-Column1),
  1166. {Str,Rest} = lists:split(Len, String),
  1167. if
  1168. Str =:= T ->
  1169. {Line,Column} = string_newlines(T, L, C),
  1170. test_strings(Attrs, Rest, Line, Column);
  1171. true ->
  1172. {token_error, Attr, Str}
  1173. end.
  1174. skip_newlines(String, Line, Line, Column) ->
  1175. {String, Column};
  1176. skip_newlines([$\n|String], L, Line, _Column) ->
  1177. skip_newlines(String, L, Line+1, 1);
  1178. skip_newlines([_|String], L, Line, Column) ->
  1179. skip_newlines(String, L, Line, Column+1).
  1180. skip_chars(String, 0) ->
  1181. String;
  1182. skip_chars([_|String], N) ->
  1183. skip_chars(String, N-1).
  1184. string_newlines([$\n|String], Line, _Column) ->
  1185. string_newlines(String, Line+1, 1);
  1186. string_newlines([], Line, Column) ->
  1187. {Line, Column};
  1188. string_newlines([_|String], Line, Column) ->
  1189. string_newlines(String, Line, Column+1).
  1190. scan_string_with_column(String, Options0) ->
  1191. Options = [text | Options0],
  1192. StartLoc = {1, 1},
  1193. {ok, Ts1, End1} = erl_scan:string(String, StartLoc, Options),
  1194. TString = String ++ ". ",
  1195. {ok,Ts2,End2} = scan_tokens(TString, Options, [], StartLoc),
  1196. {ok, Ts3, End3} =
  1197. scan_tokens_1({more, []}, TString, Options, [], StartLoc),
  1198. {end_2,End2,End3} = {end_2,End3,End2},
  1199. {EndLine1,EndColumn1} = End1,
  1200. End2 = {EndLine1,EndColumn1+2},
  1201. {ts_1,Ts2,Ts3} = {ts_1,Ts3,Ts2},
  1202. Ts2 = Ts1 ++ [lists:last(Ts2)],
  1203. %% Attributes are keylists, but have no text.
  1204. {ok, Ts7, End7} = erl_scan:string(String, {1,1}, Options),
  1205. {ok, Ts8, End8} = scan_tokens(TString, Options, [], {1,1}),
  1206. {end1, End1} = {end1, End7},
  1207. {end2, End2} = {end2, End8},
  1208. Ts8 = Ts7 ++ [lists:last(Ts8)],
  1209. {cons,true} = {cons,consistent_attributes([Ts1,Ts2,Ts3,Ts7,Ts8])},
  1210. {Ts1, End1}.
  1211. scan_tokens(String, Options, Rs, Location) ->
  1212. case erl_scan:tokens([], String, Location, Options) of
  1213. {done, {ok,Ts,End}, ""} ->
  1214. {ok, lists:append(lists:reverse([Ts|Rs])), End};
  1215. {done, {ok,Ts,End}, Rest} ->
  1216. scan_tokens(Rest, Options, [Ts|Rs], End)
  1217. end.
  1218. scan_tokens_1({done, {ok,Ts,End}, ""}, "", _Options, Rs, _Location) ->
  1219. {ok,lists:append(lists:reverse([Ts|Rs])),End};
  1220. scan_tokens_1({done, {ok,Ts,End}, Rest}, Cs, Options, Rs, _Location) ->
  1221. scan_tokens_1({more,[]}, Rest++Cs, Options, [Ts|Rs], End);
  1222. scan_tokens_1({more, Cont}, [C | Cs], Options, Rs, Loc) ->
  1223. R = erl_scan:tokens(Cont, [C], Loc, Options),
  1224. scan_tokens_1(R, Cs, Options, Rs, Loc).
  1225. consistent_attributes([]) ->
  1226. true;
  1227. consistent_attributes([Ts | TsL]) ->
  1228. L = [T || T <- Ts, is_integer(element(2, T))],
  1229. case L of
  1230. [] ->
  1231. TagsL = [[Tag || {Tag,_} <- defined(token_info_long(T))] ||
  1232. T <- Ts],
  1233. case lists:usort(TagsL) of
  1234. [_] ->
  1235. consistent_attributes(TsL);
  1236. [] when Ts =:= [] ->
  1237. consistent_attributes(TsL);
  1238. _ ->
  1239. Ts
  1240. end;
  1241. Ts ->
  1242. consistent_attributes(TsL);
  1243. _ ->
  1244. Ts
  1245. end.
  1246. defined(L) ->
  1247. [{T,V} || {T,V} <- L, V =/= undefined].
  1248. family_list(L) ->
  1249. sofs:to_external(family(L)).
  1250. family(L) ->
  1251. sofs:relation_to_family(sofs:relation(L)).