PageRenderTime 71ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/erl_cgi/src/html_esp.erl

https://github.com/babo/jungerl
Erlang | 984 lines | 626 code | 134 blank | 224 comment | 35 complexity | cf4661452c55cf959ddbcd499e6c3bcb MD5 | raw file
Possible License(s): LGPL-2.1, BSD-3-Clause, AGPL-1.0
  1. %%% File : html_esp.erl
  2. %%% Author : Tony Rogvall <tony@localhost.localdomain>
  3. %%% Description : ESP generator from HTML documents
  4. %%% Created : 5 May 2002 by Tony Rogvall <tony@localhost.localdomain>
  5. -module(html_esp).
  6. -export([parse/1,parse/2,file/1,tokenise/2]).
  7. -import(lists, [map/2, reverse/1, member/2]).
  8. -compile(export_all).
  9. %% -define(debug,true).
  10. -ifdef(debug).
  11. -define(dbg(Fmt,Args), io:format(Fmt, Args)).
  12. -else.
  13. -define(dbg(Fmt,Args), ok).
  14. -endif.
  15. -record(state,
  16. {
  17. %% default formatting function is just a dummy
  18. fmt = fun(Tag,As,Cs) -> {Tag,As,Cs} end,
  19. %% default environment is empty [{Var,Value}]
  20. env = [],
  21. path = "",
  22. file = "*stdin*",
  23. line = 1,
  24. extensions = [%% {ssi,"include"},
  25. %% {ssi,"echo"},
  26. %% {ssi,"exec"},
  27. %% erl,
  28. nonstrict,
  29. repair
  30. ]
  31. }).
  32. -define(PCDATA(Ln,St,Data), {pcdata,{(Ln),(St)#state.file},Data}).
  33. -define(TAG(Tag,Ln,St,Args), {tag,(Tag),{(Ln),(St)#state.file},(Args)}).
  34. -define(TAG_END(Tag,Ln,St), {tag_end,(Tag),{(Ln),(St)#state.file}}).
  35. file(File) ->
  36. case file:read_file(File) of
  37. {ok,Bin} ->
  38. parse(Bin,File);
  39. Error ->
  40. Error
  41. end.
  42. file_html(File) ->
  43. case file(File) of
  44. {ok,Esp} ->
  45. io:put_chars(esp_html:format(Esp));
  46. Error ->
  47. Error
  48. end.
  49. file_esp(File) ->
  50. case file(File) of
  51. {ok,Esp} ->
  52. io:format("~p\n",[Esp]);
  53. Error ->
  54. Error
  55. end.
  56. file_tokens(File) ->
  57. case file:read_file(File) of
  58. {ok,Bin} ->
  59. St = #state { fmt = fun parse_fmt/3,
  60. file = File,
  61. env = []
  62. },
  63. case tokenise(Bin,St) of
  64. {ok,St1,Ts} ->
  65. lists:foreach(fun(T) ->
  66. io:format("~p\n", [T]) end,
  67. Ts);
  68. Error ->
  69. Error
  70. end;
  71. Error -> Error
  72. end.
  73. parse(Chars) ->
  74. parse(Chars, "*stdin*").
  75. parse(Chars,File) ->
  76. St = #state { fmt = fun parse_fmt/3,
  77. env = [],
  78. file = File
  79. },
  80. case tokenise(Chars,St) of
  81. {ok, St1, Tokens} ->
  82. parse0(Tokens, St1);
  83. Error -> Error
  84. end.
  85. parse_fmt(Tag,As,Cs) ->
  86. {Tag,As,Cs}.
  87. fmt(St,Tag,As,Cs) ->
  88. case St#state.fmt of
  89. undefined -> {Tag,As,Cs};
  90. Fun -> Fun(Tag,As,Cs)
  91. end.
  92. parse0([{tag,'!doctype',Ln,Type}|Ts],St) ->
  93. parse0(Ts,St);
  94. parse0([{pcdata,Ln,Data}|Ts],St) ->
  95. [] = skip_white(Data), %% only white space allowed
  96. parse0(Ts,St);
  97. parse0([{tag,html,Ln,As}|Ts],St) ->
  98. case elem(html,As,Ts,[html], St) of
  99. {Html,[]} ->
  100. {ok,{document,[Html]}};
  101. {Html,[{pcdata,_,Data}]} ->
  102. [] = skip_white(Data),
  103. {ok,{document,[Html]}}
  104. end;
  105. parse0(Ts,St) ->
  106. {Flow,_} = repeat(fun(T) -> html:is_flow(T) end, Ts, [],St),
  107. {ok,{document,Flow}}.
  108. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  109. %% element parsing
  110. %% elem(Tag, Attributes, Tokens, Parents)
  111. %%
  112. %% return {Tokens', {Tag,Attributes,Children}}
  113. %%
  114. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  115. %%
  116. %% EXTENSION <erl>#PCDATA</erl>
  117. %%
  118. elem(erl,As,Ts,Ps,St) ->
  119. {{erl,Attr,Cs},Ts1} = el(erl,As,Ts,fun(T) -> T == pcdata end,Ps,St),
  120. String = flatten_pcdata(Cs),
  121. case member(erl, St#state.extensions) of
  122. true ->
  123. {ok,Tokens,Ln} = erl_scan:string(String, St#state.line),
  124. {ok,Exprs} = erl_parse:parse_exprs(Tokens++[{dot,'.'}]),
  125. {{erl,As,Exprs},Ts1};
  126. false ->
  127. {{pre,[],"<erl>"++String++"</erl>"},Ts1}
  128. end;
  129. %%
  130. %% <!ENTITY % html.content "HEAD, BODY">
  131. %% <!ELEMENT HTML O O (%html.content;) -- document root element -->
  132. %%
  133. elem(html,As,Ts,Ps,St) ->
  134. ?dbg("~s<~p>\n", [indent(Ps),html]),
  135. {E1,Ts1} = require(head, Ts,Ps,St),
  136. case ent(fun(T) -> (T == body) or (T == frameset) end, Ts1,Ps,St) of
  137. {[E2],Ts2} ->
  138. {fmt(St,html,attrlist(html,As),[E1,E2]), elem_end(html,Ts2,Ps,St)};
  139. {[],Ts2} ->
  140. exit({bad_tag,html,Ps})
  141. end;
  142. %%
  143. %% <!ELEMENT HEAD O O (%head.content;) +(%head.misc;) -- document head -->
  144. %%
  145. %% FIXME: head.content = TITLE & BASE?
  146. %% i.e one TITLE and optionally one BASE in any order
  147. %%
  148. elem(head,As,Ts,Ps,St) ->
  149. el(head,As,Ts,fun(T) -> html:is_head_misc(T) or
  150. html:is_head_content(T) end,Ps,St);
  151. %%
  152. %% <!ELEMENT BODY O O (%flow)+ +(INS|DEL) -- document body -->
  153. %%
  154. %% FIXME: INS and DEL is not handled
  155. %%
  156. elem(body,As,Ts,Ps,St) -> el(body,As,Ts, html:flow(),Ps,St);
  157. elem(title, As, Ts,Ps,St) ->
  158. el(title,As,Ts,fun(T) -> T == pcdata end,Ps,St);
  159. %%
  160. %% <!ELEMENT META - O EMPTY -- generic metainformation -->
  161. %%
  162. elem(meta,As,Ts,Ps,St) -> el0(meta,As,Ts,Ps,St);
  163. %%
  164. %% <!ELEMENT BASE - O EMPTY -- document base URI -->
  165. %%
  166. elem(base,As,Ts,Ps,St) -> el0(base,As,Ts,Ps,St);
  167. %%
  168. %% <!ELEMENT LINK - O EMPTY -- a media-independent link -->
  169. %%
  170. elem(link,As,Ts,Ps,St) -> el0(link,As,Ts,Ps,St);
  171. elem(noscript, As, Ts,Ps,St) -> el(noscript,As,Ts,html:block(),Ps,St);
  172. elem(object, As, Ts,Ps,St) -> el(object,As,Ts,
  173. fun(T) -> html:is_flow(T) or (T == param) end,Ps,St);
  174. elem(param, As, Ts,Ps,St) -> el0(param,As,Ts,Ps,St);
  175. elem(img, As, Ts,Ps,St) -> el0(img,As,Ts,Ps,St);
  176. elem(style,As,Ts,Ps,St) ->
  177. %% FIXME: handle commented styles
  178. el(style,As,Ts,fun(T) -> T == pcdata end,Ps,St);
  179. elem(script,As,Ts,Ps,St) ->
  180. %% FIXME: handle commented scripts
  181. el(style,As,Ts,fun(T) -> T == pcdata end,Ps,St);
  182. elem(form, As, Ts,Ps,St) -> el(form,As,Ts,
  183. fun(T) -> html:is_flow(T) and (T =/= form) end,Ps,St);
  184. %%
  185. %% <!ELEMENT TABLE - -
  186. %% (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)>
  187. %%
  188. elem(table,As,Ts0,Ps,St) ->
  189. ?dbg("~s<~p>\n", [indent(Ps),table]),
  190. {E1,Ts1} = optional(caption,Ts0,Ps,St),
  191. {E2,Ts2} = repeat(fun(Tag) -> (Tag == col) or (Tag == colgroup) end,
  192. Ts1,Ps,St),
  193. {E3,Ts3} = optional(thead, Ts2,Ps,St),
  194. {E4,Ts4} = optional(tfoot, Ts3,Ps,St),
  195. {E5,Ts5} = optional(tbody, Ts4,Ps,St),
  196. {E6,Ts6} = repeat(fun(T) -> T == tr end, Ts5,Ps,St),
  197. Ts7 = elem_end(table,Ts6,Ps,St),
  198. { fmt(St,table,attrlist(table,As),E1++E2++E3++E4++E5++E6), Ts7};
  199. %%
  200. %% <!ELEMENT TBODY O O (TR)+ -- table body -->
  201. %%
  202. elem(tbody, As, Ts,Ps,St) -> el(tbody,As,Ts,fun(T) -> T == tr end,Ps,St);
  203. %%
  204. %% <!ELEMENT THEAD - O (TR)+ -- table header -->
  205. %%
  206. elem(thead, As, Ts,Ps,St) -> el(thead,As,Ts,fun(T) -> T == tr end,Ps,St);
  207. %%
  208. %% <!ELEMENT TFOOT - O (TR)+ -- table footer -->
  209. %%
  210. elem(tfoot, As, Ts,Ps,St) -> el(tfoot,As,Ts,fun(T) -> T == tr end,Ps,St);
  211. %%
  212. %% <!ELEMENT TR - O (TH|TD)+ -- table row -->
  213. %%
  214. elem(tr, As, Ts,Ps,St) -> el(tr,As,Ts,fun(T) -> (T == th) or (T == td) end,Ps,St);
  215. %%
  216. %% <!ELEMENT (TH|TD) - O (%flow;)* -- table header cell, table data cell-->
  217. %%
  218. elem(th, As, Ts,Ps,St) -> el(th,As,Ts,html:flow(),Ps,St);
  219. elem(td, As, Ts,Ps,St) -> el(td,As,Ts,html:flow(),Ps,St);
  220. %%
  221. %% <!ELEMENT CAPTION - - (%inline;)* -- table caption -->
  222. %%
  223. elem(caption, As, Ts,Ps,St) -> el(caption,As,Ts,html:inline(),Ps,St);
  224. elem(col, As, Ts,Ps,St) -> el0(col,As,Ts,Ps,St);
  225. elem(colgroup, As, Ts,Ps,St) -> el(colgroup,As,Ts,fun(T) -> T == col end,Ps,St);
  226. %% Frames
  227. %%
  228. %% <!ELEMENT FRAMESET - - ((FRAMESET|FRAME)+ & NOFRAMES?) -- window subdivision-->
  229. %%
  230. elem(frameset,As,Ts,Ps,St) ->
  231. el(frameset,As,Ts,
  232. fun(T) ->
  233. (T == frameset) or (T == frame) or (T == noframes)
  234. end,Ps,St);
  235. elem(frame, As, Ts,Ps,St) -> el0(frame,As,Ts,Ps,St);
  236. elem(iframe, As, Ts,Ps,St) -> el(iframe,As,Ts, html:flow(),Ps,St);
  237. elem(noframes,As,Ts,Ps,St) -> el(noframes,As,Ts, html:flow(),Ps,St);
  238. %%
  239. %% LAYER, ILAYER and NOLAYER
  240. %%
  241. elem(layer,As,Ts,Ps,St) -> el(layer, As, Ts, html:flow(),Ps,St);
  242. elem(ilayer, As,Ts,Ps,St) -> el(ilayer, As, Ts, html:flow(),Ps,St);
  243. elem(nolayer,As,Ts,Ps,St) -> el(nolayer, As, Ts, html:flow(),Ps,St);
  244. %% Lists
  245. elem(ul, As, Ts,Ps,St) -> el(ul,As,Ts,fun(T) -> T == li end,Ps,St);
  246. elem(ol, As, Ts,Ps,St) -> el(ol,As,Ts,fun(T) -> T == li end,Ps,St);
  247. elem(li, As, Ts,Ps,St) -> el(li,As,Ts,html:flow(),Ps,St);
  248. elem(dl, As, Ts,Ps,St) -> el(dl,As,Ts,fun(T) -> (T == dt) or (T == dd) end,Ps,St);
  249. elem(dt, As, Ts,Ps,St) -> el(dt,As,Ts,html:inline(),Ps,St);
  250. elem(dd, As, Ts,Ps,St) -> el(dd,As,Ts,html:flow(),Ps,St);
  251. %%
  252. %% %fontstyle
  253. %% <!ELEMENT (%fontstyle;|%phrase;) - - (%inline;)*>
  254. %%
  255. elem(tt,As, Ts,Ps,St) -> el(tt,As,Ts,html:inline(),Ps,St);
  256. elem(i,As, Ts,Ps,St) -> el(i,As,Ts,html:inline(),Ps,St);
  257. elem(b,As, Ts,Ps,St) -> el(b,As,Ts,html:inline(),Ps,St);
  258. elem(u,As, Ts,Ps,St) -> el(u,As,Ts,html:inline(),Ps,St);
  259. elem(s,As, Ts,Ps,St) -> el(s,As,Ts,html:inline(),Ps,St);
  260. elem(strike,As, Ts,Ps,St) -> el(strike,As,Ts,html:inline(),Ps,St);
  261. elem(big,As, Ts,Ps,St) -> el(big,As,Ts,html:inline(),Ps,St);
  262. elem(small,As, Ts,Ps,St) -> el(small,As,Ts,html:inline(),Ps,St);
  263. %%
  264. %% <!ELEMENT (%heading;) - - (%inline;)* -- heading -->
  265. %%
  266. elem(h1,As,Ts,Ps,St) -> el(h1,As,Ts,html:inline(),Ps,St);
  267. elem(h2,As,Ts,Ps,St) -> el(h2,As,Ts,html:inline(),Ps,St);
  268. elem(h3,As,Ts,Ps,St) -> el(h3,As,Ts,html:inline(),Ps,St);
  269. elem(h4,As,Ts,Ps,St) -> el(h4,As,Ts,html:inline(),Ps,St);
  270. elem(h5,As,Ts,Ps,St) -> el(h5,As,Ts,html:inline(),Ps,St);
  271. elem(h6,As,Ts,Ps,St) -> el(h6,As,Ts,html:inline(),Ps,St);
  272. %%
  273. %% <!ELEMENT ADDRESS - - (%inline;)* -- information on author -->
  274. %%
  275. elem(address,As,Ts,Ps,St) -> el(address,As,Ts,html:inline(),Ps,St);
  276. %%
  277. %% %phrase
  278. %% <!ELEMENT (%fontstyle;|%phrase;) - - (%inline;)*>
  279. %%
  280. elem(em,As,Ts,Ps,St) -> el(em,As,Ts,html:inline(),Ps,St);
  281. elem(strong,As,Ts,Ps,St) -> el(strong,As,Ts,html:inline(),Ps,St);
  282. elem(dfn,As,Ts,Ps,St) -> el(dfn,As,Ts,html:inline(),Ps,St);
  283. elem(code,As,Ts,Ps,St) -> el(code,As,Ts,html:inline(),Ps,St);
  284. elem(samp,As,Ts,Ps,St) -> el(samp,As,Ts,html:inline(),Ps,St);
  285. elem(kbd,As,Ts,Ps,St) -> el(kbd,As,Ts,html:inline(),Ps,St);
  286. elem(var,As,Ts,Ps,St) -> el(var,As,Ts,html:inline(),Ps,St);
  287. elem(cite,As,Ts,Ps,St) -> el(cite,As,Ts,html:inline(),Ps,St);
  288. elem(abbr,As,Ts,Ps,St) -> el(abbr,As,Ts,html:inline(),Ps,St);
  289. elem(acronym,As,Ts,Ps,St) -> el(acronum,As,Ts,html:inline(),Ps,St);
  290. %%
  291. %% <!ELEMENT BLOCKQUOTE - - (%block;|SCRIPT)+ -- long quotation -->
  292. %%
  293. elem(blockquote,As,Ts,Ps,St) ->
  294. el(blockquote,As,Ts, fun(T) -> html:is_block(T) or (T == script) end,Ps,St);
  295. %%
  296. %% <!ELEMENT Q - - (%inline;)* -- short inline quotation -->
  297. %%
  298. elem(q,As,Ts,Ps,St) -> el(q,As,Ts,html:inline(),Ps,St);
  299. %%
  300. %% <!ELEMENT (SUB|SUP) - - (%inline;)* -- subscript, superscript -->
  301. %%
  302. elem(sub,As,Ts,Ps,St) -> el(sub,As,Ts,html:inline(),Ps,St);
  303. elem(sup,As,Ts,Ps,St) -> el(sup,As,Ts,html:inline(),Ps,St);
  304. %%
  305. %% <!ELEMENT P - O (%inline;)* -- paragraph -->
  306. %%
  307. elem(p,As,Ts,Ps,St) -> el(p,As,Ts,html:inline(),Ps,St);
  308. %%
  309. %% <!ELEMENT BR - O EMPTY -- forced line break -->
  310. %%
  311. elem(br,As,Ts,Ps,St) -> el0(br,As,Ts,Ps,St);
  312. %%
  313. %% <!ELEMENT PRE - - (%inline;)* -(%pre.exclusion;) -- preformatted text -->
  314. %%
  315. elem(pre,As,Ts,Ps,St) ->
  316. el(pre,As,Ts,fun(T) -> html:is_inline(T) and not
  317. html:is_pre_exclusion(T) end,Ps,St);
  318. %%
  319. %% <!ELEMENT DIV - - (%flow;)* -- generic language/style container -->
  320. %%
  321. elem('div',As,Ts,Ps,St) -> el('div', As, Ts, html:flow(),Ps,St);
  322. %%
  323. %% <!ELEMENT CENTER - - (%flow;)* -- shorthand for DIV align=center -->
  324. %%
  325. elem(center,As,Ts,Ps,St) -> el(center, As, Ts, html:flow(),Ps,St);
  326. %%
  327. %% <!ELEMENT SPAN - - (%inline;)* -- generic language/style container -->
  328. %%
  329. elem(span,As,Ts,Ps,St) -> el(span, As, Ts, html:inline(),Ps,St);
  330. %%
  331. %% <!ELEMENT BDO - - (%inline;)* -- I18N BiDi over-ride -->
  332. %%
  333. elem(bdo,As,Ts,Ps,St) -> el(bdo, As, Ts, html:inline(),Ps,St);
  334. %%
  335. %% <!ELEMENT BASEFONT - O EMPTY -- base font size -->
  336. %%
  337. elem(basefont,As,Ts,Ps,St) -> el0(basefont, As, Ts,Ps,St);
  338. %%
  339. %% <!ELEMENT FONT - - (%inline;)* -- local change to font -->
  340. %%
  341. elem(font,As,Ts,Ps,St) -> el(font, As, Ts, html:inline(),Ps,St);
  342. %%
  343. %% <!ELEMENT HR - O EMPTY -- horizontal rule -->
  344. %%
  345. elem(hr, As, Ts,Ps,St) -> el0(hr, As, Ts,Ps,St);
  346. %%
  347. %% <!ELEMENT A - - (%inline;)* -(A) -- anchor -->
  348. %%
  349. elem(a, As, Ts,Ps,St) ->
  350. el(a, As, Ts, fun(T) -> html:is_inline(T) and (T =/= a) end,Ps,St);
  351. %%
  352. %% <!ELEMENT MAP - - ((%block;)+ | AREA+) -- client-side image map -->
  353. %%
  354. elem(map, As,Ts,Ps,St) ->
  355. el(map, As, Ts, fun(T) -> html:is_block(T) or (T == area) end,Ps,St);
  356. %%
  357. %% <!ELEMENT AREA - O EMPTY -- client-side image map area -->
  358. %%
  359. elem(area, As,Ts,Ps,St) -> el0(area, As, Ts,Ps,St);
  360. %%
  361. %% <!ELEMENT TEXTAREA - - (#PCDATA) -- multi-line text field -->
  362. %%
  363. elem(textarea,As,Ts,Ps,St) -> el(textarea,As,Ts,fun(T) -> T == pcdata end,Ps,St);
  364. %%
  365. %% <!ELEMENT FIELDSET - - (#PCDATA,LEGEND,(%flow;)*) -- form control group -->
  366. %%
  367. elem(fieldset,As,Ts,Ps,St) ->
  368. el(fieldset,As,Ts,
  369. fun(T) -> (T == pcdata) or (T == legend) or html:is_flow(T) end,Ps,St);
  370. %%
  371. %% <!ELEMENT LEGEND - - (%inline;)* -- fieldset legend -->
  372. %%
  373. elem(legend,As,Ts,Ps,St) -> el(legend,As,Ts,html:inline(),Ps,St);
  374. %% <!ELEMENT BUTTON - -
  375. %% (%flow;)* -(A|%formctrl;|FORM|ISINDEX|FIELDSET|IFRAME)
  376. %% -- push button -->
  377. elem(button,As,Ts,Ps,St) ->
  378. el(button,As,Ts,
  379. fun(T) -> html:is_flow(T) and
  380. (T =/= a) and (not html:is_formctrl(T)) and
  381. (T =/= form) and (T =/= isindex) and
  382. (T =/= fieldset) and (T =/= iframe)
  383. end,Ps,St);
  384. %%
  385. %% <!ELEMENT LABEL - - (%inline;)* -(LABEL) -- form field label text -->
  386. %%
  387. elem(label,As,Ts,Ps,St) ->
  388. el(label,As,Ts, fun(T) -> html:is_inline(T) and (T =/= label) end,Ps,St);
  389. %%
  390. %% <!ELEMENT INPUT - O EMPTY -- form control -->
  391. %%
  392. elem(input, As, Ts,Ps,St) -> el0(input, As, Ts,Ps,St);
  393. %%
  394. %% <!ELEMENT SELECT - - (OPTGROUP|OPTION)+ -- option selector -->
  395. %%
  396. elem(select,As,Ts,Ps,St) ->
  397. el(select,As,Ts,fun(T) -> (T == optgroup) or (T == option) end,Ps,St);
  398. %%
  399. %% <!ELEMENT OPTGROUP - - (OPTION)+ -- option group -->
  400. %%
  401. elem(optgroup,As,Ts,Ps,St) ->
  402. el(optgroup,As,Ts,fun(T) -> (T == option) end,Ps,St);
  403. %%
  404. %% <!ELEMENT OPTION - O (#PCDATA) -- selectable choice -->
  405. %%
  406. elem(option,As,Ts,Ps,St) ->
  407. el(option,As,Ts,fun(T) -> T == pcdata end,Ps,St);
  408. elem(pcdata,Data,Ts,Ps,St) ->
  409. ?dbg("~s#PCDATA(~p)\n", [indent(Ps),Data]),
  410. {{pcdata,Data},Ts}.
  411. %% check for end tag or optional end tag
  412. elem_end(Tag,Ts=[{pcdata,Ln1,Data},{tag_end,Tag,Ln2}|Ts1],Ps,St) ->
  413. case skip_white(Data) of
  414. [] ->
  415. ?dbg("~s</~p>\n", [indent(Ps),Tag]),
  416. Ts1;
  417. _ ->
  418. elem_end_opt(Tag,Ts,Ps,St)
  419. end;
  420. elem_end(Tag,[{tag_end,Tag,Ln}|Ts1],Ps,St) ->
  421. ?dbg("~s</~p>\n", [indent(Ps),Tag]),
  422. Ts1;
  423. elem_end(Tag,Ts,Ps,St) ->
  424. elem_end_opt(Tag,Ts,Ps,St).
  425. elem_end_opt(Tag,Ts,Ps,St) ->
  426. case html:end_tag_optional(Tag) of
  427. true ->
  428. ?dbg("~s<+~p> FAIL=~p\n", [indent(Ps),Tag,hd(Ts)]),
  429. Ts;
  430. false ->
  431. case member(repair, St#state.extensions) of
  432. false ->
  433. ?dbg("~s<-~p> FAIL=~p\n", [indent(Ps),Tag,hd(Ts)]),
  434. exit({bad_tag,Tag,Ps});
  435. true ->
  436. elem_end_repair(Tag,Ts,Ps)
  437. end
  438. end.
  439. %%
  440. %% Non-strict fix for bad end nesting of tags
  441. %% look for the {tag_end,Tag,Ln} among the tokens
  442. %% continue while finding {tag_end,X,Ln} then extract and
  443. %% remove that tag
  444. %%
  445. elem_end_repair(Tag, Ts, Ps) ->
  446. elem_end_repair(Tag, Ts, [], Ps).
  447. elem_end_repair(Tag,[{tag_end,Tag,Ln}|Ts], Acc, Ps) ->
  448. Ts1 = reverse(Acc) ++ Ts,
  449. ?dbg("~s<#~p> FAIL=~p\n", [indent(Ps),Tag,hd(Ts1)]),
  450. Ts1;
  451. elem_end_repair(Tag,[Et={tag_end,T,Ln}|Ts], Acc, Ps) ->
  452. elem_end_repair(Tag, Ts, [Et|Acc], Ps);
  453. elem_end_repair(Tag, Ts, Acc, Ps) ->
  454. Ts1 = reverse(Acc) ++ Ts,
  455. ?dbg("~s<-~p> FAIL=~p\n", [indent(Ps),Tag,hd(Ts1)]),
  456. exit({bad_tag,Tag,Ps}).
  457. %%
  458. %% el0: just a tag without end-tag
  459. %%
  460. el0(Tag,As,Ts,Ps,St) ->
  461. ?dbg("~s<~p>\n", [indent(Ps),Tag]),
  462. { fmt(St,Tag,As,[]), Ts}.
  463. %%
  464. %% el: repeat IsEntity predicate for all entities
  465. %% until end-tag (if not optional)
  466. %%
  467. el(Tag,As,Ts,IsEntity,Ps,St) ->
  468. ?dbg("~s<~p>\n", [indent(Ps),Tag]),
  469. {Cs,Ts1} = repeat(IsEntity,Ts,Ps,St),
  470. Ts2 = elem_end(Tag,Ts1,Ps,St),
  471. { fmt(St,Tag,As,Cs),Ts2}.
  472. indent(Ps) ->
  473. lists:duplicate(length(Ps)*2,$\s).
  474. %%
  475. %% repeat element
  476. %% repeat(BoolFun, Entity, Parents)
  477. %%
  478. repeat(IsEntity,Ts,Ps,St) ->
  479. case ent(IsEntity,Ts,Ps,St) of
  480. {[],Ts1} -> {[],Ts1};
  481. {[Elem],Ts1} ->
  482. {Es,Ts2} = repeat(IsEntity,Ts1,Ps,St),
  483. {[Elem|Es],Ts2}
  484. end.
  485. repeat0(IsEntity, Ts,Ps,St) ->
  486. case ent(IsEntity,Ts,Ps,St) of
  487. {[],Ts1} -> {[],Ts1};
  488. {[Elem],Ts1} ->
  489. {Es,Ts2} = repeat0(IsEntity,Ts1,Ps,St),
  490. {[Elem|Es],Ts2}
  491. end.
  492. %% Do optional FIRST element
  493. optional(Tag,Ts,Ps,St) ->
  494. ent(fun(T) -> T == Tag end, Ts,Ps,St).
  495. %% Do require FIRST element
  496. require(Tag,Ts,Ps,St) ->
  497. case ent(fun(T) -> T == Tag end, Ts,Ps,St) of
  498. {[],Ts1} ->
  499. exit({bad_tag,Tag,Ps});
  500. {[Elem],Ts1} ->
  501. {Elem,Ts1}
  502. end.
  503. ent(IsEntity,[{pcdata,Ln,Data}|Ts],Ps,St) ->
  504. case IsEntity(pcdata) of
  505. true ->
  506. case member(pre,Ps) orelse member(erl,Ps) orelse
  507. member(textarea,Ps) of
  508. true ->
  509. {Line,File} = Ln,
  510. {Elem,Ts1} =
  511. elem(pcdata,Data,Ts,[pcdata|Ps],
  512. St#state { line=Line, file=File }),
  513. {[Elem],Ts1};
  514. false ->
  515. case pcdata(Data) of
  516. [] -> ent(IsEntity,Ts,Ps,St);
  517. Data1 ->
  518. {Line,File} = Ln,
  519. {Elem,Ts1} = elem(pcdata,Data1,Ts,[pcdata|Ps],
  520. St#state {line=Line,file=File}),
  521. {[Elem],Ts1}
  522. end
  523. end;
  524. false ->
  525. case skip_white(Data) of
  526. [] -> ent(IsEntity, Ts,Ps,St);
  527. _ -> {[],[{pcdata,Ln,Data}|Ts]}
  528. end
  529. end;
  530. ent(IsEntity,[{tag,erl,Ln,Attr}|Ts],Ps,St) ->
  531. %% <erl>...</erl> Must be ok everywhere
  532. {Line,File} = Ln,
  533. Ts1 = skip_linebreak(Ts),
  534. {Elem,Ts2} = elem(erl,Attr,Ts1,[erl|Ps],
  535. St#state { line=Line,file=File}),
  536. {[Elem],Ts2};
  537. ent(IsEntity,[{tag,'!--',Ln,_}|Ts],Ps,St) ->
  538. ent(IsEntity, Ts,Ps,St);
  539. ent(IsEntity,[{tag,Tag,Ln,Attr}|Ts],Ps,St) ->
  540. case IsEntity(Tag) of
  541. true ->
  542. Ts1 = skip_linebreak(Ts),
  543. {Line,File} = Ln,
  544. {Elem,Ts2} = elem(Tag,Attr,Ts1,[Tag|Ps],
  545. St#state{line=Line, file=File}),
  546. {[Elem],Ts2};
  547. false ->
  548. {[],[{tag,Tag,Ln,Attr}|Ts]}
  549. end;
  550. ent(IsEntity,Ts,Ps,St) ->
  551. {[],Ts}.
  552. %% remove line break after start tag
  553. skip_linebreak([{pcdata,Data}|Ts]) ->
  554. case lnbreak(Data) of
  555. {true,[]} -> Ts;
  556. {true,Data1} -> [{pcdata,Data1}|Ts];
  557. false -> [{pcdata,Data}|Ts]
  558. end;
  559. skip_linebreak(Ts) -> Ts.
  560. lnbreak([$\s|Ts]) -> lnbreak(Ts);
  561. lnbreak([$\t|Ts]) -> lnbreak(Ts);
  562. lnbreak([$\n|Ts]) -> {true,Ts};
  563. lnbreak([$\r|Ts]) -> lnbreak(Ts);
  564. lnbreak(Ts) -> false.
  565. attrlist(Tag,As) ->
  566. %% Fixme: Do a complete check of attributes for the Tag
  567. As.
  568. %%
  569. %%
  570. %%
  571. flatten_pcdata({pcdata,Data}) ->
  572. Data;
  573. flatten_pcdata([{pcdata,Data}|Cs]) ->
  574. Data ++ flatten_pcdata(Cs);
  575. flatten_pcdata([]) -> "".
  576. %% Normalize pcdata, i.e remove multiple space
  577. %% and \s\t\n\r between characters
  578. pcdata(Cs) ->
  579. case pcdata0(Cs) of
  580. %% [$\s|Cs1] -> Cs1;
  581. Cs1 -> Cs1
  582. end.
  583. pcdata0([$\s|Cs]) -> pcdata_sp(Cs);
  584. pcdata0([$\n|Cs]) -> pcdata_sp(Cs);
  585. pcdata0([$\r|Cs]) -> pcdata_sp(Cs);
  586. pcdata0([$\t|Cs]) -> pcdata_sp(Cs);
  587. pcdata0([C|Cs]) -> [C|pcdata0(Cs)];
  588. pcdata0([]) -> [].
  589. pcdata_sp([$\s|Cs]) -> pcdata_sp(Cs);
  590. pcdata_sp([$\n|Cs]) -> pcdata_sp(Cs);
  591. pcdata_sp([$\r|Cs]) -> pcdata_sp(Cs);
  592. pcdata_sp([$\t|Cs]) -> pcdata_sp(Cs);
  593. pcdata_sp([C|Cs]) -> [$\s|pcdata0([C|Cs])];
  594. pcdata_sp([]) -> [$\s].
  595. %%
  596. %% FIXME: add code to handle unicode encoding
  597. %% and do a total rewrite!!!
  598. %%
  599. tokenise(Cs,St) when binary(Cs) ->
  600. tokenise(binary_to_list(Cs),St);
  601. tokenise(Cs,St) ->
  602. tokenise(Cs,[],St, St#state.line).
  603. tokenise([$<|Cs], Acc, St, Ln0) ->
  604. case collect_names(Cs,Ln0) of
  605. {[[$/|Name]],Ln1,[$>|Cs1]} ->
  606. case cut_tag(Name, St) of
  607. true -> tokenise(Cs1,Acc,St,Ln1);
  608. false ->
  609. tokenise(Cs1,[?TAG_END(to_name(Name),Ln0,St)|Acc],St,Ln1)
  610. end;
  611. {[[$/|Name]|_],Ln1,[$>|Cs1]} ->
  612. {error, {bad_end_tag,to_name(Name)}};
  613. {["!--#"++Cmd,{comment,Data}],Ln1,[$-,$-,$>|Cs1]} ->
  614. {Args,LnA,_} = collect_args(Data,Ln0),
  615. tokenise_ssi(tolower(Cmd),Args,Cs1,Acc,St,Ln1);
  616. {["!--"++Cmd,{comment,Data}],Ln1,[$-,$-,$>|Cs1]} ->
  617. Tag = to_name("!--"),
  618. tokenise(Cs1,[?TAG(Tag,Ln0,St,[{comment,Cmd++Data}])|Acc],St,Ln1);
  619. {[Name|Args],Ln1,[$>|Cs1]} ->
  620. case cut_tag(Name, St) of
  621. true -> tokenise(Cs1,Acc,St,Ln1);
  622. false ->
  623. case to_name(Name) of
  624. p ->
  625. {Ln2,Cs2} = skip_white(Cs1,Ln1),
  626. tokenise(Cs2,[?TAG(p,Ln0,St,Args)|Acc],St,Ln2);
  627. Tag ->
  628. tokenise(Cs1, [?TAG(Tag,Ln0,St,Args)|Acc],St,Ln1)
  629. end
  630. end;
  631. {_, [$>|Cs1]} ->
  632. {error, no_tag};
  633. {_, Cs1} ->
  634. {error, end_of_tag}
  635. end;
  636. tokenise([C|Cs], Acc, St, Ln0) ->
  637. {Raw, Ln1, Cs1} = collect_raw([C|Cs], Ln0),
  638. tokenise(Cs1, [?PCDATA(Ln0,St,Raw)|Acc], St, Ln1);
  639. tokenise([], Acc, St, Ln) ->
  640. {ok, St, reverse(Acc)}.
  641. %% Check if we should remove certain invalid tags
  642. %% (such as o:p buggy outlook XML tags...)
  643. cut_tag(Name, St) ->
  644. case member($:, Name) of
  645. true ->
  646. member(nonstrict, St#state.extensions);
  647. false ->
  648. false
  649. end.
  650. ssi_path(St) ->
  651. case St#state.path of
  652. "" ->
  653. case os:getenv("PATH_TRANSLATED") of
  654. false -> case os:getenv("PATH_INFO") of
  655. false -> ".";
  656. Path -> filename:dirname(Path)
  657. end;
  658. Path -> filename:dirname(Path)
  659. end;
  660. Path ->
  661. Path
  662. end.
  663. ssi_var(Var,St) ->
  664. case lists:keysearch(Var, 1, St#state.env) of
  665. false ->
  666. %% FIXME: NOT according to spec!!!
  667. case os:getenv(Var) of
  668. false -> "";
  669. Val -> Val
  670. end;
  671. {value, {_, Val}} -> Val
  672. end.
  673. ssi_data(Cs0, Cs1, Acc, St0, St1, Ln1) ->
  674. case tokenise(Cs0,Acc,St0,St0#state.line) of
  675. {ok,St00,Ts0} -> %% Ts0 = [Initial] ++ [Included]
  676. %% Use St1 (i.e switch back to old file)
  677. {Ln11,Cs11} = skip_white(Cs1, Ln1),
  678. case tokenise(Cs11, [],St1,Ln11) of
  679. {ok,St2,Ts1} -> %% Ts1 = [Final]
  680. {ok,St2,Ts0++Ts1}; %% [Initial]++[Include]++[Final]
  681. Error ->
  682. Error
  683. end;
  684. Error ->
  685. Error
  686. end.
  687. %%
  688. %% check if ssi command is allowed then
  689. %% skip it or process it
  690. tokenise_ssi(Cmd, Args, Cs, Acc, St, Ln) ->
  691. case lists:member({ssi,Cmd}, St#state.extensions) of
  692. true ->
  693. process_ssi(Cmd, Args, Cs, Acc, St, Ln);
  694. false ->
  695. {Ln1,Cs1} = skip_white(Cs, Ln),
  696. tokenise(Cs1,Acc,St,Ln1)
  697. end.
  698. process_ssi("include", [{file,File}|_], Cs, Acc, St, Ln) ->
  699. %% FIXME: remove ../
  700. %% FIXME: add virtual
  701. AbsFile = filename:join(ssi_path(St), File),
  702. AbsPath = filename:dirname(AbsFile),
  703. case file:read_file(AbsFile) of
  704. {ok,Bin} ->
  705. ssi_data(binary_to_list(Bin), Cs, Acc,
  706. St#state { path = AbsPath,
  707. file = AbsFile,
  708. line=1 }, St, Ln);
  709. Error ->
  710. {Ln1,Cs1} = skip_white(Cs,Ln),
  711. tokenise(Cs1, Acc, St, Ln1)
  712. end;
  713. process_ssi("echo", [{var,Var}|_], Cs, Acc, St, Ln) ->
  714. Value = ssi_var(Var,St),
  715. ssi_data(Value, Cs, Acc, St, St, Ln);
  716. process_ssi("exec", [{cmd,Cmd}|_], Cs, Acc, St, Ln) ->
  717. Value = os:cmd(Cmd),
  718. ssi_data(Value, Cs, Acc, St, St, Ln);
  719. process_ssi(_, Args, Cs, Acc,St, Ln) ->
  720. {Ln1,Cs1} = skip_white(Cs),
  721. tokenise(Cs1,Acc,St,Ln1).
  722. %%
  723. %% collect names is called after we hit <
  724. %% return {[TagName|Args], End-Line, Rest}
  725. %%
  726. collect_names(Str, Ln0) ->
  727. case skip_white(Str,Ln0) of
  728. {Ln1,[$>|T]} -> {[], Ln1, [$>|T]};
  729. {Ln1,[]} -> {[], Ln1, []};
  730. {Ln1,Str1} ->
  731. case collect_name(Str1,Ln1) of
  732. {"!--"++Com, Ln2, Str2} ->
  733. %% FIXME: -- <sp> > should be allowed
  734. {Data, Str3} = collect_tos("-->", Str2),
  735. {["!--"++Com,{comment, Data}], Ln2, Str3};
  736. {Name, Ln2, Str2} ->
  737. {Args,Ln3,Str3} = collect_args(Str2,Ln2),
  738. {[Name|Args], Ln3, Str3}
  739. end
  740. end.
  741. %% Args = ( name = arg | name)* |
  742. %%
  743. %% collect tag arg list return
  744. %% {Args, End-Line, Rest}
  745. %%
  746. collect_args(Str,Ln) ->
  747. case skip_white(Str,Ln) of
  748. {Ln1,[$>|T]} -> {[], Ln1, [$>|T]};
  749. {Ln1,[]} -> {[], Ln1, []};
  750. {Ln1,Str1} ->
  751. {Name, Ln2, Str2} = collect_name(Str1, Ln1),
  752. case skip_white(Str2,Ln2) of
  753. {Ln3,[$=|Str3]} ->
  754. {Ln4,Str4} = skip_white(Str3,Ln3),
  755. {Val,Ln5,Str5} = collect_name(Str4,Ln4),
  756. {ArgT,Ln6,Str6} = collect_args(Str5,Ln5),
  757. {[{to_name(Name),to_val(Val)}|ArgT],Ln6,Str6};
  758. {Ln3,[$>|Str3]} ->
  759. {[to_name(Name)],Ln3,[$>|Str3]};
  760. {Ln3,Str3} ->
  761. {Ln4,Str4} = skip_white(Str3,Ln3),
  762. {ArgT,Ln5,Str5} = collect_args(Str4,Ln4),
  763. {[to_name(Name)|ArgT],Ln5,Str5}
  764. end
  765. end.
  766. to_name(Str) ->
  767. list_to_atom(tolower(Str)).
  768. %% hanle erl: values
  769. to_val("erl:"++ErlValue) ->
  770. {ok,Tokens,Ln} = erl_scan:string(ErlValue),
  771. {ok,Exprs} = erl_parse:parse_exprs(Tokens++[{dot,'.'}]),
  772. {erl,Exprs};
  773. to_val(Value) ->
  774. Value.
  775. tolower([H|T]) when $A =< H, H =< $Z ->
  776. [(H-$A)+$a|tolower(T)];
  777. tolower([H|T]) ->
  778. [H|tolower(T)];
  779. tolower([]) -> [].
  780. %% skip white space but do not count newlines
  781. skip_white(Cs) ->
  782. {_,Cs1} = skip_white(Cs,0),
  783. Cs1.
  784. skip_white([$\s|T],Ln) ->skip_white(T,Ln);
  785. skip_white([$\n|T],Ln) ->skip_white(T,Ln+1);
  786. skip_white([$\r|T],Ln) ->skip_white(T,Ln);
  787. skip_white([$\t|T],Ln) ->skip_white(T,Ln);
  788. skip_white(T,Ln) -> {Ln,T}.
  789. collect_tos(X, List) ->
  790. collect_tos(X, List, []).
  791. collect_tos(X, [], Acc) ->
  792. {reverse(Acc), X};
  793. collect_tos(X, List, Acc) ->
  794. case lists:prefix(X, List) of
  795. true -> {reverse(Acc), List};
  796. false -> collect_tos(X, tl(List), [hd(List)|Acc])
  797. end.
  798. collect_to(X, List) -> collect_to(X, List, []).
  799. collect_to(X, [X|T], Acc) -> {reverse(Acc), [X|T]};
  800. collect_to(X, [], Acc) -> {reverse(Acc), [X]};
  801. collect_to(X, [H|T],Acc) -> collect_to(X, T, [H|Acc]).
  802. %% collect a name or a double quoted name
  803. %% return {Name,End-Line,Rest}
  804. collect_name([$"|T],Ln) -> collect_quoted_name(T, [],Ln);
  805. collect_name(Str,Ln) -> collect_name(Str, [],Ln).
  806. collect_name([$\s|T],L,Ln) -> {reverse(L),Ln,T};
  807. collect_name([$>|T],L,Ln) -> {reverse(L),Ln,[$>|T]};
  808. collect_name([$=|T],L,Ln) -> {reverse(L),Ln,[$=|T]};
  809. collect_name([$\n|T],L,Ln) -> {reverse(L),Ln+1,T};
  810. collect_name([$\r|T],L,Ln) -> {reverse(L),Ln,T};
  811. collect_name([H|T],L,Ln) -> collect_name(T, [H|L], Ln);
  812. collect_name([], L,Ln) -> {reverse(L),Ln,[]}.
  813. %% collect a double quoted string
  814. %% return {String,End-Line,Rest}
  815. collect_quoted_name([$\\,$"|T],L,Ln) -> collect_quoted_name(T, [$"|L],Ln);
  816. collect_quoted_name([$"|T],L,Ln) -> {reverse(L),Ln,T};
  817. collect_quoted_name([$\n|T],L,Ln) -> collect_quoted_name(T, [$\n|L],Ln+1);
  818. collect_quoted_name([$\r|T],L,Ln) -> collect_quoted_name(T, [$\n|L],Ln);
  819. collect_quoted_name([H|T],L,Ln) -> collect_quoted_name(T, [H|L],Ln);
  820. collect_quoted_name([],L,Ln) -> {reverse(L),Ln,[]}.
  821. %% collect_raw(Str,StartLine) -> {Raw',StopLine,Str'}
  822. collect_raw(Str,Ln) -> collect_raw(Str,[],Ln).
  823. collect_raw([$\\,$<|T],L,Ln) -> collect_raw(T, [$<|L],Ln);
  824. collect_raw([$\n|T], L,Ln) -> collect_raw(T, [$\n|L],Ln+1);
  825. collect_raw([$\r|T], L,Ln) -> collect_raw(T, [$\r|L],Ln);
  826. collect_raw([$<|T], L,Ln) -> {reverse(L),Ln,[$<|T]};
  827. collect_raw([$&|T], L,Ln) ->
  828. {NT,Ln1,Name} = collect_amp(T,[],Ln),
  829. case translate_amp(Name) of
  830. error -> collect_raw(T, [$&|L], Ln1);
  831. Code -> collect_raw(NT, [Code | L],Ln1)
  832. end;
  833. collect_raw([H|T], L, Ln) ->
  834. collect_raw(T, [H|L],Ln);
  835. collect_raw([], L, Ln) ->
  836. {reverse(L),Ln, []}.
  837. collect_amp([$ | T], L,Ln) -> {T,Ln,reverse(L)};
  838. collect_amp([$\n | T], L,Ln) -> {T,Ln+1,reverse(L)};
  839. collect_amp([$\r | T], L,Ln) -> {T,Ln,reverse(L)};
  840. collect_amp([$; | T], L,Ln) -> {T,Ln,reverse(L)};
  841. collect_amp([H | T], L,Ln) -> collect_amp(T, [H|L],Ln);
  842. collect_amp([], L,Ln) -> {[],Ln,reverse(L)}.
  843. translate_amp([$# | Ds]) -> amp_digits(Ds, 0);
  844. translate_amp(Name) -> html:value(Name).
  845. amp_digits([X | Xs], N) when X >= $0, X =< $9 ->
  846. amp_digits(Xs, N*10 + (X-$0));
  847. amp_digits([], N) ->
  848. if N > 16#ffffffff -> error;
  849. true -> N
  850. end.