/src/xml_stream.erl

https://github.com/treacheroustalks/ejabberd-app · Erlang · 190 lines · 143 code · 17 blank · 30 comment · 2 complexity · aa395f5f8d0ea304af506f227de94f8c MD5 · raw file

  1. %%%----------------------------------------------------------------------
  2. %%% File : xml_stream.erl
  3. %%% Author : Alexey Shchepin <alexey@process-one.net>
  4. %%% Purpose : Parse XML streams
  5. %%% Created : 17 Nov 2002 by Alexey Shchepin <alexey@process-one.net>
  6. %%%
  7. %%%
  8. %%% ejabberd, Copyright (C) 2002-2011 ProcessOne
  9. %%%
  10. %%% This program is free software; you can redistribute it and/or
  11. %%% modify it under the terms of the GNU General Public License as
  12. %%% published by the Free Software Foundation; either version 2 of the
  13. %%% License, or (at your option) any later version.
  14. %%%
  15. %%% This program is distributed in the hope that it will be useful,
  16. %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. %%% General Public License for more details.
  19. %%%
  20. %%% You should have received a copy of the GNU General Public License
  21. %%% along with this program; if not, write to the Free Software
  22. %%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  23. %%% 02111-1307 USA
  24. %%%
  25. %%%----------------------------------------------------------------------
  26. -module(xml_stream).
  27. -author('alexey@process-one.net').
  28. -export([new/1,
  29. new/2,
  30. parse/2,
  31. close/1,
  32. parse_element/1]).
  33. -define(XML_START, 0).
  34. -define(XML_END, 1).
  35. -define(XML_CDATA, 2).
  36. -define(XML_ERROR, 3).
  37. -define(PARSE_COMMAND, 0).
  38. -define(PARSE_FINAL_COMMAND, 1).
  39. -record(xml_stream_state, {callback_pid, port, stack, size, maxsize}).
  40. process_data(CallbackPid, Stack, Data) ->
  41. case Data of
  42. {?XML_START, {Name, Attrs}} ->
  43. if
  44. Stack == [] ->
  45. catch gen_fsm:send_event(CallbackPid,
  46. {xmlstreamstart, Name, Attrs});
  47. true ->
  48. ok
  49. end,
  50. [{xmlelement, Name, Attrs, []} | Stack];
  51. {?XML_END, EndName} ->
  52. case Stack of
  53. [{xmlelement, Name, Attrs, Els} | Tail] ->
  54. NewEl = {xmlelement, Name, Attrs, lists:reverse(Els)},
  55. case Tail of
  56. [] ->
  57. catch gen_fsm:send_event(CallbackPid,
  58. {xmlstreamend, EndName}),
  59. Tail;
  60. [_] ->
  61. catch gen_fsm:send_event(CallbackPid,
  62. {xmlstreamelement, NewEl}),
  63. Tail;
  64. [{xmlelement, Name1, Attrs1, Els1} | Tail1] ->
  65. [{xmlelement, Name1, Attrs1, [NewEl | Els1]} |
  66. Tail1]
  67. end
  68. end;
  69. {?XML_CDATA, CData} ->
  70. case Stack of
  71. [El] ->
  72. [El];
  73. %% Merge CDATA nodes if they are contiguous
  74. %% This does not change the semantic: the split in
  75. %% several CDATA nodes depends on the TCP/IP packet
  76. %% fragmentation
  77. [{xmlelement, Name, Attrs,
  78. [{xmlcdata, PreviousCData}|Els]} | Tail] ->
  79. [{xmlelement, Name, Attrs,
  80. [{xmlcdata, list_to_binary([PreviousCData, CData])} | Els]} | Tail];
  81. %% No previous CDATA
  82. [{xmlelement, Name, Attrs, Els} | Tail] ->
  83. [{xmlelement, Name, Attrs, [{xmlcdata, CData} | Els]} |
  84. Tail];
  85. [] -> []
  86. end;
  87. {?XML_ERROR, Err} ->
  88. catch gen_fsm:send_event(CallbackPid, {xmlstreamerror, Err})
  89. end.
  90. new(CallbackPid) ->
  91. new(CallbackPid, infinity).
  92. new(CallbackPid, MaxSize) ->
  93. Port = open_port({spawn, expat_erl}, [binary]),
  94. #xml_stream_state{callback_pid = CallbackPid,
  95. port = Port,
  96. stack = [],
  97. size = 0,
  98. maxsize = MaxSize}.
  99. parse(#xml_stream_state{callback_pid = CallbackPid,
  100. port = Port,
  101. stack = Stack,
  102. size = Size,
  103. maxsize = MaxSize} = State, Str) ->
  104. StrSize = if
  105. is_list(Str) -> length(Str);
  106. is_binary(Str) -> size(Str)
  107. end,
  108. Res = port_control(Port, ?PARSE_COMMAND, Str),
  109. {NewStack, NewSize} =
  110. lists:foldl(
  111. fun(Data, {St, Sz}) ->
  112. NewSt = process_data(CallbackPid, St, Data),
  113. case NewSt of
  114. [_] -> {NewSt, 0};
  115. _ -> {NewSt, Sz}
  116. end
  117. end, {Stack, Size + StrSize}, binary_to_term(Res)),
  118. if
  119. NewSize > MaxSize ->
  120. catch gen_fsm:send_event(CallbackPid,
  121. {xmlstreamerror, "XML stanza is too big"});
  122. true ->
  123. ok
  124. end,
  125. State#xml_stream_state{stack = NewStack, size = NewSize}.
  126. close(#xml_stream_state{port = Port}) ->
  127. port_close(Port).
  128. parse_element(Str) ->
  129. Port = open_port({spawn, expat_erl}, [binary]),
  130. Res = port_control(Port, ?PARSE_FINAL_COMMAND, Str),
  131. port_close(Port),
  132. process_element_events(binary_to_term(Res)).
  133. process_element_events(Events) ->
  134. process_element_events(Events, []).
  135. process_element_events([], _Stack) ->
  136. {error, parse_error};
  137. process_element_events([Event | Events], Stack) ->
  138. case Event of
  139. {?XML_START, {Name, Attrs}} ->
  140. process_element_events(
  141. Events, [{xmlelement, Name, Attrs, []} | Stack]);
  142. {?XML_END, _EndName} ->
  143. case Stack of
  144. [{xmlelement, Name, Attrs, Els} | Tail] ->
  145. NewEl = {xmlelement, Name, Attrs, lists:reverse(Els)},
  146. case Tail of
  147. [] ->
  148. if
  149. Events == [] ->
  150. NewEl;
  151. true ->
  152. {error, parse_error}
  153. end;
  154. [{xmlelement, Name1, Attrs1, Els1} | Tail1] ->
  155. process_element_events(
  156. Events,
  157. [{xmlelement, Name1, Attrs1, [NewEl | Els1]} |
  158. Tail1])
  159. end
  160. end;
  161. {?XML_CDATA, CData} ->
  162. case Stack of
  163. [{xmlelement, Name, Attrs, Els} | Tail] ->
  164. process_element_events(
  165. Events,
  166. [{xmlelement, Name, Attrs, [{xmlcdata, CData} | Els]} |
  167. Tail]);
  168. [] ->
  169. process_element_events(Events, [])
  170. end;
  171. {?XML_ERROR, Err} ->
  172. {error, Err}
  173. end.