/src/editest.erl

http://github.com/ebastien/erlang-edi-parser · Erlang · 264 lines · 151 code · 54 blank · 59 comment · 0 complexity · 860a218387b1ab033426f506e473ffdc MD5 · raw file

  1. %% compile:file(editest, [native, {hipe, [o3,verbose,load]}]).
  2. -module(editest).
  3. -export([start/0, bench/2, bench2/1]).
  4. %% ----------------------------------------------------------------------------
  5. file_to_value(File) ->
  6. {ok, Data} = file:read_file(File),
  7. String = binary_to_list(Data),
  8. {ok, Tokens, _} = erl_scan:string(String),
  9. {ok, Exprs} = erl_parse:parse_exprs(Tokens),
  10. {value, Value, _} = erl_eval:exprs(Exprs, []),
  11. Value.
  12. start() ->
  13. Spec = file_to_value("edispec.erl"),
  14. {ok, Data} = file:read_file("AFIVRQ.edi"),
  15. Message = parse(Spec, Data),
  16. Stream = term_to_binary(Message),
  17. io:format("Start~n", []),
  18. {Time, _} = timer:tc(editest, bench2, [Stream]),
  19. %%Message = parse(Spec, Data),
  20. %%io:format("Message ~p~n", [Message]).
  21. io:format("End ~p~n", [Time]).
  22. %% ----------------------------------------------------------------------------
  23. %% Parse a stream of bytes as an list of EDIFACT segments
  24. %% ----------------------------------------------------------------------------
  25. -define(SSEP, $').
  26. -define(DSEP, $+).
  27. -define(CSEP, $:).
  28. %% Composite strings are kept in reverse order.
  29. %% It is up to the concrete message decoder to reverse them if needed.
  30. %% Entry point.
  31. parse_string(String) ->
  32. parse_string_seg(String, [], []).
  33. %% Final point.
  34. parse_string_seg( [], Segments, [] ) ->
  35. {ok, lists:reverse(Segments)};
  36. %% Match a segment separator => Insert an empty segment.
  37. parse_string_seg( [?SSEP|NextChars],
  38. Segments, Tag ) ->
  39. NewTag = list_to_atom(lists:reverse(Tag)),
  40. NewSegments = [{NewTag, []}|Segments],
  41. parse_string_seg(NextChars, NewSegments, []);
  42. %% Match a data element separator => Switch to the compound state.
  43. parse_string_seg( [?DSEP|NextChars],
  44. Segments, Tag ) ->
  45. parse_string_cpd(NextChars, Segments, Tag, [], [], {num, 0, []});
  46. %% Match a character part of the segment name.
  47. parse_string_seg( [Char|NextChars],
  48. Segments, Tag ) ->
  49. parse_string_seg(NextChars, Segments, [Char|Tag]).
  50. %% Match a segment separator => Finalize pending compound, element and segment.
  51. parse_string_cpd( [?SSEP|NextChars],
  52. Segments, Tag, Elements, Compounds, Compound ) ->
  53. NewCompounds = lists:reverse([Compound|Compounds]),
  54. NewElements = lists:reverse([NewCompounds|Elements]),
  55. NewTag = list_to_atom(lists:reverse(Tag)),
  56. NewSegments = [{NewTag, NewElements}|Segments],
  57. parse_string_seg(NextChars, NewSegments, []);
  58. %% Match a data element separator => Finalize pending compound and element.
  59. parse_string_cpd( [?DSEP|NextChars],
  60. Segments, Tag, Elements, Compounds, Compound ) ->
  61. NewCompounds = lists:reverse([Compound|Compounds]),
  62. NewElements = [NewCompounds|Elements],
  63. parse_string_cpd(NextChars, Segments, Tag, NewElements, [], {num, 0, []});
  64. %% Match a compound separator => Finalize pending compound.
  65. parse_string_cpd( [?CSEP|NextChars],
  66. Segments, Tag, Elements, Compounds, Compound ) ->
  67. NewCompounds = [Compound|Compounds],
  68. parse_string_cpd(NextChars, Segments, Tag, Elements, NewCompounds, {num, 0, []});
  69. %% Match a numeric character => Append to the current compound.
  70. parse_string_cpd( [Char|NextChars],
  71. Segments, Tag, Elements, Compounds,
  72. {num, Length, Chars} ) when (Char >= $0) and (Char =< $9) ->
  73. parse_string_cpd(NextChars, Segments, Tag, Elements, Compounds, {num, Length+1, [Char|Chars]});
  74. %% Match an alpha-numeric character => Append to the current compound.
  75. parse_string_cpd( [Char|NextChars],
  76. Segments, Tag, Elements, Compounds,
  77. {_, Length, Chars} ) ->
  78. parse_string_cpd(NextChars, Segments, Tag, Elements, Compounds, {aln, Length+1, [Char|Chars]}).
  79. %% ----------------------------------------------------------------------------
  80. %% Parse a list of composites against an EDIFACT specification
  81. %% ----------------------------------------------------------------------------
  82. %% Entry point.
  83. parse_composites(Spec, Composites) ->
  84. parse_composites(Spec, Composites, []).
  85. %% Final point.
  86. parse_composites([], [], SubRecords) ->
  87. SubRecords;
  88. %% Match end of spec with remaining data.
  89. parse_composites([], _, SubRecords) ->
  90. ['garbage' | SubRecords];
  91. %% Skip a conditional spec when the input stream is exhausted.
  92. parse_composites([{_, 'cond', _, _, _, _} | NextSpecs], Composites=[], SubRecords) ->
  93. parse_composites(NextSpecs, Composites, SubRecords);
  94. %% Jump to composite parsing loop.
  95. parse_composites([{Name, _, Cardinality, Type, Min, Max} | NextSpecs], Composites, SubRecords) ->
  96. parse_composites_cps(Cardinality, Type, Min, Max, Name, NextSpecs, Composites, SubRecords).
  97. %% Match the end of the cardinality loop.
  98. parse_composites_cps(0, _, _, _, _, NextSpecs, Composites, SubRecords) ->
  99. parse_composites(NextSpecs, Composites, SubRecords);
  100. %% Append a composite matching the spec to the list of records.
  101. parse_composites_cps(Cardinality, SType, Min, Max, Name, NextSpecs,
  102. [{DType, Length, Data} | NextComposites], SubRecords)
  103. when (Length >= Min) and (Length =< Max) and ((SType =:= 'aln') or (SType =:= DType)) ->
  104. NewSubRecords = [{Name, lists:reverse(Data)} | SubRecords],
  105. parse_composites_cps(Cardinality-1, 'cond', Min, Max, Name, NextSpecs, NextComposites, NewSubRecords).
  106. %% ----------------------------------------------------------------------------
  107. %% Parse a list of data elements against an EDIFACT specification
  108. %% ----------------------------------------------------------------------------
  109. %% Entry point.
  110. parse_elements(Spec, Elements) ->
  111. parse_elements(Spec, Elements, []).
  112. %% Final point.
  113. parse_elements([], [], Records) ->
  114. Records;
  115. %% Match end of spec with remaining data.
  116. parse_elements([], _, Records) ->
  117. ['garbage' | Records];
  118. %% Skip a conditional spec when the input stream is exhausted.
  119. parse_elements([{_, 'cond', _, _} | NextSpecs], Elements=[], Records) ->
  120. parse_elements(NextSpecs, Elements, Records);
  121. %% Jump to compound parsing loop.
  122. parse_elements([{Name, _, Cardinality, CompoundSpec} | NextSpecs], Elements, Records) ->
  123. parse_elements_cpd(Cardinality, Name, CompoundSpec, NextSpecs, Elements, Records);
  124. %% Skip a conditional spec when the input stream is exhausted.
  125. parse_elements([{_, 'cond', _, _, _, _} | NextSpecs], Elements=[], Records) ->
  126. parse_elements(NextSpecs, Elements, Records);
  127. %% Parse a single data element.
  128. parse_elements([Spec | NextSpecs],
  129. [Element | NextElements],
  130. Records) ->
  131. [Record] = parse_composites([Spec], Element),
  132. NewRecords = [Record | Records],
  133. parse_elements(NextSpecs, NextElements, NewRecords).
  134. %% Match the end of the cardinality loop.
  135. parse_elements_cpd(0, _, _, NextSpecs, Elements, Records) ->
  136. parse_elements(NextSpecs, Elements, Records);
  137. %% Append a data element matching the spec to the list of records.
  138. parse_elements_cpd(Cardinality, Name, CompoundSpec, NextSpecs, [Element | NextElements], Records) ->
  139. RecordData = parse_composites(CompoundSpec, Element),
  140. NewRecords = [{Name, RecordData} | Records],
  141. parse_elements_cpd(Cardinality-1, Name, CompoundSpec, NextSpecs, NextElements, NewRecords).
  142. %% ----------------------------------------------------------------------------
  143. %% Parse a list of segments against an EDIFACT specification
  144. %% ----------------------------------------------------------------------------
  145. %% Entry point.
  146. parse_segments(Spec, Segments) ->
  147. {ok, Message, []} = parse_segments(Spec, Segments, []),
  148. {ok, Message}.
  149. %% Final point.
  150. parse_segments( [], Segments, Message ) ->
  151. {ok, lists:reverse(Message), Segments};
  152. %% Match a segment definition.
  153. parse_segments( [{{Tag, SegmentSpec}, Name, Type, Cardinality} | NextSpecs],
  154. Segments, Message ) ->
  155. parse_segments_seg(Tag, Type, Cardinality, Name, SegmentSpec, NextSpecs, Segments, Message );
  156. %% Match a group definition.
  157. parse_segments( [{Name, Type, Cardinality, GroupSpec=[{{Tag,_},_,_,_}|_]} | NextSpecs],
  158. Segments, Message ) ->
  159. parse_segments_grp(Tag, Type, Cardinality, Name, GroupSpec, NextSpecs, Segments, Message ).
  160. %% Skip a branch when the cardinality is exhausted.
  161. parse_segments_seg( _, _, 0, _, _, NextSpecs, Segments, Message ) ->
  162. parse_segments(NextSpecs, Segments, Message);
  163. %% Match a branch having the same tag as the current segment.
  164. parse_segments_seg( Tag, _, Cardinality, Name, SegmentSpec, NextSpecs,
  165. [{Tag, Elements} | NextSegments],
  166. Message ) ->
  167. Record = {Name, parse_elements(SegmentSpec, Elements)},
  168. NewMessage = [Record | Message],
  169. parse_segments_seg(Tag, 'cond', Cardinality-1, Name, SegmentSpec, NextSpecs, NextSegments, NewMessage);
  170. %% Skip a non-matching branch if it is conditional.
  171. parse_segments_seg( _, 'cond', _, _, _, NextSpecs, Segments, Message ) ->
  172. parse_segments(NextSpecs, Segments, Message).
  173. %% Skip a sub-branch when the cardinality is exhausted.
  174. parse_segments_grp( _, _, 0, _, _, NextSpecs, Segments, Message ) ->
  175. parse_segments(NextSpecs, Segments, Message);
  176. %% Match a sub-branch having the same tag as the current segment.
  177. parse_segments_grp( Tag, _, Cardinality, Name, GroupSpec, NextSpecs,
  178. Segments=[{Tag, _} | _],
  179. Message ) ->
  180. {ok, SubMessage, NewSegments} = parse_segments(GroupSpec, Segments, []),
  181. Record = {Name, SubMessage},
  182. NewMessage = [Record | Message],
  183. parse_segments_grp(Tag, 'cond', Cardinality-1, Name, GroupSpec, NextSpecs, NewSegments, NewMessage);
  184. %% Skip a non-matching sub-branch if it is conditional.
  185. parse_segments_grp( _, 'cond', _, _, _, NextSpecs, Segments, Message ) ->
  186. parse_segments(NextSpecs, Segments, Message).
  187. %% ----------------------------------------------------------------------------
  188. parse(Spec, Data) ->
  189. String = binary_to_list(Data),
  190. {ok, Segments} = parse_string(String),
  191. {ok, Message} = parse_segments(Spec, Segments),
  192. Message.
  193. %% ----------------------------------------------------------------------------
  194. bench(Spec, Data) ->
  195. bench(Spec, Data, 0).
  196. bench(Spec, Data, 1000) ->
  197. _Message = parse(Spec, Data);
  198. %%io:format("Message ~p~n", [_Message]);
  199. bench(Spec, Data, N) ->
  200. parse(Spec, Data),
  201. bench(Spec, Data, N+1).
  202. %% ----------------------------------------------------------------------------
  203. bench2(Data) ->
  204. bench2(Data, 0).
  205. bench2(Data, 1000) ->
  206. _Message = binary_to_term(Data);
  207. %%io:format("Message ~p~n", [_Message]);
  208. bench2(Data, N) ->
  209. _Message = binary_to_term(Data),
  210. bench2(Data, N+1).