PageRenderTime 25ms CodeModel.GetById 11ms RepoModel.GetById 1ms app.codeStats 0ms

/CS/migrated/tags/R0_90_001/plugins/cslexan/lexan.cpp

#
C++ | 373 lines | 273 code | 69 blank | 31 comment | 40 complexity | 6898e297132074060968c1faee4eddaf MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, LGPL-2.0
  1. /*
  2. Copyright (C) 2001 by Christopher Nelson
  3. This library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Library General Public
  5. License as published by the Free Software Foundation; either
  6. version 2 of the License, or (at your option) any later version.
  7. This library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Library General Public License for more details.
  11. You should have received a copy of the GNU Library General Public
  12. License along with this library; if not, write to the Free
  13. Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  14. */
  15. #include "cssysdef.h"
  16. #include "cslex.h"
  17. #include "csutil/scfstr.h"
  18. #include <ctype.h>
  19. CS_IMPLEMENT_PLUGIN
  20. SCF_IMPLEMENT_IBASE (csLexicalAnalyzer)
  21. SCF_IMPLEMENTS_INTERFACE (iLexicalAnalyzer)
  22. SCF_IMPLEMENTS_EMBEDDED_INTERFACE (iComponent)
  23. SCF_IMPLEMENT_IBASE_END
  24. SCF_IMPLEMENT_EMBEDDED_IBASE (csLexicalAnalyzer::eiComponent)
  25. SCF_IMPLEMENTS_INTERFACE (iComponent)
  26. SCF_IMPLEMENT_EMBEDDED_IBASE_END
  27. SCF_IMPLEMENT_FACTORY (csLexicalAnalyzer)
  28. SCF_EXPORT_CLASS_TABLE (cslexan)
  29. SCF_EXPORT_CLASS (csLexicalAnalyzer, "crystalspace.scanner.regex",
  30. "Crystal Space regular-expression-based scanner")
  31. SCF_EXPORT_CLASS_TABLE_END
  32. csLexicalAnalyzer::csLexicalAnalyzer(iBase* p):next_key(1)
  33. {
  34. SCF_CONSTRUCT_IBASE (p);
  35. SCF_CONSTRUCT_EMBEDDED_IBASE(scfiComponent);
  36. }
  37. csLexicalAnalyzer::~csLexicalAnalyzer()
  38. {
  39. void *item=re_list.GetFirstItem();
  40. while(item)
  41. {
  42. key_re_pair *check = (key_re_pair *)item;
  43. delete check;
  44. re_list.SetCurrentItem(0);
  45. item = re_list.GetNextItem();
  46. }
  47. }
  48. bool
  49. csLexicalAnalyzer::Initialize(iObjectRegistry *)
  50. {
  51. return true;
  52. }
  53. bool
  54. csLexicalAnalyzer::RegisterRegExp(unsigned int key, iRegExp &re)
  55. {
  56. re_list.AddItem(new key_re_pair(re, key));
  57. return true;
  58. }
  59. bool
  60. csLexicalAnalyzer::UnregisterRegExp(unsigned int key)
  61. {
  62. void *item=re_list.GetFirstItem();
  63. while(item)
  64. {
  65. key_re_pair *check = (key_re_pair *)item;
  66. if (check->key == key)
  67. {
  68. re_list.RemoveItem(item);
  69. return true;
  70. }
  71. item = re_list.GetNextItem();
  72. }
  73. exec_error = RE_EXEC_ERR_KEY_DOES_NOT_EXIST;
  74. return false;
  75. }
  76. unsigned int
  77. csLexicalAnalyzer::GetMatchedKey()
  78. {
  79. return last_matched_key;
  80. }
  81. iString *
  82. csLexicalAnalyzer::GetMatchedText()
  83. {
  84. return new scfString(last_matched_text.GetData());
  85. }
  86. bool
  87. csLexicalAnalyzer::PushStream(iDataBuffer &/*buf*/)
  88. {
  89. return false;
  90. }
  91. bool
  92. csLexicalAnalyzer::PopStream()
  93. {
  94. return false;
  95. }
  96. bool
  97. csLexicalAnalyzer::Exec(iRegExp &re)
  98. {
  99. stream_state *ss = STATIC_CAST(stream_state*,re_list.GetFirstItem());
  100. uint8 *buf = ss->buf->GetUint8();
  101. unsigned int pos = ss->pos;
  102. unsigned int i = 0,
  103. saved_i,
  104. num_matches=0;
  105. unsigned char op=0;
  106. unsigned char opts;
  107. bool matched=false;
  108. csString str;
  109. while(op!=OP_END && pos < ss->buf->GetSize())
  110. {
  111. saved_i = i;
  112. re.GetOp(i++, op);
  113. if (op == OP_END) break;
  114. bool extended_op = (op == 0);
  115. bool extended_match=false;
  116. bool op_is_nop=false;
  117. bool keep_match=false;
  118. // if the instruction is NOT an escape, then match it exactly to the buffer contents
  119. if (!extended_op)
  120. {
  121. if (op == buf[pos])
  122. {
  123. matched=true;
  124. str+=buf[pos];
  125. } // end if buffer matches instruction (direct op match)
  126. } // end if instruction isn't extended
  127. else
  128. {
  129. // get escaped op
  130. re.GetOp(i++, op);
  131. switch(op)
  132. {
  133. case OP_EXT_NOP:
  134. op_is_nop=true;
  135. break;
  136. case OP_EXT_ALPHA_TABLE: // op matches isalpha
  137. if (isalpha(buf[pos]))
  138. {
  139. matched=true;
  140. str+=buf[pos];
  141. }
  142. break;
  143. case OP_EXT_DIGIT_TABLE: // op matches isdigit
  144. if (isdigit(buf[pos]))
  145. {
  146. matched=true;
  147. str+=buf[pos];
  148. }
  149. break;
  150. case OP_EXT_ALNUM_TABLE: // op matches isalnum
  151. if (isalnum(buf[pos]))
  152. {
  153. matched=true;
  154. str+=buf[pos];
  155. }
  156. break;
  157. case OP_EXT_PUNCT_TABLE: // op matches ispunct
  158. if (ispunct(buf[pos]))
  159. {
  160. matched=true;
  161. str+=buf[pos];
  162. }
  163. break;
  164. case OP_EXT_SPACE_TABLE: // op matches isspace
  165. if (isspace(buf[pos]))
  166. {
  167. matched=true;
  168. str+=buf[pos];
  169. }
  170. break;
  171. case OP_EXT_CNTRL_TABLE: // op matches iscntrl
  172. if (iscntrl(buf[pos]))
  173. {
  174. matched=true;
  175. str+=buf[pos];
  176. }
  177. break;
  178. case OP_EXT_GRAPH_TABLE: // op matches isgraph
  179. if (isgraph(buf[pos]))
  180. {
  181. matched=true;
  182. str+=buf[pos];
  183. }
  184. break;
  185. case OP_EXT_LOWER_TABLE: // op matches islower
  186. if (islower(buf[pos]))
  187. {
  188. matched=true;
  189. str+=buf[pos];
  190. }
  191. break;
  192. case OP_EXT_UPPER_TABLE: // op matches isupper
  193. if (isupper(buf[pos]))
  194. {
  195. matched=true;
  196. str+=buf[pos];
  197. }
  198. break;
  199. case OP_EXT_PRINT_TABLE: // op matches isprint
  200. if (isprint(buf[pos]))
  201. {
  202. matched=true;
  203. str+=buf[pos];
  204. }
  205. break;
  206. case OP_EXT_XDIGIT_TABLE: // op matches isxdigit
  207. if (isxdigit(buf[pos]))
  208. {
  209. matched=true;
  210. str+=buf[pos];
  211. }
  212. break;
  213. case OP_EXT_CUSTOM_TABLE:
  214. {
  215. } // end custom table scope
  216. break;
  217. case OP_EXT_LOGICAL_OR: // turns on the OR flag in the VM
  218. default:
  219. break;
  220. } // end switch instruction
  221. } // end else instruction IS extended
  222. // examine modifiers and stuff
  223. re.GetOp(i++, opts);
  224. // if there are no modifiers and the instruction did not match, then return false
  225. if (opts == 0 && !matched) return false;
  226. // this is a non or many match (optional sequence) then keep going
  227. switch(opts & 0x3)
  228. {
  229. case OP_MATCH_NONE_OR_MORE:
  230. if (matched)
  231. {
  232. num_matches++;
  233. i=saved_i;
  234. extended_match=true;
  235. }
  236. else num_matches = 0;
  237. break;
  238. case OP_MATCH_ONE_OR_NONE:
  239. if (matched && num_matches<1)
  240. {
  241. num_matches++;
  242. i=saved_i;
  243. extended_match=true;
  244. }
  245. else num_matches = 0;
  246. break;
  247. case OP_MATCH_ONE_OR_MORE:
  248. if (!matched && num_matches==0) return false;
  249. else if (matched)
  250. {
  251. num_matches++;
  252. i=saved_i;
  253. extended_match=true;
  254. }
  255. else num_matches=0;
  256. break;
  257. } // end modifier op
  258. // special handling for NOP's with OP_MATCH
  259. if (opts & 3 !=0 && op_is_nop)
  260. {
  261. // if there was an OP_MATCH, but it failed to continue, pop the execution state stack
  262. if (!extended_match)
  263. {
  264. execution_state *es = STATIC_CAST(execution_state*,es_list.GetFirstItem());
  265. if (es)
  266. {
  267. delete es;
  268. es_list.RemoveItem();
  269. }
  270. }
  271. // We may need to reset the instruction pointer farther back, if the stack holds anything
  272. else
  273. {
  274. execution_state *es = STATIC_CAST(execution_state*,es_list.GetFirstItem());
  275. if (es)
  276. {
  277. i = es->ip;
  278. }
  279. } // end else we need to return to the last place pushed
  280. } // end if extended match and instruction is NOP
  281. // check the push flag
  282. if (opts & OP_PUSH_ADDRESS)
  283. {
  284. es_list.AddItem(new execution_state(i));
  285. }
  286. // possibly reset the matched var for next time around
  287. if (!keep_match) matched=false;
  288. // increment the buffer position
  289. ++pos;
  290. } // end while i is not at the end of instruction sequence
  291. // if we ran out of buffer before running out of instructions, return false
  292. // if (op!=OP_END) return false; // does this cause false negatives? // FIXME!
  293. // save the matched text
  294. last_matched_text = str;
  295. return true;
  296. }
  297. unsigned int
  298. csLexicalAnalyzer::Match()
  299. {
  300. return 0;
  301. }