/Src/Dependencies/Boost/boost/token_functions.hpp

http://hadesmem.googlecode.com/ · C++ Header · 665 lines · 458 code · 95 blank · 112 comment · 106 complexity · a43088576d333e7a9f37bc689c0e41c9 MD5 · raw file

  1. // Boost token_functions.hpp ------------------------------------------------//
  2. // Copyright John R. Bandela 2001.
  3. // Distributed under the Boost Software License, Version 1.0. (See
  4. // accompanying file LICENSE_1_0.txt or copy at
  5. // http://www.boost.org/LICENSE_1_0.txt)
  6. // See http://www.boost.org/libs/tokenizer/ for documentation.
  7. // Revision History:
  8. // 01 Oct 2004 Joaquin M Lopez Munoz
  9. // Workaround for a problem with string::assign in msvc-stlport
  10. // 06 Apr 2004 John Bandela
  11. // Fixed a bug involving using char_delimiter with a true input iterator
  12. // 28 Nov 2003 Robert Zeh and John Bandela
  13. // Converted into "fast" functions that avoid using += when
  14. // the supplied iterator isn't an input_iterator; based on
  15. // some work done at Archelon and a version that was checked into
  16. // the boost CVS for a short period of time.
  17. // 20 Feb 2002 John Maddock
  18. // Removed using namespace std declarations and added
  19. // workaround for BOOST_NO_STDC_NAMESPACE (the library
  20. // can be safely mixed with regex).
  21. // 06 Feb 2002 Jeremy Siek
  22. // Added char_separator.
  23. // 02 Feb 2002 Jeremy Siek
  24. // Removed tabs and a little cleanup.
  25. #ifndef BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
  26. #define BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
  27. #include <vector>
  28. #include <stdexcept>
  29. #include <string>
  30. #include <cctype>
  31. #include <algorithm> // for find_if
  32. #include <boost/config.hpp>
  33. #include <boost/assert.hpp>
  34. #include <boost/detail/workaround.hpp>
  35. #include <boost/mpl/if.hpp>
  36. #if !defined(BOOST_NO_CWCTYPE)
  37. #include <cwctype>
  38. #endif
  39. //
  40. // the following must not be macros if we are to prefix them
  41. // with std:: (they shouldn't be macros anyway...)
  42. //
  43. #ifdef ispunct
  44. # undef ispunct
  45. #endif
  46. #ifdef iswpunct
  47. # undef iswpunct
  48. #endif
  49. #ifdef isspace
  50. # undef isspace
  51. #endif
  52. #ifdef iswspace
  53. # undef iswspace
  54. #endif
  55. //
  56. // fix namespace problems:
  57. //
  58. #ifdef BOOST_NO_STDC_NAMESPACE
  59. namespace std{
  60. using ::ispunct;
  61. using ::isspace;
  62. #if !defined(BOOST_NO_CWCTYPE)
  63. using ::iswpunct;
  64. using ::iswspace;
  65. #endif
  66. }
  67. #endif
  68. namespace boost{
  69. //===========================================================================
  70. // The escaped_list_separator class. Which is a model of TokenizerFunction
  71. // An escaped list is a super-set of what is commonly known as a comma
  72. // separated value (csv) list.It is separated into fields by a comma or
  73. // other character. If the delimiting character is inside quotes, then it is
  74. // counted as a regular character.To allow for embedded quotes in a field,
  75. // there can be escape sequences using the \ much like C.
  76. // The role of the comma, the quotation mark, and the escape
  77. // character (backslash \), can be assigned to other characters.
  78. struct escaped_list_error : public std::runtime_error{
  79. escaped_list_error(const std::string& what_arg):std::runtime_error(what_arg) { }
  80. };
  81. // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
  82. // MSVC does not like the following typename
  83. template <class Char,
  84. class Traits = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
  85. class escaped_list_separator {
  86. private:
  87. typedef std::basic_string<Char,Traits> string_type;
  88. struct char_eq {
  89. Char e_;
  90. char_eq(Char e):e_(e) { }
  91. bool operator()(Char c) {
  92. return Traits::eq(e_,c);
  93. }
  94. };
  95. string_type escape_;
  96. string_type c_;
  97. string_type quote_;
  98. bool last_;
  99. bool is_escape(Char e) {
  100. char_eq f(e);
  101. return std::find_if(escape_.begin(),escape_.end(),f)!=escape_.end();
  102. }
  103. bool is_c(Char e) {
  104. char_eq f(e);
  105. return std::find_if(c_.begin(),c_.end(),f)!=c_.end();
  106. }
  107. bool is_quote(Char e) {
  108. char_eq f(e);
  109. return std::find_if(quote_.begin(),quote_.end(),f)!=quote_.end();
  110. }
  111. template <typename iterator, typename Token>
  112. void do_escape(iterator& next,iterator end,Token& tok) {
  113. if (++next == end)
  114. throw escaped_list_error(std::string("cannot end with escape"));
  115. if (Traits::eq(*next,'n')) {
  116. tok+='\n';
  117. return;
  118. }
  119. else if (is_quote(*next)) {
  120. tok+=*next;
  121. return;
  122. }
  123. else if (is_c(*next)) {
  124. tok+=*next;
  125. return;
  126. }
  127. else if (is_escape(*next)) {
  128. tok+=*next;
  129. return;
  130. }
  131. else
  132. throw escaped_list_error(std::string("unknown escape sequence"));
  133. }
  134. public:
  135. explicit escaped_list_separator(Char e = '\\',
  136. Char c = ',',Char q = '\"')
  137. : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { }
  138. escaped_list_separator(string_type e, string_type c, string_type q)
  139. : escape_(e), c_(c), quote_(q), last_(false) { }
  140. void reset() {last_=false;}
  141. template <typename InputIterator, typename Token>
  142. bool operator()(InputIterator& next,InputIterator end,Token& tok) {
  143. bool bInQuote = false;
  144. tok = Token();
  145. if (next == end) {
  146. if (last_) {
  147. last_ = false;
  148. return true;
  149. }
  150. else
  151. return false;
  152. }
  153. last_ = false;
  154. for (;next != end;++next) {
  155. if (is_escape(*next)) {
  156. do_escape(next,end,tok);
  157. }
  158. else if (is_c(*next)) {
  159. if (!bInQuote) {
  160. // If we are not in quote, then we are done
  161. ++next;
  162. // The last character was a c, that means there is
  163. // 1 more blank field
  164. last_ = true;
  165. return true;
  166. }
  167. else tok+=*next;
  168. }
  169. else if (is_quote(*next)) {
  170. bInQuote=!bInQuote;
  171. }
  172. else {
  173. tok += *next;
  174. }
  175. }
  176. return true;
  177. }
  178. };
  179. //===========================================================================
  180. // The classes here are used by offset_separator and char_separator to implement
  181. // faster assigning of tokens using assign instead of +=
  182. namespace tokenizer_detail {
  183. //===========================================================================
  184. // Tokenizer was broken for wide character separators, at least on Windows, since
  185. // CRT functions isspace etc only expect values in [0, 0xFF]. Debug build asserts
  186. // if higher values are passed in. The traits extension class should take care of this.
  187. // Assuming that the conditional will always get optimized out in the function
  188. // implementations, argument types are not a problem since both forms of character classifiers
  189. // expect an int.
  190. #if !defined(BOOST_NO_CWCTYPE)
  191. template<typename traits, int N>
  192. struct traits_extension_details : public traits {
  193. typedef typename traits::char_type char_type;
  194. static bool isspace(char_type c)
  195. {
  196. return std::iswspace(c) != 0;
  197. }
  198. static bool ispunct(char_type c)
  199. {
  200. return std::iswpunct(c) != 0;
  201. }
  202. };
  203. template<typename traits>
  204. struct traits_extension_details<traits, 1> : public traits {
  205. typedef typename traits::char_type char_type;
  206. static bool isspace(char_type c)
  207. {
  208. return std::isspace(c) != 0;
  209. }
  210. static bool ispunct(char_type c)
  211. {
  212. return std::ispunct(c) != 0;
  213. }
  214. };
  215. #endif
  216. // In case there is no cwctype header, we implement the checks manually.
  217. // We make use of the fact that the tested categories should fit in ASCII.
  218. template<typename traits>
  219. struct traits_extension : public traits {
  220. typedef typename traits::char_type char_type;
  221. static bool isspace(char_type c)
  222. {
  223. #if !defined(BOOST_NO_CWCTYPE)
  224. return traits_extension_details<traits, sizeof(char_type)>::isspace(c);
  225. #else
  226. return static_cast< unsigned >(c) <= 255 && std::isspace(c) != 0;
  227. #endif
  228. }
  229. static bool ispunct(char_type c)
  230. {
  231. #if !defined(BOOST_NO_CWCTYPE)
  232. return traits_extension_details<traits, sizeof(char_type)>::ispunct(c);
  233. #else
  234. return static_cast< unsigned >(c) <= 255 && std::ispunct(c) != 0;
  235. #endif
  236. }
  237. };
  238. // The assign_or_plus_equal struct contains functions that implement
  239. // assign, +=, and clearing based on the iterator type. The
  240. // generic case does nothing for plus_equal and clearing, while
  241. // passing through the call for assign.
  242. //
  243. // When an input iterator is being used, the situation is reversed.
  244. // The assign method does nothing, plus_equal invokes operator +=,
  245. // and the clearing method sets the supplied token to the default
  246. // token constructor's result.
  247. //
  248. template<class IteratorTag>
  249. struct assign_or_plus_equal {
  250. template<class Iterator, class Token>
  251. static void assign(Iterator b, Iterator e, Token &t) {
  252. #if BOOST_WORKAROUND(BOOST_MSVC, < 1300) &&\
  253. BOOST_WORKAROUND(__SGI_STL_PORT, < 0x500) &&\
  254. defined(_STLP_DEBUG) &&\
  255. (defined(_STLP_USE_DYNAMIC_LIB) || defined(_DLL))
  256. // Problem with string::assign for msvc-stlport in debug mode: the
  257. // linker tries to import the templatized version of this memfun,
  258. // which is obviously not exported.
  259. // See http://www.stlport.com/dcforum/DCForumID6/1763.html for details.
  260. t = Token();
  261. while(b != e) t += *b++;
  262. #else
  263. t.assign(b, e);
  264. #endif
  265. }
  266. template<class Token, class Value>
  267. static void plus_equal(Token &, const Value &) { }
  268. // If we are doing an assign, there is no need for the
  269. // the clear.
  270. //
  271. template<class Token>
  272. static void clear(Token &) { }
  273. };
  274. template <>
  275. struct assign_or_plus_equal<std::input_iterator_tag> {
  276. template<class Iterator, class Token>
  277. static void assign(Iterator b, Iterator e, Token &t) { }
  278. template<class Token, class Value>
  279. static void plus_equal(Token &t, const Value &v) {
  280. t += v;
  281. }
  282. template<class Token>
  283. static void clear(Token &t) {
  284. t = Token();
  285. }
  286. };
  287. template<class Iterator>
  288. struct pointer_iterator_category{
  289. typedef std::random_access_iterator_tag type;
  290. };
  291. template<class Iterator>
  292. struct class_iterator_category{
  293. typedef typename Iterator::iterator_category type;
  294. };
  295. // This portably gets the iterator_tag without partial template specialization
  296. template<class Iterator>
  297. struct get_iterator_category{
  298. typedef typename mpl::if_<is_pointer<Iterator>,
  299. pointer_iterator_category<Iterator>,
  300. class_iterator_category<Iterator>
  301. >::type cat;
  302. typedef typename cat::type iterator_category;
  303. };
  304. } // namespace tokenizer_detail
  305. //===========================================================================
  306. // The offset_separator class, which is a model of TokenizerFunction.
  307. // Offset breaks a string into tokens based on a range of offsets
  308. class offset_separator {
  309. private:
  310. std::vector<int> offsets_;
  311. unsigned int current_offset_;
  312. bool wrap_offsets_;
  313. bool return_partial_last_;
  314. public:
  315. template <typename Iter>
  316. offset_separator(Iter begin, Iter end, bool wrap_offsets = true,
  317. bool return_partial_last = true)
  318. : offsets_(begin,end), current_offset_(0),
  319. wrap_offsets_(wrap_offsets),
  320. return_partial_last_(return_partial_last) { }
  321. offset_separator()
  322. : offsets_(1,1), current_offset_(),
  323. wrap_offsets_(true), return_partial_last_(true) { }
  324. void reset() {
  325. current_offset_ = 0;
  326. }
  327. template <typename InputIterator, typename Token>
  328. bool operator()(InputIterator& next, InputIterator end, Token& tok)
  329. {
  330. typedef tokenizer_detail::assign_or_plus_equal<
  331. BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category<
  332. InputIterator
  333. >::iterator_category
  334. > assigner;
  335. BOOST_ASSERT(!offsets_.empty());
  336. assigner::clear(tok);
  337. InputIterator start(next);
  338. if (next == end)
  339. return false;
  340. if (current_offset_ == offsets_.size())
  341. {
  342. if (wrap_offsets_)
  343. current_offset_=0;
  344. else
  345. return false;
  346. }
  347. int c = offsets_[current_offset_];
  348. int i = 0;
  349. for (; i < c; ++i) {
  350. if (next == end)break;
  351. assigner::plus_equal(tok,*next++);
  352. }
  353. assigner::assign(start,next,tok);
  354. if (!return_partial_last_)
  355. if (i < (c-1) )
  356. return false;
  357. ++current_offset_;
  358. return true;
  359. }
  360. };
  361. //===========================================================================
  362. // The char_separator class breaks a sequence of characters into
  363. // tokens based on the character delimiters (very much like bad old
  364. // strtok). A delimiter character can either be kept or dropped. A
  365. // kept delimiter shows up as an output token, whereas a dropped
  366. // delimiter does not.
  367. // This class replaces the char_delimiters_separator class. The
  368. // constructor for the char_delimiters_separator class was too
  369. // confusing and needed to be deprecated. However, because of the
  370. // default arguments to the constructor, adding the new constructor
  371. // would cause ambiguity, so instead I deprecated the whole class.
  372. // The implementation of the class was also simplified considerably.
  373. enum empty_token_policy { drop_empty_tokens, keep_empty_tokens };
  374. // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
  375. template <typename Char,
  376. typename Tr = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
  377. class char_separator
  378. {
  379. typedef tokenizer_detail::traits_extension<Tr> Traits;
  380. typedef std::basic_string<Char,Tr> string_type;
  381. public:
  382. explicit
  383. char_separator(const Char* dropped_delims,
  384. const Char* kept_delims = 0,
  385. empty_token_policy empty_tokens = drop_empty_tokens)
  386. : m_dropped_delims(dropped_delims),
  387. m_use_ispunct(false),
  388. m_use_isspace(false),
  389. m_empty_tokens(empty_tokens),
  390. m_output_done(false)
  391. {
  392. // Borland workaround
  393. if (kept_delims)
  394. m_kept_delims = kept_delims;
  395. }
  396. // use ispunct() for kept delimiters and isspace for dropped.
  397. explicit
  398. char_separator()
  399. : m_use_ispunct(true),
  400. m_use_isspace(true),
  401. m_empty_tokens(drop_empty_tokens) { }
  402. void reset() { }
  403. template <typename InputIterator, typename Token>
  404. bool operator()(InputIterator& next, InputIterator end, Token& tok)
  405. {
  406. typedef tokenizer_detail::assign_or_plus_equal<
  407. BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category<
  408. InputIterator
  409. >::iterator_category
  410. > assigner;
  411. assigner::clear(tok);
  412. // skip past all dropped_delims
  413. if (m_empty_tokens == drop_empty_tokens)
  414. for (; next != end && is_dropped(*next); ++next)
  415. { }
  416. InputIterator start(next);
  417. if (m_empty_tokens == drop_empty_tokens) {
  418. if (next == end)
  419. return false;
  420. // if we are on a kept_delims move past it and stop
  421. if (is_kept(*next)) {
  422. assigner::plus_equal(tok,*next);
  423. ++next;
  424. } else
  425. // append all the non delim characters
  426. for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next)
  427. assigner::plus_equal(tok,*next);
  428. }
  429. else { // m_empty_tokens == keep_empty_tokens
  430. // Handle empty token at the end
  431. if (next == end)
  432. {
  433. if (m_output_done == false)
  434. {
  435. m_output_done = true;
  436. assigner::assign(start,next,tok);
  437. return true;
  438. }
  439. else
  440. return false;
  441. }
  442. if (is_kept(*next)) {
  443. if (m_output_done == false)
  444. m_output_done = true;
  445. else {
  446. assigner::plus_equal(tok,*next);
  447. ++next;
  448. m_output_done = false;
  449. }
  450. }
  451. else if (m_output_done == false && is_dropped(*next)) {
  452. m_output_done = true;
  453. }
  454. else {
  455. if (is_dropped(*next))
  456. start=++next;
  457. for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next)
  458. assigner::plus_equal(tok,*next);
  459. m_output_done = true;
  460. }
  461. }
  462. assigner::assign(start,next,tok);
  463. return true;
  464. }
  465. private:
  466. string_type m_kept_delims;
  467. string_type m_dropped_delims;
  468. bool m_use_ispunct;
  469. bool m_use_isspace;
  470. empty_token_policy m_empty_tokens;
  471. bool m_output_done;
  472. bool is_kept(Char E) const
  473. {
  474. if (m_kept_delims.length())
  475. return m_kept_delims.find(E) != string_type::npos;
  476. else if (m_use_ispunct) {
  477. return Traits::ispunct(E) != 0;
  478. } else
  479. return false;
  480. }
  481. bool is_dropped(Char E) const
  482. {
  483. if (m_dropped_delims.length())
  484. return m_dropped_delims.find(E) != string_type::npos;
  485. else if (m_use_isspace) {
  486. return Traits::isspace(E) != 0;
  487. } else
  488. return false;
  489. }
  490. };
  491. //===========================================================================
  492. // The following class is DEPRECATED, use class char_separators instead.
  493. //
  494. // The char_delimiters_separator class, which is a model of
  495. // TokenizerFunction. char_delimiters_separator breaks a string
  496. // into tokens based on character delimiters. There are 2 types of
  497. // delimiters. returnable delimiters can be returned as
  498. // tokens. These are often punctuation. nonreturnable delimiters
  499. // cannot be returned as tokens. These are often whitespace
  500. // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
  501. template <class Char,
  502. class Tr = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
  503. class char_delimiters_separator {
  504. private:
  505. typedef tokenizer_detail::traits_extension<Tr> Traits;
  506. typedef std::basic_string<Char,Tr> string_type;
  507. string_type returnable_;
  508. string_type nonreturnable_;
  509. bool return_delims_;
  510. bool no_ispunct_;
  511. bool no_isspace_;
  512. bool is_ret(Char E)const
  513. {
  514. if (returnable_.length())
  515. return returnable_.find(E) != string_type::npos;
  516. else{
  517. if (no_ispunct_) {return false;}
  518. else{
  519. int r = Traits::ispunct(E);
  520. return r != 0;
  521. }
  522. }
  523. }
  524. bool is_nonret(Char E)const
  525. {
  526. if (nonreturnable_.length())
  527. return nonreturnable_.find(E) != string_type::npos;
  528. else{
  529. if (no_isspace_) {return false;}
  530. else{
  531. int r = Traits::isspace(E);
  532. return r != 0;
  533. }
  534. }
  535. }
  536. public:
  537. explicit char_delimiters_separator(bool return_delims = false,
  538. const Char* returnable = 0,
  539. const Char* nonreturnable = 0)
  540. : returnable_(returnable ? returnable : string_type().c_str()),
  541. nonreturnable_(nonreturnable ? nonreturnable:string_type().c_str()),
  542. return_delims_(return_delims), no_ispunct_(returnable!=0),
  543. no_isspace_(nonreturnable!=0) { }
  544. void reset() { }
  545. public:
  546. template <typename InputIterator, typename Token>
  547. bool operator()(InputIterator& next, InputIterator end,Token& tok) {
  548. tok = Token();
  549. // skip past all nonreturnable delims
  550. // skip past the returnable only if we are not returning delims
  551. for (;next!=end && ( is_nonret(*next) || (is_ret(*next)
  552. && !return_delims_ ) );++next) { }
  553. if (next == end) {
  554. return false;
  555. }
  556. // if we are to return delims and we are one a returnable one
  557. // move past it and stop
  558. if (is_ret(*next) && return_delims_) {
  559. tok+=*next;
  560. ++next;
  561. }
  562. else
  563. // append all the non delim characters
  564. for (;next!=end && !is_nonret(*next) && !is_ret(*next);++next)
  565. tok+=*next;
  566. return true;
  567. }
  568. };
  569. } //namespace boost
  570. #endif