PageRenderTime 86ms CodeModel.GetById 13ms app.highlight 67ms RepoModel.GetById 1ms app.codeStats 0ms

/Src/Dependencies/Boost/boost/date_time/format_date_parser.hpp

http://hadesmem.googlecode.com/
C++ Header | 743 lines | 572 code | 66 blank | 105 comment | 114 complexity | ed02f41f8ea40ef09a1b4dabccb81f3a MD5 | raw file
  1
  2#ifndef DATE_TIME_FORMAT_DATE_PARSER_HPP__
  3#define DATE_TIME_FORMAT_DATE_PARSER_HPP__
  4
  5/* Copyright (c) 2004-2005 CrystalClear Software, Inc.
  6 * Use, modification and distribution is subject to the 
  7 * Boost Software License, Version 1.0. (See accompanying
  8 * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
  9 * Author: Jeff Garland, Bart Garst
 10 * $Date: 2009-06-04 18:24:49 +1000 (Thu, 04 Jun 2009) $
 11 */
 12
 13
 14#include "boost/lexical_cast.hpp"
 15#include "boost/date_time/string_parse_tree.hpp"
 16#include "boost/date_time/strings_from_facet.hpp"
 17#include "boost/date_time/special_values_parser.hpp"
 18#include <string>
 19#include <vector>
 20#include <sstream>
 21#include <iterator>
 22#ifndef BOOST_NO_STDC_NAMESPACE
 23#  include <cctype>
 24#else
 25#  include <ctype.h>
 26#endif
 27
 28#ifdef BOOST_NO_STDC_NAMESPACE
 29namespace std {
 30  using ::isspace;
 31  using ::isdigit;
 32}
 33#endif
 34namespace boost { namespace date_time {
 35  
 36//! Helper function for parsing fixed length strings into integers
 37/*! Will consume 'length' number of characters from stream. Consumed 
 38 * character are transfered to parse_match_result struct. 
 39 * Returns '-1' if no number can be parsed or incorrect number of 
 40 * digits in stream. */
 41template<typename int_type, typename charT>
 42inline
 43int_type
 44fixed_string_to_int(std::istreambuf_iterator<charT>& itr,
 45                    std::istreambuf_iterator<charT>& stream_end,
 46                    parse_match_result<charT>& mr,
 47                    unsigned int length,
 48                    const charT& fill_char)
 49{
 50  //typedef std::basic_string<charT>  string_type;
 51  unsigned int j = 0;
 52  //string_type s;
 53  while (j < length && itr != stream_end && 
 54      (std::isdigit(*itr) || *itr == fill_char)) {
 55    if(*itr == fill_char) {
 56      /* Since a fill_char can be anything, we convert it to a zero. 
 57       * lexical_cast will behave predictably when zero is used as fill. */
 58      mr.cache += ('0'); 
 59    }
 60    else {
 61      mr.cache += (*itr);
 62    }
 63    itr++;
 64    j++;
 65  }
 66  int_type i = -1;
 67  // mr.cache will hold leading zeros. size() tells us when input is too short.
 68  if(mr.cache.size() < length) {
 69    return i;
 70  }
 71  try {
 72    i = boost::lexical_cast<int_type>(mr.cache);
 73  }catch(bad_lexical_cast&){
 74    // we want to return -1 if the cast fails so nothing to do here
 75  }
 76  return i;
 77}
 78
 79//! Helper function for parsing fixed length strings into integers
 80/*! Will consume 'length' number of characters from stream. Consumed 
 81 * character are transfered to parse_match_result struct. 
 82 * Returns '-1' if no number can be parsed or incorrect number of 
 83 * digits in stream. */
 84template<typename int_type, typename charT>
 85inline
 86int_type
 87fixed_string_to_int(std::istreambuf_iterator<charT>& itr,
 88                    std::istreambuf_iterator<charT>& stream_end,
 89                    parse_match_result<charT>& mr,
 90                    unsigned int length)
 91{
 92  return fixed_string_to_int<int_type, charT>(itr, stream_end, mr, length, '0');
 93}
 94
 95//! Helper function for parsing varied length strings into integers
 96/*! Will consume 'max_length' characters from stream only if those 
 97 * characters are digits. Returns '-1' if no number can be parsed. 
 98 * Will not parse a number preceeded by a '+' or '-'. */
 99template<typename int_type, typename charT>
100inline
101int_type
102var_string_to_int(std::istreambuf_iterator<charT>& itr,
103                  const std::istreambuf_iterator<charT>& stream_end,
104                  unsigned int max_length)
105{
106  typedef std::basic_string<charT>  string_type;
107  unsigned int j = 0;
108  string_type s;
109  while (itr != stream_end && (j < max_length) && std::isdigit(*itr)) {
110    s += (*itr);
111    ++itr;
112    ++j;
113  }
114  int_type i = -1;
115  if(!s.empty()) {
116    i = boost::lexical_cast<int_type>(s);
117  }
118  return i;
119}
120
121
122//! Class with generic date parsing using a format string
123/*! The following is the set of recognized format specifiers
124 -  %a - Short weekday name
125 -  %A - Long weekday name
126 -  %b - Abbreviated month name
127 -  %B - Full month name
128 -  %d - Day of the month as decimal 01 to 31
129 -  %j - Day of year as decimal from 001 to 366
130 -  %m - Month name as a decimal 01 to 12
131 -  %U - Week number 00 to 53 with first Sunday as the first day of week 1?
132 -  %w - Weekday as decimal number 0 to 6 where Sunday == 0
133 -  %W - Week number 00 to 53 where Monday is first day of week 1
134 -  %x - facet default date representation
135 -  %y - Year without the century - eg: 04 for 2004
136 -  %Y - Year with century 
137
138 The weekday specifiers (%a and %A) do not add to the date construction,
139 but they provide a way to skip over the weekday names for formats that
140 provide them.
141
142 todo -- Another interesting feature that this approach could provide is
143         an option to fill in any missing fields with the current values
144         from the clock.  So if you have %m-%d the parser would detect
145         the missing year value and fill it in using the clock. 
146
147 todo -- What to do with the %x.  %x in the classic facet is just bad...
148
149 */
150template<class date_type, typename charT>
151class format_date_parser
152{
153 public:
154  typedef std::basic_string<charT>        string_type;
155  typedef std::basic_istringstream<charT>  stringstream_type;
156  typedef std::istreambuf_iterator<charT> stream_itr_type;
157  typedef typename string_type::const_iterator const_itr;
158  typedef typename date_type::year_type  year_type;
159  typedef typename date_type::month_type month_type;
160  typedef typename date_type::day_type day_type;
161  typedef typename date_type::duration_type duration_type;
162  typedef typename date_type::day_of_week_type day_of_week_type;
163  typedef typename date_type::day_of_year_type day_of_year_type;
164  typedef string_parse_tree<charT> parse_tree_type;
165  typedef typename parse_tree_type::parse_match_result_type match_results;
166  typedef std::vector<std::basic_string<charT> > input_collection_type;
167
168  // TODO sv_parser uses its default constructor - write the others
169  
170  format_date_parser(const string_type& format_str,
171                     const input_collection_type& month_short_names,
172                     const input_collection_type& month_long_names,
173                     const input_collection_type& weekday_short_names,
174                     const input_collection_type& weekday_long_names) :
175    m_format(format_str),
176    m_month_short_names(month_short_names, 1),
177    m_month_long_names(month_long_names, 1),
178    m_weekday_short_names(weekday_short_names),
179    m_weekday_long_names(weekday_long_names)
180  {}
181  
182  format_date_parser(const string_type& format_str,
183                     const std::locale& locale) :
184    m_format(format_str),
185    m_month_short_names(gather_month_strings<charT>(locale), 1),
186    m_month_long_names(gather_month_strings<charT>(locale, false), 1),
187    m_weekday_short_names(gather_weekday_strings<charT>(locale)),
188    m_weekday_long_names(gather_weekday_strings<charT>(locale, false))
189  {}
190
191  format_date_parser(const format_date_parser<date_type,charT>& fdp)
192  {
193    this->m_format = fdp.m_format;
194    this->m_month_short_names = fdp.m_month_short_names;
195    this->m_month_long_names = fdp.m_month_long_names;
196    this->m_weekday_short_names = fdp.m_weekday_short_names;
197    this->m_weekday_long_names = fdp.m_weekday_long_names;
198  }
199  
200  string_type format() const
201  {
202    return m_format;
203  }
204
205  void format(string_type format_str)
206  {
207    m_format = format_str;
208  }
209
210  void short_month_names(const input_collection_type& month_names)
211  {
212    m_month_short_names = parse_tree_type(month_names, 1);
213  }
214  void long_month_names(const input_collection_type& month_names)
215  {
216    m_month_long_names = parse_tree_type(month_names, 1);
217  }
218  void short_weekday_names(const input_collection_type& weekday_names)
219  {
220    m_weekday_short_names = parse_tree_type(weekday_names);
221  }
222  void long_weekday_names(const input_collection_type& weekday_names)
223  {
224    m_weekday_long_names = parse_tree_type(weekday_names);
225  }
226
227  date_type
228  parse_date(const string_type& value, 
229             const string_type& format_str,
230             const special_values_parser<date_type,charT>& sv_parser) const
231  {
232    stringstream_type ss(value);
233    stream_itr_type sitr(ss);
234    stream_itr_type stream_end;
235    return parse_date(sitr, stream_end, format_str, sv_parser);
236  }
237
238  date_type
239  parse_date(std::istreambuf_iterator<charT>& sitr, 
240             std::istreambuf_iterator<charT>& stream_end,
241             const special_values_parser<date_type,charT>& sv_parser) const
242  {
243    return parse_date(sitr, stream_end, m_format, sv_parser);
244  }
245
246  /*! Of all the objects that the format_date_parser can parse, only a 
247   * date can be a special value. Therefore, only parse_date checks 
248   * for special_values. */
249  date_type
250  parse_date(std::istreambuf_iterator<charT>& sitr, 
251             std::istreambuf_iterator<charT>& stream_end,
252             string_type format_str,
253             const special_values_parser<date_type,charT>& sv_parser) const
254  {
255    bool use_current_char = false;
256    
257    // skip leading whitespace
258    while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } 
259    charT current_char = *sitr;
260
261    short year(0), month(0), day(0), day_of_year(0);// wkday(0); 
262    /* Initialized the following to their minimum values. These intermediate 
263     * objects are used so we get specific exceptions when part of the input 
264     * is unparsable. 
265     * Ex: "205-Jan-15" will throw a bad_year, "2005-Jsn-15"- bad_month, etc.*/
266    year_type t_year(1400);
267    month_type t_month(1);
268    day_type t_day(1);
269    day_of_week_type wkday(0);
270    
271    
272    const_itr itr(format_str.begin());
273    while (itr != format_str.end() && (sitr != stream_end)) {
274      if (*itr == '%') {
275        itr++;
276        if (*itr != '%') {
277          switch(*itr) {
278          case 'a': 
279            {
280              //this value is just throw away.  It could be used for
281              //error checking potentially, but it isn't helpful in 
282              //actually constructing the date - we just need to get it
283              //out of the stream
284              match_results mr = m_weekday_short_names.match(sitr, stream_end);
285              if(mr.current_match == match_results::PARSE_ERROR) {
286                // check special_values
287                if(sv_parser.match(sitr, stream_end, mr)) {
288                  return date_type(static_cast<special_values>(mr.current_match));
289                }
290              }
291              wkday = mr.current_match;
292              if (mr.has_remaining()) {
293                current_char = mr.last_char();
294                use_current_char = true;
295              }
296              break;
297            }
298          case 'A': 
299            {
300              //this value is just throw away.  It could be used for
301              //error checking potentially, but it isn't helpful in 
302              //actually constructing the date - we just need to get it
303              //out of the stream
304              match_results mr = m_weekday_long_names.match(sitr, stream_end);
305              if(mr.current_match == match_results::PARSE_ERROR) {
306                // check special_values
307                if(sv_parser.match(sitr, stream_end, mr)) {
308                  return date_type(static_cast<special_values>(mr.current_match));
309                }
310              }
311              wkday = mr.current_match;
312              if (mr.has_remaining()) {
313                current_char = mr.last_char();
314                use_current_char = true;
315              }
316              break;
317            }
318          case 'b': 
319            {
320              match_results mr = m_month_short_names.match(sitr, stream_end);
321              if(mr.current_match == match_results::PARSE_ERROR) {
322                // check special_values
323                if(sv_parser.match(sitr, stream_end, mr)) {
324                  return date_type(static_cast<special_values>(mr.current_match));
325                }
326              }
327              t_month = month_type(mr.current_match);
328              if (mr.has_remaining()) {
329                current_char = mr.last_char();
330                use_current_char = true;
331              }
332              break;
333            }
334          case 'B': 
335            {
336              match_results mr = m_month_long_names.match(sitr, stream_end);
337              if(mr.current_match == match_results::PARSE_ERROR) {
338                // check special_values
339                if(sv_parser.match(sitr, stream_end, mr)) {
340                  return date_type(static_cast<special_values>(mr.current_match));
341                }
342              }
343              t_month = month_type(mr.current_match);
344              if (mr.has_remaining()) {
345                current_char = mr.last_char();
346                use_current_char = true;
347              }
348              break;
349            }
350          case 'd': 
351            {
352              match_results mr;
353              day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
354              if(day == -1) {
355                if(sv_parser.match(sitr, stream_end, mr)) {
356                  return date_type(static_cast<special_values>(mr.current_match));
357                }
358              }
359              t_day = day_type(day);
360              break;
361            }
362          case 'e': 
363            {
364              match_results mr;
365              day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2, ' ');
366              if(day == -1) {
367                if(sv_parser.match(sitr, stream_end, mr)) {
368                  return date_type(static_cast<special_values>(mr.current_match));
369                }
370              }
371              t_day = day_type(day);
372              break;
373            }
374          case 'j': 
375            {
376              match_results mr;
377              day_of_year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 3);
378              if(day_of_year == -1) {
379                if(sv_parser.match(sitr, stream_end, mr)) {
380                  return date_type(static_cast<special_values>(mr.current_match));
381                }
382              }
383              // these next two lines are so we get an exception with bad input
384              day_of_year_type t_day_of_year(1);
385              t_day_of_year = day_of_year_type(day_of_year);
386              break;
387            }
388          case 'm': 
389            {
390              match_results mr;
391              month = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
392              if(month == -1) {
393                if(sv_parser.match(sitr, stream_end, mr)) {
394                  return date_type(static_cast<special_values>(mr.current_match));
395                }
396              }
397              t_month = month_type(month);
398              break;
399            }
400          case 'Y': 
401            {
402              match_results mr;
403              year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4);
404              if(year == -1) {
405                if(sv_parser.match(sitr, stream_end, mr)) {
406                  return date_type(static_cast<special_values>(mr.current_match));
407                }
408              }
409              t_year = year_type(year);
410              break;
411            }
412          case 'y': 
413            {
414              match_results mr;
415              year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
416              if(year == -1) {
417                if(sv_parser.match(sitr, stream_end, mr)) {
418                  return date_type(static_cast<special_values>(mr.current_match));
419                }
420              }
421              year += 2000; //make 2 digit years in this century
422              t_year = year_type(year);
423              break;
424            }
425          default:
426            {} //ignore those we don't understand
427            
428          }//switch
429          
430        }
431        else { // itr == '%', second consecutive
432          sitr++;
433        }
434        
435        itr++; //advance past format specifier
436      }
437      else {  //skip past chars in format and in buffer
438        itr++;
439        if (use_current_char) {
440          use_current_char = false;
441          current_char = *sitr;
442        }
443        else {
444          sitr++;
445        }
446      }
447    }
448    
449    if (day_of_year > 0) {
450      date_type d(static_cast<unsigned short>(year-1),12,31); //end of prior year
451      return d + duration_type(day_of_year);
452    }
453    
454    return date_type(t_year, t_month, t_day); // exceptions were thrown earlier 
455                                        // if input was no good 
456  }
457 
458  //! Throws bad_month if unable to parse
459  month_type
460  parse_month(std::istreambuf_iterator<charT>& sitr, 
461             std::istreambuf_iterator<charT>& stream_end,
462             string_type format_str) const
463  {
464    match_results mr;
465    return parse_month(sitr, stream_end, format_str, mr);
466  }
467 
468  //! Throws bad_month if unable to parse
469  month_type
470  parse_month(std::istreambuf_iterator<charT>& sitr, 
471             std::istreambuf_iterator<charT>& stream_end,
472             string_type format_str,
473             match_results& mr) const
474  {
475    bool use_current_char = false;
476    
477    // skip leading whitespace
478    while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } 
479    charT current_char = *sitr;
480
481    short month(0);
482    
483    const_itr itr(format_str.begin());
484    while (itr != format_str.end() && (sitr != stream_end)) {
485      if (*itr == '%') {
486        itr++;
487        if (*itr != '%') {
488          switch(*itr) {
489          case 'b': 
490            {
491              mr = m_month_short_names.match(sitr, stream_end);
492              month = mr.current_match;
493              if (mr.has_remaining()) {
494                current_char = mr.last_char();
495                use_current_char = true;
496              }
497              break;
498            }
499          case 'B': 
500            {
501              mr = m_month_long_names.match(sitr, stream_end);
502              month = mr.current_match;
503              if (mr.has_remaining()) {
504                current_char = mr.last_char();
505                use_current_char = true;
506              }
507              break;
508            }
509          case 'm': 
510            {
511              month = var_string_to_int<short, charT>(sitr, stream_end, 2);
512              // var_string_to_int returns -1 if parse failed. That will 
513              // cause a bad_month exception to be thrown so we do nothing here
514              break;
515            }
516          default:
517            {} //ignore those we don't understand
518            
519          }//switch
520          
521        }
522        else { // itr == '%', second consecutive
523          sitr++;
524        }
525        
526        itr++; //advance past format specifier
527      }
528      else {  //skip past chars in format and in buffer
529        itr++;
530        if (use_current_char) {
531          use_current_char = false;
532          current_char = *sitr;
533        }
534        else {
535          sitr++;
536        }
537      }
538    }
539    
540    return month_type(month); // throws bad_month exception when values are zero
541  }
542
543  //! Expects 1 or 2 digits 1-31. Throws bad_day_of_month if unable to parse
544  day_type
545  parse_var_day_of_month(std::istreambuf_iterator<charT>& sitr, 
546                         std::istreambuf_iterator<charT>& stream_end) const
547  {
548    // skip leading whitespace
549    while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } 
550
551    return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2));
552  }
553  //! Expects 2 digits 01-31. Throws bad_day_of_month if unable to parse
554  day_type
555  parse_day_of_month(std::istreambuf_iterator<charT>& sitr, 
556                     std::istreambuf_iterator<charT>& stream_end) const
557  {
558    // skip leading whitespace
559    while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } 
560
561    //return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2));
562    match_results mr;
563    return day_type(fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2));
564  }
565
566  day_of_week_type
567  parse_weekday(std::istreambuf_iterator<charT>& sitr, 
568             std::istreambuf_iterator<charT>& stream_end,
569             string_type format_str) const
570  {
571    match_results mr;
572    return parse_weekday(sitr, stream_end, format_str, mr);
573  }
574  day_of_week_type
575  parse_weekday(std::istreambuf_iterator<charT>& sitr, 
576             std::istreambuf_iterator<charT>& stream_end,
577             string_type format_str,
578             match_results& mr) const
579  {
580    bool use_current_char = false;
581    
582    // skip leading whitespace
583    while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } 
584    charT current_char = *sitr;
585
586    short wkday(0);
587    
588    const_itr itr(format_str.begin());
589    while (itr != format_str.end() && (sitr != stream_end)) {
590      if (*itr == '%') {
591        itr++;
592        if (*itr != '%') {
593          switch(*itr) {
594          case 'a': 
595            {
596              //this value is just throw away.  It could be used for
597              //error checking potentially, but it isn't helpful in 
598              //actually constructing the date - we just need to get it
599              //out of the stream
600              mr = m_weekday_short_names.match(sitr, stream_end);
601              wkday = mr.current_match;
602              if (mr.has_remaining()) {
603                current_char = mr.last_char();
604                use_current_char = true;
605              }
606              break;
607            }
608          case 'A': 
609            {
610              //this value is just throw away.  It could be used for
611              //error checking potentially, but it isn't helpful in 
612              //actually constructing the date - we just need to get it
613              //out of the stream
614              mr = m_weekday_long_names.match(sitr, stream_end);
615              wkday = mr.current_match;
616              if (mr.has_remaining()) {
617                current_char = mr.last_char();
618                use_current_char = true;
619              }
620              break;
621            }
622          case 'w':
623            {
624              // weekday as number 0-6, Sunday == 0
625              wkday = var_string_to_int<short, charT>(sitr, stream_end, 2);
626              break;
627            }
628          default:
629            {} //ignore those we don't understand
630            
631          }//switch
632          
633        }
634        else { // itr == '%', second consecutive
635          sitr++;
636        }
637        
638        itr++; //advance past format specifier
639      }
640      else {  //skip past chars in format and in buffer
641        itr++;
642        if (use_current_char) {
643          use_current_char = false;
644          current_char = *sitr;
645        }
646        else {
647          sitr++;
648        }
649      }
650    }
651    
652    return day_of_week_type(wkday); // throws bad_day_of_month exception 
653                                    // when values are zero
654  }
655  
656  //! throws bad_year if unable to parse
657  year_type
658  parse_year(std::istreambuf_iterator<charT>& sitr, 
659             std::istreambuf_iterator<charT>& stream_end,
660             string_type format_str) const
661  {
662    match_results mr;
663    return parse_year(sitr, stream_end, format_str, mr);
664  }
665
666  //! throws bad_year if unable to parse
667  year_type
668  parse_year(std::istreambuf_iterator<charT>& sitr, 
669             std::istreambuf_iterator<charT>& stream_end,
670             string_type format_str,
671             match_results& mr) const
672  {
673    bool use_current_char = false;
674    
675    // skip leading whitespace
676    while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } 
677    charT current_char = *sitr;
678
679    unsigned short year(0);
680    
681    const_itr itr(format_str.begin());
682    while (itr != format_str.end() && (sitr != stream_end)) {
683      if (*itr == '%') {
684        itr++;
685        if (*itr != '%') {
686          //match_results mr;
687          switch(*itr) {
688          case 'Y':
689            {
690              // year from 4 digit string
691              year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4);
692              break;
693            }
694          case 'y':
695            {
696              // year from 2 digit string (no century)
697              year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
698              year += 2000; //make 2 digit years in this century
699              break;
700            }
701          default:
702            {} //ignore those we don't understand
703            
704          }//switch
705          
706        }
707        else { // itr == '%', second consecutive
708          sitr++;
709        }
710        
711        itr++; //advance past format specifier
712      }
713      else {  //skip past chars in format and in buffer
714        itr++;
715        if (use_current_char) {
716          use_current_char = false;
717          current_char = *sitr;
718        }
719        else {
720          sitr++;
721        }
722      }
723    }
724    
725    return year_type(year); // throws bad_year exception when values are zero
726  }
727  
728  
729 private:
730  string_type m_format;
731  parse_tree_type m_month_short_names;
732  parse_tree_type m_month_long_names;
733  parse_tree_type m_weekday_short_names;
734  parse_tree_type m_weekday_long_names;
735
736};
737
738} } //namespace
739
740#endif
741
742
743