PageRenderTime 61ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/third_party/numpy/files/numpy/core/_mx_datetime_parser.py

http://sawbuck.googlecode.com/
Python | 962 lines | 858 code | 38 blank | 66 comment | 12 complexity | 4262b2b6b346dec8958ce8ae9b55e039 MD5 | raw file
Possible License(s): LGPL-2.1, CPL-1.0, Apache-2.0, CC-BY-SA-3.0, BSD-3-Clause, GPL-3.0, GPL-2.0
  1. #-*- coding: latin-1 -*-
  2. """
  3. Date/Time string parsing module.
  4. This code is a slightly modified version of Parser.py found in mx.DateTime
  5. version 3.0.0
  6. As such, it is subject to the terms of the eGenix public license version 1.1.0.
  7. FIXME: Add license.txt to NumPy
  8. """
  9. __all__ = ['date_from_string', 'datetime_from_string']
  10. import types
  11. import re
  12. import datetime as dt
  13. class RangeError(Exception): pass
  14. # Enable to produce debugging output
  15. _debug = 0
  16. # REs for matching date and time parts in a string; These REs
  17. # parse a superset of ARPA, ISO, American and European style dates.
  18. # Timezones are supported via the Timezone submodule.
  19. _year = '(?P<year>-?\d+\d(?!:))'
  20. _fullyear = '(?P<year>-?\d+\d\d(?!:))'
  21. _year_epoch = '(?:' + _year + '(?P<epoch> *[ABCDE\.]+)?)'
  22. _fullyear_epoch = '(?:' + _fullyear + '(?P<epoch> *[ABCDE\.]+)?)'
  23. _relyear = '(?:\((?P<relyear>[-+]?\d+)\))'
  24. _month = '(?P<month>\d?\d(?!:))'
  25. _fullmonth = '(?P<month>\d\d(?!:))'
  26. _litmonth = ('(?P<litmonth>'
  27. 'jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|'
  28. 'mär|mae|mrz|mai|okt|dez|'
  29. 'fev|avr|juin|juil|aou|aoű|déc|'
  30. 'ene|abr|ago|dic|'
  31. 'out'
  32. ')[a-z,\.;]*')
  33. litmonthtable = {
  34. # English
  35. 'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6,
  36. 'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12,
  37. # German
  38. 'mär':3, 'mae':3, 'mrz':3, 'mai':5, 'okt':10, 'dez':12,
  39. # French
  40. 'fev':2, 'avr':4, 'juin':6, 'juil':7, 'aou':8, 'aoű':8,
  41. 'déc':12,
  42. # Spanish
  43. 'ene':1, 'abr':4, 'ago':8, 'dic':12,
  44. # Portuguese
  45. 'out':10,
  46. }
  47. _relmonth = '(?:\((?P<relmonth>[-+]?\d+)\))'
  48. _day = '(?P<day>\d?\d(?!:))'
  49. _usday = '(?P<day>\d?\d(?!:))(?:st|nd|rd|th|[,\.;])?'
  50. _fullday = '(?P<day>\d\d(?!:))'
  51. _litday = ('(?P<litday>'
  52. 'mon|tue|wed|thu|fri|sat|sun|'
  53. 'die|mit|don|fre|sam|son|'
  54. 'lun|mar|mer|jeu|ven|sam|dim|'
  55. 'mie|jue|vie|sab|dom|'
  56. 'pri|seg|ter|cua|qui'
  57. ')[a-z]*')
  58. litdaytable = {
  59. # English
  60. 'mon':0, 'tue':1, 'wed':2, 'thu':3, 'fri':4, 'sat':5, 'sun':6,
  61. # German
  62. 'die':1, 'mit':2, 'don':3, 'fre':4, 'sam':5, 'son':6,
  63. # French
  64. 'lun':0, 'mar':1, 'mer':2, 'jeu':3, 'ven':4, 'sam':5, 'dim':6,
  65. # Spanish
  66. 'mie':2, 'jue':3, 'vie':4, 'sab':5, 'dom':6,
  67. # Portuguese
  68. 'pri':0, 'seg':1, 'ter':2, 'cua':3, 'qui':4,
  69. }
  70. _relday = '(?:\((?P<relday>[-+]?\d+)\))'
  71. _hour = '(?P<hour>[012]?\d)'
  72. _minute = '(?P<minute>[0-6]\d)'
  73. _second = '(?P<second>[0-6]\d(?:[.,]\d+)?)'
  74. _days = '(?P<days>\d*\d(?:[.,]\d+)?)'
  75. _hours = '(?P<hours>\d*\d(?:[.,]\d+)?)'
  76. _minutes = '(?P<minutes>\d*\d(?:[.,]\d+)?)'
  77. _seconds = '(?P<seconds>\d*\d(?:[.,]\d+)?)'
  78. _reldays = '(?:\((?P<reldays>[-+]?\d+(?:[.,]\d+)?)\))'
  79. _relhours = '(?:\((?P<relhours>[-+]?\d+(?:[.,]\d+)?)\))'
  80. _relminutes = '(?:\((?P<relminutes>[-+]?\d+(?:[.,]\d+)?)\))'
  81. _relseconds = '(?:\((?P<relseconds>[-+]?\d+(?:[.,]\d+)?)\))'
  82. _sign = '(?:(?P<sign>[-+]) *)'
  83. _week = 'W(?P<week>\d?\d)'
  84. _zone = '(?P<zone>[A-Z]+|[+-]\d\d?:?(?:\d\d)?)'
  85. _ampm = '(?P<ampm>[ap][m.]+)'
  86. _time = (_hour + ':' + _minute + '(?::' + _second + '|[^:]|$) *'
  87. + _ampm + '? *' + _zone + '?')
  88. _isotime = _hour + ':?' + _minute + ':?' + _second + '? *' + _zone + '?'
  89. _yeardate = _year
  90. _weekdate = _year + '-?(?:' + _week + '-?' + _day + '?)?'
  91. _eurodate = _day + '\.' + _month + '\.' + _year_epoch + '?'
  92. _usdate = _month + '/' + _day + '(?:/' + _year_epoch + '|[^/]|$)'
  93. _altusdate = _month + '-' + _day + '-' + _fullyear_epoch
  94. _isodate = _year + '-' + _month + '-?' + _day + '?(?!:)'
  95. _altisodate = _year + _fullmonth + _fullday + '(?!:)'
  96. _usisodate = _fullyear + '/' + _fullmonth + '/' + _fullday
  97. _litdate = ('(?:'+ _litday + ',? )? *' +
  98. _usday + ' *' +
  99. '[- ] *(?:' + _litmonth + '|'+ _month +') *[- ] *' +
  100. _year_epoch + '?')
  101. _altlitdate = ('(?:'+ _litday + ',? )? *' +
  102. _litmonth + '[ ,.a-z]+' +
  103. _usday +
  104. '(?:[ a-z]+' + _year_epoch + ')?')
  105. _eurlitdate = ('(?:'+ _litday + ',?[ a-z]+)? *' +
  106. '(?:'+ _usday + '[ a-z]+)? *' +
  107. _litmonth +
  108. '(?:[ ,.a-z]+' + _year_epoch + ')?')
  109. _relany = '[*%?a-zA-Z]+'
  110. _relisodate = ('(?:(?:' + _relany + '|' + _year + '|' + _relyear + ')-' +
  111. '(?:' + _relany + '|' + _month + '|' + _relmonth + ')-' +
  112. '(?:' + _relany + '|' + _day + '|' + _relday + '))')
  113. _asctime = ('(?:'+ _litday + ',? )? *' +
  114. _usday + ' *' +
  115. '[- ] *(?:' + _litmonth + '|'+ _month +') *[- ]' +
  116. '(?:[0-9: ]+)' +
  117. _year_epoch + '?')
  118. _relisotime = ('(?:(?:' + _relany + '|' + _hour + '|' + _relhours + '):' +
  119. '(?:' + _relany + '|' + _minute + '|' + _relminutes + ')' +
  120. '(?::(?:' + _relany + '|' + _second + '|' + _relseconds + '))?)')
  121. _isodelta1 = (_sign + '?' +
  122. _days + ':' + _hours + ':' + _minutes + ':' + _seconds)
  123. _isodelta2 = (_sign + '?' +
  124. _hours + ':' + _minutes + ':' + _seconds)
  125. _isodelta3 = (_sign + '?' +
  126. _hours + ':' + _minutes)
  127. _litdelta = (_sign + '?' +
  128. '(?:' + _days + ' *d[a-z]*[,; ]*)?' +
  129. '(?:' + _hours + ' *h[a-z]*[,; ]*)?' +
  130. '(?:' + _minutes + ' *m[a-z]*[,; ]*)?' +
  131. '(?:' + _seconds + ' *s[a-z]*[,; ]*)?')
  132. _litdelta2 = (_sign + '?' +
  133. '(?:' + _days + ' *d[a-z]*[,; ]*)?' +
  134. _hours + ':' + _minutes + '(?::' + _seconds + ')?')
  135. _timeRE = re.compile(_time, re.I)
  136. _isotimeRE = re.compile(_isotime, re.I)
  137. _isodateRE = re.compile(_isodate, re.I)
  138. _altisodateRE = re.compile(_altisodate, re.I)
  139. _usisodateRE = re.compile(_usisodate, re.I)
  140. _yeardateRE = re.compile(_yeardate, re.I)
  141. _eurodateRE = re.compile(_eurodate, re.I)
  142. _usdateRE = re.compile(_usdate, re.I)
  143. _altusdateRE = re.compile(_altusdate, re.I)
  144. _litdateRE = re.compile(_litdate, re.I)
  145. _altlitdateRE = re.compile(_altlitdate, re.I)
  146. _eurlitdateRE = re.compile(_eurlitdate, re.I)
  147. _relisodateRE = re.compile(_relisodate, re.I)
  148. _asctimeRE = re.compile(_asctime, re.I)
  149. _isodelta1RE = re.compile(_isodelta1)
  150. _isodelta2RE = re.compile(_isodelta2)
  151. _isodelta3RE = re.compile(_isodelta3)
  152. _litdeltaRE = re.compile(_litdelta)
  153. _litdelta2RE = re.compile(_litdelta2)
  154. _relisotimeRE = re.compile(_relisotime, re.I)
  155. # Available date parsers
  156. _date_formats = ('euro',
  157. 'usiso', 'us', 'altus',
  158. 'iso', 'altiso',
  159. 'lit', 'altlit', 'eurlit',
  160. 'year', 'unknown')
  161. # Available time parsers
  162. _time_formats = ('standard',
  163. 'iso',
  164. 'unknown')
  165. _zoneoffset = ('(?:'
  166. '(?P<zonesign>[+-])?'
  167. '(?P<hours>\d\d?)'
  168. ':?'
  169. '(?P<minutes>\d\d)?'
  170. '(?P<extra>\d+)?'
  171. ')'
  172. )
  173. _zoneoffsetRE = re.compile(_zoneoffset)
  174. _zonetable = {
  175. # Timezone abbreviations
  176. # Std Summer
  177. # Standards
  178. 'UT':0,
  179. 'UTC':0,
  180. 'GMT':0,
  181. # A few common timezone abbreviations
  182. 'CET':1, 'CEST':2, 'CETDST':2, # Central European
  183. 'MET':1, 'MEST':2, 'METDST':2, # Mean European
  184. 'MEZ':1, 'MESZ':2, # Mitteleuropäische Zeit
  185. 'EET':2, 'EEST':3, 'EETDST':3, # Eastern Europe
  186. 'WET':0, 'WEST':1, 'WETDST':1, # Western Europe
  187. 'MSK':3, 'MSD':4, # Moscow
  188. 'IST':5.5, # India
  189. 'JST':9, # Japan
  190. 'KST':9, # Korea
  191. 'HKT':8, # Hong Kong
  192. # US time zones
  193. 'AST':-4, 'ADT':-3, # Atlantic
  194. 'EST':-5, 'EDT':-4, # Eastern
  195. 'CST':-6, 'CDT':-5, # Central
  196. 'MST':-7, 'MDT':-6, # Midwestern
  197. 'PST':-8, 'PDT':-7, # Pacific
  198. # Australian time zones
  199. 'CAST':9.5, 'CADT':10.5, # Central
  200. 'EAST':10, 'EADT':11, # Eastern
  201. 'WAST':8, 'WADT':9, # Western
  202. 'SAST':9.5, 'SADT':10.5, # Southern
  203. # US military time zones
  204. 'Z': 0,
  205. 'A': 1,
  206. 'B': 2,
  207. 'C': 3,
  208. 'D': 4,
  209. 'E': 5,
  210. 'F': 6,
  211. 'G': 7,
  212. 'H': 8,
  213. 'I': 9,
  214. 'K': 10,
  215. 'L': 11,
  216. 'M': 12,
  217. 'N':-1,
  218. 'O':-2,
  219. 'P':-3,
  220. 'Q':-4,
  221. 'R':-5,
  222. 'S':-6,
  223. 'T':-7,
  224. 'U':-8,
  225. 'V':-9,
  226. 'W':-10,
  227. 'X':-11,
  228. 'Y':-12
  229. }
  230. def utc_offset(zone):
  231. """ utc_offset(zonestring)
  232. Return the UTC time zone offset in minutes.
  233. zone must be string and can either be given as +-HH:MM,
  234. +-HHMM, +-HH numeric offset or as time zone
  235. abbreviation. Daylight saving time must be encoded into the
  236. zone offset.
  237. Timezone abbreviations are treated case-insensitive.
  238. """
  239. if not zone:
  240. return 0
  241. uzone = zone.upper()
  242. if uzone in _zonetable:
  243. return _zonetable[uzone]*60
  244. offset = _zoneoffsetRE.match(zone)
  245. if not offset:
  246. raise ValueError,'wrong format or unkown time zone: "%s"' % zone
  247. zonesign,hours,minutes,extra = offset.groups()
  248. if extra:
  249. raise ValueError,'illegal time zone offset: "%s"' % zone
  250. offset = int(hours or 0) * 60 + int(minutes or 0)
  251. if zonesign == '-':
  252. offset = -offset
  253. return offset
  254. def add_century(year):
  255. """ Sliding window approach to the Y2K problem: adds a suitable
  256. century to the given year and returns it as integer.
  257. The window used depends on the current year. If adding the current
  258. century to the given year gives a year within the range
  259. current_year-70...current_year+30 [both inclusive], then the
  260. current century is added. Otherwise the century (current + 1 or
  261. - 1) producing the least difference is chosen.
  262. """
  263. current_year=dt.datetime.now().year
  264. current_century=(dt.datetime.now().year / 100) * 100
  265. if year > 99:
  266. # Take it as-is
  267. return year
  268. year = year + current_century
  269. diff = year - current_year
  270. if diff >= -70 and diff <= 30:
  271. return year
  272. elif diff < -70:
  273. return year + 100
  274. else:
  275. return year - 100
  276. def _parse_date(text):
  277. """
  278. Parses the date part given in text and returns a tuple
  279. (text,day,month,year,style) with the following meanings:
  280. * text gives the original text without the date part
  281. * day,month,year give the parsed date
  282. * style gives information about which parser was successful:
  283. 'euro' - the European date parser
  284. 'us' - the US date parser
  285. 'altus' - the alternative US date parser (with '-' instead of '/')
  286. 'iso' - the ISO date parser
  287. 'altiso' - the alternative ISO date parser (without '-')
  288. 'usiso' - US style ISO date parser (yyyy/mm/dd)
  289. 'lit' - the US literal date parser
  290. 'altlit' - the alternative US literal date parser
  291. 'eurlit' - the Eurpean literal date parser
  292. 'unknown' - no date part was found, defaultdate was used
  293. Formats may be set to a tuple of style strings specifying which of the above
  294. parsers to use and in which order to try them.
  295. Default is to try all of them in the above order.
  296. ``defaultdate`` provides the defaults to use in case no date part is found.
  297. Most other parsers default to the current year January 1 if some of these
  298. date parts are missing.
  299. If ``'unknown'`` is not given in formats and the date cannot be parsed,
  300. a :exc:`ValueError` is raised.
  301. """
  302. match = None
  303. style = ''
  304. formats = _date_formats
  305. us_formats=('us', 'altus')
  306. iso_formats=('iso', 'altiso', 'usiso')
  307. now=dt.datetime.now
  308. # Apply parsers in the order given in formats
  309. for format in formats:
  310. if format == 'euro':
  311. # European style date
  312. match = _eurodateRE.search(text)
  313. if match is not None:
  314. day,month,year,epoch = match.groups()
  315. if year:
  316. if len(year) == 2:
  317. # Y2K problem:
  318. year = add_century(int(year))
  319. else:
  320. year = int(year)
  321. else:
  322. defaultdate = now()
  323. year = defaultdate.year
  324. if epoch and 'B' in epoch:
  325. year = -year + 1
  326. month = int(month)
  327. day = int(day)
  328. # Could have mistaken euro format for us style date
  329. # which uses month, day order
  330. if month > 12 or month == 0:
  331. match = None
  332. continue
  333. break
  334. elif format == 'year':
  335. # just a year specified
  336. match = _yeardateRE.match(text)
  337. if match is not None:
  338. year = match.groups()[0]
  339. if year:
  340. if len(year) == 2:
  341. # Y2K problem:
  342. year = add_century(int(year))
  343. else:
  344. year = int(year)
  345. else:
  346. defaultdate = now()
  347. year = defaultdate.year
  348. day = 1
  349. month = 1
  350. break
  351. elif format in iso_formats:
  352. # ISO style date
  353. if format == 'iso':
  354. match = _isodateRE.search(text)
  355. elif format == 'altiso':
  356. match = _altisodateRE.search(text)
  357. # Avoid mistaking ISO time parts ('Thhmmss') for dates
  358. if match is not None:
  359. left, right = match.span()
  360. if left > 0 and \
  361. text[left - 1:left] == 'T':
  362. match = None
  363. continue
  364. else:
  365. match = _usisodateRE.search(text)
  366. if match is not None:
  367. year,month,day = match.groups()
  368. if len(year) == 2:
  369. # Y2K problem:
  370. year = add_century(int(year))
  371. else:
  372. year = int(year)
  373. # Default to January 1st
  374. if not month:
  375. month = 1
  376. else:
  377. month = int(month)
  378. if not day:
  379. day = 1
  380. else:
  381. day = int(day)
  382. break
  383. elif format in us_formats:
  384. # US style date
  385. if format == 'us':
  386. match = _usdateRE.search(text)
  387. else:
  388. match = _altusdateRE.search(text)
  389. if match is not None:
  390. month,day,year,epoch = match.groups()
  391. if year:
  392. if len(year) == 2:
  393. # Y2K problem:
  394. year = add_century(int(year))
  395. else:
  396. year = int(year)
  397. else:
  398. defaultdate = now()
  399. year = defaultdate.year
  400. if epoch and 'B' in epoch:
  401. year = -year + 1
  402. # Default to 1 if no day is given
  403. if day:
  404. day = int(day)
  405. else:
  406. day = 1
  407. month = int(month)
  408. # Could have mistaken us format for euro style date
  409. # which uses day, month order
  410. if month > 12 or month == 0:
  411. match = None
  412. continue
  413. break
  414. elif format == 'lit':
  415. # US style literal date
  416. match = _litdateRE.search(text)
  417. if match is not None:
  418. litday,day,litmonth,month,year,epoch = match.groups()
  419. break
  420. elif format == 'altlit':
  421. # Alternative US style literal date
  422. match = _altlitdateRE.search(text)
  423. if match is not None:
  424. litday,litmonth,day,year,epoch = match.groups()
  425. month = '<missing>'
  426. break
  427. elif format == 'eurlit':
  428. # European style literal date
  429. match = _eurlitdateRE.search(text)
  430. if match is not None:
  431. litday,day,litmonth,year,epoch = match.groups()
  432. month = '<missing>'
  433. break
  434. elif format == 'unknown':
  435. # No date part: use defaultdate
  436. defaultdate = now()
  437. year = defaultdate.year
  438. month = defaultdate.month
  439. day = defaultdate.day
  440. style = format
  441. break
  442. # Check success
  443. if match is not None:
  444. # Remove date from text
  445. left, right = match.span()
  446. if 0 and _debug:
  447. print 'parsed date:',repr(text[left:right]),\
  448. 'giving:',year,month,day
  449. text = text[:left] + text[right:]
  450. style = format
  451. elif not style:
  452. # Not recognized: raise an error
  453. raise ValueError, 'unknown date format: "%s"' % text
  454. # Literal date post-processing
  455. if style in ('lit', 'altlit', 'eurlit'):
  456. if 0 and _debug: print match.groups()
  457. # Default to current year, January 1st
  458. if not year:
  459. defaultdate = now()
  460. year = defaultdate.year
  461. else:
  462. if len(year) == 2:
  463. # Y2K problem:
  464. year = add_century(int(year))
  465. else:
  466. year = int(year)
  467. if epoch and 'B' in epoch:
  468. year = -year + 1
  469. if litmonth:
  470. litmonth = litmonth.lower()
  471. try:
  472. month = litmonthtable[litmonth]
  473. except KeyError:
  474. raise ValueError,\
  475. 'wrong month name: "%s"' % litmonth
  476. elif month:
  477. month = int(month)
  478. else:
  479. month = 1
  480. if day:
  481. day = int(day)
  482. else:
  483. day = 1
  484. #print '_parse_date:',text,day,month,year,style
  485. return text,day,month,year,style
  486. def _parse_time(text):
  487. """ Parses a time part given in text and returns a tuple
  488. (text,hour,minute,second,offset,style) with the following
  489. meanings:
  490. * text gives the original text without the time part
  491. * hour,minute,second give the parsed time
  492. * offset gives the time zone UTC offset
  493. * style gives information about which parser was successful:
  494. 'standard' - the standard parser
  495. 'iso' - the ISO time format parser
  496. 'unknown' - no time part was found
  497. formats may be set to a tuple specifying the parsers to use:
  498. 'standard' - standard time format with ':' delimiter
  499. 'iso' - ISO time format (superset of 'standard')
  500. 'unknown' - default to 0:00:00, 0 zone offset
  501. If 'unknown' is not given in formats and the time cannot be
  502. parsed, a ValueError is raised.
  503. """
  504. match = None
  505. style = ''
  506. formats=_time_formats
  507. # Apply parsers in the order given in formats
  508. for format in formats:
  509. # Standard format
  510. if format == 'standard':
  511. match = _timeRE.search(text)
  512. if match is not None:
  513. hour,minute,second,ampm,zone = match.groups()
  514. style = 'standard'
  515. break
  516. # ISO format
  517. if format == 'iso':
  518. match = _isotimeRE.search(text)
  519. if match is not None:
  520. hour,minute,second,zone = match.groups()
  521. ampm = None
  522. style = 'iso'
  523. break
  524. # Default handling
  525. elif format == 'unknown':
  526. hour,minute,second,offset = 0,0,0.0,0
  527. style = 'unknown'
  528. break
  529. if not style:
  530. # If no default handling should be applied, raise an error
  531. raise ValueError, 'unknown time format: "%s"' % text
  532. # Post-processing
  533. if match is not None:
  534. if zone:
  535. # Convert to UTC offset
  536. offset = utc_offset(zone)
  537. else:
  538. offset = 0
  539. hour = int(hour)
  540. if ampm:
  541. if ampm[0] in ('p', 'P'):
  542. # 12pm = midday
  543. if hour < 12:
  544. hour = hour + 12
  545. else:
  546. # 12am = midnight
  547. if hour >= 12:
  548. hour = hour - 12
  549. if minute:
  550. minute = int(minute)
  551. else:
  552. minute = 0
  553. if not second:
  554. second = 0.0
  555. else:
  556. if ',' in second:
  557. second = second.replace(',', '.')
  558. second = float(second)
  559. # Remove time from text
  560. left,right = match.span()
  561. if 0 and _debug:
  562. print 'parsed time:',repr(text[left:right]),\
  563. 'giving:',hour,minute,second,offset
  564. text = text[:left] + text[right:]
  565. #print '_parse_time:',text,hour,minute,second,offset,style
  566. return text,hour,minute,second,offset,style
  567. ###
  568. def datetime_from_string(text):
  569. """ datetime_from_string(text, [formats, defaultdate])
  570. Returns a datetime instance reflecting the date and time given
  571. in text. In case a timezone is given, the returned instance
  572. will point to the corresponding UTC time value. Otherwise, the
  573. value is set as given in the string.
  574. formats may be set to a tuple of strings specifying which of
  575. the following parsers to use and in which order to try
  576. them. Default is to try all of them in the order given below:
  577. 'euro' - the European date parser
  578. 'us' - the US date parser
  579. 'altus' - the alternative US date parser (with '-' instead of '/')
  580. 'iso' - the ISO date parser
  581. 'altiso' - the alternative ISO date parser (without '-')
  582. 'usiso' - US style ISO date parser (yyyy/mm/dd)
  583. 'lit' - the US literal date parser
  584. 'altlit' - the alternative US literal date parser
  585. 'eurlit' - the Eurpean literal date parser
  586. 'unknown' - if no date part is found, use defaultdate
  587. defaultdate provides the defaults to use in case no date part
  588. is found. Most of the parsers default to the current year
  589. January 1 if some of these date parts are missing.
  590. If 'unknown' is not given in formats and the date cannot
  591. be parsed, a ValueError is raised.
  592. time_formats may be set to a tuple of strings specifying which
  593. of the following parsers to use and in which order to try
  594. them. Default is to try all of them in the order given below:
  595. 'standard' - standard time format HH:MM:SS (with ':' delimiter)
  596. 'iso' - ISO time format (superset of 'standard')
  597. 'unknown' - default to 00:00:00 in case the time format
  598. cannot be parsed
  599. Defaults to 00:00:00.00 for time parts that are not included
  600. in the textual representation.
  601. If 'unknown' is not given in time_formats and the time cannot
  602. be parsed, a ValueError is raised.
  603. """
  604. origtext = text
  605. text,hour,minute,second,offset,timestyle = _parse_time(origtext)
  606. text,day,month,year,datestyle = _parse_date(text)
  607. if 0 and _debug:
  608. print 'tried time/date on %s, date=%s, time=%s' % (origtext,
  609. datestyle,
  610. timestyle)
  611. # If this fails, try the ISO order (date, then time)
  612. if timestyle in ('iso', 'unknown'):
  613. text,day,month,year,datestyle = _parse_date(origtext)
  614. text,hour,minute,second,offset,timestyle = _parse_time(text)
  615. if 0 and _debug:
  616. print 'tried ISO on %s, date=%s, time=%s' % (origtext,
  617. datestyle,
  618. timestyle)
  619. try:
  620. microsecond = int(round(1000000 * (second % 1)))
  621. second = int(second)
  622. return dt.datetime(year,month,day,hour,minute,second, microsecond) - \
  623. dt.timedelta(minutes=offset)
  624. except ValueError, why:
  625. raise RangeError,\
  626. 'Failed to parse "%s": %s' % (origtext, why)
  627. def date_from_string(text):
  628. """ date_from_string(text, [formats, defaultdate])
  629. Returns a datetime instance reflecting the date given in
  630. text. A possibly included time part is ignored.
  631. formats and defaultdate work just like for
  632. datetime_from_string().
  633. """
  634. _text,day,month,year,datestyle = _parse_date(text)
  635. try:
  636. return dt.datetime(year,month,day)
  637. except ValueError, why:
  638. raise RangeError,\
  639. 'Failed to parse "%s": %s' % (text, why)
  640. def validateDateTimeString(text):
  641. """ validateDateTimeString(text, [formats, defaultdate])
  642. Validates the given text and returns 1/0 depending on whether
  643. text includes parseable date and time values or not.
  644. formats works just like for datetime_from_string() and defines
  645. the order of date/time parsers to apply. It defaults to the
  646. same list of parsers as for datetime_from_string().
  647. XXX Undocumented !
  648. """
  649. try:
  650. datetime_from_string(text)
  651. except ValueError, why:
  652. return 0
  653. return 1
  654. def validateDateString(text):
  655. """ validateDateString(text, [formats, defaultdate])
  656. Validates the given text and returns 1/0 depending on whether
  657. text includes a parseable date value or not.
  658. formats works just like for datetime_from_string() and defines
  659. the order of date/time parsers to apply. It defaults to the
  660. same list of parsers as for datetime_from_string().
  661. XXX Undocumented !
  662. """
  663. try:
  664. date_from_string(text)
  665. except ValueError, why:
  666. return 0
  667. return 1
  668. ### Tests
  669. def _test():
  670. import sys
  671. t = dt.datetime.now()
  672. _date = t.strftime('%Y-%m-%d')
  673. print 'Testing DateTime Parser...'
  674. l = [
  675. # Literal formats
  676. ('Sun Nov 6 08:49:37 1994', '1994-11-06 08:49:37.00'),
  677. ('sun nov 6 08:49:37 1994', '1994-11-06 08:49:37.00'),
  678. ('sUN NOV 6 08:49:37 1994', '1994-11-06 08:49:37.00'),
  679. ('Sunday, 06-Nov-94 08:49:37 GMT', '1994-11-06 08:49:37.00'),
  680. ('Sun, 06 Nov 1994 08:49:37 GMT', '1994-11-06 08:49:37.00'),
  681. ('06-Nov-94 08:49:37', '1994-11-06 08:49:37.00'),
  682. ('06-Nov-94', '1994-11-06 00:00:00.00'),
  683. ('06-NOV-94', '1994-11-06 00:00:00.00'),
  684. ('November 19 08:49:37', '%s-11-19 08:49:37.00' % t.year),
  685. ('Nov. 9', '%s-11-09 00:00:00.00' % t.year),
  686. ('Sonntag, der 6. November 1994, 08:49:37 GMT', '1994-11-06 08:49:37.00'),
  687. ('6. November 2001, 08:49:37', '2001-11-06 08:49:37.00'),
  688. ('sep 6', '%s-09-06 00:00:00.00' % t.year),
  689. ('sep 6 2000', '2000-09-06 00:00:00.00'),
  690. ('September 29', '%s-09-29 00:00:00.00' % t.year),
  691. ('Sep. 29', '%s-09-29 00:00:00.00' % t.year),
  692. ('6 sep', '%s-09-06 00:00:00.00' % t.year),
  693. ('29 September', '%s-09-29 00:00:00.00' % t.year),
  694. ('29 Sep.', '%s-09-29 00:00:00.00' % t.year),
  695. ('sep 6 2001', '2001-09-06 00:00:00.00'),
  696. ('Sep 6, 2001', '2001-09-06 00:00:00.00'),
  697. ('September 6, 2001', '2001-09-06 00:00:00.00'),
  698. ('sep 6 01', '2001-09-06 00:00:00.00'),
  699. ('Sep 6, 01', '2001-09-06 00:00:00.00'),
  700. ('September 6, 01', '2001-09-06 00:00:00.00'),
  701. ('30 Apr 2006 20:19:00', '2006-04-30 20:19:00.00'),
  702. # ISO formats
  703. ('1994-11-06 08:49:37', '1994-11-06 08:49:37.00'),
  704. ('010203', '2001-02-03 00:00:00.00'),
  705. ('2001-02-03 00:00:00.00', '2001-02-03 00:00:00.00'),
  706. ('2001-02 00:00:00.00', '2001-02-01 00:00:00.00'),
  707. ('2001-02-03', '2001-02-03 00:00:00.00'),
  708. ('2001-02', '2001-02-01 00:00:00.00'),
  709. ('20000824/2300', '2000-08-24 23:00:00.00'),
  710. ('20000824/0102', '2000-08-24 01:02:00.00'),
  711. ('20000824', '2000-08-24 00:00:00.00'),
  712. ('20000824/020301', '2000-08-24 02:03:01.00'),
  713. ('20000824 020301', '2000-08-24 02:03:01.00'),
  714. ('20000824T020301', '2000-08-24 02:03:01.00'),
  715. ('20000824 020301', '2000-08-24 02:03:01.00'),
  716. ('2000-08-24 02:03:01.00', '2000-08-24 02:03:01.00'),
  717. ('T020311', '%s 02:03:11.00' % _date),
  718. ('2003-12-9', '2003-12-09 00:00:00.00'),
  719. ('03-12-9', '2003-12-09 00:00:00.00'),
  720. ('003-12-9', '0003-12-09 00:00:00.00'),
  721. ('0003-12-9', '0003-12-09 00:00:00.00'),
  722. ('2003-1-9', '2003-01-09 00:00:00.00'),
  723. ('03-1-9', '2003-01-09 00:00:00.00'),
  724. ('003-1-9', '0003-01-09 00:00:00.00'),
  725. ('0003-1-9', '0003-01-09 00:00:00.00'),
  726. # US formats
  727. ('06/11/94 08:49:37', '1994-06-11 08:49:37.00'),
  728. ('11/06/94 08:49:37', '1994-11-06 08:49:37.00'),
  729. ('9/23/2001', '2001-09-23 00:00:00.00'),
  730. ('9-23-2001', '2001-09-23 00:00:00.00'),
  731. ('9/6', '%s-09-06 00:00:00.00' % t.year),
  732. ('09/6', '%s-09-06 00:00:00.00' % t.year),
  733. ('9/06', '%s-09-06 00:00:00.00' % t.year),
  734. ('09/06', '%s-09-06 00:00:00.00' % t.year),
  735. ('9/6/2001', '2001-09-06 00:00:00.00'),
  736. ('09/6/2001', '2001-09-06 00:00:00.00'),
  737. ('9/06/2001', '2001-09-06 00:00:00.00'),
  738. ('09/06/2001', '2001-09-06 00:00:00.00'),
  739. ('9-6-2001', '2001-09-06 00:00:00.00'),
  740. ('09-6-2001', '2001-09-06 00:00:00.00'),
  741. ('9-06-2001', '2001-09-06 00:00:00.00'),
  742. ('09-06-2001', '2001-09-06 00:00:00.00'),
  743. ('2002/05/28 13:10:56.114700 GMT+2', '2002-05-28 13:10:56.114700'),
  744. ('1970/01/01', '1970-01-01 00:00:00.00'),
  745. ('20021025 12:00 PM', '2002-10-25 12:00:00.00'),
  746. ('20021025 12:30 PM', '2002-10-25 12:30:00.00'),
  747. ('20021025 12:00 AM', '2002-10-25 00:00:00.00'),
  748. ('20021025 12:30 AM', '2002-10-25 00:30:00.00'),
  749. ('20021025 1:00 PM', '2002-10-25 13:00:00.00'),
  750. ('20021025 2:00 AM', '2002-10-25 02:00:00.00'),
  751. ('Thursday, February 06, 2003 12:40 PM', '2003-02-06 12:40:00.00'),
  752. ('Mon, 18 Sep 2006 23:03:00', '2006-09-18 23:03:00.00'),
  753. # European formats
  754. ('6.11.2001, 08:49:37', '2001-11-06 08:49:37.00'),
  755. ('06.11.2001, 08:49:37', '2001-11-06 08:49:37.00'),
  756. ('06.11. 08:49:37', '%s-11-06 08:49:37.00' % t.year),
  757. #('21/12/2002', '2002-12-21 00:00:00.00'),
  758. #('21/08/2002', '2002-08-21 00:00:00.00'),
  759. #('21-08-2002', '2002-08-21 00:00:00.00'),
  760. #('13/01/03', '2003-01-13 00:00:00.00'),
  761. #('13/1/03', '2003-01-13 00:00:00.00'),
  762. #('13/1/3', '2003-01-13 00:00:00.00'),
  763. #('13/01/3', '2003-01-13 00:00:00.00'),
  764. # Time only formats
  765. ('01:03', '%s 01:03:00.00' % _date),
  766. ('01:03:11', '%s 01:03:11.00' % _date),
  767. ('01:03:11.50', '%s 01:03:11.500000' % _date),
  768. ('01:03:11.50 AM', '%s 01:03:11.500000' % _date),
  769. ('01:03:11.50 PM', '%s 13:03:11.500000' % _date),
  770. ('01:03:11.50 a.m.', '%s 01:03:11.500000' % _date),
  771. ('01:03:11.50 p.m.', '%s 13:03:11.500000' % _date),
  772. # Invalid formats
  773. ('6..2001, 08:49:37', '%s 08:49:37.00' % _date),
  774. ('9//2001', 'ignore'),
  775. ('06--94 08:49:37', 'ignore'),
  776. ('20-03 00:00:00.00', 'ignore'),
  777. ('9/2001', 'ignore'),
  778. ('9-6', 'ignore'),
  779. ('09-6', 'ignore'),
  780. ('9-06', 'ignore'),
  781. ('09-06', 'ignore'),
  782. ('20000824/23', 'ignore'),
  783. ('November 1994 08:49:37', 'ignore'),
  784. ]
  785. # Add Unicode versions
  786. try:
  787. unicode
  788. except NameError:
  789. pass
  790. else:
  791. k = []
  792. for text, result in l:
  793. k.append((unicode(text), result))
  794. l.extend(k)
  795. for text, reference in l:
  796. try:
  797. value = datetime_from_string(text)
  798. except:
  799. if reference is None:
  800. continue
  801. else:
  802. value = str(sys.exc_info()[1])
  803. valid_datetime = validateDateTimeString(text)
  804. valid_date = validateDateString(text)
  805. if reference[-3:] == '.00': reference = reference[:-3]
  806. if str(value) != reference and \
  807. not reference == 'ignore':
  808. print 'Failed to parse "%s"' % text
  809. print ' expected: %s' % (reference or '<exception>')
  810. print ' parsed: %s' % value
  811. elif _debug:
  812. print 'Parsed "%s" successfully' % text
  813. if _debug:
  814. if not valid_datetime:
  815. print ' "%s" failed date/time validation' % text
  816. if not valid_date:
  817. print ' "%s" failed date validation' % text
  818. et = dt.datetime.now()
  819. print 'done. (after %f seconds)' % ((et-t).seconds)
  820. if __name__ == '__main__':
  821. _test()