PageRenderTime 56ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/parsedatetime/__init__.py

http://parsedatetime.googlecode.com/
Python | 1795 lines | 1640 code | 38 blank | 117 comment | 66 complexity | 49474631dbdd987f75b67d3bdb25ce16 MD5 | raw file
Possible License(s): Apache-2.0
  1. """
  2. parsedatetime
  3. Parse human-readable date/time text.
  4. Requires Python 2.6 or later
  5. """
  6. __author__ = 'Mike Taylor (bear@code-bear.com)'
  7. __copyright__ = 'Copyright (c) 2004 Mike Taylor'
  8. __license__ = 'Apache v2.0'
  9. __version__ = '1.0.0'
  10. __contributors__ = [ 'Darshana Chhajed',
  11. 'Michael Lim (lim.ck.michael@gmail.com)',
  12. 'Bernd Zeimetz (bzed@debian.org)',
  13. ]
  14. import re
  15. import time
  16. import datetime
  17. import calendar
  18. import logging
  19. import email.utils
  20. from . import pdt_locales
  21. log = logging.getLogger()
  22. echoHandler = logging.StreamHandler()
  23. echoFormatter = logging.Formatter('%(levelname)-8s %(message)s')
  24. log.addHandler(echoHandler)
  25. # log.addHandler(logging.NullHandler())
  26. log.setLevel(logging.DEBUG)
  27. pdtLocales = { 'icu': pdt_locales.pdtLocale_icu,
  28. 'en_US': pdt_locales.pdtLocale_en,
  29. 'en_AU': pdt_locales.pdtLocale_au,
  30. 'es_ES': pdt_locales.pdtLocale_es,
  31. 'de_DE': pdt_locales.pdtLocale_de,
  32. }
  33. # Copied from feedparser.py
  34. # Universal Feedparser
  35. # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
  36. # Originally a def inside of _parse_date_w3dtf()
  37. def _extract_date(m):
  38. year = int(m.group('year'))
  39. if year < 100:
  40. year = 100 * int(time.gmtime()[0] / 100) + int(year)
  41. if year < 1000:
  42. return 0, 0, 0
  43. julian = m.group('julian')
  44. if julian:
  45. julian = int(julian)
  46. month = julian / 30 + 1
  47. day = julian % 30 + 1
  48. jday = None
  49. while jday != julian:
  50. t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0))
  51. jday = time.gmtime(t)[-2]
  52. diff = abs(jday - julian)
  53. if jday > julian:
  54. if diff < day:
  55. day = day - diff
  56. else:
  57. month = month - 1
  58. day = 31
  59. elif jday < julian:
  60. if day + diff < 28:
  61. day = day + diff
  62. else:
  63. month = month + 1
  64. return year, month, day
  65. month = m.group('month')
  66. day = 1
  67. if month is None:
  68. month = 1
  69. else:
  70. month = int(month)
  71. day = m.group('day')
  72. if day:
  73. day = int(day)
  74. else:
  75. day = 1
  76. return year, month, day
  77. # Copied from feedparser.py
  78. # Universal Feedparser
  79. # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
  80. # Originally a def inside of _parse_date_w3dtf()
  81. def _extract_time(m):
  82. if not m:
  83. return 0, 0, 0
  84. hours = m.group('hours')
  85. if not hours:
  86. return 0, 0, 0
  87. hours = int(hours)
  88. minutes = int(m.group('minutes'))
  89. seconds = m.group('seconds')
  90. if seconds:
  91. seconds = int(seconds)
  92. else:
  93. seconds = 0
  94. return hours, minutes, seconds
  95. # Copied from feedparser.py
  96. # Universal Feedparser
  97. # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
  98. # Modified to return a tuple instead of mktime
  99. #
  100. # Original comment:
  101. # W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by
  102. # Drake and licensed under the Python license. Removed all range checking
  103. # for month, day, hour, minute, and second, since mktime will normalize
  104. # these later
  105. def _parse_date_w3dtf(dateString):
  106. # the __extract_date and __extract_time methods were
  107. # copied-out so they could be used by my code --bear
  108. def __extract_tzd(m):
  109. '''Return the Time Zone Designator as an offset in seconds from UTC.'''
  110. if not m:
  111. return 0
  112. tzd = m.group('tzd')
  113. if not tzd:
  114. return 0
  115. if tzd == 'Z':
  116. return 0
  117. hours = int(m.group('tzdhours'))
  118. minutes = m.group('tzdminutes')
  119. if minutes:
  120. minutes = int(minutes)
  121. else:
  122. minutes = 0
  123. offset = (hours*60 + minutes) * 60
  124. if tzd[0] == '+':
  125. return -offset
  126. return offset
  127. __date_re = ('(?P<year>\d\d\d\d)'
  128. '(?:(?P<dsep>-|)'
  129. '(?:(?P<julian>\d\d\d)'
  130. '|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?')
  131. __tzd_re = '(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)'
  132. __tzd_rx = re.compile(__tzd_re)
  133. __time_re = ('(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)'
  134. '(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?'
  135. + __tzd_re)
  136. __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re)
  137. __datetime_rx = re.compile(__datetime_re)
  138. m = __datetime_rx.match(dateString)
  139. if (m is None) or (m.group() != dateString): return
  140. return _extract_date(m) + _extract_time(m) + (0, 0, 0)
  141. _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
  142. 'aug', 'sep', 'oct', 'nov', 'dec',
  143. 'january', 'february', 'march', 'april', 'may', 'june', 'july',
  144. 'august', 'september', 'october', 'november', 'december']
  145. _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
  146. # Copied from feedparser.py
  147. # Universal Feedparser
  148. # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
  149. # Modified to return a tuple instead of mktime
  150. #
  151. def _parse_date_rfc822(dateString):
  152. '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date'''
  153. data = dateString.split()
  154. if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
  155. del data[0]
  156. if len(data) == 4:
  157. s = data[3]
  158. i = s.find('+')
  159. if i > 0:
  160. data[3:] = [s[:i], s[i+1:]]
  161. else:
  162. data.append('')
  163. dateString = " ".join(data)
  164. if len(data) < 5:
  165. dateString += ' 00:00:00 GMT'
  166. return email.utils.parsedate_tz(dateString)
  167. # # rfc822.py defines several time zones, but we define some extra ones.
  168. # # 'ET' is equivalent to 'EST', etc.
  169. # _additional_timezones = {'AT': -400, 'ET': -500,
  170. # 'CT': -600, 'MT': -700,
  171. # 'PT': -800}
  172. # email.utils._timezones.update(_additional_timezones)
  173. class Calendar:
  174. """
  175. A collection of routines to input, parse and manipulate date and times.
  176. The text can either be 'normal' date values or it can be human readable.
  177. """
  178. def __init__(self, constants=None):
  179. """
  180. Default constructor for the L{Calendar} class.
  181. @type constants: object
  182. @param constants: Instance of the class L{parsedatetime_consts.Constants}
  183. @rtype: object
  184. @return: L{Calendar} instance
  185. """
  186. # if a constants reference is not included, use default
  187. if constants is None:
  188. self.ptc = Constants()
  189. else:
  190. self.ptc = constants
  191. self.weekdyFlag = False # monday/tuesday/...
  192. self.dateStdFlag = False # 07/21/06
  193. self.dateStrFlag = False # July 21st, 2006
  194. self.timeStdFlag = False # 5:50
  195. self.meridianFlag = False # am/pm
  196. self.dayStrFlag = False # tomorrow/yesterday/today/..
  197. self.timeStrFlag = False # lunch/noon/breakfast/...
  198. self.modifierFlag = False # after/before/prev/next/..
  199. self.modifier2Flag = False # after/before/prev/next/..
  200. self.unitsFlag = False # hrs/weeks/yrs/min/..
  201. self.qunitsFlag = False # h/m/t/d..
  202. self.timeFlag = 0
  203. self.dateFlag = 0
  204. def _convertUnitAsWords(self, unitText):
  205. """
  206. Converts text units into their number value
  207. Five = 5
  208. Twenty Five = 25
  209. Two hundred twenty five = 225
  210. Two thousand and twenty five = 2025
  211. Two thousand twenty five = 2025
  212. @type unitText: string
  213. @param unitText: number text to convert
  214. @rtype: integer
  215. @return: numerical value of unitText
  216. """
  217. # TODO: implement this
  218. pass
  219. def _buildTime(self, source, quantity, modifier, units):
  220. """
  221. Take C{quantity}, C{modifier} and C{unit} strings and convert them into values.
  222. After converting, calcuate the time and return the adjusted sourceTime.
  223. @type source: time
  224. @param source: time to use as the base (or source)
  225. @type quantity: string
  226. @param quantity: quantity string
  227. @type modifier: string
  228. @param modifier: how quantity and units modify the source time
  229. @type units: string
  230. @param units: unit of the quantity (i.e. hours, days, months, etc)
  231. @rtype: struct_time
  232. @return: C{struct_time} of the calculated time
  233. """
  234. log.debug('_buildTime: [%s][%s][%s]' % (quantity, modifier, units))
  235. if source is None:
  236. source = time.localtime()
  237. if quantity is None:
  238. quantity = ''
  239. else:
  240. quantity = quantity.strip()
  241. if len(quantity) == 0:
  242. qty = 1
  243. else:
  244. try:
  245. qty = int(quantity)
  246. except ValueError:
  247. qty = 0
  248. if modifier in self.ptc.Modifiers:
  249. qty = qty * self.ptc.Modifiers[modifier]
  250. if units is None or units == '':
  251. units = 'dy'
  252. # plurals are handled by regex's (could be a bug tho)
  253. (yr, mth, dy, hr, mn, sec, _, _, _) = source
  254. start = datetime.datetime(yr, mth, dy, hr, mn, sec)
  255. target = start
  256. if units.startswith('y'):
  257. target = self.inc(start, year=qty)
  258. self.dateFlag = 1
  259. elif units.endswith('th') or units.endswith('ths'):
  260. target = self.inc(start, month=qty)
  261. self.dateFlag = 1
  262. else:
  263. if units.startswith('d'):
  264. target = start + datetime.timedelta(days=qty)
  265. self.dateFlag = 1
  266. elif units.startswith('h'):
  267. target = start + datetime.timedelta(hours=qty)
  268. self.timeFlag = 2
  269. elif units.startswith('m'):
  270. target = start + datetime.timedelta(minutes=qty)
  271. self.timeFlag = 2
  272. elif units.startswith('s'):
  273. target = start + datetime.timedelta(seconds=qty)
  274. self.timeFlag = 2
  275. elif units.startswith('w'):
  276. target = start + datetime.timedelta(weeks=qty)
  277. self.dateFlag = 1
  278. return target.timetuple()
  279. def parseDate(self, dateString):
  280. """
  281. Parse short-form date strings::
  282. '05/28/2006' or '04.21'
  283. @type dateString: string
  284. @param dateString: text to convert to a C{datetime}
  285. @rtype: struct_time
  286. @return: calculated C{struct_time} value of dateString
  287. """
  288. yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime()
  289. # values pulled from regex's will be stored here and later
  290. # assigned to mth, dy, yr based on information from the locale
  291. # -1 is used as the marker value because we want zero values
  292. # to be passed thru so they can be flagged as errors later
  293. v1 = -1
  294. v2 = -1
  295. v3 = -1
  296. s = dateString
  297. m = self.ptc.CRE_DATE2.search(s)
  298. if m is not None:
  299. index = m.start()
  300. v1 = int(s[:index])
  301. s = s[index + 1:]
  302. m = self.ptc.CRE_DATE2.search(s)
  303. if m is not None:
  304. index = m.start()
  305. v2 = int(s[:index])
  306. v3 = int(s[index + 1:])
  307. else:
  308. v2 = int(s.strip())
  309. v = [ v1, v2, v3 ]
  310. d = { 'm': mth, 'd': dy, 'y': yr }
  311. for i in range(0, 3):
  312. n = v[i]
  313. c = self.ptc.dp_order[i]
  314. if n >= 0:
  315. d[c] = n
  316. # if the year is not specified and the date has already
  317. # passed, increment the year
  318. if v3 == -1 and ((mth > d['m']) or (mth == d['m'] and dy > d['d'])):
  319. yr = d['y'] + 1
  320. else:
  321. yr = d['y']
  322. mth = d['m']
  323. dy = d['d']
  324. # birthday epoch constraint
  325. if yr < self.ptc.BirthdayEpoch:
  326. yr += 2000
  327. elif yr < 100:
  328. yr += 1900
  329. log.debug('parseDate: %s %s %s %s' % (yr, mth, dy, self.ptc.daysInMonth(mth, yr)))
  330. if (mth > 0 and mth <= 12) and \
  331. (dy > 0 and dy <= self.ptc.daysInMonth(mth, yr)):
  332. sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
  333. else:
  334. self.dateFlag = 0
  335. self.timeFlag = 0
  336. sourceTime = time.localtime() # return current time if date
  337. # string is invalid
  338. return sourceTime
  339. def parseDateText(self, dateString):
  340. """
  341. Parse long-form date strings::
  342. 'May 31st, 2006'
  343. 'Jan 1st'
  344. 'July 2006'
  345. @type dateString: string
  346. @param dateString: text to convert to a datetime
  347. @rtype: struct_time
  348. @return: calculated C{struct_time} value of dateString
  349. """
  350. yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime()
  351. currentMth = mth
  352. currentDy = dy
  353. s = dateString.lower()
  354. m = self.ptc.CRE_DATE3.search(s)
  355. mth = m.group('mthname')
  356. mth = self.ptc.MonthOffsets[mth]
  357. if m.group('day') != None:
  358. dy = int(m.group('day'))
  359. else:
  360. dy = 1
  361. if m.group('year') != None:
  362. yr = int(m.group('year'))
  363. # birthday epoch constraint
  364. if yr < self.ptc.BirthdayEpoch:
  365. yr += 2000
  366. elif yr < 100:
  367. yr += 1900
  368. elif (mth < currentMth) or (mth == currentMth and dy < currentDy):
  369. # if that day and month have already passed in this year,
  370. # then increment the year by 1
  371. yr += 1
  372. if dy > 0 and dy <= self.ptc.daysInMonth(mth, yr):
  373. sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
  374. else:
  375. # Return current time if date string is invalid
  376. self.dateFlag = 0
  377. self.timeFlag = 0
  378. sourceTime = time.localtime()
  379. return sourceTime
  380. def evalRanges(self, datetimeString, sourceTime=None):
  381. """
  382. Evaluate the C{datetimeString} text and determine if
  383. it represents a date or time range.
  384. @type datetimeString: string
  385. @param datetimeString: datetime text to evaluate
  386. @type sourceTime: struct_time
  387. @param sourceTime: C{struct_time} value to use as the base
  388. @rtype: tuple
  389. @return: tuple of: start datetime, end datetime and the invalid flag
  390. """
  391. startTime = ''
  392. endTime = ''
  393. startDate = ''
  394. endDate = ''
  395. rangeFlag = 0
  396. s = datetimeString.strip().lower()
  397. if self.ptc.rangeSep in s:
  398. s = s.replace(self.ptc.rangeSep, ' %s ' % self.ptc.rangeSep)
  399. s = s.replace(' ', ' ')
  400. m = self.ptc.CRE_TIMERNG1.search(s)
  401. if m is not None:
  402. rangeFlag = 1
  403. else:
  404. m = self.ptc.CRE_TIMERNG2.search(s)
  405. if m is not None:
  406. rangeFlag = 2
  407. else:
  408. m = self.ptc.CRE_TIMERNG4.search(s)
  409. if m is not None:
  410. rangeFlag = 7
  411. else:
  412. m = self.ptc.CRE_TIMERNG3.search(s)
  413. if m is not None:
  414. rangeFlag = 3
  415. else:
  416. m = self.ptc.CRE_DATERNG1.search(s)
  417. if m is not None:
  418. rangeFlag = 4
  419. else:
  420. m = self.ptc.CRE_DATERNG2.search(s)
  421. if m is not None:
  422. rangeFlag = 5
  423. else:
  424. m = self.ptc.CRE_DATERNG3.search(s)
  425. if m is not None:
  426. rangeFlag = 6
  427. log.debug('evalRanges: rangeFlag = %s [%s]' % (rangeFlag, s))
  428. if m is not None:
  429. if (m.group() != s):
  430. # capture remaining string
  431. parseStr = m.group()
  432. chunk1 = s[:m.start()]
  433. chunk2 = s[m.end():]
  434. s = '%s %s' % (chunk1, chunk2)
  435. flag = 1
  436. sourceTime, flag = self.parse(s, sourceTime)
  437. if flag == 0:
  438. sourceTime = None
  439. else:
  440. parseStr = s
  441. if rangeFlag == 1:
  442. m = re.search(self.ptc.rangeSep, parseStr)
  443. startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime)
  444. endTime, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime)
  445. if (eflag != 0) and (sflag != 0):
  446. return (startTime, endTime, 2)
  447. elif rangeFlag == 2:
  448. m = re.search(self.ptc.rangeSep, parseStr)
  449. startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime)
  450. endTime, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime)
  451. if (eflag != 0) and (sflag != 0):
  452. return (startTime, endTime, 2)
  453. elif rangeFlag == 3 or rangeFlag == 7:
  454. m = re.search(self.ptc.rangeSep, parseStr)
  455. # capturing the meridian from the end time
  456. if self.ptc.usesMeridian:
  457. ampm = re.search(self.ptc.am[0], parseStr)
  458. # appending the meridian to the start time
  459. if ampm is not None:
  460. startTime, sflag = self.parse((parseStr[:m.start()] + self.ptc.meridian[0]), sourceTime)
  461. else:
  462. startTime, sflag = self.parse((parseStr[:m.start()] + self.ptc.meridian[1]), sourceTime)
  463. else:
  464. startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime)
  465. endTime, eflag = self.parse(parseStr[(m.start() + 1):], sourceTime)
  466. if (eflag != 0) and (sflag != 0):
  467. return (startTime, endTime, 2)
  468. elif rangeFlag == 4:
  469. m = re.search(self.ptc.rangeSep, parseStr)
  470. startDate, sflag = self.parse((parseStr[:m.start()]), sourceTime)
  471. endDate, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime)
  472. if (eflag != 0) and (sflag != 0):
  473. return (startDate, endDate, 1)
  474. elif rangeFlag == 5:
  475. m = re.search(self.ptc.rangeSep, parseStr)
  476. endDate = parseStr[(m.start() + 1):]
  477. # capturing the year from the end date
  478. date = self.ptc.CRE_DATE3.search(endDate)
  479. endYear = date.group('year')
  480. # appending the year to the start date if the start date
  481. # does not have year information and the end date does.
  482. # eg : "Aug 21 - Sep 4, 2007"
  483. if endYear is not None:
  484. startDate = (parseStr[:m.start()]).strip()
  485. date = self.ptc.CRE_DATE3.search(startDate)
  486. startYear = date.group('year')
  487. if startYear is None:
  488. startDate = startDate + ', ' + endYear
  489. else:
  490. startDate = parseStr[:m.start()]
  491. startDate, sflag = self.parse(startDate, sourceTime)
  492. endDate, eflag = self.parse(endDate, sourceTime)
  493. if (eflag != 0) and (sflag != 0):
  494. return (startDate, endDate, 1)
  495. elif rangeFlag == 6:
  496. m = re.search(self.ptc.rangeSep, parseStr)
  497. startDate = parseStr[:m.start()]
  498. # capturing the month from the start date
  499. mth = self.ptc.CRE_DATE3.search(startDate)
  500. mth = mth.group('mthname')
  501. # appending the month name to the end date
  502. endDate = mth + parseStr[(m.start() + 1):]
  503. startDate, sflag = self.parse(startDate, sourceTime)
  504. endDate, eflag = self.parse(endDate, sourceTime)
  505. if (eflag != 0) and (sflag != 0):
  506. return (startDate, endDate, 1)
  507. else:
  508. # if range is not found
  509. sourceTime = time.localtime()
  510. return (sourceTime, sourceTime, 0)
  511. def _CalculateDOWDelta(self, wd, wkdy, offset, style, currentDayStyle):
  512. """
  513. Based on the C{style} and C{currentDayStyle} determine what
  514. day-of-week value is to be returned.
  515. @type wd: integer
  516. @param wd: day-of-week value for the current day
  517. @type wkdy: integer
  518. @param wkdy: day-of-week value for the parsed day
  519. @type offset: integer
  520. @param offset: offset direction for any modifiers (-1, 0, 1)
  521. @type style: integer
  522. @param style: normally the value set in C{Constants.DOWParseStyle}
  523. @type currentDayStyle: integer
  524. @param currentDayStyle: normally the value set in C{Constants.CurrentDOWParseStyle}
  525. @rtype: integer
  526. @return: calculated day-of-week
  527. """
  528. if offset == 1:
  529. # modifier is indicating future week eg: "next".
  530. # DOW is calculated as DOW of next week
  531. diff = 7 - wd + wkdy
  532. elif offset == -1:
  533. # modifier is indicating past week eg: "last","previous"
  534. # DOW is calculated as DOW of previous week
  535. diff = wkdy - wd - 7
  536. elif offset == 0:
  537. # modifier is indiacting current week eg: "this"
  538. # DOW is calculated as DOW of this week
  539. diff = wkdy - wd
  540. elif offset == 2:
  541. # no modifier is present.
  542. # i.e. string to be parsed is just DOW
  543. if style == 1:
  544. # next occurance of the DOW is calculated
  545. if currentDayStyle == True:
  546. if wkdy >= wd:
  547. diff = wkdy - wd
  548. else:
  549. diff = 7 - wd + wkdy
  550. else:
  551. if wkdy > wd:
  552. diff = wkdy - wd
  553. else:
  554. diff = 7 - wd + wkdy
  555. elif style == -1:
  556. # last occurance of the DOW is calculated
  557. if currentDayStyle == True:
  558. if wkdy <= wd:
  559. diff = wkdy - wd
  560. else:
  561. diff = wkdy - wd - 7
  562. else:
  563. if wkdy < wd:
  564. diff = wkdy - wd
  565. else:
  566. diff = wkdy - wd - 7
  567. else:
  568. # occurance of the DOW in the current week is calculated
  569. diff = wkdy - wd
  570. log.debug("wd %s, wkdy %s, offset %d, style %d" % (wd, wkdy, offset, style))
  571. return diff
  572. def _evalModifier(self, modifier, chunk1, chunk2, sourceTime):
  573. """
  574. Evaluate the C{modifier} string and following text (passed in
  575. as C{chunk1} and C{chunk2}) and if they match any known modifiers
  576. calculate the delta and apply it to C{sourceTime}.
  577. @type modifier: string
  578. @param modifier: modifier text to apply to sourceTime
  579. @type chunk1: string
  580. @param chunk1: first text chunk that followed modifier (if any)
  581. @type chunk2: string
  582. @param chunk2: second text chunk that followed modifier (if any)
  583. @type sourceTime: struct_time
  584. @param sourceTime: C{struct_time} value to use as the base
  585. @rtype: tuple
  586. @return: tuple of: remaining text and the modified sourceTime
  587. """
  588. offset = self.ptc.Modifiers[modifier]
  589. if sourceTime is not None:
  590. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
  591. else:
  592. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime()
  593. # capture the units after the modifier and the remaining
  594. # string after the unit
  595. m = self.ptc.CRE_REMAINING.search(chunk2)
  596. if m is not None:
  597. index = m.start() + 1
  598. unit = chunk2[:m.start()]
  599. chunk2 = chunk2[index:]
  600. else:
  601. unit = chunk2
  602. chunk2 = ''
  603. flag = False
  604. log.debug("modifier [%s] chunk1 [%s] chunk2 [%s] unit [%s] flag %s" % (modifier, chunk1, chunk2, unit, flag))
  605. if unit == 'month' or \
  606. unit == 'mth' or \
  607. unit == 'm':
  608. if offset == 0:
  609. dy = self.ptc.daysInMonth(mth, yr)
  610. sourceTime = (yr, mth, dy, 9, 0, 0, wd, yd, isdst)
  611. elif offset == 2:
  612. # if day is the last day of the month, calculate the last day
  613. # of the next month
  614. if dy == self.ptc.daysInMonth(mth, yr):
  615. dy = self.ptc.daysInMonth(mth + 1, yr)
  616. start = datetime.datetime(yr, mth, dy, 9, 0, 0)
  617. target = self.inc(start, month=1)
  618. sourceTime = target.timetuple()
  619. else:
  620. start = datetime.datetime(yr, mth, 1, 9, 0, 0)
  621. target = self.inc(start, month=offset)
  622. sourceTime = target.timetuple()
  623. flag = True
  624. self.dateFlag = 1
  625. if unit == 'week' or \
  626. unit == 'wk' or \
  627. unit == 'w':
  628. if offset == 0:
  629. start = datetime.datetime(yr, mth, dy, 17, 0, 0)
  630. target = start + datetime.timedelta(days=(4 - wd))
  631. sourceTime = target.timetuple()
  632. elif offset == 2:
  633. start = datetime.datetime(yr, mth, dy, 9, 0, 0)
  634. target = start + datetime.timedelta(days=7)
  635. sourceTime = target.timetuple()
  636. else:
  637. return self._evalModifier(modifier, chunk1, "monday " + chunk2, sourceTime)
  638. flag = True
  639. self.dateFlag = 1
  640. if unit == 'day' or \
  641. unit == 'dy' or \
  642. unit == 'd':
  643. if offset == 0:
  644. sourceTime = (yr, mth, dy, 17, 0, 0, wd, yd, isdst)
  645. self.timeFlag = 2
  646. elif offset == 2:
  647. start = datetime.datetime(yr, mth, dy, hr, mn, sec)
  648. target = start + datetime.timedelta(days=1)
  649. sourceTime = target.timetuple()
  650. else:
  651. start = datetime.datetime(yr, mth, dy, 9, 0, 0)
  652. target = start + datetime.timedelta(days=offset)
  653. sourceTime = target.timetuple()
  654. flag = True
  655. self.dateFlag = 1
  656. if unit == 'hour' or \
  657. unit == 'hr':
  658. if offset == 0:
  659. sourceTime = (yr, mth, dy, hr, 0, 0, wd, yd, isdst)
  660. else:
  661. start = datetime.datetime(yr, mth, dy, hr, 0, 0)
  662. target = start + datetime.timedelta(hours=offset)
  663. sourceTime = target.timetuple()
  664. flag = True
  665. self.timeFlag = 2
  666. if unit == 'year' or \
  667. unit == 'yr' or \
  668. unit == 'y':
  669. if offset == 0:
  670. sourceTime = (yr, 12, 31, hr, mn, sec, wd, yd, isdst)
  671. elif offset == 2:
  672. sourceTime = (yr + 1, mth, dy, hr, mn, sec, wd, yd, isdst)
  673. else:
  674. sourceTime = (yr + offset, 1, 1, 9, 0, 0, wd, yd, isdst)
  675. flag = True
  676. self.dateFlag = 1
  677. if not flag:
  678. if modifier == 'eom':
  679. self.modifierFlag = False
  680. dy = self.ptc.daysInMonth(mth, yr)
  681. sourceTime = (yr, mth, dy, 9, 0, 0, wd, yd, isdst)
  682. self.dateFlag = 2
  683. flag = True
  684. elif modifier == 'eoy':
  685. self.modifierFlag = False
  686. mth = 12
  687. dy = self.ptc.daysInMonth(mth, yr)
  688. sourceTime = (yr, mth, dy, 9, 0, 0, wd, yd, isdst)
  689. self.dateFlag = 2
  690. flag = True
  691. if not flag:
  692. m = self.ptc.CRE_WEEKDAY.match(unit)
  693. if m is not None:
  694. wkdy = m.group()
  695. self.dateFlag = 1
  696. if modifier == 'eod':
  697. # Calculate the upcoming weekday
  698. self.modifierFlag = False
  699. (sourceTime, _) = self.parse(wkdy, sourceTime)
  700. sources = self.ptc.buildSources(sourceTime)
  701. self.timeFlag = 2
  702. if modifier in sources:
  703. sourceTime = sources[modifier]
  704. else:
  705. wkdy = self.ptc.WeekdayOffsets[wkdy]
  706. diff = self._CalculateDOWDelta(wd, wkdy, offset,
  707. self.ptc.DOWParseStyle,
  708. self.ptc.CurrentDOWParseStyle)
  709. start = datetime.datetime(yr, mth, dy, 9, 0, 0)
  710. target = start + datetime.timedelta(days=diff)
  711. sourceTime = target.timetuple()
  712. flag = True
  713. self.dateFlag = 1
  714. if not flag:
  715. m = self.ptc.CRE_TIME.match(unit)
  716. if m is not None:
  717. self.modifierFlag = False
  718. (yr, mth, dy, hr, mn, sec, wd, yd, isdst), _ = self.parse(unit)
  719. start = datetime.datetime(yr, mth, dy, hr, mn, sec)
  720. target = start + datetime.timedelta(days=offset)
  721. sourceTime = target.timetuple()
  722. flag = True
  723. else:
  724. self.modifierFlag = False
  725. # check if the remaining text is parsable and if so,
  726. # use it as the base time for the modifier source time
  727. t, flag2 = self.parse('%s %s' % (chunk1, unit), sourceTime)
  728. if flag2 != 0:
  729. sourceTime = t
  730. sources = self.ptc.buildSources(sourceTime)
  731. if modifier in sources:
  732. sourceTime = sources[modifier]
  733. flag = True
  734. self.timeFlag = 2
  735. # if the word after next is a number, the string is more than likely
  736. # to be "next 4 hrs" which we will have to combine the units with the
  737. # rest of the string
  738. if not flag:
  739. if offset < 0:
  740. # if offset is negative, the unit has to be made negative
  741. unit = '-%s' % unit
  742. chunk2 = '%s %s' % (unit, chunk2)
  743. self.modifierFlag = False
  744. #return '%s %s' % (chunk1, chunk2), sourceTime
  745. return '%s' % chunk2, sourceTime
  746. def _evalModifier2(self, modifier, chunk1 , chunk2, sourceTime):
  747. """
  748. Evaluate the C{modifier} string and following text (passed in
  749. as C{chunk1} and C{chunk2}) and if they match any known modifiers
  750. calculate the delta and apply it to C{sourceTime}.
  751. @type modifier: string
  752. @param modifier: modifier text to apply to C{sourceTime}
  753. @type chunk1: string
  754. @param chunk1: first text chunk that followed modifier (if any)
  755. @type chunk2: string
  756. @param chunk2: second text chunk that followed modifier (if any)
  757. @type sourceTime: struct_time
  758. @param sourceTime: C{struct_time} value to use as the base
  759. @rtype: tuple
  760. @return: tuple of: remaining text and the modified sourceTime
  761. """
  762. offset = self.ptc.Modifiers[modifier]
  763. digit = r'\d+'
  764. self.modifier2Flag = False
  765. # If the string after the negative modifier starts with digits,
  766. # then it is likely that the string is similar to ' before 3 days'
  767. # or 'evening prior to 3 days'.
  768. # In this case, the total time is calculated by subtracting '3 days'
  769. # from the current date.
  770. # So, we have to identify the quantity and negate it before parsing
  771. # the string.
  772. # This is not required for strings not starting with digits since the
  773. # string is enough to calculate the sourceTime
  774. if chunk2 != '':
  775. if offset < 0:
  776. m = re.match(digit, chunk2.strip())
  777. if m is not None:
  778. qty = int(m.group()) * -1
  779. chunk2 = chunk2[m.end():]
  780. chunk2 = '%d%s' % (qty, chunk2)
  781. sourceTime, flag1 = self.parse(chunk2, sourceTime)
  782. if flag1 == 0:
  783. flag1 = True
  784. else:
  785. flag1 = False
  786. flag2 = False
  787. else:
  788. flag1 = False
  789. if chunk1 != '':
  790. if offset < 0:
  791. m = re.search(digit, chunk1.strip())
  792. if m is not None:
  793. qty = int(m.group()) * -1
  794. chunk1 = chunk1[m.end():]
  795. chunk1 = '%d%s' % (qty, chunk1)
  796. tempDateFlag = self.dateFlag
  797. tempTimeFlag = self.timeFlag
  798. sourceTime2, flag2 = self.parse(chunk1, sourceTime)
  799. else:
  800. return sourceTime, (flag1 and flag2)
  801. # if chunk1 is not a datetime and chunk2 is then do not use datetime
  802. # value returned by parsing chunk1
  803. if not (flag1 == False and flag2 == 0):
  804. sourceTime = sourceTime2
  805. else:
  806. self.timeFlag = tempTimeFlag
  807. self.dateFlag = tempDateFlag
  808. return sourceTime, (flag1 and flag2)
  809. def _evalString(self, datetimeString, sourceTime=None):
  810. """
  811. Calculate the datetime based on flags set by the L{parse()} routine
  812. Examples handled::
  813. RFC822, W3CDTF formatted dates
  814. HH:MM[:SS][ am/pm]
  815. MM/DD/YYYY
  816. DD MMMM YYYY
  817. @type datetimeString: string
  818. @param datetimeString: text to try and parse as more "traditional"
  819. date/time text
  820. @type sourceTime: struct_time
  821. @param sourceTime: C{struct_time} value to use as the base
  822. @rtype: datetime
  823. @return: calculated C{struct_time} value or current C{struct_time}
  824. if not parsed
  825. """
  826. s = datetimeString.strip()
  827. now = time.localtime()
  828. # Given string date is a RFC822 date
  829. if sourceTime is None:
  830. sourceTime = _parse_date_rfc822(s)
  831. if sourceTime is not None:
  832. (yr, mth, dy, hr, mn, sec, wd, yd, isdst, _) = sourceTime
  833. self.dateFlag = 1
  834. if (hr != 0) and (mn != 0) and (sec != 0):
  835. self.timeFlag = 2
  836. sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
  837. # Given string date is a W3CDTF date
  838. if sourceTime is None:
  839. sourceTime = _parse_date_w3dtf(s)
  840. if sourceTime is not None:
  841. self.dateFlag = 1
  842. self.timeFlag = 2
  843. if sourceTime is None:
  844. s = s.lower()
  845. # Given string is in the format HH:MM(:SS)(am/pm)
  846. if self.meridianFlag:
  847. if sourceTime is None:
  848. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now
  849. else:
  850. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
  851. m = self.ptc.CRE_TIMEHMS2.search(s)
  852. if m is not None:
  853. dt = s[:m.start('meridian')].strip()
  854. if len(dt) <= 2:
  855. hr = int(dt)
  856. mn = 0
  857. sec = 0
  858. else:
  859. hr, mn, sec = _extract_time(m)
  860. if hr == 24:
  861. hr = 0
  862. sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
  863. meridian = m.group('meridian').lower()
  864. # if 'am' found and hour is 12 - force hour to 0 (midnight)
  865. if (meridian in self.ptc.am) and hr == 12:
  866. sourceTime = (yr, mth, dy, 0, mn, sec, wd, yd, isdst)
  867. # if 'pm' found and hour < 12, add 12 to shift to evening
  868. if (meridian in self.ptc.pm) and hr < 12:
  869. sourceTime = (yr, mth, dy, hr + 12, mn, sec, wd, yd, isdst)
  870. # invalid time
  871. if hr > 24 or mn > 59 or sec > 59:
  872. sourceTime = now
  873. self.dateFlag = 0
  874. self.timeFlag = 0
  875. self.meridianFlag = False
  876. # Given string is in the format HH:MM(:SS)
  877. if self.timeStdFlag:
  878. if sourceTime is None:
  879. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now
  880. else:
  881. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
  882. m = self.ptc.CRE_TIMEHMS.search(s)
  883. if m is not None:
  884. hr, mn, sec = _extract_time(m)
  885. if hr == 24:
  886. hr = 0
  887. if hr > 24 or mn > 59 or sec > 59:
  888. # invalid time
  889. sourceTime = now
  890. self.dateFlag = 0
  891. self.timeFlag = 0
  892. else:
  893. sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
  894. self.timeStdFlag = False
  895. # Given string is in the format 07/21/2006
  896. if self.dateStdFlag:
  897. sourceTime = self.parseDate(s)
  898. self.dateStdFlag = False
  899. # Given string is in the format "May 23rd, 2005"
  900. if self.dateStrFlag:
  901. sourceTime = self.parseDateText(s)
  902. self.dateStrFlag = False
  903. # Given string is a weekday
  904. if self.weekdyFlag:
  905. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now
  906. start = datetime.datetime(yr, mth, dy, hr, mn, sec)
  907. wkdy = self.ptc.WeekdayOffsets[s]
  908. if wkdy > wd:
  909. qty = self._CalculateDOWDelta(wd, wkdy, 2,
  910. self.ptc.DOWParseStyle,
  911. self.ptc.CurrentDOWParseStyle)
  912. else:
  913. qty = self._CalculateDOWDelta(wd, wkdy, 2,
  914. self.ptc.DOWParseStyle,
  915. self.ptc.CurrentDOWParseStyle)
  916. target = start + datetime.timedelta(days=qty)
  917. wd = wkdy
  918. sourceTime = target.timetuple()
  919. self.weekdyFlag = False
  920. # Given string is a natural language time string like
  921. # lunch, midnight, etc
  922. if self.timeStrFlag:
  923. if s in self.ptc.re_values['now']:
  924. sourceTime = now
  925. else:
  926. sources = self.ptc.buildSources(sourceTime)
  927. if s in sources:
  928. sourceTime = sources[s]
  929. else:
  930. sourceTime = now
  931. self.dateFlag = 0
  932. self.timeFlag = 0
  933. self.timeStrFlag = False
  934. # Given string is a natural language date string like today, tomorrow..
  935. if self.dayStrFlag:
  936. if sourceTime is None:
  937. sourceTime = now
  938. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
  939. if s in self.ptc.dayOffsets:
  940. offset = self.ptc.dayOffsets[s]
  941. else:
  942. offset = 0
  943. start = datetime.datetime(yr, mth, dy, 9, 0, 0)
  944. target = start + datetime.timedelta(days=offset)
  945. sourceTime = target.timetuple()
  946. self.dayStrFlag = False
  947. # Given string is a time string with units like "5 hrs 30 min"
  948. if self.unitsFlag:
  949. modifier = '' # TODO
  950. if sourceTime is None:
  951. sourceTime = now
  952. m = self.ptc.CRE_UNITS.search(s)
  953. if m is not None:
  954. units = m.group('units')
  955. quantity = s[:m.start('units')]
  956. sourceTime = self._buildTime(sourceTime, quantity, modifier, units)
  957. self.unitsFlag = False
  958. # Given string is a time string with single char units like "5 h 30 m"
  959. if self.qunitsFlag:
  960. modifier = '' # TODO
  961. if sourceTime is None:
  962. sourceTime = now
  963. m = self.ptc.CRE_QUNITS.search(s)
  964. if m is not None:
  965. units = m.group('qunits')
  966. quantity = s[:m.start('qunits')]
  967. sourceTime = self._buildTime(sourceTime, quantity, modifier, units)
  968. self.qunitsFlag = False
  969. # Given string does not match anything
  970. if sourceTime is None:
  971. sourceTime = now
  972. self.dateFlag = 0
  973. self.timeFlag = 0
  974. return sourceTime
  975. def parse(self, datetimeString, sourceTime=None):
  976. """
  977. Splits the given C{datetimeString} into tokens, finds the regex
  978. patterns that match and then calculates a C{struct_time} value from
  979. the chunks.
  980. If C{sourceTime} is given then the C{struct_time} value will be
  981. calculated from that value, otherwise from the current date/time.
  982. If the C{datetimeString} is parsed and date/time value found then
  983. the second item of the returned tuple will be a flag to let you know
  984. what kind of C{struct_time} value is being returned::
  985. 0 = not parsed at all
  986. 1 = parsed as a C{date}
  987. 2 = parsed as a C{time}
  988. 3 = parsed as a C{datetime}
  989. @type datetimeString: string
  990. @param datetimeString: date/time text to evaluate
  991. @type sourceTime: struct_time
  992. @param sourceTime: C{struct_time} value to use as the base
  993. @rtype: tuple
  994. @return: tuple of: modified C{sourceTime} and the result flag
  995. """
  996. if sourceTime:
  997. if isinstance(sourceTime, datetime.datetime):
  998. log.debug('coercing datetime to timetuple')
  999. sourceTime = sourceTime.timetuple()
  1000. else:
  1001. if not isinstance(sourceTime, time.struct_time) and \
  1002. not isinstance(sourceTime, tuple):
  1003. raise Exception('sourceTime is not a struct_time')
  1004. s = datetimeString.strip().lower()
  1005. parseStr = ''
  1006. totalTime = sourceTime
  1007. if s == '' :
  1008. if sourceTime is not None:
  1009. return (sourceTime, self.dateFlag + self.timeFlag)
  1010. else:
  1011. return (time.localtime(), 0)
  1012. self.timeFlag = 0
  1013. self.dateFlag = 0
  1014. while len(s) > 0:
  1015. flag = False
  1016. chunk1 = ''
  1017. chunk2 = ''
  1018. log.debug('parse (top of loop): [%s][%s]' % (s, parseStr))
  1019. if parseStr == '':
  1020. # Modifier like next\prev..
  1021. m = self.ptc.CRE_MODIFIER.search(s)
  1022. if m is not None:
  1023. self.modifierFlag = True
  1024. if (m.group('modifier') != s):
  1025. # capture remaining string
  1026. parseStr = m.group('modifier')
  1027. chunk1 = s[:m.start('modifier')].strip()
  1028. chunk2 = s[m.end('modifier'):].strip()
  1029. flag = True
  1030. else:
  1031. parseStr = s
  1032. if parseStr == '':
  1033. # Modifier like from\after\prior..
  1034. m = self.ptc.CRE_MODIFIER2.search(s)
  1035. if m is not None:
  1036. self.modifier2Flag = True
  1037. if (m.group('modifier') != s):
  1038. # capture remaining string
  1039. parseStr = m.group('modifier')
  1040. chunk1 = s[:m.start('modifier')].strip()
  1041. chunk2 = s[m.end('modifier'):].strip()
  1042. flag = True
  1043. else:
  1044. parseStr = s
  1045. if parseStr == '':
  1046. valid_date = False
  1047. for match in self.ptc.CRE_DATE3.finditer(s):
  1048. # to prevent "HH:MM(:SS) time strings" expressions from triggering
  1049. # this regex, we checks if the month field exists in the searched
  1050. # expression, if it doesn't exist, the date field is not valid
  1051. if match.group('mthname'):
  1052. m = self.ptc.CRE_DATE3.search(s, match.start())
  1053. valid_date = True
  1054. break
  1055. # String date format
  1056. if valid_date:
  1057. self.dateStrFlag = True
  1058. self.dateFlag = 1
  1059. if (m.group('date') != s):
  1060. # capture remaining string
  1061. parseStr = m.group('date')
  1062. chunk1 = s[:m.start('date')]
  1063. chunk2 = s[m.end('date'):]
  1064. s = '%s %s' % (chunk1, chunk2)
  1065. flag = True
  1066. else:
  1067. parseStr = s
  1068. if parseStr == '':
  1069. # Standard date format
  1070. m = self.ptc.CRE_DATE.search(s)
  1071. if m is not None:
  1072. self.dateStdFlag = True
  1073. self.dateFlag = 1
  1074. if (m.group('date') != s):
  1075. # capture remaining string
  1076. parseStr = m.group('date')
  1077. chunk1 = s[:m.start('date')]
  1078. chunk2 = s[m.end('date'):]
  1079. s = '%s %s' % (chunk1, chunk2)
  1080. flag = True
  1081. else:
  1082. parseStr = s
  1083. if parseStr == '':
  1084. # Natural language day strings
  1085. m = self.ptc.CRE_DAY.search(s)
  1086. if m is not None:
  1087. self.dayStrFlag = True
  1088. self.dateFlag = 1
  1089. if (m.group('day') != s):
  1090. # capture remaining string
  1091. parseStr = m.group('day')
  1092. chunk1 = s[:m.start('day')]
  1093. chunk2 = s[m.end('day'):]
  1094. s = '%s %s' % (chunk1, chunk2)
  1095. flag = True
  1096. else:
  1097. parseStr = s
  1098. if parseStr == '':
  1099. # Quantity + Units
  1100. m = self.ptc.CRE_UNITS.search(s)
  1101. if m is not None:
  1102. self.unitsFlag = True
  1103. if (m.group('qty') != s):
  1104. # capture remaining string
  1105. parseStr = m.group('qty')
  1106. chunk1 = s[:m.start('qty')].strip()
  1107. chunk2 = s[m.end('qty'):].strip()
  1108. if chunk1[-1:] == '-':
  1109. parseStr = '-%s' % parseStr
  1110. chunk1 = chunk1[:-1]
  1111. s = '%s %s' % (chunk1, chunk2)
  1112. flag = True
  1113. else:
  1114. parseStr = s
  1115. if parseStr == '':
  1116. # Quantity + Units
  1117. m = self.ptc.CRE_QUNITS.search(s)
  1118. if m is not None:
  1119. self.qunitsFlag = True
  1120. if (m.group('qty') != s):
  1121. # capture remaining string
  1122. parseStr = m.group('qty')
  1123. chunk1 = s[:m.start('qty')].strip()
  1124. chunk2 = s[m.end('qty'):].strip()
  1125. if chunk1[-1:] == '-':
  1126. parseStr = '-%s' % parseStr
  1127. chunk1 = chunk1[:-1]
  1128. s = '%s %s' % (chunk1, chunk2)
  1129. flag = True
  1130. else:
  1131. parseStr = s
  1132. if parseStr == '':
  1133. # Weekday
  1134. m = self.ptc.CRE_WEEKDAY.search(s)
  1135. if m is not None:
  1136. gv = m.group('weekday')
  1137. if s not in self.ptc.dayOffsets:
  1138. self.weekdyFlag = True
  1139. self.dateFlag = 1
  1140. if (gv != s):
  1141. # capture remaining string
  1142. parseStr = gv
  1143. chunk1 = s[:m.start('weekday')]
  1144. chunk2 = s[m.end('weekday'):]
  1145. s = '%s %s' % (chunk1, chunk2)
  1146. flag = True
  1147. else:
  1148. parseStr = s
  1149. if parseStr == '':
  1150. # Natural language time strings
  1151. m = self.ptc.CRE_TIME.search(s)
  1152. if m is not None:
  1153. self.timeStrFlag = True
  1154. self.timeFlag = 2
  1155. if (m.group('time') != s):
  1156. # capture remaining string
  1157. parseStr = m.group('time')
  1158. chunk1 = s[:m.start('time')]
  1159. chunk2 = s[m.end('time'):]
  1160. s = '%s %s' % (chunk1, chunk2)
  1161. flag = True
  1162. else:
  1163. parseStr = s
  1164. if parseStr == '':
  1165. # HH:MM(:SS) am/pm time strings
  1166. m = self.ptc.CRE_TIMEHMS2.search(s)
  1167. if m is not None:
  1168. self.meridianFlag = True
  1169. self.timeFlag = 2
  1170. if m.group('minutes') is not None:
  1171. if m.group('seconds') is not None:
  1172. parseStr = '%s:%s:%s %s' % (m.group('hours'),
  1173. m.group('minutes'),
  1174. m.group('seconds'),
  1175. m.group('meridian'))
  1176. else:
  1177. parseStr = '%s:%s %s' % (m.group('hours'),
  1178. m.group('minutes'),
  1179. m.group('meridian'))
  1180. else:
  1181. parseStr = '%s %s' % (m.group('hours'),
  1182. m.group('meridian'))
  1183. chunk1 = s[:m.start('hours')]
  1184. chunk2 = s[m.end('meridian'):]
  1185. s = '%s %s' % (chunk1, chunk2)
  1186. flag = True
  1187. if parseStr == '':
  1188. # HH:MM(:SS) time strings
  1189. m = self.ptc.CRE_TIMEHMS.search(s)
  1190. if m is not None:
  1191. self.timeStdFlag = True
  1192. self.timeFlag = 2
  1193. if m.group('seconds') is not None:
  1194. parseStr = '%s:%s:%s' % (m.group('hours'),
  1195. m.group('minutes'),
  1196. m.group('seconds'))
  1197. chunk1 = s[:m.start('hours')]
  1198. chunk2 = s[m.end('seconds'):]
  1199. else:
  1200. parseStr = '%s:%s' % (m.group('hours'),
  1201. m.group('minutes'))
  1202. chunk1 = s[:m.start('hours')]
  1203. chunk2 = s[m.end('minutes'):]
  1204. s = '%s %s' % (chunk1, chunk2)
  1205. flag = True
  1206. # if string does not match any regex, empty string to
  1207. # come out of the while loop
  1208. if not flag:
  1209. s = ''
  1210. log.debug('parse (bottom) [%s][%s][%s][%s]' % (s, parseStr, chunk1, chunk2))
  1211. log.debug('weekday %s, dateStd %s, dateStr %s, time %s, timeStr %s, meridian %s' % \
  1212. (self.weekdyFlag, self.dateStdFlag, self.dateStrFlag, self.timeStdFlag, self.timeStrFlag, self.meridianFlag))
  1213. log.debug('dayStr %s, modifier %s, modifier2 %s, units %s, qunits %s' % \
  1214. (self.dayStrFlag, self.modifierFlag, self.modifier2Flag, self.unitsFlag, self.qunitsFlag))
  1215. # evaluate the matched string
  1216. if parseStr != '':
  1217. if self.modifierFlag == True:
  1218. t, totalTime = self._evalModifier(parseStr, chunk1, chunk2, totalTime)
  1219. # t is the unparsed part of the chunks.
  1220. # If it is not date/time, return current
  1221. # totalTime as it is; else return the output
  1222. # after parsing t.
  1223. if (t != '') and (t != None):
  1224. tempDateFlag = self.dateFlag
  1225. tempTimeFlag = self.timeFlag
  1226. (totalTime2, flag) = self.parse(t, totalTime)
  1227. if flag == 0 and totalTime is not None:
  1228. self.timeFlag = tempTimeFlag
  1229. self.dateFlag = tempDateFlag
  1230. return (totalTime, self.dateFlag + self.timeFlag)
  1231. else:
  1232. return (totalTime2, self.dateFlag + self.timeFlag)
  1233. elif self.modifier2Flag == True:
  1234. totalTime, invalidFlag = self._evalModifier2(parseStr, chunk1, chunk2, totalTime)
  1235. if invalidFlag == True:
  1236. self.dateFlag = 0
  1237. self.timeFlag = 0
  1238. else:
  1239. totalTime = self._evalString(parseStr, totalTime)
  1240. parseStr = ''
  1241. # String is not parsed at all
  1242. if totalTime is None or totalTime == sourceTime:
  1243. totalTime = time.localtime()
  1244. self.dateFlag = 0
  1245. self.timeFlag = 0
  1246. return (totalTime, self.dateFlag + self.timeFlag)
  1247. def inc(self, source, month=None, year=None):
  1248. """
  1249. Takes the given C{source} date, or current date if none is
  1250. passed, and increments it according to the values passed in
  1251. by month and/or year.
  1252. This routine is needed because Python's C{timedelta()} function
  1253. does not allow for month or year increments.
  1254. @type source: struct_time
  1255. @param source: C{struct_time} value to increment
  1256. @type month: integer
  1257. @param month: optional number of months to increment
  1258. @type year: integer
  1259. @param year: optional number of years to increment
  1260. @rtype: datetime
  1261. @return: C{source} incremented by the number of months and/or years
  1262. """
  1263. yr = source.year
  1264. mth = source.month
  1265. dy = source.day
  1266. if year:
  1267. try:
  1268. yi = int(year)
  1269. except ValueError:
  1270. yi = 0
  1271. yr += yi
  1272. if month:
  1273. try:
  1274. mi = int(month)
  1275. except ValueError:
  1276. mi = 0
  1277. m = abs(mi)
  1278. y = m // 12 # how many years are in month increment
  1279. m = m % 12 # get remaining months
  1280. if mi < 0:
  1281. mth = mth - m # sub months from start month
  1282. if mth < 1: # cross start-of-year?
  1283. y -= 1 # yes - decrement year
  1284. mth += 12 # and fix month
  1285. else:
  1286. mth = mth + m # add months to start month
  1287. if mth > 12: # cross end-of-year?
  1288. y += 1 # yes - increment year
  1289. mth -= 12 # and fix month
  1290. yr += y
  1291. # if the day ends up past the last day of
  1292. # the new month, set it to the last day
  1293. if dy > self.ptc.daysInMonth(mth, yr):
  1294. dy = self.ptc.daysInMonth(mth, yr)
  1295. d = source.replace(year=yr, month=mth, day=dy)
  1296. return source + (d - source)
  1297. def _initSymbols(ptc):
  1298. """
  1299. Initialize symbols and single character constants.
  1300. """
  1301. # build am and pm lists to contain
  1302. # original case, lowercase and first-char
  1303. # versions of the meridian text
  1304. if len(ptc.locale.meridian) > 0:
  1305. am = ptc.locale.meridian[0]
  1306. ptc.am = [ am ]
  1307. if len(am) > 0:
  1308. ptc.am.append(am[0])
  1309. am = am.lower()
  1310. ptc.am.append(am)
  1311. ptc.am.append(am[0])
  1312. else:
  1313. am = ''
  1314. ptc.am = [ '', '' ]
  1315. if len(ptc.locale.meridian) > 1:
  1316. pm = ptc.locale.meridian[1]
  1317. ptc.pm = [ pm ]
  1318. if len(pm) > 0:
  1319. ptc.pm.append(pm[0])
  1320. pm = pm.lower()
  1321. ptc.pm.append(pm)
  1322. ptc.pm.append(pm[0])
  1323. else:
  1324. pm = ''
  1325. ptc.pm = [ '', '' ]
  1326. class Constants(object):
  1327. """
  1328. Default set of constants for parsedatetime.
  1329. If PyICU is present, then the class will first try to get PyICU
  1330. to return a locale specified by C{localeID}. If either C{localeID} is
  1331. None or if the locale does not exist within PyICU, then each of the
  1332. locales defined in C{fallbackLocales} is tried in order.
  1333. If PyICU is not present or none of the specified locales can be used,
  1334. then the class will initialize itself to the en_US locale.
  1335. if PyICU is not present or not requested, only the locales defined by
  1336. C{pdtLocales} will be searched.
  1337. """
  1338. def __init__(self, localeID=None, usePyICU=True, fallbackLocales=['en_US']):
  1339. self.localeID = localeID
  1340. self.fallbackLocales = fallbackLocales
  1341. if 'en_US' not in self.fallbackLocales:
  1342. self.fallbackLocales.append('en_US')
  1343. # define non-locale specific constants
  1344. self.locale = None
  1345. self.usePyICU = usePyICU
  1346. # starting cache of leap years
  1347. # daysInMonth will add to this if during
  1348. # runtime it gets a request for a year not found
  1349. self._leapYears = [ 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944,
  1350. 1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988,
  1351. 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020, 2024, 2028, 2032,
  1352. 2036, 2040, 2044, 2048, 2052, 2056, 2060, 2064, 2068, 2072, 2076,
  1353. 2080, 2084, 2088, 2092, 2096 ]
  1354. self.Second = 1
  1355. self.Minute = 60 * self.Second
  1356. self.Hour = 60 * self.Minute
  1357. self.Day = 24 * self.Hour
  1358. self.Week = 7 * self.Day
  1359. self.Month = 30 * self.Day
  1360. self.Year = 365 * self.Day
  1361. self._DaysInMonthList = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
  1362. self.rangeSep = '-'
  1363. self.BirthdayEpoch = 50
  1364. # DOWParseStyle controls how we parse "Tuesday"
  1365. # If the current day was Thursday and the text to parse is "Tuesday"
  1366. # then the following table shows how each style would be returned
  1367. # -1, 0, +1
  1368. #
  1369. # Current day marked as ***
  1370. #
  1371. # Sun Mon Tue Wed Thu Fri Sat
  1372. # week -1
  1373. # current -1,0 ***
  1374. # week +1 +1
  1375. #
  1376. # If the current day was Monday and the text to parse is "Tuesday"
  1377. # then the following table shows how each style would be returned
  1378. # -1, 0, +1
  1379. #
  1380. # Sun Mon Tue Wed Thu Fri Sat
  1381. # week -1 -1
  1382. # current *** 0,+1
  1383. # week +1
  1384. self.DOWParseStyle = 1
  1385. # CurrentDOWParseStyle controls how we parse "Friday"
  1386. # If the current day was Friday and the text to parse is "Friday"
  1387. # then the following table shows how each style would be returned
  1388. # True/False. This also depends on DOWParseStyle.
  1389. #
  1390. # Current day marked as ***
  1391. #
  1392. # DOWParseStyle = 0
  1393. # Sun Mon Tue Wed Thu Fri Sat
  1394. # week -1
  1395. # current T,F
  1396. # week +1
  1397. #
  1398. # DOWParseStyle = -1
  1399. # Sun Mon Tue Wed Thu Fri Sat
  1400. # week -1 F
  1401. # current T
  1402. # week +1
  1403. #
  1404. # DOWParseStyle = +1
  1405. #
  1406. # Sun Mon Tue Wed Thu Fri Sat
  1407. # week -1
  1408. # current T
  1409. # week +1 F
  1410. self.CurrentDOWParseStyle = False
  1411. if self.usePyICU:
  1412. self.locale = pdtLocales['icu'](self.localeID)
  1413. if self.locale.icu is None:
  1414. self.usePyICU = False
  1415. self.locale = None
  1416. if self.locale is None:
  1417. if not self.localeID in pdtLocales:
  1418. for id in range(0, len(self.fallbackLocales)):
  1419. self.localeID = self.fallbackLocales[id]
  1420. if self.localeID in pdtLocales:
  1421. break
  1422. self.locale = pdtLocales[self.localeID]()
  1423. if self.locale is not None:
  1424. # escape any regex special characters that may be found
  1425. wd = tuple(map(re.escape, self.locale.Weekdays))
  1426. swd = tuple(map(re.escape, self.locale.shortWeekdays))
  1427. mth = tuple(map(re.escape, self.locale.Months))
  1428. smth = tuple(map(re.escape, self.locale.shortMonths))
  1429. self.locale.re_values['months'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % mth
  1430. self.locale.re_values['shortmonths'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % smth
  1431. self.locale.re_values['days'] = '%s|%s|%s|%s|%s|%s|%s' % wd
  1432. self.locale.re_values['shortdays'] = '%s|%s|%s|%s|%s|%s|%s' % swd
  1433. l = []
  1434. for s in self.locale.units:
  1435. l = l + self.locale.units[s]
  1436. self.locale.re_values['units'] = '|'.join(tuple(map(re.escape, l)))
  1437. l = []
  1438. lbefore = []
  1439. lafter = []
  1440. for s in self.locale.Modifiers:
  1441. l.append(s)
  1442. if self.locale.Modifiers[s] < 0:
  1443. lbefore.append(s)
  1444. elif self.locale.Modifiers[s] > 0:
  1445. lafter.append(s)
  1446. self.locale.re_values['modifiers'] = '|'.join(tuple(map(re.escape, l)))
  1447. self.locale.re_values['modifiers-before'] = '|'.join(tuple(map(re.escape, lbefore)))
  1448. self.locale.re_values['modifiers-after'] = '|'.join(tuple(map(re.escape, lafter)))
  1449. l = []
  1450. for s in self.locale.re_sources:
  1451. l.append(s)
  1452. self.locale.re_values['sources'] = '|'.join(tuple(map(re.escape, l)))
  1453. # build weekday offsets - yes, it assumes the Weekday and shortWeekday
  1454. # lists are in the same order and Mon..Sun (Python style)
  1455. o = 0
  1456. for key in self.locale.Weekdays:
  1457. self.locale.WeekdayOffsets[key] = o
  1458. o += 1
  1459. o = 0
  1460. for key in self.locale.shortWeekdays:
  1461. self.locale.WeekdayOffsets[key] = o
  1462. o += 1
  1463. # build month offsets - yes, it assumes the Months and shortMonths
  1464. # lists are in the same order and Jan..Dec
  1465. o = 1
  1466. for key in self.locale.Months:
  1467. self.locale.MonthOffsets[key] = o
  1468. o += 1
  1469. o = 1
  1470. for key in self.locale.shortMonths:
  1471. self.locale.MonthOffsets[key] = o
  1472. o += 1
  1473. # self.locale.DaySuffixes = self.locale.re_values['daysuffix'].split('|')
  1474. _initSymbols(self)
  1475. # TODO add code to parse the date formats and build the regexes up from sub-parts
  1476. # TODO find all hard-coded uses of date/time seperators
  1477. self.RE_DATE4 = r'''(?P<date>(((?P<day>\d\d?)(?P<suffix>%(daysuffix)s)?(,)?(\s)?)
  1478. (?P<mthname>(%(months)s|%(shortmonths)s))\s?
  1479. (?P<year>\d\d(\d\d)?)?
  1480. )
  1481. )''' % self.locale.re_values
  1482. # I refactored DATE3 to fix Issue 16 http://code.google.com/p/parsedatetime/issues/detail?id=16
  1483. # I suspect the final line was for a trailing time - but testing shows it's not needed
  1484. # ptc.RE_DATE3 = r'''(?P<date>((?P<mthname>(%(months)s|%(shortmonths)s))\s?
  1485. # ((?P<day>\d\d?)(\s?|%(daysuffix)s|$)+)?
  1486. # (,\s?(?P<year>\d\d(\d\d)?))?))
  1487. # (\s?|$|[^0-9a-zA-Z])''' % ptc.locale.re_values
  1488. self.RE_DATE3 = r'''(?P<date>(
  1489. (((?P<mthname>(%(months)s|%(shortmonths)s))|
  1490. ((?P<