PageRenderTime 35ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/parsedatetime/__init__.py

http://parsedatetime.googlecode.com/
Python | 1795 lines | 1640 code | 38 blank | 117 comment | 66 complexity | 49474631dbdd987f75b67d3bdb25ce16 MD5 | raw file
Possible License(s): Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. """
  2. parsedatetime
  3. Parse human-readable date/time text.
  4. Requires Python 2.6 or later
  5. """
  6. __author__ = 'Mike Taylor (bear@code-bear.com)'
  7. __copyright__ = 'Copyright (c) 2004 Mike Taylor'
  8. __license__ = 'Apache v2.0'
  9. __version__ = '1.0.0'
  10. __contributors__ = [ 'Darshana Chhajed',
  11. 'Michael Lim (lim.ck.michael@gmail.com)',
  12. 'Bernd Zeimetz (bzed@debian.org)',
  13. ]
  14. import re
  15. import time
  16. import datetime
  17. import calendar
  18. import logging
  19. import email.utils
  20. from . import pdt_locales
  21. log = logging.getLogger()
  22. echoHandler = logging.StreamHandler()
  23. echoFormatter = logging.Formatter('%(levelname)-8s %(message)s')
  24. log.addHandler(echoHandler)
  25. # log.addHandler(logging.NullHandler())
  26. log.setLevel(logging.DEBUG)
  27. pdtLocales = { 'icu': pdt_locales.pdtLocale_icu,
  28. 'en_US': pdt_locales.pdtLocale_en,
  29. 'en_AU': pdt_locales.pdtLocale_au,
  30. 'es_ES': pdt_locales.pdtLocale_es,
  31. 'de_DE': pdt_locales.pdtLocale_de,
  32. }
  33. # Copied from feedparser.py
  34. # Universal Feedparser
  35. # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
  36. # Originally a def inside of _parse_date_w3dtf()
  37. def _extract_date(m):
  38. year = int(m.group('year'))
  39. if year < 100:
  40. year = 100 * int(time.gmtime()[0] / 100) + int(year)
  41. if year < 1000:
  42. return 0, 0, 0
  43. julian = m.group('julian')
  44. if julian:
  45. julian = int(julian)
  46. month = julian / 30 + 1
  47. day = julian % 30 + 1
  48. jday = None
  49. while jday != julian:
  50. t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0))
  51. jday = time.gmtime(t)[-2]
  52. diff = abs(jday - julian)
  53. if jday > julian:
  54. if diff < day:
  55. day = day - diff
  56. else:
  57. month = month - 1
  58. day = 31
  59. elif jday < julian:
  60. if day + diff < 28:
  61. day = day + diff
  62. else:
  63. month = month + 1
  64. return year, month, day
  65. month = m.group('month')
  66. day = 1
  67. if month is None:
  68. month = 1
  69. else:
  70. month = int(month)
  71. day = m.group('day')
  72. if day:
  73. day = int(day)
  74. else:
  75. day = 1
  76. return year, month, day
  77. # Copied from feedparser.py
  78. # Universal Feedparser
  79. # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
  80. # Originally a def inside of _parse_date_w3dtf()
  81. def _extract_time(m):
  82. if not m:
  83. return 0, 0, 0
  84. hours = m.group('hours')
  85. if not hours:
  86. return 0, 0, 0
  87. hours = int(hours)
  88. minutes = int(m.group('minutes'))
  89. seconds = m.group('seconds')
  90. if seconds:
  91. seconds = int(seconds)
  92. else:
  93. seconds = 0
  94. return hours, minutes, seconds
  95. # Copied from feedparser.py
  96. # Universal Feedparser
  97. # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
  98. # Modified to return a tuple instead of mktime
  99. #
  100. # Original comment:
  101. # W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by
  102. # Drake and licensed under the Python license. Removed all range checking
  103. # for month, day, hour, minute, and second, since mktime will normalize
  104. # these later
  105. def _parse_date_w3dtf(dateString):
  106. # the __extract_date and __extract_time methods were
  107. # copied-out so they could be used by my code --bear
  108. def __extract_tzd(m):
  109. '''Return the Time Zone Designator as an offset in seconds from UTC.'''
  110. if not m:
  111. return 0
  112. tzd = m.group('tzd')
  113. if not tzd:
  114. return 0
  115. if tzd == 'Z':
  116. return 0
  117. hours = int(m.group('tzdhours'))
  118. minutes = m.group('tzdminutes')
  119. if minutes:
  120. minutes = int(minutes)
  121. else:
  122. minutes = 0
  123. offset = (hours*60 + minutes) * 60
  124. if tzd[0] == '+':
  125. return -offset
  126. return offset
  127. __date_re = ('(?P<year>\d\d\d\d)'
  128. '(?:(?P<dsep>-|)'
  129. '(?:(?P<julian>\d\d\d)'
  130. '|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?')
  131. __tzd_re = '(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)'
  132. __tzd_rx = re.compile(__tzd_re)
  133. __time_re = ('(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)'
  134. '(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?'
  135. + __tzd_re)
  136. __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re)
  137. __datetime_rx = re.compile(__datetime_re)
  138. m = __datetime_rx.match(dateString)
  139. if (m is None) or (m.group() != dateString): return
  140. return _extract_date(m) + _extract_time(m) + (0, 0, 0)
  141. _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
  142. 'aug', 'sep', 'oct', 'nov', 'dec',
  143. 'january', 'february', 'march', 'april', 'may', 'june', 'july',
  144. 'august', 'september', 'october', 'november', 'december']
  145. _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
  146. # Copied from feedparser.py
  147. # Universal Feedparser
  148. # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
  149. # Modified to return a tuple instead of mktime
  150. #
  151. def _parse_date_rfc822(dateString):
  152. '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date'''
  153. data = dateString.split()
  154. if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
  155. del data[0]
  156. if len(data) == 4:
  157. s = data[3]
  158. i = s.find('+')
  159. if i > 0:
  160. data[3:] = [s[:i], s[i+1:]]
  161. else:
  162. data.append('')
  163. dateString = " ".join(data)
  164. if len(data) < 5:
  165. dateString += ' 00:00:00 GMT'
  166. return email.utils.parsedate_tz(dateString)
  167. # # rfc822.py defines several time zones, but we define some extra ones.
  168. # # 'ET' is equivalent to 'EST', etc.
  169. # _additional_timezones = {'AT': -400, 'ET': -500,
  170. # 'CT': -600, 'MT': -700,
  171. # 'PT': -800}
  172. # email.utils._timezones.update(_additional_timezones)
  173. class Calendar:
  174. """
  175. A collection of routines to input, parse and manipulate date and times.
  176. The text can either be 'normal' date values or it can be human readable.
  177. """
  178. def __init__(self, constants=None):
  179. """
  180. Default constructor for the L{Calendar} class.
  181. @type constants: object
  182. @param constants: Instance of the class L{parsedatetime_consts.Constants}
  183. @rtype: object
  184. @return: L{Calendar} instance
  185. """
  186. # if a constants reference is not included, use default
  187. if constants is None:
  188. self.ptc = Constants()
  189. else:
  190. self.ptc = constants
  191. self.weekdyFlag = False # monday/tuesday/...
  192. self.dateStdFlag = False # 07/21/06
  193. self.dateStrFlag = False # July 21st, 2006
  194. self.timeStdFlag = False # 5:50
  195. self.meridianFlag = False # am/pm
  196. self.dayStrFlag = False # tomorrow/yesterday/today/..
  197. self.timeStrFlag = False # lunch/noon/breakfast/...
  198. self.modifierFlag = False # after/before/prev/next/..
  199. self.modifier2Flag = False # after/before/prev/next/..
  200. self.unitsFlag = False # hrs/weeks/yrs/min/..
  201. self.qunitsFlag = False # h/m/t/d..
  202. self.timeFlag = 0
  203. self.dateFlag = 0
  204. def _convertUnitAsWords(self, unitText):
  205. """
  206. Converts text units into their number value
  207. Five = 5
  208. Twenty Five = 25
  209. Two hundred twenty five = 225
  210. Two thousand and twenty five = 2025
  211. Two thousand twenty five = 2025
  212. @type unitText: string
  213. @param unitText: number text to convert
  214. @rtype: integer
  215. @return: numerical value of unitText
  216. """
  217. # TODO: implement this
  218. pass
  219. def _buildTime(self, source, quantity, modifier, units):
  220. """
  221. Take C{quantity}, C{modifier} and C{unit} strings and convert them into values.
  222. After converting, calcuate the time and return the adjusted sourceTime.
  223. @type source: time
  224. @param source: time to use as the base (or source)
  225. @type quantity: string
  226. @param quantity: quantity string
  227. @type modifier: string
  228. @param modifier: how quantity and units modify the source time
  229. @type units: string
  230. @param units: unit of the quantity (i.e. hours, days, months, etc)
  231. @rtype: struct_time
  232. @return: C{struct_time} of the calculated time
  233. """
  234. log.debug('_buildTime: [%s][%s][%s]' % (quantity, modifier, units))
  235. if source is None:
  236. source = time.localtime()
  237. if quantity is None:
  238. quantity = ''
  239. else:
  240. quantity = quantity.strip()
  241. if len(quantity) == 0:
  242. qty = 1
  243. else:
  244. try:
  245. qty = int(quantity)
  246. except ValueError:
  247. qty = 0
  248. if modifier in self.ptc.Modifiers:
  249. qty = qty * self.ptc.Modifiers[modifier]
  250. if units is None or units == '':
  251. units = 'dy'
  252. # plurals are handled by regex's (could be a bug tho)
  253. (yr, mth, dy, hr, mn, sec, _, _, _) = source
  254. start = datetime.datetime(yr, mth, dy, hr, mn, sec)
  255. target = start
  256. if units.startswith('y'):
  257. target = self.inc(start, year=qty)
  258. self.dateFlag = 1
  259. elif units.endswith('th') or units.endswith('ths'):
  260. target = self.inc(start, month=qty)
  261. self.dateFlag = 1
  262. else:
  263. if units.startswith('d'):
  264. target = start + datetime.timedelta(days=qty)
  265. self.dateFlag = 1
  266. elif units.startswith('h'):
  267. target = start + datetime.timedelta(hours=qty)
  268. self.timeFlag = 2
  269. elif units.startswith('m'):
  270. target = start + datetime.timedelta(minutes=qty)
  271. self.timeFlag = 2
  272. elif units.startswith('s'):
  273. target = start + datetime.timedelta(seconds=qty)
  274. self.timeFlag = 2
  275. elif units.startswith('w'):
  276. target = start + datetime.timedelta(weeks=qty)
  277. self.dateFlag = 1
  278. return target.timetuple()
  279. def parseDate(self, dateString):
  280. """
  281. Parse short-form date strings::
  282. '05/28/2006' or '04.21'
  283. @type dateString: string
  284. @param dateString: text to convert to a C{datetime}
  285. @rtype: struct_time
  286. @return: calculated C{struct_time} value of dateString
  287. """
  288. yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime()
  289. # values pulled from regex's will be stored here and later
  290. # assigned to mth, dy, yr based on information from the locale
  291. # -1 is used as the marker value because we want zero values
  292. # to be passed thru so they can be flagged as errors later
  293. v1 = -1
  294. v2 = -1
  295. v3 = -1
  296. s = dateString
  297. m = self.ptc.CRE_DATE2.search(s)
  298. if m is not None:
  299. index = m.start()
  300. v1 = int(s[:index])
  301. s = s[index + 1:]
  302. m = self.ptc.CRE_DATE2.search(s)
  303. if m is not None:
  304. index = m.start()
  305. v2 = int(s[:index])
  306. v3 = int(s[index + 1:])
  307. else:
  308. v2 = int(s.strip())
  309. v = [ v1, v2, v3 ]
  310. d = { 'm': mth, 'd': dy, 'y': yr }
  311. for i in range(0, 3):
  312. n = v[i]
  313. c = self.ptc.dp_order[i]
  314. if n >= 0:
  315. d[c] = n
  316. # if the year is not specified and the date has already
  317. # passed, increment the year
  318. if v3 == -1 and ((mth > d['m']) or (mth == d['m'] and dy > d['d'])):
  319. yr = d['y'] + 1
  320. else:
  321. yr = d['y']
  322. mth = d['m']
  323. dy = d['d']
  324. # birthday epoch constraint
  325. if yr < self.ptc.BirthdayEpoch:
  326. yr += 2000
  327. elif yr < 100:
  328. yr += 1900
  329. log.debug('parseDate: %s %s %s %s' % (yr, mth, dy, self.ptc.daysInMonth(mth, yr)))
  330. if (mth > 0 and mth <= 12) and \
  331. (dy > 0 and dy <= self.ptc.daysInMonth(mth, yr)):
  332. sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
  333. else:
  334. self.dateFlag = 0
  335. self.timeFlag = 0
  336. sourceTime = time.localtime() # return current time if date
  337. # string is invalid
  338. return sourceTime
  339. def parseDateText(self, dateString):
  340. """
  341. Parse long-form date strings::
  342. 'May 31st, 2006'
  343. 'Jan 1st'
  344. 'July 2006'
  345. @type dateString: string
  346. @param dateString: text to convert to a datetime
  347. @rtype: struct_time
  348. @return: calculated C{struct_time} value of dateString
  349. """
  350. yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime()
  351. currentMth = mth
  352. currentDy = dy
  353. s = dateString.lower()
  354. m = self.ptc.CRE_DATE3.search(s)
  355. mth = m.group('mthname')
  356. mth = self.ptc.MonthOffsets[mth]
  357. if m.group('day') != None:
  358. dy = int(m.group('day'))
  359. else:
  360. dy = 1
  361. if m.group('year') != None:
  362. yr = int(m.group('year'))
  363. # birthday epoch constraint
  364. if yr < self.ptc.BirthdayEpoch:
  365. yr += 2000
  366. elif yr < 100:
  367. yr += 1900
  368. elif (mth < currentMth) or (mth == currentMth and dy < currentDy):
  369. # if that day and month have already passed in this year,
  370. # then increment the year by 1
  371. yr += 1
  372. if dy > 0 and dy <= self.ptc.daysInMonth(mth, yr):
  373. sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
  374. else:
  375. # Return current time if date string is invalid
  376. self.dateFlag = 0
  377. self.timeFlag = 0
  378. sourceTime = time.localtime()
  379. return sourceTime
  380. def evalRanges(self, datetimeString, sourceTime=None):
  381. """
  382. Evaluate the C{datetimeString} text and determine if
  383. it represents a date or time range.
  384. @type datetimeString: string
  385. @param datetimeString: datetime text to evaluate
  386. @type sourceTime: struct_time
  387. @param sourceTime: C{struct_time} value to use as the base
  388. @rtype: tuple
  389. @return: tuple of: start datetime, end datetime and the invalid flag
  390. """
  391. startTime = ''
  392. endTime = ''
  393. startDate = ''
  394. endDate = ''
  395. rangeFlag = 0
  396. s = datetimeString.strip().lower()
  397. if self.ptc.rangeSep in s:
  398. s = s.replace(self.ptc.rangeSep, ' %s ' % self.ptc.rangeSep)
  399. s = s.replace(' ', ' ')
  400. m = self.ptc.CRE_TIMERNG1.search(s)
  401. if m is not None:
  402. rangeFlag = 1
  403. else:
  404. m = self.ptc.CRE_TIMERNG2.search(s)
  405. if m is not None:
  406. rangeFlag = 2
  407. else:
  408. m = self.ptc.CRE_TIMERNG4.search(s)
  409. if m is not None:
  410. rangeFlag = 7
  411. else:
  412. m = self.ptc.CRE_TIMERNG3.search(s)
  413. if m is not None:
  414. rangeFlag = 3
  415. else:
  416. m = self.ptc.CRE_DATERNG1.search(s)
  417. if m is not None:
  418. rangeFlag = 4
  419. else:
  420. m = self.ptc.CRE_DATERNG2.search(s)
  421. if m is not None:
  422. rangeFlag = 5
  423. else:
  424. m = self.ptc.CRE_DATERNG3.search(s)
  425. if m is not None:
  426. rangeFlag = 6
  427. log.debug('evalRanges: rangeFlag = %s [%s]' % (rangeFlag, s))
  428. if m is not None:
  429. if (m.group() != s):
  430. # capture remaining string
  431. parseStr = m.group()
  432. chunk1 = s[:m.start()]
  433. chunk2 = s[m.end():]
  434. s = '%s %s' % (chunk1, chunk2)
  435. flag = 1
  436. sourceTime, flag = self.parse(s, sourceTime)
  437. if flag == 0:
  438. sourceTime = None
  439. else:
  440. parseStr = s
  441. if rangeFlag == 1:
  442. m = re.search(self.ptc.rangeSep, parseStr)
  443. startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime)
  444. endTime, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime)
  445. if (eflag != 0) and (sflag != 0):
  446. return (startTime, endTime, 2)
  447. elif rangeFlag == 2:
  448. m = re.search(self.ptc.rangeSep, parseStr)
  449. startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime)
  450. endTime, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime)
  451. if (eflag != 0) and (sflag != 0):
  452. return (startTime, endTime, 2)
  453. elif rangeFlag == 3 or rangeFlag == 7:
  454. m = re.search(self.ptc.rangeSep, parseStr)
  455. # capturing the meridian from the end time
  456. if self.ptc.usesMeridian:
  457. ampm = re.search(self.ptc.am[0], parseStr)
  458. # appending the meridian to the start time
  459. if ampm is not None:
  460. startTime, sflag = self.parse((parseStr[:m.start()] + self.ptc.meridian[0]), sourceTime)
  461. else:
  462. startTime, sflag = self.parse((parseStr[:m.start()] + self.ptc.meridian[1]), sourceTime)
  463. else:
  464. startTime, sflag = self.parse((parseStr[:m.start()]), sourceTime)
  465. endTime, eflag = self.parse(parseStr[(m.start() + 1):], sourceTime)
  466. if (eflag != 0) and (sflag != 0):
  467. return (startTime, endTime, 2)
  468. elif rangeFlag == 4:
  469. m = re.search(self.ptc.rangeSep, parseStr)
  470. startDate, sflag = self.parse((parseStr[:m.start()]), sourceTime)
  471. endDate, eflag = self.parse((parseStr[(m.start() + 1):]), sourceTime)
  472. if (eflag != 0) and (sflag != 0):
  473. return (startDate, endDate, 1)
  474. elif rangeFlag == 5:
  475. m = re.search(self.ptc.rangeSep, parseStr)
  476. endDate = parseStr[(m.start() + 1):]
  477. # capturing the year from the end date
  478. date = self.ptc.CRE_DATE3.search(endDate)
  479. endYear = date.group('year')
  480. # appending the year to the start date if the start date
  481. # does not have year information and the end date does.
  482. # eg : "Aug 21 - Sep 4, 2007"
  483. if endYear is not None:
  484. startDate = (parseStr[:m.start()]).strip()
  485. date = self.ptc.CRE_DATE3.search(startDate)
  486. startYear = date.group('year')
  487. if startYear is None:
  488. startDate = startDate + ', ' + endYear
  489. else:
  490. startDate = parseStr[:m.start()]
  491. startDate, sflag = self.parse(startDate, sourceTime)
  492. endDate, eflag = self.parse(endDate, sourceTime)
  493. if (eflag != 0) and (sflag != 0):
  494. return (startDate, endDate, 1)
  495. elif rangeFlag == 6:
  496. m = re.search(self.ptc.rangeSep, parseStr)
  497. startDate = parseStr[:m.start()]
  498. # capturing the month from the start date
  499. mth = self.ptc.CRE_DATE3.search(startDate)
  500. mth = mth.group('mthname')
  501. # appending the month name to the end date
  502. endDate = mth + parseStr[(m.start() + 1):]
  503. startDate, sflag = self.parse(startDate, sourceTime)
  504. endDate, eflag = self.parse(endDate, sourceTime)
  505. if (eflag != 0) and (sflag != 0):
  506. return (startDate, endDate, 1)
  507. else:
  508. # if range is not found
  509. sourceTime = time.localtime()
  510. return (sourceTime, sourceTime, 0)
  511. def _CalculateDOWDelta(self, wd, wkdy, offset, style, currentDayStyle):
  512. """
  513. Based on the C{style} and C{currentDayStyle} determine what
  514. day-of-week value is to be returned.
  515. @type wd: integer
  516. @param wd: day-of-week value for the current day
  517. @type wkdy: integer
  518. @param wkdy: day-of-week value for the parsed day
  519. @type offset: integer
  520. @param offset: offset direction for any modifiers (-1, 0, 1)
  521. @type style: integer
  522. @param style: normally the value set in C{Constants.DOWParseStyle}
  523. @type currentDayStyle: integer
  524. @param currentDayStyle: normally the value set in C{Constants.CurrentDOWParseStyle}
  525. @rtype: integer
  526. @return: calculated day-of-week
  527. """
  528. if offset == 1:
  529. # modifier is indicating future week eg: "next".
  530. # DOW is calculated as DOW of next week
  531. diff = 7 - wd + wkdy
  532. elif offset == -1:
  533. # modifier is indicating past week eg: "last","previous"
  534. # DOW is calculated as DOW of previous week
  535. diff = wkdy - wd - 7
  536. elif offset == 0:
  537. # modifier is indiacting current week eg: "this"
  538. # DOW is calculated as DOW of this week
  539. diff = wkdy - wd
  540. elif offset == 2:
  541. # no modifier is present.
  542. # i.e. string to be parsed is just DOW
  543. if style == 1:
  544. # next occurance of the DOW is calculated
  545. if currentDayStyle == True:
  546. if wkdy >= wd:
  547. diff = wkdy - wd
  548. else:
  549. diff = 7 - wd + wkdy
  550. else:
  551. if wkdy > wd:
  552. diff = wkdy - wd
  553. else:
  554. diff = 7 - wd + wkdy
  555. elif style == -1:
  556. # last occurance of the DOW is calculated
  557. if currentDayStyle == True:
  558. if wkdy <= wd:
  559. diff = wkdy - wd
  560. else:
  561. diff = wkdy - wd - 7
  562. else:
  563. if wkdy < wd:
  564. diff = wkdy - wd
  565. else:
  566. diff = wkdy - wd - 7
  567. else:
  568. # occurance of the DOW in the current week is calculated
  569. diff = wkdy - wd
  570. log.debug("wd %s, wkdy %s, offset %d, style %d" % (wd, wkdy, offset, style))
  571. return diff
  572. def _evalModifier(self, modifier, chunk1, chunk2, sourceTime):
  573. """
  574. Evaluate the C{modifier} string and following text (passed in
  575. as C{chunk1} and C{chunk2}) and if they match any known modifiers
  576. calculate the delta and apply it to C{sourceTime}.
  577. @type modifier: string
  578. @param modifier: modifier text to apply to sourceTime
  579. @type chunk1: string
  580. @param chunk1: first text chunk that followed modifier (if any)
  581. @type chunk2: string
  582. @param chunk2: second text chunk that followed modifier (if any)
  583. @type sourceTime: struct_time
  584. @param sourceTime: C{struct_time} value to use as the base
  585. @rtype: tuple
  586. @return: tuple of: remaining text and the modified sourceTime
  587. """
  588. offset = self.ptc.Modifiers[modifier]
  589. if sourceTime is not None:
  590. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
  591. else:
  592. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime()
  593. # capture the units after the modifier and the remaining
  594. # string after the unit
  595. m = self.ptc.CRE_REMAINING.search(chunk2)
  596. if m is not None:
  597. index = m.start() + 1
  598. unit = chunk2[:m.start()]
  599. chunk2 = chunk2[index:]
  600. else:
  601. unit = chunk2
  602. chunk2 = ''
  603. flag = False
  604. log.debug("modifier [%s] chunk1 [%s] chunk2 [%s] unit [%s] flag %s" % (modifier, chunk1, chunk2, unit, flag))
  605. if unit == 'month' or \
  606. unit == 'mth' or \
  607. unit == 'm':
  608. if offset == 0:
  609. dy = self.ptc.daysInMonth(mth, yr)
  610. sourceTime = (yr, mth, dy, 9, 0, 0, wd, yd, isdst)
  611. elif offset == 2:
  612. # if day is the last day of the month, calculate the last day
  613. # of the next month
  614. if dy == self.ptc.daysInMonth(mth, yr):
  615. dy = self.ptc.daysInMonth(mth + 1, yr)
  616. start = datetime.datetime(yr, mth, dy, 9, 0, 0)
  617. target = self.inc(start, month=1)
  618. sourceTime = target.timetuple()
  619. else:
  620. start = datetime.datetime(yr, mth, 1, 9, 0, 0)
  621. target = self.inc(start, month=offset)
  622. sourceTime = target.timetuple()
  623. flag = True
  624. self.dateFlag = 1
  625. if unit == 'week' or \
  626. unit == 'wk' or \
  627. unit == 'w':
  628. if offset == 0:
  629. start = datetime.datetime(yr, mth, dy, 17, 0, 0)
  630. target = start + datetime.timedelta(days=(4 - wd))
  631. sourceTime = target.timetuple()
  632. elif offset == 2:
  633. start = datetime.datetime(yr, mth, dy, 9, 0, 0)
  634. target = start + datetime.timedelta(days=7)
  635. sourceTime = target.timetuple()
  636. else:
  637. return self._evalModifier(modifier, chunk1, "monday " + chunk2, sourceTime)
  638. flag = True
  639. self.dateFlag = 1
  640. if unit == 'day' or \
  641. unit == 'dy' or \
  642. unit == 'd':
  643. if offset == 0:
  644. sourceTime = (yr, mth, dy, 17, 0, 0, wd, yd, isdst)
  645. self.timeFlag = 2
  646. elif offset == 2:
  647. start = datetime.datetime(yr, mth, dy, hr, mn, sec)
  648. target = start + datetime.timedelta(days=1)
  649. sourceTime = target.timetuple()
  650. else:
  651. start = datetime.datetime(yr, mth, dy, 9, 0, 0)
  652. target = start + datetime.timedelta(days=offset)
  653. sourceTime = target.timetuple()
  654. flag = True
  655. self.dateFlag = 1
  656. if unit == 'hour' or \
  657. unit == 'hr':
  658. if offset == 0:
  659. sourceTime = (yr, mth, dy, hr, 0, 0, wd, yd, isdst)
  660. else:
  661. start = datetime.datetime(yr, mth, dy, hr, 0, 0)
  662. target = start + datetime.timedelta(hours=offset)
  663. sourceTime = target.timetuple()
  664. flag = True
  665. self.timeFlag = 2
  666. if unit == 'year' or \
  667. unit == 'yr' or \
  668. unit == 'y':
  669. if offset == 0:
  670. sourceTime = (yr, 12, 31, hr, mn, sec, wd, yd, isdst)
  671. elif offset == 2:
  672. sourceTime = (yr + 1, mth, dy, hr, mn, sec, wd, yd, isdst)
  673. else:
  674. sourceTime = (yr + offset, 1, 1, 9, 0, 0, wd, yd, isdst)
  675. flag = True
  676. self.dateFlag = 1
  677. if not flag:
  678. if modifier == 'eom':
  679. self.modifierFlag = False
  680. dy = self.ptc.daysInMonth(mth, yr)
  681. sourceTime = (yr, mth, dy, 9, 0, 0, wd, yd, isdst)
  682. self.dateFlag = 2
  683. flag = True
  684. elif modifier == 'eoy':
  685. self.modifierFlag = False
  686. mth = 12
  687. dy = self.ptc.daysInMonth(mth, yr)
  688. sourceTime = (yr, mth, dy, 9, 0, 0, wd, yd, isdst)
  689. self.dateFlag = 2
  690. flag = True
  691. if not flag:
  692. m = self.ptc.CRE_WEEKDAY.match(unit)
  693. if m is not None:
  694. wkdy = m.group()
  695. self.dateFlag = 1
  696. if modifier == 'eod':
  697. # Calculate the upcoming weekday
  698. self.modifierFlag = False
  699. (sourceTime, _) = self.parse(wkdy, sourceTime)
  700. sources = self.ptc.buildSources(sourceTime)
  701. self.timeFlag = 2
  702. if modifier in sources:
  703. sourceTime = sources[modifier]
  704. else:
  705. wkdy = self.ptc.WeekdayOffsets[wkdy]
  706. diff = self._CalculateDOWDelta(wd, wkdy, offset,
  707. self.ptc.DOWParseStyle,
  708. self.ptc.CurrentDOWParseStyle)
  709. start = datetime.datetime(yr, mth, dy, 9, 0, 0)
  710. target = start + datetime.timedelta(days=diff)
  711. sourceTime = target.timetuple()
  712. flag = True
  713. self.dateFlag = 1
  714. if not flag:
  715. m = self.ptc.CRE_TIME.match(unit)
  716. if m is not None:
  717. self.modifierFlag = False
  718. (yr, mth, dy, hr, mn, sec, wd, yd, isdst), _ = self.parse(unit)
  719. start = datetime.datetime(yr, mth, dy, hr, mn, sec)
  720. target = start + datetime.timedelta(days=offset)
  721. sourceTime = target.timetuple()
  722. flag = True
  723. else:
  724. self.modifierFlag = False
  725. # check if the remaining text is parsable and if so,
  726. # use it as the base time for the modifier source time
  727. t, flag2 = self.parse('%s %s' % (chunk1, unit), sourceTime)
  728. if flag2 != 0:
  729. sourceTime = t
  730. sources = self.ptc.buildSources(sourceTime)
  731. if modifier in sources:
  732. sourceTime = sources[modifier]
  733. flag = True
  734. self.timeFlag = 2
  735. # if the word after next is a number, the string is more than likely
  736. # to be "next 4 hrs" which we will have to combine the units with the
  737. # rest of the string
  738. if not flag:
  739. if offset < 0:
  740. # if offset is negative, the unit has to be made negative
  741. unit = '-%s' % unit
  742. chunk2 = '%s %s' % (unit, chunk2)
  743. self.modifierFlag = False
  744. #return '%s %s' % (chunk1, chunk2), sourceTime
  745. return '%s' % chunk2, sourceTime
  746. def _evalModifier2(self, modifier, chunk1 , chunk2, sourceTime):
  747. """
  748. Evaluate the C{modifier} string and following text (passed in
  749. as C{chunk1} and C{chunk2}) and if they match any known modifiers
  750. calculate the delta and apply it to C{sourceTime}.
  751. @type modifier: string
  752. @param modifier: modifier text to apply to C{sourceTime}
  753. @type chunk1: string
  754. @param chunk1: first text chunk that followed modifier (if any)
  755. @type chunk2: string
  756. @param chunk2: second text chunk that followed modifier (if any)
  757. @type sourceTime: struct_time
  758. @param sourceTime: C{struct_time} value to use as the base
  759. @rtype: tuple
  760. @return: tuple of: remaining text and the modified sourceTime
  761. """
  762. offset = self.ptc.Modifiers[modifier]
  763. digit = r'\d+'
  764. self.modifier2Flag = False
  765. # If the string after the negative modifier starts with digits,
  766. # then it is likely that the string is similar to ' before 3 days'
  767. # or 'evening prior to 3 days'.
  768. # In this case, the total time is calculated by subtracting '3 days'
  769. # from the current date.
  770. # So, we have to identify the quantity and negate it before parsing
  771. # the string.
  772. # This is not required for strings not starting with digits since the
  773. # string is enough to calculate the sourceTime
  774. if chunk2 != '':
  775. if offset < 0:
  776. m = re.match(digit, chunk2.strip())
  777. if m is not None:
  778. qty = int(m.group()) * -1
  779. chunk2 = chunk2[m.end():]
  780. chunk2 = '%d%s' % (qty, chunk2)
  781. sourceTime, flag1 = self.parse(chunk2, sourceTime)
  782. if flag1 == 0:
  783. flag1 = True
  784. else:
  785. flag1 = False
  786. flag2 = False
  787. else:
  788. flag1 = False
  789. if chunk1 != '':
  790. if offset < 0:
  791. m = re.search(digit, chunk1.strip())
  792. if m is not None:
  793. qty = int(m.group()) * -1
  794. chunk1 = chunk1[m.end():]
  795. chunk1 = '%d%s' % (qty, chunk1)
  796. tempDateFlag = self.dateFlag
  797. tempTimeFlag = self.timeFlag
  798. sourceTime2, flag2 = self.parse(chunk1, sourceTime)
  799. else:
  800. return sourceTime, (flag1 and flag2)
  801. # if chunk1 is not a datetime and chunk2 is then do not use datetime
  802. # value returned by parsing chunk1
  803. if not (flag1 == False and flag2 == 0):
  804. sourceTime = sourceTime2
  805. else:
  806. self.timeFlag = tempTimeFlag
  807. self.dateFlag = tempDateFlag
  808. return sourceTime, (flag1 and flag2)
  809. def _evalString(self, datetimeString, sourceTime=None):
  810. """
  811. Calculate the datetime based on flags set by the L{parse()} routine
  812. Examples handled::
  813. RFC822, W3CDTF formatted dates
  814. HH:MM[:SS][ am/pm]
  815. MM/DD/YYYY
  816. DD MMMM YYYY
  817. @type datetimeString: string
  818. @param datetimeString: text to try and parse as more "traditional"
  819. date/time text
  820. @type sourceTime: struct_time
  821. @param sourceTime: C{struct_time} value to use as the base
  822. @rtype: datetime
  823. @return: calculated C{struct_time} value or current C{struct_time}
  824. if not parsed
  825. """
  826. s = datetimeString.strip()
  827. now = time.localtime()
  828. # Given string date is a RFC822 date
  829. if sourceTime is None:
  830. sourceTime = _parse_date_rfc822(s)
  831. if sourceTime is not None:
  832. (yr, mth, dy, hr, mn, sec, wd, yd, isdst, _) = sourceTime
  833. self.dateFlag = 1
  834. if (hr != 0) and (mn != 0) and (sec != 0):
  835. self.timeFlag = 2
  836. sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
  837. # Given string date is a W3CDTF date
  838. if sourceTime is None:
  839. sourceTime = _parse_date_w3dtf(s)
  840. if sourceTime is not None:
  841. self.dateFlag = 1
  842. self.timeFlag = 2
  843. if sourceTime is None:
  844. s = s.lower()
  845. # Given string is in the format HH:MM(:SS)(am/pm)
  846. if self.meridianFlag:
  847. if sourceTime is None:
  848. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now
  849. else:
  850. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
  851. m = self.ptc.CRE_TIMEHMS2.search(s)
  852. if m is not None:
  853. dt = s[:m.start('meridian')].strip()
  854. if len(dt) <= 2:
  855. hr = int(dt)
  856. mn = 0
  857. sec = 0
  858. else:
  859. hr, mn, sec = _extract_time(m)
  860. if hr == 24:
  861. hr = 0
  862. sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
  863. meridian = m.group('meridian').lower()
  864. # if 'am' found and hour is 12 - force hour to 0 (midnight)
  865. if (meridian in self.ptc.am) and hr == 12:
  866. sourceTime = (yr, mth, dy, 0, mn, sec, wd, yd, isdst)
  867. # if 'pm' found and hour < 12, add 12 to shift to evening
  868. if (meridian in self.ptc.pm) and hr < 12:
  869. sourceTime = (yr, mth, dy, hr + 12, mn, sec, wd, yd, isdst)
  870. # invalid time
  871. if hr > 24 or mn > 59 or sec > 59:
  872. sourceTime = now
  873. self.dateFlag = 0
  874. self.timeFlag = 0
  875. self.meridianFlag = False
  876. # Given string is in the format HH:MM(:SS)
  877. if self.timeStdFlag:
  878. if sourceTime is None:
  879. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now
  880. else:
  881. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
  882. m = self.ptc.CRE_TIMEHMS.search(s)
  883. if m is not None:
  884. hr, mn, sec = _extract_time(m)
  885. if hr == 24:
  886. hr = 0
  887. if hr > 24 or mn > 59 or sec > 59:
  888. # invalid time
  889. sourceTime = now
  890. self.dateFlag = 0
  891. self.timeFlag = 0
  892. else:
  893. sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
  894. self.timeStdFlag = False
  895. # Given string is in the format 07/21/2006
  896. if self.dateStdFlag:
  897. sourceTime = self.parseDate(s)
  898. self.dateStdFlag = False
  899. # Given string is in the format "May 23rd, 2005"
  900. if self.dateStrFlag:
  901. sourceTime = self.parseDateText(s)
  902. self.dateStrFlag = False
  903. # Given string is a weekday
  904. if self.weekdyFlag:
  905. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now
  906. start = datetime.datetime(yr, mth, dy, hr, mn, sec)
  907. wkdy = self.ptc.WeekdayOffsets[s]
  908. if wkdy > wd:
  909. qty = self._CalculateDOWDelta(wd, wkdy, 2,
  910. self.ptc.DOWParseStyle,
  911. self.ptc.CurrentDOWParseStyle)
  912. else:
  913. qty = self._CalculateDOWDelta(wd, wkdy, 2,
  914. self.ptc.DOWParseStyle,
  915. self.ptc.CurrentDOWParseStyle)
  916. target = start + datetime.timedelta(days=qty)
  917. wd = wkdy
  918. sourceTime = target.timetuple()
  919. self.weekdyFlag = False
  920. # Given string is a natural language time string like
  921. # lunch, midnight, etc
  922. if self.timeStrFlag:
  923. if s in self.ptc.re_values['now']:
  924. sourceTime = now
  925. else:
  926. sources = self.ptc.buildSources(sourceTime)
  927. if s in sources:
  928. sourceTime = sources[s]
  929. else:
  930. sourceTime = now
  931. self.dateFlag = 0
  932. self.timeFlag = 0
  933. self.timeStrFlag = False
  934. # Given string is a natural language date string like today, tomorrow..
  935. if self.dayStrFlag:
  936. if sourceTime is None:
  937. sourceTime = now
  938. (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
  939. if s in self.ptc.dayOffsets:
  940. offset = self.ptc.dayOffsets[s]
  941. else:
  942. offset = 0
  943. start = datetime.datetime(yr, mth, dy, 9, 0, 0)
  944. target = start + datetime.timedelta(days=offset)
  945. sourceTime = target.timetuple()
  946. self.dayStrFlag = False
  947. # Given string is a time string with units like "5 hrs 30 min"
  948. if self.unitsFlag:
  949. modifier = '' # TODO
  950. if sourceTime is None:
  951. sourceTime = now
  952. m = self.ptc.CRE_UNITS.search(s)
  953. if m is not None:
  954. units = m.group('units')
  955. quantity = s[:m.start('units')]
  956. sourceTime = self._buildTime(sourceTime, quantity, modifier, units)
  957. self.unitsFlag = False
  958. # Given string is a time string with single char units like "5 h 30 m"
  959. if self.qunitsFlag:
  960. modifier = '' # TODO
  961. if sourceTime is None:
  962. sourceTime = now
  963. m = self.ptc.CRE_QUNITS.search(s)
  964. if m is not None:
  965. units = m.group('qunits')
  966. quantity = s[:m.start('qunits')]
  967. sourceTime = self._buildTime(sourceTime, quantity, modifier, units)
  968. self.qunitsFlag = False
  969. # Given string does not match anything
  970. if sourceTime is None:
  971. sourceTime = now
  972. self.dateFlag = 0
  973. self.timeFlag = 0
  974. return sourceTime
  975. def parse(self, datetimeString, sourceTime=None):
  976. """
  977. Splits the given C{datetimeString} into tokens, finds the regex
  978. patterns that match and then calculates a C{struct_time} value from
  979. the chunks.
  980. If C{sourceTime} is given then the C{struct_time} value will be
  981. calculated from that value, otherwise from the current date/time.
  982. If the C{datetimeString} is parsed and date/time value found then
  983. the second item of the returned tuple will be a flag to let you know
  984. what kind of C{struct_time} value is being returned::
  985. 0 = not parsed at all
  986. 1 = parsed as a C{date}
  987. 2 = parsed as a C{time}
  988. 3 = parsed as a C{datetime}
  989. @type datetimeString: string
  990. @param datetimeString: date/time text to evaluate
  991. @type sourceTime: struct_time
  992. @param sourceTime: C{struct_time} value to use as the base
  993. @rtype: tuple
  994. @return: tuple of: modified C{sourceTime} and the result flag
  995. """
  996. if sourceTime:
  997. if isinstance(sourceTime, datetime.datetime):
  998. log.debug('coercing datetime to timetuple')
  999. sourceTime = sourceTime.timetuple()
  1000. else:
  1001. if not isinstance(sourceTime, time.struct_time) and \
  1002. not isinstance(sourceTime, tuple):
  1003. raise Exception('sourceTime is not a struct_time')
  1004. s = datetimeString.strip().lower()
  1005. parseStr = ''
  1006. totalTime = sourceTime
  1007. if s == '' :
  1008. if sourceTime is not None:
  1009. return (sourceTime, self.dateFlag + self.timeFlag)
  1010. else:
  1011. return (time.localtime(), 0)
  1012. self.timeFlag = 0
  1013. self.dateFlag = 0
  1014. while len(s) > 0:
  1015. flag = False
  1016. chunk1 = ''
  1017. chunk2 = ''
  1018. log.debug('parse (top of loop): [%s][%s]' % (s, parseStr))
  1019. if parseStr == '':
  1020. # Modifier like next\prev..
  1021. m = self.ptc.CRE_MODIFIER.search(s)
  1022. if m is not None:
  1023. self.modifierFlag = True
  1024. if (m.group('modifier') != s):
  1025. # capture remaining string
  1026. parseStr = m.group('modifier')
  1027. chunk1 = s[:m.start('modifier')].strip()
  1028. chunk2 = s[m.end('modifier'):].strip()
  1029. flag = True
  1030. else:
  1031. parseStr = s
  1032. if parseStr == '':
  1033. # Modifier like from\after\prior..
  1034. m = self.ptc.CRE_MODIFIER2.search(s)
  1035. if m is not None:
  1036. self.modifier2Flag = True
  1037. if (m.group('modifier') != s):
  1038. # capture remaining string
  1039. parseStr = m.group('modifier')
  1040. chunk1 = s[:m.start('modifier')].strip()
  1041. chunk2 = s[m.end('modifier'):].strip()
  1042. flag = True
  1043. else:
  1044. parseStr = s
  1045. if parseStr == '':
  1046. valid_date = False
  1047. for match in self.ptc.CRE_DATE3.finditer(s):
  1048. # to prevent "HH:MM(:SS) time strings" expressions from triggering
  1049. # this regex, we checks if the month field exists in the searched
  1050. # expression, if it doesn't exist, the date field is not valid
  1051. if match.group('mthname'):
  1052. m = self.ptc.CRE_DATE3.search(s, match.start())
  1053. valid_date = True
  1054. break
  1055. # String date format
  1056. if valid_date:
  1057. self.dateStrFlag = True
  1058. self.dateFlag = 1
  1059. if (m.group('date') != s):
  1060. # capture remaining string
  1061. parseStr = m.group('date')
  1062. chunk1 = s[:m.start('date')]
  1063. chunk2 = s[m.end('date'):]
  1064. s = '%s %s' % (chunk1, chunk2)
  1065. flag = True
  1066. else:
  1067. parseStr = s
  1068. if parseStr == '':
  1069. # Standard date format
  1070. m = self.ptc.CRE_DATE.search(s)
  1071. if m is not None:
  1072. self.dateStdFlag = True
  1073. self.dateFlag = 1
  1074. if (m.group('date') != s):
  1075. # capture remaining string
  1076. parseStr = m.group('date')
  1077. chunk1 = s[:m.start('date')]
  1078. chunk2 = s[m.end('date'):]
  1079. s = '%s %s' % (chunk1, chunk2)
  1080. flag = True
  1081. else:
  1082. parseStr = s
  1083. if parseStr == '':
  1084. # Natural language day strings
  1085. m = self.ptc.CRE_DAY.search(s)
  1086. if m is not None:
  1087. self.dayStrFlag = True
  1088. self.dateFlag = 1
  1089. if (m.group('day') != s):
  1090. # capture remaining string
  1091. parseStr = m.group('day')
  1092. chunk1 = s[:m.start('day')]
  1093. chunk2 = s[m.end('day'):]
  1094. s = '%s %s' % (chunk1, chunk2)
  1095. flag = True
  1096. else:
  1097. parseStr = s
  1098. if parseStr == '':
  1099. # Quantity + Units
  1100. m = self.ptc.CRE_UNITS.search(s)
  1101. if m is not None:
  1102. self.unitsFlag = True
  1103. if (m.group('qty') != s):
  1104. # capture remaining string
  1105. parseStr = m.group('qty')
  1106. chunk1 = s[:m.start('qty')].strip()
  1107. chunk2 = s[m.end('qty'):].strip()
  1108. if chunk1[-1:] == '-':
  1109. parseStr = '-%s' % parseStr
  1110. chunk1 = chunk1[:-1]
  1111. s = '%s %s' % (chunk1, chunk2)
  1112. flag = True
  1113. else:
  1114. parseStr = s
  1115. if parseStr == '':
  1116. # Quantity + Units
  1117. m = self.ptc.CRE_QUNITS.search(s)
  1118. if m is not None:
  1119. self.qunitsFlag = True
  1120. if (m.group('qty') != s):
  1121. # capture remaining string
  1122. parseStr = m.group('qty')
  1123. chunk1 = s[:m.start('qty')].strip()
  1124. chunk2 = s[m.end('qty'):].strip()
  1125. if chunk1[-1:] == '-':
  1126. parseStr = '-%s' % parseStr
  1127. chunk1 = chunk1[:-1]
  1128. s = '%s %s' % (chunk1, chunk2)
  1129. flag = True
  1130. else:
  1131. parseStr = s
  1132. if parseStr == '':
  1133. # Weekday
  1134. m = self.ptc.CRE_WEEKDAY.search(s)
  1135. if m is not None:
  1136. gv = m.group('weekday')
  1137. if s not in self.ptc.dayOffsets:
  1138. self.weekdyFlag = True
  1139. self.dateFlag = 1
  1140. if (gv != s):
  1141. # capture remaining string
  1142. parseStr = gv
  1143. chunk1 = s[:m.start('weekday')]
  1144. chunk2 = s[m.end('weekday'):]
  1145. s = '%s %s' % (chunk1, chunk2)
  1146. flag = True
  1147. else:
  1148. parseStr = s
  1149. if parseStr == '':
  1150. # Natural language time strings
  1151. m = self.ptc.CRE_TIME.search(s)
  1152. if m is not None:
  1153. self.timeStrFlag = True
  1154. self.timeFlag = 2
  1155. if (m.group('time') != s):
  1156. # capture remaining string
  1157. parseStr = m.group('time')
  1158. chunk1 = s[:m.start('time')]
  1159. chunk2 = s[m.end('time'):]
  1160. s = '%s %s' % (chunk1, chunk2)
  1161. flag = True
  1162. else:
  1163. parseStr = s
  1164. if parseStr == '':
  1165. # HH:MM(:SS) am/pm time strings
  1166. m = sel

Large files files are truncated, but you can click here to view the full file