/parsedatetime/pdt_locales.py

https://github.com/rmecham/parsedatetime · Python · 440 lines · 312 code · 70 blank · 58 comment · 13 complexity · e14efbe6f42204d02df3ac5e747dafb4 MD5 · raw file

  1. """
  2. pdt_locales
  3. All of the included locale classes shipped with pdt.
  4. """
  5. __author__ = 'Mike Taylor (bear@code-bear.com)'
  6. __copyright__ = 'Copyright (c) 2004 Mike Taylor'
  7. __license__ = 'Apache v2.0'
  8. __version__ = '1.0.0'
  9. __contributors__ = [ 'Darshana Chhajed',
  10. 'Michael Lim (lim.ck.michael@gmail.com)',
  11. 'Bernd Zeimetz (bzed@debian.org)',
  12. ]
  13. import datetime
  14. try:
  15. import PyICU as pyicu
  16. except:
  17. pyicu = None
  18. def lcase(x):
  19. return x.lower()
  20. class pdtLocale_base(object):
  21. """
  22. default values for Locales
  23. """
  24. locale_keys = [ 'MonthOffsets', 'Months', 'WeekdayOffsets', 'Weekdays',
  25. 'dateFormats', 'dateSep', 'dayOffsets', 'dp_order',
  26. 'localeID', 'meridian', 'Modifiers', 're_sources', 're_values',
  27. 'shortMonths', 'shortWeekdays', 'timeFormats', 'timeSep', 'units',
  28. 'uses24', 'usesMeridian', 'numbers' ]
  29. def __init__(self):
  30. self.localeID = None # don't use a unicode string
  31. self.dateSep = [ '/', '.' ]
  32. self.timeSep = [ ':' ]
  33. self.meridian = [ 'AM', 'PM' ]
  34. self.usesMeridian = True
  35. self.uses24 = True
  36. self.WeekdayOffsets = {}
  37. self.MonthOffsets = {}
  38. # always lowercase any lookup values - helper code expects that
  39. self.Weekdays = [ 'monday', 'tuesday', 'wednesday',
  40. 'thursday', 'friday', 'saturday', 'sunday',
  41. ]
  42. self.shortWeekdays = [ 'mon', 'tues', 'wed',
  43. 'th', 'fri', 'sat', 'sun',
  44. ]
  45. self.Months = [ 'january', 'february', 'march',
  46. 'april', 'may', 'june',
  47. 'july', 'august', 'september',
  48. 'october', 'november', 'december',
  49. ]
  50. self.shortMonths = [ 'jan', 'feb', 'mar',
  51. 'apr', 'may', 'jun',
  52. 'jul', 'aug', 'sep',
  53. 'oct', 'nov', 'dec',
  54. ]
  55. # use the same formats as ICU by default
  56. self.dateFormats = { 'full': 'EEEE, MMMM d, yyyy',
  57. 'long': 'MMMM d, yyyy',
  58. 'medium': 'MMM d, yyyy',
  59. 'short': 'M/d/yy',
  60. }
  61. self.timeFormats = { 'full': 'h:mm:ss a z',
  62. 'long': 'h:mm:ss a z',
  63. 'medium': 'h:mm:ss a',
  64. 'short': 'h:mm a',
  65. }
  66. self.dp_order = [ 'm', 'd', 'y' ]
  67. # Used to parse expressions like "in 5 hours"
  68. self.numbers = { 'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4,
  69. 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9,
  70. 'ten': 10, 'eleven': 11, 'twelve': 12, 'thirteen': 13,
  71. 'fourteen': 14, 'fifteen': 15, 'sixteen': 16,
  72. 'seventeen': 17, 'eighteen': 18, 'nineteen': 19,
  73. 'twenty': 20 }
  74. # this will be added to re_values later
  75. self.units = { 'seconds': [ 'second', 'seconds', 'sec', 's' ],
  76. 'minutes': [ 'minute', 'minutes', 'min', 'm' ],
  77. 'hours': [ 'hour', 'hours', 'hr', 'h' ],
  78. 'days': [ 'day', 'days', 'dy', 'd' ],
  79. 'weeks': [ 'week', 'weeks', 'wk', 'w' ],
  80. 'months': [ 'month', 'months', 'mth' ],
  81. 'years': [ 'year', 'years', 'yr', 'y' ],
  82. }
  83. # text constants to be used by later regular expressions
  84. self.re_values = { 'specials': 'in|on|of|at',
  85. 'timeseperator': ':',
  86. 'rangeseperator': '-',
  87. 'daysuffix': 'rd|st|nd|th',
  88. 'meridian': 'am|pm|a.m.|p.m.|a|p',
  89. 'qunits': 'h|m|s|d|w|y',
  90. 'now': [ 'now' ],
  91. }
  92. # Used to adjust the returned date before/after the source
  93. self.Modifiers = { 'from': 1,
  94. 'before': -1,
  95. 'after': 1,
  96. 'ago': -1,
  97. 'prior': -1,
  98. 'prev': -1,
  99. 'last': -1,
  100. 'next': 1,
  101. 'previous': -1,
  102. 'in a': 2,
  103. 'end of': 0,
  104. 'eod': 1,
  105. 'eom': 1,
  106. 'eoy': 1,
  107. }
  108. self.dayOffsets = { 'tomorrow': 1,
  109. 'today': 0,
  110. 'yesterday': -1,
  111. }
  112. # special day and/or times, i.e. lunch, noon, evening
  113. # each element in the dictionary is a dictionary that is used
  114. # to fill in any value to be replace - the current date/time will
  115. # already have been populated by the method buildSources
  116. self.re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 },
  117. 'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 },
  118. 'morning': { 'hr': 6, 'mn': 0, 'sec': 0 },
  119. 'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 },
  120. 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 },
  121. 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 },
  122. 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 },
  123. 'night': { 'hr': 21, 'mn': 0, 'sec': 0 },
  124. 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 },
  125. 'eod': { 'hr': 17, 'mn': 0, 'sec': 0 },
  126. }
  127. class pdtLocale_icu(pdtLocale_base):
  128. """
  129. Create a locale from pyICU
  130. """
  131. def __init__(self, localeID):
  132. super( pdtLocale_icu, self ).__init__()
  133. self.icu = None
  134. if pyicu is not None:
  135. if localeID is None:
  136. localeID = 'en_US'
  137. self.icu = pyicu.Locale(localeID)
  138. if self.icu is not None:
  139. # grab spelled out format of all numbers from 0 to 100
  140. rbnf = pyicu.RuleBasedNumberFormat(pyicu.URBNFRuleSetTag.SPELLOUT, self.icu)
  141. self.numbers = dict([(rbnf.format(i), i) for i in xrange(0, 100)])
  142. self.symbols = pyicu.DateFormatSymbols(self.icu)
  143. # grab ICU list of weekdays, skipping first entry which
  144. # is always blank
  145. wd = list(map(lcase, self.symbols.getWeekdays()[1:]))
  146. swd = list(map(lcase, self.symbols.getShortWeekdays()[1:]))
  147. # store them in our list with Monday first (ICU puts Sunday first)
  148. self.Weekdays = wd[1:] + wd[0:1]
  149. self.shortWeekdays = swd[1:] + swd[0:1]
  150. self.Months = list(map(lcase, self.symbols.getMonths()))
  151. self.shortMonths = list(map(lcase, self.symbols.getShortMonths()))
  152. self.icu_df = { 'full': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kFull, self.icu),
  153. 'long': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kLong, self.icu),
  154. 'medium': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kMedium, self.icu),
  155. 'short': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kShort, self.icu),
  156. }
  157. self.icu_tf = { 'full': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kFull, self.icu),
  158. 'long': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kLong, self.icu),
  159. 'medium': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kMedium, self.icu),
  160. 'short': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kShort, self.icu),
  161. }
  162. self.dateFormats = { 'full': self.icu_df['full'].toPattern(),
  163. 'long': self.icu_df['long'].toPattern(),
  164. 'medium': self.icu_df['medium'].toPattern(),
  165. 'short': self.icu_df['short'].toPattern(),
  166. }
  167. self.timeFormats = { 'full': self.icu_tf['full'].toPattern(),
  168. 'long': self.icu_tf['long'].toPattern(),
  169. 'medium': self.icu_tf['medium'].toPattern(),
  170. 'short': self.icu_tf['short'].toPattern(),
  171. }
  172. am = ''
  173. pm = ''
  174. ts = ''
  175. # ICU doesn't seem to provide directly the date or time seperator
  176. # so we have to figure it out
  177. o = self.icu_tf['short']
  178. s = self.timeFormats['short']
  179. self.usesMeridian = 'a' in s
  180. self.uses24 = 'H' in s
  181. # '11:45 AM' or '11:45'
  182. s = o.format(datetime.datetime(2003, 10, 30, 11, 45))
  183. # ': AM' or ':'
  184. s = s.replace('11', '').replace('45', '')
  185. if len(s) > 0:
  186. ts = s[0]
  187. if self.usesMeridian:
  188. # '23:45 AM' or '23:45'
  189. am = s[1:].strip()
  190. s = o.format(datetime.datetime(2003, 10, 30, 23, 45))
  191. if self.uses24:
  192. s = s.replace('23', '')
  193. else:
  194. s = s.replace('11', '')
  195. # 'PM' or ''
  196. pm = s.replace('45', '').replace(ts, '').strip()
  197. self.timeSep = [ ts ]
  198. self.meridian = [ am, pm ]
  199. o = self.icu_df['short']
  200. s = o.format(datetime.datetime(2003, 10, 30, 11, 45))
  201. s = s.replace('10', '').replace('30', '').replace('03', '').replace('2003', '')
  202. if len(s) > 0:
  203. ds = s[0]
  204. else:
  205. ds = '/'
  206. self.dateSep = [ ds ]
  207. s = self.dateFormats['short']
  208. l = s.lower().split(ds)
  209. dp_order = []
  210. for s in l:
  211. if len(s) > 0:
  212. dp_order.append(s[:1])
  213. self.dp_order = dp_order
  214. class pdtLocale_en(pdtLocale_base):
  215. """
  216. en_US Locale
  217. """
  218. def __init__(self):
  219. super( pdtLocale_en, self ).__init__()
  220. self.localeID = 'en_US' # don't use a unicode string
  221. self.uses24 = False
  222. class pdtLocale_au(pdtLocale_base):
  223. """
  224. en_AU Locale
  225. """
  226. def __init__(self):
  227. super( pdtLocale_au, self ).__init__()
  228. self.localeID = 'en_A' # don't use a unicode string
  229. self.dateSep = [ '-', '/' ]
  230. self.uses24 = False
  231. self.dateFormats['full'] = 'EEEE, d MMMM yyyy'
  232. self.dateFormats['long'] = 'd MMMM yyyy'
  233. self.dateFormats['medium'] = 'dd/MM/yyyy'
  234. self.dateFormats['short'] = 'd/MM/yy'
  235. self.timeFormats['long'] = self.timeFormats['full']
  236. self.dp_order = [ 'd', 'm', 'y' ]
  237. class pdtLocale_es(pdtLocale_base):
  238. """
  239. es Locale
  240. Note that I don't speak Spanish so many of the items below are still in English
  241. """
  242. def __init__(self):
  243. super( pdtLocale_es, self ).__init__()
  244. self.localeID = 'es' # don't use a unicode string
  245. self.dateSep = [ '/' ]
  246. self.usesMeridian = False
  247. self.uses24 = True
  248. self.Weekdays = [ 'lunes', 'martes', 'mi\xe9rcoles',
  249. 'jueves', 'viernes', 's\xe1bado', 'domingo',
  250. ]
  251. self.shortWeekdays = [ 'lun', 'mar', 'mi\xe9',
  252. 'jue', 'vie', 's\xe1b', 'dom',
  253. ]
  254. self.Months = [ 'enero', 'febrero', 'marzo',
  255. 'abril', 'mayo', 'junio',
  256. 'julio', 'agosto', 'septiembre',
  257. 'octubre', 'noviembre', 'diciembre'
  258. ]
  259. self.shortMonths = [ 'ene', 'feb', 'mar',
  260. 'abr', 'may', 'jun',
  261. 'jul', 'ago', 'sep',
  262. 'oct', 'nov', 'dic'
  263. ]
  264. self.dateFormats['full'] = "EEEE d' de 'MMMM' de 'yyyy"
  265. self.dateFormats['long'] = "d' de 'MMMM' de 'yyyy"
  266. self.dateFormats['medium'] = "dd-MMM-yy"
  267. self.dateFormats['short'] = "d/MM/yy"
  268. self.timeFormats['full'] = "HH'H'mm' 'ss z"
  269. self.timeFormats['long'] = "HH:mm:ss z"
  270. self.timeFormats['medium'] = "HH:mm:ss"
  271. self.timeFormats['short'] = "HH:mm"
  272. self.dp_order = [ 'd', 'm', 'y' ]
  273. class pdtLocale_de(pdtLocale_base):
  274. """
  275. de_DE Locale constants
  276. Contributed by Debian parsedatetime package maintainer Bernd Zeimetz <bzed@debian.org>
  277. """
  278. def __init__(self):
  279. super( pdtLocale_de, self ).__init__()
  280. self.localeID = 'de_DE' # don't use a unicode string
  281. self.dateSep = [ '.' ]
  282. self.timeSep = [ ':' ]
  283. self.meridian = [ ]
  284. self.usesMeridian = False
  285. self.uses24 = True
  286. self.Weekdays = [ 'montag', 'dienstag', 'mittwoch',
  287. 'donnerstag', 'freitag', 'samstag', 'sonntag',
  288. ]
  289. self.shortWeekdays = [ 'mo', 'di', 'mi',
  290. 'do', 'fr', 'sa', 'so',
  291. ]
  292. self.Months = [ 'januar', 'februar', 'm\xe4rz',
  293. 'april', 'mai', 'juni',
  294. 'juli', 'august', 'september',
  295. 'oktober', 'november', 'dezember',
  296. ]
  297. self.shortMonths = [ 'jan', 'feb', 'mrz',
  298. 'apr', 'mai', 'jun',
  299. 'jul', 'aug', 'sep',
  300. 'okt', 'nov', 'dez',
  301. ]
  302. self.dateFormats['full'] = 'EEEE, d. MMMM yyyy'
  303. self.dateFormats['long'] = 'd. MMMM yyyy'
  304. self.dateFormats['medium'] = 'dd.MM.yyyy'
  305. self.dateFormats['short'] = 'dd.MM.yy'
  306. self.timeFormats['full'] = 'HH:mm:ss v'
  307. self.timeFormats['long'] = 'HH:mm:ss z'
  308. self.timeFormats['medium'] = 'HH:mm:ss'
  309. self.timeFormats['short'] = 'HH:mm'
  310. self.dp_order = [ 'd', 'm', 'y' ]
  311. self.units['seconds'] = [ 'sekunden', 'sek', 's' ]
  312. self.units['minutes'] = [ 'minuten', 'min' , 'm' ]
  313. self.units['hours'] = [ 'stunden', 'std', 'h' ]
  314. self.units['days'] = [ 'tag', 'tage', 't' ]
  315. self.units['weeks'] = [ 'wochen', 'w' ]
  316. self.units['months'] = [ 'monat', 'monate' ] #the short version would be a capital M,
  317. #as I understand it we can't distinguis
  318. #between m for minutes and M for months.
  319. self.units['years'] = [ 'jahr', 'jahre', 'j' ]
  320. self.re_values['specials'] = 'am|dem|der|im|in|den|zum'
  321. self.re_values['timeseperator'] = ':'
  322. self.re_values['rangeseperator'] = '-'
  323. self.re_values['daysuffix'] = ''
  324. self.re_values['qunits'] = 'h|m|s|t|w|m|j'
  325. self.re_values['now'] = [ 'jetzt' ]
  326. # Used to adjust the returned date before/after the source
  327. #still looking for insight on how to translate all of them to german.
  328. self.Modifiers['from'] = 1
  329. self.Modifiers['before'] = -1
  330. self.Modifiers['after'] = 1
  331. self.Modifiers['vergangener'] = -1
  332. self.Modifiers['vorheriger'] = -1
  333. self.Modifiers['prev'] = -1
  334. self.Modifiers['letzter'] = -1
  335. self.Modifiers['n\xe4chster'] = 1
  336. self.Modifiers['dieser'] = 0
  337. self.Modifiers['previous'] = -1
  338. self.Modifiers['in a'] = 2
  339. self.Modifiers['end of'] = 0
  340. self.Modifiers['eod'] = 0
  341. self.Modifiers['eo'] = 0
  342. #morgen/abermorgen does not work, see http://code.google.com/p/parsedatetime/issues/detail?id=19
  343. self.dayOffsets['morgen'] = 1
  344. self.dayOffsets['heute'] = 0
  345. self.dayOffsets['gestern'] = -1
  346. self.dayOffsets['vorgestern'] = -2
  347. self.dayOffsets['\xfcbermorgen'] = 2
  348. # special day and/or times, i.e. lunch, noon, evening
  349. # each element in the dictionary is a dictionary that is used
  350. # to fill in any value to be replace - the current date/time will
  351. # already have been populated by the method buildSources
  352. self.re_sources['mittag'] = { 'hr': 12, 'mn': 0, 'sec': 0 }
  353. self.re_sources['mittags'] = { 'hr': 12, 'mn': 0, 'sec': 0 }
  354. self.re_sources['mittagessen'] = { 'hr': 12, 'mn': 0, 'sec': 0 }
  355. self.re_sources['morgen'] = { 'hr': 6, 'mn': 0, 'sec': 0 }
  356. self.re_sources['morgens'] = { 'hr': 6, 'mn': 0, 'sec': 0 }
  357. self.re_sources['fr\e4hst\xe4ck'] = { 'hr': 8, 'mn': 0, 'sec': 0 }
  358. self.re_sources['abendessen'] = { 'hr': 19, 'mn': 0, 'sec': 0 }
  359. self.re_sources['abend'] = { 'hr': 18, 'mn': 0, 'sec': 0 }
  360. self.re_sources['abends'] = { 'hr': 18, 'mn': 0, 'sec': 0 }
  361. self.re_sources['mitternacht'] = { 'hr': 0, 'mn': 0, 'sec': 0 }
  362. self.re_sources['nacht'] = { 'hr': 21, 'mn': 0, 'sec': 0 }
  363. self.re_sources['nachts'] = { 'hr': 21, 'mn': 0, 'sec': 0 }
  364. self.re_sources['heute abend'] = { 'hr': 21, 'mn': 0, 'sec': 0 }
  365. self.re_sources['heute nacht'] = { 'hr': 21, 'mn': 0, 'sec': 0 }
  366. self.re_sources['feierabend'] = { 'hr': 17, 'mn': 0, 'sec': 0 }