PageRenderTime 50ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/other/FetchData/mechanize/_util.py

http://github.com/jbeezley/wrf-fire
Python | 291 lines | 275 code | 7 blank | 9 comment | 1 complexity | 25cfd7904f6ead499cf46ca5e72ca575 MD5 | raw file
Possible License(s): AGPL-1.0
  1. """Utility functions and date/time routines.
  2. Copyright 2002-2006 John J Lee <jjl@pobox.com>
  3. This code is free software; you can redistribute it and/or modify it
  4. under the terms of the BSD or ZPL 2.1 licenses (see the file
  5. COPYING.txt included with the distribution).
  6. """
  7. import re, time, warnings
  8. class ExperimentalWarning(UserWarning):
  9. pass
  10. def experimental(message):
  11. warnings.warn(message, ExperimentalWarning, stacklevel=3)
  12. def hide_experimental_warnings():
  13. warnings.filterwarnings("ignore", category=ExperimentalWarning)
  14. def reset_experimental_warnings():
  15. warnings.filterwarnings("default", category=ExperimentalWarning)
  16. def deprecation(message):
  17. warnings.warn(message, DeprecationWarning, stacklevel=3)
  18. def hide_deprecations():
  19. warnings.filterwarnings("ignore", category=DeprecationWarning)
  20. def reset_deprecations():
  21. warnings.filterwarnings("default", category=DeprecationWarning)
  22. def isstringlike(x):
  23. try: x+""
  24. except: return False
  25. else: return True
  26. ## def caller():
  27. ## try:
  28. ## raise SyntaxError
  29. ## except:
  30. ## import sys
  31. ## return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name
  32. from calendar import timegm
  33. # Date/time conversion routines for formats used by the HTTP protocol.
  34. EPOCH = 1970
  35. def my_timegm(tt):
  36. year, month, mday, hour, min, sec = tt[:6]
  37. if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
  38. (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
  39. return timegm(tt)
  40. else:
  41. return None
  42. days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
  43. months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
  44. "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
  45. months_lower = []
  46. for month in months: months_lower.append(month.lower())
  47. def time2isoz(t=None):
  48. """Return a string representing time in seconds since epoch, t.
  49. If the function is called without an argument, it will use the current
  50. time.
  51. The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
  52. representing Universal Time (UTC, aka GMT). An example of this format is:
  53. 1994-11-24 08:49:37Z
  54. """
  55. if t is None: t = time.time()
  56. year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
  57. return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
  58. year, mon, mday, hour, min, sec)
  59. def time2netscape(t=None):
  60. """Return a string representing time in seconds since epoch, t.
  61. If the function is called without an argument, it will use the current
  62. time.
  63. The format of the returned string is like this:
  64. Wed, DD-Mon-YYYY HH:MM:SS GMT
  65. """
  66. if t is None: t = time.time()
  67. year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
  68. return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
  69. days[wday], mday, months[mon-1], year, hour, min, sec)
  70. UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
  71. timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
  72. def offset_from_tz_string(tz):
  73. offset = None
  74. if UTC_ZONES.has_key(tz):
  75. offset = 0
  76. else:
  77. m = timezone_re.search(tz)
  78. if m:
  79. offset = 3600 * int(m.group(2))
  80. if m.group(3):
  81. offset = offset + 60 * int(m.group(3))
  82. if m.group(1) == '-':
  83. offset = -offset
  84. return offset
  85. def _str2time(day, mon, yr, hr, min, sec, tz):
  86. # translate month name to number
  87. # month numbers start with 1 (January)
  88. try:
  89. mon = months_lower.index(mon.lower())+1
  90. except ValueError:
  91. # maybe it's already a number
  92. try:
  93. imon = int(mon)
  94. except ValueError:
  95. return None
  96. if 1 <= imon <= 12:
  97. mon = imon
  98. else:
  99. return None
  100. # make sure clock elements are defined
  101. if hr is None: hr = 0
  102. if min is None: min = 0
  103. if sec is None: sec = 0
  104. yr = int(yr)
  105. day = int(day)
  106. hr = int(hr)
  107. min = int(min)
  108. sec = int(sec)
  109. if yr < 1000:
  110. # find "obvious" year
  111. cur_yr = time.localtime(time.time())[0]
  112. m = cur_yr % 100
  113. tmp = yr
  114. yr = yr + cur_yr - m
  115. m = m - tmp
  116. if abs(m) > 50:
  117. if m > 0: yr = yr + 100
  118. else: yr = yr - 100
  119. # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
  120. t = my_timegm((yr, mon, day, hr, min, sec, tz))
  121. if t is not None:
  122. # adjust time using timezone string, to get absolute time since epoch
  123. if tz is None:
  124. tz = "UTC"
  125. tz = tz.upper()
  126. offset = offset_from_tz_string(tz)
  127. if offset is None:
  128. return None
  129. t = t - offset
  130. return t
  131. strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
  132. r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
  133. wkday_re = re.compile(
  134. r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
  135. loose_http_re = re.compile(
  136. r"""^
  137. (\d\d?) # day
  138. (?:\s+|[-\/])
  139. (\w+) # month
  140. (?:\s+|[-\/])
  141. (\d+) # year
  142. (?:
  143. (?:\s+|:) # separator before clock
  144. (\d\d?):(\d\d) # hour:min
  145. (?::(\d\d))? # optional seconds
  146. )? # optional clock
  147. \s*
  148. ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
  149. \s*
  150. (?:\(\w+\))? # ASCII representation of timezone in parens.
  151. \s*$""", re.X)
  152. def http2time(text):
  153. """Returns time in seconds since epoch of time represented by a string.
  154. Return value is an integer.
  155. None is returned if the format of str is unrecognized, the time is outside
  156. the representable range, or the timezone string is not recognized. If the
  157. string contains no timezone, UTC is assumed.
  158. The timezone in the string may be numerical (like "-0800" or "+0100") or a
  159. string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
  160. timezone strings equivalent to UTC (zero offset) are known to the function.
  161. The function loosely parses the following formats:
  162. Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
  163. Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
  164. Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
  165. 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
  166. 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
  167. 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
  168. The parser ignores leading and trailing whitespace. The time may be
  169. absent.
  170. If the year is given with only 2 digits, the function will select the
  171. century that makes the year closest to the current date.
  172. """
  173. # fast exit for strictly conforming string
  174. m = strict_re.search(text)
  175. if m:
  176. g = m.groups()
  177. mon = months_lower.index(g[1].lower()) + 1
  178. tt = (int(g[2]), mon, int(g[0]),
  179. int(g[3]), int(g[4]), float(g[5]))
  180. return my_timegm(tt)
  181. # No, we need some messy parsing...
  182. # clean up
  183. text = text.lstrip()
  184. text = wkday_re.sub("", text, 1) # Useless weekday
  185. # tz is time zone specifier string
  186. day, mon, yr, hr, min, sec, tz = [None]*7
  187. # loose regexp parse
  188. m = loose_http_re.search(text)
  189. if m is not None:
  190. day, mon, yr, hr, min, sec, tz = m.groups()
  191. else:
  192. return None # bad format
  193. return _str2time(day, mon, yr, hr, min, sec, tz)
  194. iso_re = re.compile(
  195. """^
  196. (\d{4}) # year
  197. [-\/]?
  198. (\d\d?) # numerical month
  199. [-\/]?
  200. (\d\d?) # day
  201. (?:
  202. (?:\s+|[-:Tt]) # separator before clock
  203. (\d\d?):?(\d\d) # hour:min
  204. (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
  205. )? # optional clock
  206. \s*
  207. ([-+]?\d\d?:?(:?\d\d)?
  208. |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
  209. \s*$""", re.X)
  210. def iso2time(text):
  211. """
  212. As for http2time, but parses the ISO 8601 formats:
  213. 1994-02-03 14:15:29 -0100 -- ISO 8601 format
  214. 1994-02-03 14:15:29 -- zone is optional
  215. 1994-02-03 -- only date
  216. 1994-02-03T14:15:29 -- Use T as separator
  217. 19940203T141529Z -- ISO 8601 compact format
  218. 19940203 -- only date
  219. """
  220. # clean up
  221. text = text.lstrip()
  222. # tz is time zone specifier string
  223. day, mon, yr, hr, min, sec, tz = [None]*7
  224. # loose regexp parse
  225. m = iso_re.search(text)
  226. if m is not None:
  227. # XXX there's an extra bit of the timezone I'm ignoring here: is
  228. # this the right thing to do?
  229. yr, mon, day, hr, min, sec, tz, _ = m.groups()
  230. else:
  231. return None # bad format
  232. return _str2time(day, mon, yr, hr, min, sec, tz)