PageRenderTime 43ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/vendor/bundle/ruby/1.9.1/gems/chronic-0.6.7/lib/chronic/chronic.rb

https://bitbucket.org/mulligan/extractext
Ruby | 328 lines | 225 code | 28 blank | 75 comment | 19 complexity | 7cf5559edf18f9f3e22b6e8bb01536cc MD5 | raw file
Possible License(s): Apache-2.0, MIT, GPL-3.0, GPL-2.0, BSD-3-Clause, MPL-2.0-no-copyleft-exception, BSD-2-Clause, JSON
  1. module Chronic
  2. DEFAULT_OPTIONS = {
  3. :context => :future,
  4. :now => nil,
  5. :guess => true,
  6. :ambiguous_time_range => 6,
  7. :endian_precedence => [:middle, :little],
  8. :ambiguous_year_future_bias => 50
  9. }
  10. class << self
  11. # Parses a string containing a natural language date or time
  12. #
  13. # If the parser can find a date or time, either a Time or Chronic::Span
  14. # will be returned (depending on the value of `:guess`). If no
  15. # date or time can be found, `nil` will be returned
  16. #
  17. # @param [String] text The text to parse
  18. #
  19. # @option opts [Symbol] :context (:future)
  20. # * If your string represents a birthday, you can set `:context` to
  21. # `:past` and if an ambiguous string is given, it will assume it is
  22. # in the past. Specify `:future` or omit to set a future context.
  23. #
  24. # @option opts [Object] :now (Time.now)
  25. # * By setting `:now` to a Time, all computations will be based off of
  26. # that time instead of `Time.now`. If set to nil, Chronic will use
  27. # `Time.now`
  28. #
  29. # @option opts [Boolean] :guess (true)
  30. # * By default, the parser will guess a single point in time for the
  31. # given date or time. If you'd rather have the entire time span
  32. # returned, set `:guess` to `false` and a {Chronic::Span} will
  33. # be returned
  34. #
  35. # @option opts [Integer] :ambiguous_time_range (6)
  36. # * If an Integer is given, ambiguous times (like 5:00) will be
  37. # assumed to be within the range of that time in the AM to that time
  38. # in the PM. For example, if you set it to `7`, then the parser
  39. # will look for the time between 7am and 7pm. In the case of 5:00, it
  40. # would assume that means 5:00pm. If `:none` is given, no
  41. # assumption will be made, and the first matching instance of that
  42. # time will be used
  43. #
  44. # @option opts [Array] :endian_precedence ([:middle, :little])
  45. # * By default, Chronic will parse "03/04/2011" as the fourth day
  46. # of the third month. Alternatively you can tell Chronic to parse
  47. # this as the third day of the fourth month by altering the
  48. # `:endian_precedence` to `[:little, :middle]`
  49. #
  50. # @option opts [Integer] :ambiguous_year_future_bias (50)
  51. # * When parsing two digit years (ie 79) unlike Rubys Time class,
  52. # Chronic will attempt to assume the full year using this figure.
  53. # Chronic will look x amount of years into the future and past. If
  54. # the two digit year is `now + x years` it's assumed to be the
  55. # future, `now - x years` is assumed to be the past
  56. #
  57. # @return [Time, Chronic::Span, nil]
  58. def parse(text, opts={})
  59. options = DEFAULT_OPTIONS.merge opts
  60. # ensure the specified options are valid
  61. (opts.keys - DEFAULT_OPTIONS.keys).each do |key|
  62. raise ArgumentError, "#{key} is not a valid option key."
  63. end
  64. unless [:past, :future, :none].include?(options[:context])
  65. raise ArgumentError, "Invalid context, :past/:future only"
  66. end
  67. options[:text] = text
  68. Chronic.now = options[:now] || Chronic.time_class.now
  69. # tokenize words
  70. tokens = tokenize(text, options)
  71. if Chronic.debug
  72. puts "+#{'-' * 51}\n| #{tokens}\n+#{'-' * 51}"
  73. end
  74. span = tokens_to_span(tokens, options)
  75. if span
  76. options[:guess] ? guess(span) : span
  77. end
  78. end
  79. # Clean up the specified text ready for parsing
  80. #
  81. # Clean up the string by stripping unwanted characters, converting
  82. # idioms to their canonical form, converting number words to numbers
  83. # (three => 3), and converting ordinal words to numeric
  84. # ordinals (third => 3rd)
  85. #
  86. # @example
  87. # Chronic.pre_normalize('first day in May')
  88. # #=> "1st day in may"
  89. #
  90. # Chronic.pre_normalize('tomorrow after noon')
  91. # #=> "next day future 12:00"
  92. #
  93. # Chronic.pre_normalize('one hundred and thirty six days from now')
  94. # #=> "136 days future this second"
  95. #
  96. # @param [String] text The string to normalize
  97. # @return [String] A new string ready for Chronic to parse
  98. def pre_normalize(text)
  99. text = text.to_s.downcase
  100. text.gsub!(/['"\.]/, '')
  101. text.gsub!(/,/, ' ')
  102. text.gsub!(/\bsecond (of|day|month|hour|minute|second)\b/, '2nd \1')
  103. text = Numerizer.numerize(text)
  104. text.gsub!(/ \-(\d{4})\b/, ' tzminus\1')
  105. text.gsub!(/([\/\-\,\@])/) { ' ' + $1 + ' ' }
  106. text.gsub!(/(?:^|\s)0(\d+:\d+\s*pm?\b)/, '\1')
  107. text.gsub!(/\btoday\b/, 'this day')
  108. text.gsub!(/\btomm?orr?ow\b/, 'next day')
  109. text.gsub!(/\byesterday\b/, 'last day')
  110. text.gsub!(/\bnoon\b/, '12:00pm')
  111. text.gsub!(/\bmidnight\b/, '24:00')
  112. text.gsub!(/\bnow\b/, 'this second')
  113. text.gsub!(/\b(?:ago|before(?: now)?)\b/, 'past')
  114. text.gsub!(/\bthis (?:last|past)\b/, 'last')
  115. text.gsub!(/\b(?:in|during) the (morning)\b/, '\1')
  116. text.gsub!(/\b(?:in the|during the|at) (afternoon|evening|night)\b/, '\1')
  117. text.gsub!(/\btonight\b/, 'this night')
  118. text.gsub!(/\b\d+:?\d*[ap]\b/,'\0m')
  119. text.gsub!(/(\d)([ap]m|oclock)\b/, '\1 \2')
  120. text.gsub!(/\b(hence|after|from)\b/, 'future')
  121. text
  122. end
  123. # Convert number words to numbers (three => 3, fourth => 4th)
  124. #
  125. # @see Numerizer.numerize
  126. # @param [String] text The string to convert
  127. # @return [String] A new string with words converted to numbers
  128. def numericize_numbers(text)
  129. warn "Chronic.numericize_numbers will be deprecated in version 0.7.0. Please use Chronic::Numerizer.numerize instead"
  130. Numerizer.numerize(text)
  131. end
  132. # Guess a specific time within the given span
  133. #
  134. # @param [Span] span
  135. # @return [Time]
  136. def guess(span)
  137. if span.width > 1
  138. span.begin + (span.width / 2)
  139. else
  140. span.begin
  141. end
  142. end
  143. # List of {Handler} definitions. See {parse} for a list of options this
  144. # method accepts
  145. #
  146. # @see parse
  147. # @return [Hash] A Hash of Handler definitions
  148. def definitions(options={})
  149. options[:endian_precedence] ||= [:middle, :little]
  150. @definitions ||= {
  151. :time => [
  152. Handler.new([:repeater_time, :repeater_day_portion?], nil)
  153. ],
  154. :date => [
  155. Handler.new([:repeater_day_name, :repeater_month_name, :scalar_day, :repeater_time, :separator_slash_or_dash?, :time_zone, :scalar_year], :handle_rdn_rmn_sd_t_tz_sy),
  156. Handler.new([:repeater_day_name, :repeater_month_name, :scalar_day], :handle_rdn_rmn_sd),
  157. Handler.new([:repeater_day_name, :repeater_month_name, :scalar_day, :scalar_year], :handle_rdn_rmn_sd_sy),
  158. Handler.new([:repeater_day_name, :repeater_month_name, :ordinal_day], :handle_rdn_rmn_od),
  159. Handler.new([:scalar_year, :separator_slash_or_dash, :scalar_month, :separator_slash_or_dash, :scalar_day, :repeater_time, :time_zone], :handle_sy_sm_sd_t_tz),
  160. Handler.new([:repeater_month_name, :scalar_day, :scalar_year], :handle_rmn_sd_sy),
  161. Handler.new([:repeater_month_name, :ordinal_day, :scalar_year], :handle_rmn_od_sy),
  162. Handler.new([:repeater_month_name, :scalar_day, :scalar_year, :separator_at?, 'time?'], :handle_rmn_sd_sy),
  163. Handler.new([:repeater_month_name, :ordinal_day, :scalar_year, :separator_at?, 'time?'], :handle_rmn_od_sy),
  164. Handler.new([:repeater_month_name, :scalar_day, :separator_at?, 'time?'], :handle_rmn_sd),
  165. Handler.new([:repeater_time, :repeater_day_portion?, :separator_on?, :repeater_month_name, :scalar_day], :handle_rmn_sd_on),
  166. Handler.new([:repeater_month_name, :ordinal_day, :separator_at?, 'time?'], :handle_rmn_od),
  167. Handler.new([:ordinal_day, :repeater_month_name, :scalar_year, :separator_at?, 'time?'], :handle_od_rmn_sy),
  168. Handler.new([:ordinal_day, :repeater_month_name, :separator_at?, 'time?'], :handle_od_rmn),
  169. Handler.new([:scalar_year, :repeater_month_name, :ordinal_day], :handle_sy_rmn_od),
  170. Handler.new([:repeater_time, :repeater_day_portion?, :separator_on?, :repeater_month_name, :ordinal_day], :handle_rmn_od_on),
  171. Handler.new([:repeater_month_name, :scalar_year], :handle_rmn_sy),
  172. Handler.new([:scalar_day, :repeater_month_name, :scalar_year, :separator_at?, 'time?'], :handle_sd_rmn_sy),
  173. Handler.new([:scalar_day, :repeater_month_name, :separator_at?, 'time?'], :handle_sd_rmn),
  174. Handler.new([:scalar_year, :separator_slash_or_dash, :scalar_month, :separator_slash_or_dash, :scalar_day, :separator_at?, 'time?'], :handle_sy_sm_sd),
  175. Handler.new([:scalar_month, :separator_slash_or_dash, :scalar_year], :handle_sm_sy)
  176. ],
  177. # tonight at 7pm
  178. :anchor => [
  179. Handler.new([:grabber?, :repeater, :separator_at?, :repeater?, :repeater?], :handle_r),
  180. Handler.new([:grabber?, :repeater, :repeater, :separator_at?, :repeater?, :repeater?], :handle_r),
  181. Handler.new([:repeater, :grabber, :repeater], :handle_r_g_r)
  182. ],
  183. # 3 weeks from now, in 2 months
  184. :arrow => [
  185. Handler.new([:scalar, :repeater, :pointer], :handle_s_r_p),
  186. Handler.new([:pointer, :scalar, :repeater], :handle_p_s_r),
  187. Handler.new([:scalar, :repeater, :pointer, 'anchor'], :handle_s_r_p_a)
  188. ],
  189. # 3rd week in march
  190. :narrow => [
  191. Handler.new([:ordinal, :repeater, :separator_in, :repeater], :handle_o_r_s_r),
  192. Handler.new([:ordinal, :repeater, :grabber, :repeater], :handle_o_r_g_r)
  193. ]
  194. }
  195. endians = [
  196. Handler.new([:scalar_month, :separator_slash_or_dash, :scalar_day, :separator_slash_or_dash, :scalar_year, :separator_at?, 'time?'], :handle_sm_sd_sy),
  197. Handler.new([:scalar_day, :separator_slash_or_dash, :scalar_month, :separator_slash_or_dash, :scalar_year, :separator_at?, 'time?'], :handle_sd_sm_sy)
  198. ]
  199. case endian = Array(options[:endian_precedence]).first
  200. when :little
  201. @definitions[:endian] = endians.reverse
  202. when :middle
  203. @definitions[:endian] = endians
  204. else
  205. raise ArgumentError, "Unknown endian option '#{endian}'"
  206. end
  207. @definitions
  208. end
  209. # Construct a time Object
  210. #
  211. # @return [Time]
  212. def construct(year, month = 1, day = 1, hour = 0, minute = 0, second = 0)
  213. if second >= 60
  214. minute += second / 60
  215. second = second % 60
  216. end
  217. if minute >= 60
  218. hour += minute / 60
  219. minute = minute % 60
  220. end
  221. if hour >= 24
  222. day += hour / 24
  223. hour = hour % 24
  224. end
  225. # determine if there is a day overflow. this is complicated by our crappy calendar
  226. # system (non-constant number of days per month)
  227. day <= 56 || raise("day must be no more than 56 (makes month resolution easier)")
  228. if day > 28
  229. # no month ever has fewer than 28 days, so only do this if necessary
  230. leap_year_month_days = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
  231. common_year_month_days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
  232. days_this_month = Date.leap?(year) ? leap_year_month_days[month - 1] : common_year_month_days[month - 1]
  233. if day > days_this_month
  234. month += day / days_this_month
  235. day = day % days_this_month
  236. end
  237. end
  238. if month > 12
  239. if month % 12 == 0
  240. year += (month - 12) / 12
  241. month = 12
  242. else
  243. year += month / 12
  244. month = month % 12
  245. end
  246. end
  247. Chronic.time_class.local(year, month, day, hour, minute, second)
  248. end
  249. private
  250. def tokenize(text, options)
  251. text = pre_normalize(text)
  252. tokens = text.split(' ').map { |word| Token.new(word) }
  253. [Repeater, Grabber, Pointer, Scalar, Ordinal, Separator, TimeZone].each do |tok|
  254. tok.scan(tokens, options)
  255. end
  256. tokens.select { |token| token.tagged? }
  257. end
  258. def tokens_to_span(tokens, options)
  259. definitions = definitions(options)
  260. (definitions[:endian] + definitions[:date]).each do |handler|
  261. if handler.match(tokens, definitions)
  262. good_tokens = tokens.select { |o| !o.get_tag Separator }
  263. return handler.invoke(:date, good_tokens, options)
  264. end
  265. end
  266. definitions[:anchor].each do |handler|
  267. if handler.match(tokens, definitions)
  268. good_tokens = tokens.select { |o| !o.get_tag Separator }
  269. return handler.invoke(:anchor, good_tokens, options)
  270. end
  271. end
  272. definitions[:arrow].each do |handler|
  273. if handler.match(tokens, definitions)
  274. good_tokens = tokens.reject { |o| o.get_tag(SeparatorAt) || o.get_tag(SeparatorSlashOrDash) || o.get_tag(SeparatorComma) }
  275. return handler.invoke(:arrow, good_tokens, options)
  276. end
  277. end
  278. definitions[:narrow].each do |handler|
  279. if handler.match(tokens, definitions)
  280. return handler.invoke(:narrow, tokens, options)
  281. end
  282. end
  283. puts "-none" if Chronic.debug
  284. return nil
  285. end
  286. end
  287. # Internal exception
  288. class ChronicPain < Exception
  289. end
  290. end