PageRenderTime 4184ms CodeModel.GetById 31ms RepoModel.GetById 2ms app.codeStats 0ms

/ebpub/ebpub/geocoder/base.py

https://github.com/frankk00/openblock
Python | 331 lines | 227 code | 34 blank | 70 comment | 59 complexity | c6a47b36853b51bde90e1ac93688b5af MD5 | raw file
  1. # Copyright 2007,2008,2009,2011 Everyblock LLC, OpenPlans, and contributors
  2. #
  3. # This file is part of ebpub
  4. #
  5. # ebpub is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # ebpub is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with ebpub. If not, see <http://www.gnu.org/licenses/>.
  17. #
  18. from django.core.exceptions import ObjectDoesNotExist
  19. from django.db.models import Q
  20. from ebpub.geocoder.parser.parsing import normalize, parse, ParsingError
  21. from ebpub.geocoder.models import GeocoderCache
  22. from ebpub.streets.models import Block, StreetMisspelling, Intersection
  23. import re
  24. block_re = re.compile(r'^(\d+)[-\s]+(?:blk|block)\s+(?:of\s+)?(.*)$', re.IGNORECASE)
  25. intersection_re = re.compile(r'(?<=.) (?:and|\&|at|near|@|around|towards?|off|/|(?:just )?(?:north|south|east|west) of|(?:just )?past) (?=.)', re.IGNORECASE)
  26. # segment_re = re.compile(r'^.{1,40}?\b(?:between .{1,40}? and|from .{1,40}? to) .{1,40}?$', re.IGNORECASE) # TODO
  27. class GeocodingException(Exception):
  28. pass
  29. class AmbiguousResult(GeocodingException):
  30. def __init__(self, choices, message=None):
  31. self.choices = choices
  32. if message is None:
  33. message = "Address DB returned %s results" % len(choices)
  34. self.message = message
  35. def __str__(self):
  36. return self.message
  37. class DoesNotExist(GeocodingException):
  38. pass
  39. class UnparseableLocation(GeocodingException):
  40. pass
  41. class InvalidBlockButValidStreet(GeocodingException):
  42. def __init__(self, block_number, street_name, block_list):
  43. self.block_number = block_number
  44. self.street_name = street_name
  45. self.block_list = block_list
  46. class Address(dict):
  47. "A simple container class for representing a single street address."
  48. def __init__(self, *args, **kwargs):
  49. dict.__init__(self, *args, **kwargs)
  50. self._cache_hit = False
  51. @property
  52. def latitude(self):
  53. if self["point"]:
  54. return self["point"].y
  55. lat = latitude
  56. @property
  57. def longitude(self):
  58. if self["point"]:
  59. return self["point"].x
  60. lng = longitude
  61. def __unicode__(self):
  62. return u", ".join([self[k] for k in ["address", "city", "state", "zip"]])
  63. @classmethod
  64. def from_cache(cls, cached):
  65. """
  66. Builds an Address object from a GeocoderCache result object.
  67. """
  68. fields = {
  69. 'address': cached.address,
  70. 'city': cached.city,
  71. 'state': cached.state,
  72. 'zip': cached.zip,
  73. 'point': cached.location,
  74. 'intersection_id': cached.intersection_id,
  75. }
  76. try:
  77. block_obj = cached.block
  78. except ObjectDoesNotExist:
  79. fields.update({'block': None})
  80. else:
  81. fields.update({'block': block_obj})
  82. try:
  83. intersection_obj = cached.intersection
  84. except ObjectDoesNotExist:
  85. fields.update({'intersection': None})
  86. else:
  87. fields.update({'intersection': intersection_obj})
  88. obj = cls(fields)
  89. obj._cache_hit = True
  90. return obj
  91. class Geocoder(object):
  92. """
  93. Generic Geocoder class.
  94. Subclasses must override the following attribute:
  95. _do_geocode(self, location_string)
  96. Actually performs the geocoding. The base class implementation of
  97. geocode() calls this behind the scenes.
  98. """
  99. def __init__(self, use_cache=True):
  100. self.use_cache = use_cache
  101. def geocode(self, location):
  102. """
  103. Geocodes the given location, handling caching behind the scenes.
  104. """
  105. location = normalize(location)
  106. result, cache_hit = None, False
  107. # Get the result (an Address instance), either from the cache or by
  108. # calling _do_geocode().
  109. if self.use_cache:
  110. try:
  111. cached = GeocoderCache.objects.filter(normalized_location=location)[0]
  112. except IndexError:
  113. pass
  114. else:
  115. result = Address.from_cache(cached)
  116. cache_hit = True
  117. if result is None:
  118. try:
  119. result = self._do_geocode(location)
  120. except AmbiguousResult, e:
  121. # If multiple results were found, check whether they have the
  122. # same point. If they all have the same point, don't raise the
  123. # AmbiguousResult exception -- just return the first one.
  124. #
  125. # An edge case is if result['point'] is None. This could happen
  126. # if the geocoder found locations, not points. In that case,
  127. # just raise the AmbiguousResult.
  128. result = e.choices[0]
  129. if result['point'] is None:
  130. raise
  131. for i in e.choices[1:]:
  132. if i['point'] != result['point']:
  133. raise
  134. # Save the result to the cache if it wasn't in there already.
  135. if not cache_hit and self.use_cache:
  136. GeocoderCache.populate(location, result)
  137. return result
  138. class AddressGeocoder(Geocoder):
  139. def _do_geocode(self, location_string):
  140. # Parse the address.
  141. try:
  142. locations = parse(location_string)
  143. except ParsingError, e:
  144. raise
  145. all_results = []
  146. for loc in locations:
  147. loc_results = self._db_lookup(loc)
  148. # If none were found, maybe the street was misspelled. Check that.
  149. if not loc_results and loc['street']:
  150. try:
  151. misspelling = StreetMisspelling.objects.get(incorrect=loc['street'])
  152. loc['street'] = misspelling.correct
  153. except StreetMisspelling.DoesNotExist:
  154. pass
  155. else:
  156. loc_results = self._db_lookup(loc)
  157. # Next, try removing the street suffix, in case an incorrect
  158. # one was given.
  159. if not loc_results and loc['suffix']:
  160. loc_results = self._db_lookup(dict(loc, suffix=None))
  161. # Next, try looking for the street, in case the street
  162. # exists but the address doesn't.
  163. if not loc_results and loc['number']:
  164. kwargs = {'street': loc['street']}
  165. sided_filters = []
  166. if loc['city']:
  167. city_filter = Q(left_city=loc['city']) | Q(right_city=loc['city'])
  168. sided_filters.append(city_filter)
  169. b_list = Block.objects.filter(*sided_filters, **kwargs).order_by('predir', 'from_num', 'to_num')
  170. if b_list:
  171. raise InvalidBlockButValidStreet(loc['number'], b_list[0].street_pretty_name, b_list)
  172. all_results.extend(loc_results)
  173. if not all_results:
  174. raise DoesNotExist("Geocoder db couldn't find this location: %r" % location_string)
  175. elif len(all_results) == 1:
  176. return all_results[0]
  177. else:
  178. raise AmbiguousResult(all_results)
  179. def _db_lookup(self, location):
  180. """
  181. Given a location dict as returned by parse(), looks up the address in
  182. the DB. Always returns a list of Address dictionaries (or an empty list
  183. if no results are found).
  184. """
  185. if not location['number']:
  186. return []
  187. # Query the blocks database.
  188. try:
  189. blocks = Block.objects.search(
  190. street=location['street'],
  191. number=location['number'],
  192. predir=location['pre_dir'],
  193. suffix=location['suffix'],
  194. postdir=location['post_dir'],
  195. city=location['city'],
  196. state=location['state'],
  197. zipcode=location['zip'],
  198. )
  199. except:
  200. # TODO: replace with Block-specific exception?
  201. raise
  202. return [self._build_result(location, block, geocoded_pt) for block, geocoded_pt in blocks]
  203. def _build_result(self, location, block, geocoded_pt):
  204. return Address({
  205. 'address': unicode(" ".join([str(s) for s in [location['number'], block.predir, block.street_pretty_name, block.postdir] if s])),
  206. 'city': block.city.title(),
  207. 'state': block.state,
  208. 'zip': block.zip,
  209. 'block': block,
  210. 'intersection_id': None,
  211. 'point': geocoded_pt,
  212. 'url': block.url(),
  213. 'wkt': str(block.location),
  214. })
  215. class BlockGeocoder(AddressGeocoder):
  216. def _do_geocode(self, location_string):
  217. m = block_re.search(location_string)
  218. if not m:
  219. raise ParsingError("BlockGeocoder somehow got an address it can't parse: %r" % location_string)
  220. new_location_string = ' '.join(m.groups())
  221. return AddressGeocoder._do_geocode(self, new_location_string)
  222. class IntersectionGeocoder(Geocoder):
  223. def _do_geocode(self, location_string):
  224. sides = intersection_re.split(location_string)
  225. if len(sides) != 2:
  226. raise ParsingError("Couldn't parse intersection: %r" % location_string)
  227. # Parse each side of the intersection to a list of possibilities.
  228. # Let the ParseError exception propagate, if it's raised.
  229. left_side = parse(sides[0])
  230. right_side = parse(sides[1])
  231. all_results = []
  232. seen_intersections = set()
  233. for street_a in left_side:
  234. street_a['street'] = StreetMisspelling.objects.make_correction(street_a['street'])
  235. for street_b in right_side:
  236. street_b['street'] = StreetMisspelling.objects.make_correction(street_b['street'])
  237. for result in self._db_lookup(street_a, street_b):
  238. if result["intersection_id"] not in seen_intersections:
  239. seen_intersections.add(result["intersection_id"])
  240. all_results.append(result)
  241. if not all_results:
  242. raise DoesNotExist("Geocoder db couldn't find this intersection: %r" % location_string)
  243. elif len(all_results) == 1:
  244. return all_results.pop()
  245. else:
  246. raise AmbiguousResult(list(all_results), "Intersections DB returned %s results" % len(all_results))
  247. def _db_lookup(self, street_a, street_b):
  248. try:
  249. intersections = Intersection.objects.search(
  250. predir_a=street_a["pre_dir"],
  251. street_a=street_a["street"],
  252. suffix_a=street_a["suffix"],
  253. postdir_a=street_a["post_dir"],
  254. predir_b=street_b["pre_dir"],
  255. street_b=street_b["street"],
  256. suffix_b=street_b["suffix"],
  257. postdir_b=street_b["post_dir"]
  258. )
  259. except Exception, e:
  260. raise DoesNotExist("Intersection db query failed: %r" % e)
  261. return [self._build_result(i) for i in intersections]
  262. def _build_result(self, intersection):
  263. return Address({
  264. 'address': intersection.pretty_name,
  265. 'city': intersection.city,
  266. 'state': intersection.state,
  267. 'zip': intersection.zip,
  268. 'intersection_id': intersection.id,
  269. 'intersection': intersection,
  270. 'block': None,
  271. 'point': intersection.location,
  272. 'url': intersection.url(),
  273. 'wkt': str(intersection.location),
  274. })
  275. # THIS IS NOT YET FINISHED
  276. #
  277. # class SegmentGeocoder(Geocoder):
  278. # def _do_geocode(self, location_string):
  279. # bits = segment_re.findall(location_string)
  280. # g = IntersectionGeocoder()
  281. # try:
  282. # point1 = g.geocode('%s and %s' % (bits[0], bits[1]))
  283. # point2 = g.geocode('%s and %s' % (bits[0], bits[2]))
  284. # except DoesNotExist, e:
  285. # raise DoesNotExist("Segment query failed: %r" % e)
  286. # # TODO: Make a line from the two points, and return that.
  287. class SmartGeocoder(Geocoder):
  288. def _do_geocode(self, location_string):
  289. if intersection_re.search(location_string):
  290. geocoder = IntersectionGeocoder()
  291. elif block_re.search(location_string):
  292. geocoder = BlockGeocoder()
  293. else:
  294. geocoder = AddressGeocoder()
  295. return geocoder._do_geocode(location_string)