/django/contrib/gis/utils/layermapping.py

https://code.google.com/p/mango-py/ · Python · 608 lines · 397 code · 61 blank · 150 comment · 94 complexity · f804a0fec011db3c88b4eed870635376 MD5 · raw file

  1. # LayerMapping -- A Django Model/OGR Layer Mapping Utility
  2. """
  3. The LayerMapping class provides a way to map the contents of OGR
  4. vector files (e.g. SHP files) to Geographic-enabled Django models.
  5. For more information, please consult the GeoDjango documentation:
  6. http://geodjango.org/docs/layermapping.html
  7. """
  8. import sys
  9. from datetime import date, datetime
  10. from decimal import Decimal
  11. from django.core.exceptions import ObjectDoesNotExist
  12. from django.db import connections, DEFAULT_DB_ALIAS
  13. from django.contrib.gis.db.models import GeometryField
  14. from django.contrib.gis.gdal import CoordTransform, DataSource, \
  15. OGRException, OGRGeometry, OGRGeomType, SpatialReference
  16. from django.contrib.gis.gdal.field import \
  17. OFTDate, OFTDateTime, OFTInteger, OFTReal, OFTString, OFTTime
  18. from django.db import models, transaction
  19. from django.contrib.localflavor.us.models import USStateField
  20. # LayerMapping exceptions.
  21. class LayerMapError(Exception): pass
  22. class InvalidString(LayerMapError): pass
  23. class InvalidDecimal(LayerMapError): pass
  24. class InvalidInteger(LayerMapError): pass
  25. class MissingForeignKey(LayerMapError): pass
  26. class LayerMapping(object):
  27. "A class that maps OGR Layers to GeoDjango Models."
  28. # Acceptable 'base' types for a multi-geometry type.
  29. MULTI_TYPES = {1 : OGRGeomType('MultiPoint'),
  30. 2 : OGRGeomType('MultiLineString'),
  31. 3 : OGRGeomType('MultiPolygon'),
  32. OGRGeomType('Point25D').num : OGRGeomType('MultiPoint25D'),
  33. OGRGeomType('LineString25D').num : OGRGeomType('MultiLineString25D'),
  34. OGRGeomType('Polygon25D').num : OGRGeomType('MultiPolygon25D'),
  35. }
  36. # Acceptable Django field types and corresponding acceptable OGR
  37. # counterparts.
  38. FIELD_TYPES = {
  39. models.AutoField : OFTInteger,
  40. models.IntegerField : (OFTInteger, OFTReal, OFTString),
  41. models.FloatField : (OFTInteger, OFTReal),
  42. models.DateField : OFTDate,
  43. models.DateTimeField : OFTDateTime,
  44. models.EmailField : OFTString,
  45. models.TimeField : OFTTime,
  46. models.DecimalField : (OFTInteger, OFTReal),
  47. models.CharField : OFTString,
  48. models.SlugField : OFTString,
  49. models.TextField : OFTString,
  50. models.URLField : OFTString,
  51. USStateField : OFTString,
  52. # This is a reminder that XMLField is deprecated
  53. # and this needs to be removed in 1.4
  54. models.XMLField : OFTString,
  55. models.BigIntegerField : (OFTInteger, OFTReal, OFTString),
  56. models.SmallIntegerField : (OFTInteger, OFTReal, OFTString),
  57. models.PositiveSmallIntegerField : (OFTInteger, OFTReal, OFTString),
  58. }
  59. # The acceptable transaction modes.
  60. TRANSACTION_MODES = {'autocommit' : transaction.autocommit,
  61. 'commit_on_success' : transaction.commit_on_success,
  62. }
  63. def __init__(self, model, data, mapping, layer=0,
  64. source_srs=None, encoding=None,
  65. transaction_mode='commit_on_success',
  66. transform=True, unique=None, using=DEFAULT_DB_ALIAS):
  67. """
  68. A LayerMapping object is initialized using the given Model (not an instance),
  69. a DataSource (or string path to an OGR-supported data file), and a mapping
  70. dictionary. See the module level docstring for more details and keyword
  71. argument usage.
  72. """
  73. # Getting the DataSource and the associated Layer.
  74. if isinstance(data, basestring):
  75. self.ds = DataSource(data)
  76. else:
  77. self.ds = data
  78. self.layer = self.ds[layer]
  79. self.using = using
  80. self.spatial_backend = connections[using].ops
  81. # Setting the mapping & model attributes.
  82. self.mapping = mapping
  83. self.model = model
  84. # Checking the layer -- intitialization of the object will fail if
  85. # things don't check out before hand.
  86. self.check_layer()
  87. # Getting the geometry column associated with the model (an
  88. # exception will be raised if there is no geometry column).
  89. if self.spatial_backend.mysql:
  90. transform = False
  91. else:
  92. self.geo_field = self.geometry_field()
  93. # Checking the source spatial reference system, and getting
  94. # the coordinate transformation object (unless the `transform`
  95. # keyword is set to False)
  96. if transform:
  97. self.source_srs = self.check_srs(source_srs)
  98. self.transform = self.coord_transform()
  99. else:
  100. self.transform = transform
  101. # Setting the encoding for OFTString fields, if specified.
  102. if encoding:
  103. # Making sure the encoding exists, if not a LookupError
  104. # exception will be thrown.
  105. from codecs import lookup
  106. lookup(encoding)
  107. self.encoding = encoding
  108. else:
  109. self.encoding = None
  110. if unique:
  111. self.check_unique(unique)
  112. transaction_mode = 'autocommit' # Has to be set to autocommit.
  113. self.unique = unique
  114. else:
  115. self.unique = None
  116. # Setting the transaction decorator with the function in the
  117. # transaction modes dictionary.
  118. if transaction_mode in self.TRANSACTION_MODES:
  119. self.transaction_decorator = self.TRANSACTION_MODES[transaction_mode]
  120. self.transaction_mode = transaction_mode
  121. else:
  122. raise LayerMapError('Unrecognized transaction mode: %s' % transaction_mode)
  123. if using is None:
  124. pass
  125. #### Checking routines used during initialization ####
  126. def check_fid_range(self, fid_range):
  127. "This checks the `fid_range` keyword."
  128. if fid_range:
  129. if isinstance(fid_range, (tuple, list)):
  130. return slice(*fid_range)
  131. elif isinstance(fid_range, slice):
  132. return fid_range
  133. else:
  134. raise TypeError
  135. else:
  136. return None
  137. def check_layer(self):
  138. """
  139. This checks the Layer metadata, and ensures that it is compatible
  140. with the mapping information and model. Unlike previous revisions,
  141. there is no need to increment through each feature in the Layer.
  142. """
  143. # The geometry field of the model is set here.
  144. # TODO: Support more than one geometry field / model. However, this
  145. # depends on the GDAL Driver in use.
  146. self.geom_field = False
  147. self.fields = {}
  148. # Getting lists of the field names and the field types available in
  149. # the OGR Layer.
  150. ogr_fields = self.layer.fields
  151. ogr_field_types = self.layer.field_types
  152. # Function for determining if the OGR mapping field is in the Layer.
  153. def check_ogr_fld(ogr_map_fld):
  154. try:
  155. idx = ogr_fields.index(ogr_map_fld)
  156. except ValueError:
  157. raise LayerMapError('Given mapping OGR field "%s" not found in OGR Layer.' % ogr_map_fld)
  158. return idx
  159. # No need to increment through each feature in the model, simply check
  160. # the Layer metadata against what was given in the mapping dictionary.
  161. for field_name, ogr_name in self.mapping.items():
  162. # Ensuring that a corresponding field exists in the model
  163. # for the given field name in the mapping.
  164. try:
  165. model_field = self.model._meta.get_field(field_name)
  166. except models.fields.FieldDoesNotExist:
  167. raise LayerMapError('Given mapping field "%s" not in given Model fields.' % field_name)
  168. # Getting the string name for the Django field class (e.g., 'PointField').
  169. fld_name = model_field.__class__.__name__
  170. if isinstance(model_field, GeometryField):
  171. if self.geom_field:
  172. raise LayerMapError('LayerMapping does not support more than one GeometryField per model.')
  173. # Getting the coordinate dimension of the geometry field.
  174. coord_dim = model_field.dim
  175. try:
  176. if coord_dim == 3:
  177. gtype = OGRGeomType(ogr_name + '25D')
  178. else:
  179. gtype = OGRGeomType(ogr_name)
  180. except OGRException:
  181. raise LayerMapError('Invalid mapping for GeometryField "%s".' % field_name)
  182. # Making sure that the OGR Layer's Geometry is compatible.
  183. ltype = self.layer.geom_type
  184. if not (ltype.name.startswith(gtype.name) or self.make_multi(ltype, model_field)):
  185. raise LayerMapError('Invalid mapping geometry; model has %s%s, '
  186. 'layer geometry type is %s.' %
  187. (fld_name, (coord_dim == 3 and '(dim=3)') or '', ltype))
  188. # Setting the `geom_field` attribute w/the name of the model field
  189. # that is a Geometry. Also setting the coordinate dimension
  190. # attribute.
  191. self.geom_field = field_name
  192. self.coord_dim = coord_dim
  193. fields_val = model_field
  194. elif isinstance(model_field, models.ForeignKey):
  195. if isinstance(ogr_name, dict):
  196. # Is every given related model mapping field in the Layer?
  197. rel_model = model_field.rel.to
  198. for rel_name, ogr_field in ogr_name.items():
  199. idx = check_ogr_fld(ogr_field)
  200. try:
  201. rel_field = rel_model._meta.get_field(rel_name)
  202. except models.fields.FieldDoesNotExist:
  203. raise LayerMapError('ForeignKey mapping field "%s" not in %s fields.' %
  204. (rel_name, rel_model.__class__.__name__))
  205. fields_val = rel_model
  206. else:
  207. raise TypeError('ForeignKey mapping must be of dictionary type.')
  208. else:
  209. # Is the model field type supported by LayerMapping?
  210. if not model_field.__class__ in self.FIELD_TYPES:
  211. raise LayerMapError('Django field type "%s" has no OGR mapping (yet).' % fld_name)
  212. # Is the OGR field in the Layer?
  213. idx = check_ogr_fld(ogr_name)
  214. ogr_field = ogr_field_types[idx]
  215. # Can the OGR field type be mapped to the Django field type?
  216. if not issubclass(ogr_field, self.FIELD_TYPES[model_field.__class__]):
  217. raise LayerMapError('OGR field "%s" (of type %s) cannot be mapped to Django %s.' %
  218. (ogr_field, ogr_field.__name__, fld_name))
  219. fields_val = model_field
  220. self.fields[field_name] = fields_val
  221. def check_srs(self, source_srs):
  222. "Checks the compatibility of the given spatial reference object."
  223. if isinstance(source_srs, SpatialReference):
  224. sr = source_srs
  225. elif isinstance(source_srs, self.spatial_backend.spatial_ref_sys()):
  226. sr = source_srs.srs
  227. elif isinstance(source_srs, (int, basestring)):
  228. sr = SpatialReference(source_srs)
  229. else:
  230. # Otherwise just pulling the SpatialReference from the layer
  231. sr = self.layer.srs
  232. if not sr:
  233. raise LayerMapError('No source reference system defined.')
  234. else:
  235. return sr
  236. def check_unique(self, unique):
  237. "Checks the `unique` keyword parameter -- may be a sequence or string."
  238. if isinstance(unique, (list, tuple)):
  239. # List of fields to determine uniqueness with
  240. for attr in unique:
  241. if not attr in self.mapping: raise ValueError
  242. elif isinstance(unique, basestring):
  243. # Only a single field passed in.
  244. if unique not in self.mapping: raise ValueError
  245. else:
  246. raise TypeError('Unique keyword argument must be set with a tuple, list, or string.')
  247. #### Keyword argument retrieval routines ####
  248. def feature_kwargs(self, feat):
  249. """
  250. Given an OGR Feature, this will return a dictionary of keyword arguments
  251. for constructing the mapped model.
  252. """
  253. # The keyword arguments for model construction.
  254. kwargs = {}
  255. # Incrementing through each model field and OGR field in the
  256. # dictionary mapping.
  257. for field_name, ogr_name in self.mapping.items():
  258. model_field = self.fields[field_name]
  259. if isinstance(model_field, GeometryField):
  260. # Verify OGR geometry.
  261. try:
  262. val = self.verify_geom(feat.geom, model_field)
  263. except OGRException:
  264. raise LayerMapError('Could not retrieve geometry from feature.')
  265. elif isinstance(model_field, models.base.ModelBase):
  266. # The related _model_, not a field was passed in -- indicating
  267. # another mapping for the related Model.
  268. val = self.verify_fk(feat, model_field, ogr_name)
  269. else:
  270. # Otherwise, verify OGR Field type.
  271. val = self.verify_ogr_field(feat[ogr_name], model_field)
  272. # Setting the keyword arguments for the field name with the
  273. # value obtained above.
  274. kwargs[field_name] = val
  275. return kwargs
  276. def unique_kwargs(self, kwargs):
  277. """
  278. Given the feature keyword arguments (from `feature_kwargs`) this routine
  279. will construct and return the uniqueness keyword arguments -- a subset
  280. of the feature kwargs.
  281. """
  282. if isinstance(self.unique, basestring):
  283. return {self.unique : kwargs[self.unique]}
  284. else:
  285. return dict((fld, kwargs[fld]) for fld in self.unique)
  286. #### Verification routines used in constructing model keyword arguments. ####
  287. def verify_ogr_field(self, ogr_field, model_field):
  288. """
  289. Verifies if the OGR Field contents are acceptable to the Django
  290. model field. If they are, the verified value is returned,
  291. otherwise the proper exception is raised.
  292. """
  293. if (isinstance(ogr_field, OFTString) and
  294. isinstance(model_field, (models.CharField, models.TextField))):
  295. if self.encoding:
  296. # The encoding for OGR data sources may be specified here
  297. # (e.g., 'cp437' for Census Bureau boundary files).
  298. val = unicode(ogr_field.value, self.encoding)
  299. else:
  300. val = ogr_field.value
  301. if len(val) > model_field.max_length:
  302. raise InvalidString('%s model field maximum string length is %s, given %s characters.' %
  303. (model_field.name, model_field.max_length, len(val)))
  304. elif isinstance(ogr_field, OFTReal) and isinstance(model_field, models.DecimalField):
  305. try:
  306. # Creating an instance of the Decimal value to use.
  307. d = Decimal(str(ogr_field.value))
  308. except:
  309. raise InvalidDecimal('Could not construct decimal from: %s' % ogr_field.value)
  310. # Getting the decimal value as a tuple.
  311. dtup = d.as_tuple()
  312. digits = dtup[1]
  313. d_idx = dtup[2] # index where the decimal is
  314. # Maximum amount of precision, or digits to the left of the decimal.
  315. max_prec = model_field.max_digits - model_field.decimal_places
  316. # Getting the digits to the left of the decimal place for the
  317. # given decimal.
  318. if d_idx < 0:
  319. n_prec = len(digits[:d_idx])
  320. else:
  321. n_prec = len(digits) + d_idx
  322. # If we have more than the maximum digits allowed, then throw an
  323. # InvalidDecimal exception.
  324. if n_prec > max_prec:
  325. raise InvalidDecimal('A DecimalField with max_digits %d, decimal_places %d must round to an absolute value less than 10^%d.' %
  326. (model_field.max_digits, model_field.decimal_places, max_prec))
  327. val = d
  328. elif isinstance(ogr_field, (OFTReal, OFTString)) and isinstance(model_field, models.IntegerField):
  329. # Attempt to convert any OFTReal and OFTString value to an OFTInteger.
  330. try:
  331. val = int(ogr_field.value)
  332. except:
  333. raise InvalidInteger('Could not construct integer from: %s' % ogr_field.value)
  334. else:
  335. val = ogr_field.value
  336. return val
  337. def verify_fk(self, feat, rel_model, rel_mapping):
  338. """
  339. Given an OGR Feature, the related model and its dictionary mapping,
  340. this routine will retrieve the related model for the ForeignKey
  341. mapping.
  342. """
  343. # TODO: It is expensive to retrieve a model for every record --
  344. # explore if an efficient mechanism exists for caching related
  345. # ForeignKey models.
  346. # Constructing and verifying the related model keyword arguments.
  347. fk_kwargs = {}
  348. for field_name, ogr_name in rel_mapping.items():
  349. fk_kwargs[field_name] = self.verify_ogr_field(feat[ogr_name], rel_model._meta.get_field(field_name))
  350. # Attempting to retrieve and return the related model.
  351. try:
  352. return rel_model.objects.get(**fk_kwargs)
  353. except ObjectDoesNotExist:
  354. raise MissingForeignKey('No ForeignKey %s model found with keyword arguments: %s' % (rel_model.__name__, fk_kwargs))
  355. def verify_geom(self, geom, model_field):
  356. """
  357. Verifies the geometry -- will construct and return a GeometryCollection
  358. if necessary (for example if the model field is MultiPolygonField while
  359. the mapped shapefile only contains Polygons).
  360. """
  361. # Downgrade a 3D geom to a 2D one, if necessary.
  362. if self.coord_dim != geom.coord_dim:
  363. geom.coord_dim = self.coord_dim
  364. if self.make_multi(geom.geom_type, model_field):
  365. # Constructing a multi-geometry type to contain the single geometry
  366. multi_type = self.MULTI_TYPES[geom.geom_type.num]
  367. g = OGRGeometry(multi_type)
  368. g.add(geom)
  369. else:
  370. g = geom
  371. # Transforming the geometry with our Coordinate Transformation object,
  372. # but only if the class variable `transform` is set w/a CoordTransform
  373. # object.
  374. if self.transform: g.transform(self.transform)
  375. # Returning the WKT of the geometry.
  376. return g.wkt
  377. #### Other model methods ####
  378. def coord_transform(self):
  379. "Returns the coordinate transformation object."
  380. SpatialRefSys = self.spatial_backend.spatial_ref_sys()
  381. try:
  382. # Getting the target spatial reference system
  383. target_srs = SpatialRefSys.objects.get(srid=self.geo_field.srid).srs
  384. # Creating the CoordTransform object
  385. return CoordTransform(self.source_srs, target_srs)
  386. except Exception, msg:
  387. raise LayerMapError('Could not translate between the data source and model geometry: %s' % msg)
  388. def geometry_field(self):
  389. "Returns the GeometryField instance associated with the geographic column."
  390. # Use the `get_field_by_name` on the model's options so that we
  391. # get the correct field instance if there's model inheritance.
  392. opts = self.model._meta
  393. fld, model, direct, m2m = opts.get_field_by_name(self.geom_field)
  394. return fld
  395. def make_multi(self, geom_type, model_field):
  396. """
  397. Given the OGRGeomType for a geometry and its associated GeometryField,
  398. determine whether the geometry should be turned into a GeometryCollection.
  399. """
  400. return (geom_type.num in self.MULTI_TYPES and
  401. model_field.__class__.__name__ == 'Multi%s' % geom_type.django)
  402. def save(self, verbose=False, fid_range=False, step=False,
  403. progress=False, silent=False, stream=sys.stdout, strict=False):
  404. """
  405. Saves the contents from the OGR DataSource Layer into the database
  406. according to the mapping dictionary given at initialization.
  407. Keyword Parameters:
  408. verbose:
  409. If set, information will be printed subsequent to each model save
  410. executed on the database.
  411. fid_range:
  412. May be set with a slice or tuple of (begin, end) feature ID's to map
  413. from the data source. In other words, this keyword enables the user
  414. to selectively import a subset range of features in the geographic
  415. data source.
  416. step:
  417. If set with an integer, transactions will occur at every step
  418. interval. For example, if step=1000, a commit would occur after
  419. the 1,000th feature, the 2,000th feature etc.
  420. progress:
  421. When this keyword is set, status information will be printed giving
  422. the number of features processed and sucessfully saved. By default,
  423. progress information will pe printed every 1000 features processed,
  424. however, this default may be overridden by setting this keyword with an
  425. integer for the desired interval.
  426. stream:
  427. Status information will be written to this file handle. Defaults to
  428. using `sys.stdout`, but any object with a `write` method is supported.
  429. silent:
  430. By default, non-fatal error notifications are printed to stdout, but
  431. this keyword may be set to disable these notifications.
  432. strict:
  433. Execution of the model mapping will cease upon the first error
  434. encountered. The default behavior is to attempt to continue.
  435. """
  436. # Getting the default Feature ID range.
  437. default_range = self.check_fid_range(fid_range)
  438. # Setting the progress interval, if requested.
  439. if progress:
  440. if progress is True or not isinstance(progress, int):
  441. progress_interval = 1000
  442. else:
  443. progress_interval = progress
  444. # Defining the 'real' save method, utilizing the transaction
  445. # decorator created during initialization.
  446. @self.transaction_decorator
  447. def _save(feat_range=default_range, num_feat=0, num_saved=0):
  448. if feat_range:
  449. layer_iter = self.layer[feat_range]
  450. else:
  451. layer_iter = self.layer
  452. for feat in layer_iter:
  453. num_feat += 1
  454. # Getting the keyword arguments
  455. try:
  456. kwargs = self.feature_kwargs(feat)
  457. except LayerMapError, msg:
  458. # Something borked the validation
  459. if strict: raise
  460. elif not silent:
  461. stream.write('Ignoring Feature ID %s because: %s\n' % (feat.fid, msg))
  462. else:
  463. # Constructing the model using the keyword args
  464. is_update = False
  465. if self.unique:
  466. # If we want unique models on a particular field, handle the
  467. # geometry appropriately.
  468. try:
  469. # Getting the keyword arguments and retrieving
  470. # the unique model.
  471. u_kwargs = self.unique_kwargs(kwargs)
  472. m = self.model.objects.using(self.using).get(**u_kwargs)
  473. is_update = True
  474. # Getting the geometry (in OGR form), creating
  475. # one from the kwargs WKT, adding in additional
  476. # geometries, and update the attribute with the
  477. # just-updated geometry WKT.
  478. geom = getattr(m, self.geom_field).ogr
  479. new = OGRGeometry(kwargs[self.geom_field])
  480. for g in new: geom.add(g)
  481. setattr(m, self.geom_field, geom.wkt)
  482. except ObjectDoesNotExist:
  483. # No unique model exists yet, create.
  484. m = self.model(**kwargs)
  485. else:
  486. m = self.model(**kwargs)
  487. try:
  488. # Attempting to save.
  489. m.save(using=self.using)
  490. num_saved += 1
  491. if verbose: stream.write('%s: %s\n' % (is_update and 'Updated' or 'Saved', m))
  492. except SystemExit:
  493. raise
  494. except Exception, msg:
  495. if self.transaction_mode == 'autocommit':
  496. # Rolling back the transaction so that other model saves
  497. # will work.
  498. transaction.rollback_unless_managed()
  499. if strict:
  500. # Bailing out if the `strict` keyword is set.
  501. if not silent:
  502. stream.write('Failed to save the feature (id: %s) into the model with the keyword arguments:\n' % feat.fid)
  503. stream.write('%s\n' % kwargs)
  504. raise
  505. elif not silent:
  506. stream.write('Failed to save %s:\n %s\nContinuing\n' % (kwargs, msg))
  507. # Printing progress information, if requested.
  508. if progress and num_feat % progress_interval == 0:
  509. stream.write('Processed %d features, saved %d ...\n' % (num_feat, num_saved))
  510. # Only used for status output purposes -- incremental saving uses the
  511. # values returned here.
  512. return num_saved, num_feat
  513. nfeat = self.layer.num_feat
  514. if step and isinstance(step, int) and step < nfeat:
  515. # Incremental saving is requested at the given interval (step)
  516. if default_range:
  517. raise LayerMapError('The `step` keyword may not be used in conjunction with the `fid_range` keyword.')
  518. beg, num_feat, num_saved = (0, 0, 0)
  519. indices = range(step, nfeat, step)
  520. n_i = len(indices)
  521. for i, end in enumerate(indices):
  522. # Constructing the slice to use for this step; the last slice is
  523. # special (e.g, [100:] instead of [90:100]).
  524. if i+1 == n_i: step_slice = slice(beg, None)
  525. else: step_slice = slice(beg, end)
  526. try:
  527. num_feat, num_saved = _save(step_slice, num_feat, num_saved)
  528. beg = end
  529. except:
  530. stream.write('%s\nFailed to save slice: %s\n' % ('=-' * 20, step_slice))
  531. raise
  532. else:
  533. # Otherwise, just calling the previously defined _save() function.
  534. _save()