PageRenderTime 130ms CodeModel.GetById 50ms app.highlight 50ms RepoModel.GetById 20ms app.codeStats 1ms

/django/contrib/gis/utils/layermapping.py

https://code.google.com/p/mango-py/
Python | 608 lines | 496 code | 32 blank | 80 comment | 35 complexity | f804a0fec011db3c88b4eed870635376 MD5 | raw file
  1# LayerMapping -- A Django Model/OGR Layer Mapping Utility
  2"""
  3 The LayerMapping class provides a way to map the contents of OGR
  4 vector files (e.g. SHP files) to Geographic-enabled Django models.
  5
  6 For more information, please consult the GeoDjango documentation:
  7   http://geodjango.org/docs/layermapping.html
  8"""
  9import sys
 10from datetime import date, datetime
 11from decimal import Decimal
 12from django.core.exceptions import ObjectDoesNotExist
 13from django.db import connections, DEFAULT_DB_ALIAS
 14from django.contrib.gis.db.models import GeometryField
 15from django.contrib.gis.gdal import CoordTransform, DataSource, \
 16    OGRException, OGRGeometry, OGRGeomType, SpatialReference
 17from django.contrib.gis.gdal.field import \
 18    OFTDate, OFTDateTime, OFTInteger, OFTReal, OFTString, OFTTime
 19from django.db import models, transaction
 20from django.contrib.localflavor.us.models import USStateField
 21
 22# LayerMapping exceptions.
 23class LayerMapError(Exception): pass
 24class InvalidString(LayerMapError): pass
 25class InvalidDecimal(LayerMapError): pass
 26class InvalidInteger(LayerMapError): pass
 27class MissingForeignKey(LayerMapError): pass
 28
 29class LayerMapping(object):
 30    "A class that maps OGR Layers to GeoDjango Models."
 31
 32    # Acceptable 'base' types for a multi-geometry type.
 33    MULTI_TYPES = {1 : OGRGeomType('MultiPoint'),
 34                   2 : OGRGeomType('MultiLineString'),
 35                   3 : OGRGeomType('MultiPolygon'),
 36                   OGRGeomType('Point25D').num : OGRGeomType('MultiPoint25D'),
 37                   OGRGeomType('LineString25D').num : OGRGeomType('MultiLineString25D'),
 38                   OGRGeomType('Polygon25D').num : OGRGeomType('MultiPolygon25D'),
 39                   }
 40
 41    # Acceptable Django field types and corresponding acceptable OGR
 42    # counterparts.
 43    FIELD_TYPES = {
 44        models.AutoField : OFTInteger,
 45        models.IntegerField : (OFTInteger, OFTReal, OFTString),
 46        models.FloatField : (OFTInteger, OFTReal),
 47        models.DateField : OFTDate,
 48        models.DateTimeField : OFTDateTime,
 49        models.EmailField : OFTString,
 50        models.TimeField : OFTTime,
 51        models.DecimalField : (OFTInteger, OFTReal),
 52        models.CharField : OFTString,
 53        models.SlugField : OFTString,
 54        models.TextField : OFTString,
 55        models.URLField : OFTString,
 56        USStateField : OFTString,
 57        # This is a reminder that XMLField is deprecated
 58        # and this needs to be removed in 1.4
 59        models.XMLField : OFTString,
 60        models.BigIntegerField : (OFTInteger, OFTReal, OFTString),
 61        models.SmallIntegerField : (OFTInteger, OFTReal, OFTString),
 62        models.PositiveSmallIntegerField : (OFTInteger, OFTReal, OFTString),
 63        }
 64
 65    # The acceptable transaction modes.
 66    TRANSACTION_MODES = {'autocommit' : transaction.autocommit,
 67                         'commit_on_success' : transaction.commit_on_success,
 68                         }
 69
 70    def __init__(self, model, data, mapping, layer=0,
 71                 source_srs=None, encoding=None,
 72                 transaction_mode='commit_on_success',
 73                 transform=True, unique=None, using=DEFAULT_DB_ALIAS):
 74        """
 75        A LayerMapping object is initialized using the given Model (not an instance),
 76        a DataSource (or string path to an OGR-supported data file), and a mapping
 77        dictionary.  See the module level docstring for more details and keyword
 78        argument usage.
 79        """
 80        # Getting the DataSource and the associated Layer.
 81        if isinstance(data, basestring):
 82            self.ds = DataSource(data)
 83        else:
 84            self.ds = data
 85        self.layer = self.ds[layer]
 86
 87        self.using = using
 88        self.spatial_backend = connections[using].ops
 89
 90        # Setting the mapping & model attributes.
 91        self.mapping = mapping
 92        self.model = model
 93
 94        # Checking the layer -- intitialization of the object will fail if
 95        # things don't check out before hand.
 96        self.check_layer()
 97
 98        # Getting the geometry column associated with the model (an
 99        # exception will be raised if there is no geometry column).
100        if self.spatial_backend.mysql:
101            transform = False
102        else:
103            self.geo_field = self.geometry_field()
104
105        # Checking the source spatial reference system, and getting
106        # the coordinate transformation object (unless the `transform`
107        # keyword is set to False)
108        if transform:
109            self.source_srs = self.check_srs(source_srs)
110            self.transform = self.coord_transform()
111        else:
112            self.transform = transform
113
114        # Setting the encoding for OFTString fields, if specified.
115        if encoding:
116            # Making sure the encoding exists, if not a LookupError
117            # exception will be thrown.
118            from codecs import lookup
119            lookup(encoding)
120            self.encoding = encoding
121        else:
122            self.encoding = None
123
124        if unique:
125            self.check_unique(unique)
126            transaction_mode = 'autocommit' # Has to be set to autocommit.
127            self.unique = unique
128        else:
129            self.unique = None
130
131        # Setting the transaction decorator with the function in the
132        # transaction modes dictionary.
133        if transaction_mode in self.TRANSACTION_MODES:
134            self.transaction_decorator = self.TRANSACTION_MODES[transaction_mode]
135            self.transaction_mode = transaction_mode
136        else:
137            raise LayerMapError('Unrecognized transaction mode: %s' % transaction_mode)
138
139        if using is None:
140            pass
141
142    #### Checking routines used during initialization ####
143    def check_fid_range(self, fid_range):
144        "This checks the `fid_range` keyword."
145        if fid_range:
146            if isinstance(fid_range, (tuple, list)):
147                return slice(*fid_range)
148            elif isinstance(fid_range, slice):
149                return fid_range
150            else:
151                raise TypeError
152        else:
153            return None
154
155    def check_layer(self):
156        """
157        This checks the Layer metadata, and ensures that it is compatible
158        with the mapping information and model.  Unlike previous revisions,
159        there is no need to increment through each feature in the Layer.
160        """
161        # The geometry field of the model is set here.
162        # TODO: Support more than one geometry field / model.  However, this
163        # depends on the GDAL Driver in use.
164        self.geom_field = False
165        self.fields = {}
166
167        # Getting lists of the field names and the field types available in
168        # the OGR Layer.
169        ogr_fields = self.layer.fields
170        ogr_field_types = self.layer.field_types
171
172        # Function for determining if the OGR mapping field is in the Layer.
173        def check_ogr_fld(ogr_map_fld):
174            try:
175                idx = ogr_fields.index(ogr_map_fld)
176            except ValueError:
177                raise LayerMapError('Given mapping OGR field "%s" not found in OGR Layer.' % ogr_map_fld)
178            return idx
179
180        # No need to increment through each feature in the model, simply check
181        # the Layer metadata against what was given in the mapping dictionary.
182        for field_name, ogr_name in self.mapping.items():
183            # Ensuring that a corresponding field exists in the model
184            # for the given field name in the mapping.
185            try:
186                model_field = self.model._meta.get_field(field_name)
187            except models.fields.FieldDoesNotExist:
188                raise LayerMapError('Given mapping field "%s" not in given Model fields.' % field_name)
189
190            # Getting the string name for the Django field class (e.g., 'PointField').
191            fld_name = model_field.__class__.__name__
192
193            if isinstance(model_field, GeometryField):
194                if self.geom_field:
195                    raise LayerMapError('LayerMapping does not support more than one GeometryField per model.')
196
197                # Getting the coordinate dimension of the geometry field.
198                coord_dim = model_field.dim
199
200                try:
201                    if coord_dim == 3:
202                        gtype = OGRGeomType(ogr_name + '25D')
203                    else:
204                        gtype = OGRGeomType(ogr_name)
205                except OGRException:
206                    raise LayerMapError('Invalid mapping for GeometryField "%s".' % field_name)
207
208                # Making sure that the OGR Layer's Geometry is compatible.
209                ltype = self.layer.geom_type
210                if not (ltype.name.startswith(gtype.name) or self.make_multi(ltype, model_field)):
211                    raise LayerMapError('Invalid mapping geometry; model has %s%s, '
212                                        'layer geometry type is %s.' %
213                                        (fld_name, (coord_dim == 3 and '(dim=3)') or '', ltype))
214
215                # Setting the `geom_field` attribute w/the name of the model field
216                # that is a Geometry.  Also setting the coordinate dimension
217                # attribute.
218                self.geom_field = field_name
219                self.coord_dim = coord_dim
220                fields_val = model_field
221            elif isinstance(model_field, models.ForeignKey):
222                if isinstance(ogr_name, dict):
223                    # Is every given related model mapping field in the Layer?
224                    rel_model = model_field.rel.to
225                    for rel_name, ogr_field in ogr_name.items():
226                        idx = check_ogr_fld(ogr_field)
227                        try:
228                            rel_field = rel_model._meta.get_field(rel_name)
229                        except models.fields.FieldDoesNotExist:
230                            raise LayerMapError('ForeignKey mapping field "%s" not in %s fields.' %
231                                                (rel_name, rel_model.__class__.__name__))
232                    fields_val = rel_model
233                else:
234                    raise TypeError('ForeignKey mapping must be of dictionary type.')
235            else:
236                # Is the model field type supported by LayerMapping?
237                if not model_field.__class__ in self.FIELD_TYPES:
238                    raise LayerMapError('Django field type "%s" has no OGR mapping (yet).' % fld_name)
239
240                # Is the OGR field in the Layer?
241                idx = check_ogr_fld(ogr_name)
242                ogr_field = ogr_field_types[idx]
243
244                # Can the OGR field type be mapped to the Django field type?
245                if not issubclass(ogr_field, self.FIELD_TYPES[model_field.__class__]):
246                    raise LayerMapError('OGR field "%s" (of type %s) cannot be mapped to Django %s.' %
247                                        (ogr_field, ogr_field.__name__, fld_name))
248                fields_val = model_field
249
250            self.fields[field_name] = fields_val
251
252    def check_srs(self, source_srs):
253        "Checks the compatibility of the given spatial reference object."
254
255        if isinstance(source_srs, SpatialReference):
256            sr = source_srs
257        elif isinstance(source_srs, self.spatial_backend.spatial_ref_sys()):
258            sr = source_srs.srs
259        elif isinstance(source_srs, (int, basestring)):
260            sr = SpatialReference(source_srs)
261        else:
262            # Otherwise just pulling the SpatialReference from the layer
263            sr = self.layer.srs
264
265        if not sr:
266            raise LayerMapError('No source reference system defined.')
267        else:
268            return sr
269
270    def check_unique(self, unique):
271        "Checks the `unique` keyword parameter -- may be a sequence or string."
272        if isinstance(unique, (list, tuple)):
273            # List of fields to determine uniqueness with
274            for attr in unique:
275                if not attr in self.mapping: raise ValueError
276        elif isinstance(unique, basestring):
277            # Only a single field passed in.
278            if unique not in self.mapping: raise ValueError
279        else:
280            raise TypeError('Unique keyword argument must be set with a tuple, list, or string.')
281
282    #### Keyword argument retrieval routines ####
283    def feature_kwargs(self, feat):
284        """
285        Given an OGR Feature, this will return a dictionary of keyword arguments
286        for constructing the mapped model.
287        """
288        # The keyword arguments for model construction.
289        kwargs = {}
290
291        # Incrementing through each model field and OGR field in the
292        # dictionary mapping.
293        for field_name, ogr_name in self.mapping.items():
294            model_field = self.fields[field_name]
295
296            if isinstance(model_field, GeometryField):
297                # Verify OGR geometry.
298                try:
299                    val = self.verify_geom(feat.geom, model_field)
300                except OGRException:
301                    raise LayerMapError('Could not retrieve geometry from feature.')
302            elif isinstance(model_field, models.base.ModelBase):
303                # The related _model_, not a field was passed in -- indicating
304                # another mapping for the related Model.
305                val = self.verify_fk(feat, model_field, ogr_name)
306            else:
307                # Otherwise, verify OGR Field type.
308                val = self.verify_ogr_field(feat[ogr_name], model_field)
309
310            # Setting the keyword arguments for the field name with the
311            # value obtained above.
312            kwargs[field_name] = val
313
314        return kwargs
315
316    def unique_kwargs(self, kwargs):
317        """
318        Given the feature keyword arguments (from `feature_kwargs`) this routine
319        will construct and return the uniqueness keyword arguments -- a subset
320        of the feature kwargs.
321        """
322        if isinstance(self.unique, basestring):
323            return {self.unique : kwargs[self.unique]}
324        else:
325            return dict((fld, kwargs[fld]) for fld in self.unique)
326
327    #### Verification routines used in constructing model keyword arguments. ####
328    def verify_ogr_field(self, ogr_field, model_field):
329        """
330        Verifies if the OGR Field contents are acceptable to the Django
331        model field.  If they are, the verified value is returned,
332        otherwise the proper exception is raised.
333        """
334        if (isinstance(ogr_field, OFTString) and
335            isinstance(model_field, (models.CharField, models.TextField))):
336            if self.encoding:
337                # The encoding for OGR data sources may be specified here
338                # (e.g., 'cp437' for Census Bureau boundary files).
339                val = unicode(ogr_field.value, self.encoding)
340            else:
341                val = ogr_field.value
342                if len(val) > model_field.max_length:
343                    raise InvalidString('%s model field maximum string length is %s, given %s characters.' %
344                                        (model_field.name, model_field.max_length, len(val)))
345        elif isinstance(ogr_field, OFTReal) and isinstance(model_field, models.DecimalField):
346            try:
347                # Creating an instance of the Decimal value to use.
348                d = Decimal(str(ogr_field.value))
349            except:
350                raise InvalidDecimal('Could not construct decimal from: %s' % ogr_field.value)
351
352            # Getting the decimal value as a tuple.
353            dtup = d.as_tuple()
354            digits = dtup[1]
355            d_idx = dtup[2] # index where the decimal is
356
357            # Maximum amount of precision, or digits to the left of the decimal.
358            max_prec = model_field.max_digits - model_field.decimal_places
359
360            # Getting the digits to the left of the decimal place for the
361            # given decimal.
362            if d_idx < 0:
363                n_prec = len(digits[:d_idx])
364            else:
365                n_prec = len(digits) + d_idx
366
367            # If we have more than the maximum digits allowed, then throw an
368            # InvalidDecimal exception.
369            if n_prec > max_prec:
370                raise InvalidDecimal('A DecimalField with max_digits %d, decimal_places %d must round to an absolute value less than 10^%d.' %
371                                     (model_field.max_digits, model_field.decimal_places, max_prec))
372            val = d
373        elif isinstance(ogr_field, (OFTReal, OFTString)) and isinstance(model_field, models.IntegerField):
374            # Attempt to convert any OFTReal and OFTString value to an OFTInteger.
375            try:
376                val = int(ogr_field.value)
377            except:
378                raise InvalidInteger('Could not construct integer from: %s' % ogr_field.value)
379        else:
380            val = ogr_field.value
381        return val
382
383    def verify_fk(self, feat, rel_model, rel_mapping):
384        """
385        Given an OGR Feature, the related model and its dictionary mapping,
386        this routine will retrieve the related model for the ForeignKey
387        mapping.
388        """
389        # TODO: It is expensive to retrieve a model for every record --
390        #  explore if an efficient mechanism exists for caching related
391        #  ForeignKey models.
392
393        # Constructing and verifying the related model keyword arguments.
394        fk_kwargs = {}
395        for field_name, ogr_name in rel_mapping.items():
396            fk_kwargs[field_name] = self.verify_ogr_field(feat[ogr_name], rel_model._meta.get_field(field_name))
397
398        # Attempting to retrieve and return the related model.
399        try:
400            return rel_model.objects.get(**fk_kwargs)
401        except ObjectDoesNotExist:
402            raise MissingForeignKey('No ForeignKey %s model found with keyword arguments: %s' % (rel_model.__name__, fk_kwargs))
403
404    def verify_geom(self, geom, model_field):
405        """
406        Verifies the geometry -- will construct and return a GeometryCollection
407        if necessary (for example if the model field is MultiPolygonField while
408        the mapped shapefile only contains Polygons).
409        """
410        # Downgrade a 3D geom to a 2D one, if necessary.
411        if self.coord_dim != geom.coord_dim:
412            geom.coord_dim = self.coord_dim
413
414        if self.make_multi(geom.geom_type, model_field):
415            # Constructing a multi-geometry type to contain the single geometry
416            multi_type = self.MULTI_TYPES[geom.geom_type.num]
417            g = OGRGeometry(multi_type)
418            g.add(geom)
419        else:
420            g = geom
421
422        # Transforming the geometry with our Coordinate Transformation object,
423        # but only if the class variable `transform` is set w/a CoordTransform
424        # object.
425        if self.transform: g.transform(self.transform)
426
427        # Returning the WKT of the geometry.
428        return g.wkt
429
430    #### Other model methods ####
431    def coord_transform(self):
432        "Returns the coordinate transformation object."
433        SpatialRefSys = self.spatial_backend.spatial_ref_sys()
434        try:
435            # Getting the target spatial reference system
436            target_srs = SpatialRefSys.objects.get(srid=self.geo_field.srid).srs
437
438            # Creating the CoordTransform object
439            return CoordTransform(self.source_srs, target_srs)
440        except Exception, msg:
441            raise LayerMapError('Could not translate between the data source and model geometry: %s' % msg)
442
443    def geometry_field(self):
444        "Returns the GeometryField instance associated with the geographic column."
445        # Use the `get_field_by_name` on the model's options so that we
446        # get the correct field instance if there's model inheritance.
447        opts = self.model._meta
448        fld, model, direct, m2m = opts.get_field_by_name(self.geom_field)
449        return fld
450
451    def make_multi(self, geom_type, model_field):
452        """
453        Given the OGRGeomType for a geometry and its associated GeometryField,
454        determine whether the geometry should be turned into a GeometryCollection.
455        """
456        return (geom_type.num in self.MULTI_TYPES and
457                model_field.__class__.__name__ == 'Multi%s' % geom_type.django)
458
459    def save(self, verbose=False, fid_range=False, step=False,
460             progress=False, silent=False, stream=sys.stdout, strict=False):
461        """
462        Saves the contents from the OGR DataSource Layer into the database
463        according to the mapping dictionary given at initialization.
464
465        Keyword Parameters:
466         verbose:
467           If set, information will be printed subsequent to each model save
468           executed on the database.
469
470         fid_range:
471           May be set with a slice or tuple of (begin, end) feature ID's to map
472           from the data source.  In other words, this keyword enables the user
473           to selectively import a subset range of features in the geographic
474           data source.
475
476         step:
477           If set with an integer, transactions will occur at every step
478           interval. For example, if step=1000, a commit would occur after
479           the 1,000th feature, the 2,000th feature etc.
480
481         progress:
482           When this keyword is set, status information will be printed giving
483           the number of features processed and sucessfully saved.  By default,
484           progress information will pe printed every 1000 features processed,
485           however, this default may be overridden by setting this keyword with an
486           integer for the desired interval.
487
488         stream:
489           Status information will be written to this file handle.  Defaults to
490           using `sys.stdout`, but any object with a `write` method is supported.
491
492         silent:
493           By default, non-fatal error notifications are printed to stdout, but
494           this keyword may be set to disable these notifications.
495
496         strict:
497           Execution of the model mapping will cease upon the first error
498           encountered.  The default behavior is to attempt to continue.
499        """
500        # Getting the default Feature ID range.
501        default_range = self.check_fid_range(fid_range)
502
503        # Setting the progress interval, if requested.
504        if progress:
505            if progress is True or not isinstance(progress, int):
506                progress_interval = 1000
507            else:
508                progress_interval = progress
509
510        # Defining the 'real' save method, utilizing the transaction
511        # decorator created during initialization.
512        @self.transaction_decorator
513        def _save(feat_range=default_range, num_feat=0, num_saved=0):
514            if feat_range:
515                layer_iter = self.layer[feat_range]
516            else:
517                layer_iter = self.layer
518
519            for feat in layer_iter:
520                num_feat += 1
521                # Getting the keyword arguments
522                try:
523                    kwargs = self.feature_kwargs(feat)
524                except LayerMapError, msg:
525                    # Something borked the validation
526                    if strict: raise
527                    elif not silent:
528                        stream.write('Ignoring Feature ID %s because: %s\n' % (feat.fid, msg))
529                else:
530                    # Constructing the model using the keyword args
531                    is_update = False
532                    if self.unique:
533                        # If we want unique models on a particular field, handle the
534                        # geometry appropriately.
535                        try:
536                            # Getting the keyword arguments and retrieving
537                            # the unique model.
538                            u_kwargs = self.unique_kwargs(kwargs)
539                            m = self.model.objects.using(self.using).get(**u_kwargs)
540                            is_update = True
541
542                            # Getting the geometry (in OGR form), creating
543                            # one from the kwargs WKT, adding in additional
544                            # geometries, and update the attribute with the
545                            # just-updated geometry WKT.
546                            geom = getattr(m, self.geom_field).ogr
547                            new = OGRGeometry(kwargs[self.geom_field])
548                            for g in new: geom.add(g)
549                            setattr(m, self.geom_field, geom.wkt)
550                        except ObjectDoesNotExist:
551                            # No unique model exists yet, create.
552                            m = self.model(**kwargs)
553                    else:
554                        m = self.model(**kwargs)
555
556                    try:
557                        # Attempting to save.
558                        m.save(using=self.using)
559                        num_saved += 1
560                        if verbose: stream.write('%s: %s\n' % (is_update and 'Updated' or 'Saved', m))
561                    except SystemExit:
562                        raise
563                    except Exception, msg:
564                        if self.transaction_mode == 'autocommit':
565                            # Rolling back the transaction so that other model saves
566                            # will work.
567                            transaction.rollback_unless_managed()
568                        if strict:
569                            # Bailing out if the `strict` keyword is set.
570                            if not silent:
571                                stream.write('Failed to save the feature (id: %s) into the model with the keyword arguments:\n' % feat.fid)
572                                stream.write('%s\n' % kwargs)
573                            raise
574                        elif not silent:
575                            stream.write('Failed to save %s:\n %s\nContinuing\n' % (kwargs, msg))
576
577                # Printing progress information, if requested.
578                if progress and num_feat % progress_interval == 0:
579                    stream.write('Processed %d features, saved %d ...\n' % (num_feat, num_saved))
580
581            # Only used for status output purposes -- incremental saving uses the
582            # values returned here.
583            return num_saved, num_feat
584
585        nfeat = self.layer.num_feat
586        if step and isinstance(step, int) and step < nfeat:
587            # Incremental saving is requested at the given interval (step)
588            if default_range:
589                raise LayerMapError('The `step` keyword may not be used in conjunction with the `fid_range` keyword.')
590            beg, num_feat, num_saved = (0, 0, 0)
591            indices = range(step, nfeat, step)
592            n_i = len(indices)
593
594            for i, end in enumerate(indices):
595                # Constructing the slice to use for this step; the last slice is
596                # special (e.g, [100:] instead of [90:100]).
597                if i+1 == n_i: step_slice = slice(beg, None)
598                else: step_slice = slice(beg, end)
599
600                try:
601                    num_feat, num_saved = _save(step_slice, num_feat, num_saved)
602                    beg = end
603                except:
604                    stream.write('%s\nFailed to save slice: %s\n' % ('=-' * 20, step_slice))
605                    raise
606        else:
607            # Otherwise, just calling the previously defined _save() function.
608            _save()