PageRenderTime 110ms CodeModel.GetById 45ms app.highlight 58ms RepoModel.GetById 1ms app.codeStats 0ms

/Lib/imputil.py

http://unladen-swallow.googlecode.com/
Python | 734 lines | 645 code | 29 blank | 60 comment | 28 complexity | ed0042d97a023effa9b53c2770cad1ad MD5 | raw file
  1"""
  2Import utilities
  3
  4Exported classes:
  5    ImportManager   Manage the import process
  6
  7    Importer        Base class for replacing standard import functions
  8    BuiltinImporter Emulate the import mechanism for builtin and frozen modules
  9
 10    DynLoadSuffixImporter
 11"""
 12from warnings import warnpy3k
 13warnpy3k("the imputil module has been removed in Python 3.0", stacklevel=2)
 14del warnpy3k
 15
 16# note: avoid importing non-builtin modules
 17import imp                      ### not available in JPython?
 18import sys
 19import __builtin__
 20
 21# for the DirectoryImporter
 22import struct
 23import marshal
 24
 25__all__ = ["ImportManager","Importer","BuiltinImporter"]
 26
 27_StringType = type('')
 28_ModuleType = type(sys)         ### doesn't work in JPython...
 29
 30class ImportManager:
 31    "Manage the import process."
 32
 33    def install(self, namespace=vars(__builtin__)):
 34        "Install this ImportManager into the specified namespace."
 35
 36        if isinstance(namespace, _ModuleType):
 37            namespace = vars(namespace)
 38
 39        # Note: we have no notion of "chaining"
 40
 41        # Record the previous import hook, then install our own.
 42        self.previous_importer = namespace['__import__']
 43        self.namespace = namespace
 44        namespace['__import__'] = self._import_hook
 45
 46        ### fix this
 47        #namespace['reload'] = self._reload_hook
 48
 49    def uninstall(self):
 50        "Restore the previous import mechanism."
 51        self.namespace['__import__'] = self.previous_importer
 52
 53    def add_suffix(self, suffix, importFunc):
 54        assert callable(importFunc)
 55        self.fs_imp.add_suffix(suffix, importFunc)
 56
 57    ######################################################################
 58    #
 59    # PRIVATE METHODS
 60    #
 61
 62    clsFilesystemImporter = None
 63
 64    def __init__(self, fs_imp=None):
 65        # we're definitely going to be importing something in the future,
 66        # so let's just load the OS-related facilities.
 67        if not _os_stat:
 68            _os_bootstrap()
 69
 70        # This is the Importer that we use for grabbing stuff from the
 71        # filesystem. It defines one more method (import_from_dir) for our use.
 72        if fs_imp is None:
 73            cls = self.clsFilesystemImporter or _FilesystemImporter
 74            fs_imp = cls()
 75        self.fs_imp = fs_imp
 76
 77        # Initialize the set of suffixes that we recognize and import.
 78        # The default will import dynamic-load modules first, followed by
 79        # .py files (or a .py file's cached bytecode)
 80        for desc in imp.get_suffixes():
 81            if desc[2] == imp.C_EXTENSION:
 82                self.add_suffix(desc[0],
 83                                DynLoadSuffixImporter(desc).import_file)
 84        self.add_suffix('.py', py_suffix_importer)
 85
 86    def _import_hook(self, fqname, globals=None, locals=None, fromlist=None):
 87        """Python calls this hook to locate and import a module."""
 88
 89        parts = fqname.split('.')
 90
 91        # determine the context of this import
 92        parent = self._determine_import_context(globals)
 93
 94        # if there is a parent, then its importer should manage this import
 95        if parent:
 96            module = parent.__importer__._do_import(parent, parts, fromlist)
 97            if module:
 98                return module
 99
100        # has the top module already been imported?
101        try:
102            top_module = sys.modules[parts[0]]
103        except KeyError:
104
105            # look for the topmost module
106            top_module = self._import_top_module(parts[0])
107            if not top_module:
108                # the topmost module wasn't found at all.
109                raise ImportError, 'No module named ' + fqname
110
111        # fast-path simple imports
112        if len(parts) == 1:
113            if not fromlist:
114                return top_module
115
116            if not top_module.__dict__.get('__ispkg__'):
117                # __ispkg__ isn't defined (the module was not imported by us),
118                # or it is zero.
119                #
120                # In the former case, there is no way that we could import
121                # sub-modules that occur in the fromlist (but we can't raise an
122                # error because it may just be names) because we don't know how
123                # to deal with packages that were imported by other systems.
124                #
125                # In the latter case (__ispkg__ == 0), there can't be any sub-
126                # modules present, so we can just return.
127                #
128                # In both cases, since len(parts) == 1, the top_module is also
129                # the "bottom" which is the defined return when a fromlist
130                # exists.
131                return top_module
132
133        importer = top_module.__dict__.get('__importer__')
134        if importer:
135            return importer._finish_import(top_module, parts[1:], fromlist)
136
137        # Grrr, some people "import os.path" or do "from os.path import ..."
138        if len(parts) == 2 and hasattr(top_module, parts[1]):
139            if fromlist:
140                return getattr(top_module, parts[1])
141            else:
142                return top_module
143
144        # If the importer does not exist, then we have to bail. A missing
145        # importer means that something else imported the module, and we have
146        # no knowledge of how to get sub-modules out of the thing.
147        raise ImportError, 'No module named ' + fqname
148
149    def _determine_import_context(self, globals):
150        """Returns the context in which a module should be imported.
151
152        The context could be a loaded (package) module and the imported module
153        will be looked for within that package. The context could also be None,
154        meaning there is no context -- the module should be looked for as a
155        "top-level" module.
156        """
157
158        if not globals or not globals.get('__importer__'):
159            # globals does not refer to one of our modules or packages. That
160            # implies there is no relative import context (as far as we are
161            # concerned), and it should just pick it off the standard path.
162            return None
163
164        # The globals refer to a module or package of ours. It will define
165        # the context of the new import. Get the module/package fqname.
166        parent_fqname = globals['__name__']
167
168        # if a package is performing the import, then return itself (imports
169        # refer to pkg contents)
170        if globals['__ispkg__']:
171            parent = sys.modules[parent_fqname]
172            assert globals is parent.__dict__
173            return parent
174
175        i = parent_fqname.rfind('.')
176
177        # a module outside of a package has no particular import context
178        if i == -1:
179            return None
180
181        # if a module in a package is performing the import, then return the
182        # package (imports refer to siblings)
183        parent_fqname = parent_fqname[:i]
184        parent = sys.modules[parent_fqname]
185        assert parent.__name__ == parent_fqname
186        return parent
187
188    def _import_top_module(self, name):
189        # scan sys.path looking for a location in the filesystem that contains
190        # the module, or an Importer object that can import the module.
191        for item in sys.path:
192            if isinstance(item, _StringType):
193                module = self.fs_imp.import_from_dir(item, name)
194            else:
195                module = item.import_top(name)
196            if module:
197                return module
198        return None
199
200    def _reload_hook(self, module):
201        "Python calls this hook to reload a module."
202
203        # reloading of a module may or may not be possible (depending on the
204        # importer), but at least we can validate that it's ours to reload
205        importer = module.__dict__.get('__importer__')
206        if not importer:
207            ### oops. now what...
208            pass
209
210        # okay. it is using the imputil system, and we must delegate it, but
211        # we don't know what to do (yet)
212        ### we should blast the module dict and do another get_code(). need to
213        ### flesh this out and add proper docco...
214        raise SystemError, "reload not yet implemented"
215
216
217class Importer:
218    "Base class for replacing standard import functions."
219
220    def import_top(self, name):
221        "Import a top-level module."
222        return self._import_one(None, name, name)
223
224    ######################################################################
225    #
226    # PRIVATE METHODS
227    #
228    def _finish_import(self, top, parts, fromlist):
229        # if "a.b.c" was provided, then load the ".b.c" portion down from
230        # below the top-level module.
231        bottom = self._load_tail(top, parts)
232
233        # if the form is "import a.b.c", then return "a"
234        if not fromlist:
235            # no fromlist: return the top of the import tree
236            return top
237
238        # the top module was imported by self.
239        #
240        # this means that the bottom module was also imported by self (just
241        # now, or in the past and we fetched it from sys.modules).
242        #
243        # since we imported/handled the bottom module, this means that we can
244        # also handle its fromlist (and reliably use __ispkg__).
245
246        # if the bottom node is a package, then (potentially) import some
247        # modules.
248        #
249        # note: if it is not a package, then "fromlist" refers to names in
250        #       the bottom module rather than modules.
251        # note: for a mix of names and modules in the fromlist, we will
252        #       import all modules and insert those into the namespace of
253        #       the package module. Python will pick up all fromlist names
254        #       from the bottom (package) module; some will be modules that
255        #       we imported and stored in the namespace, others are expected
256        #       to be present already.
257        if bottom.__ispkg__:
258            self._import_fromlist(bottom, fromlist)
259
260        # if the form is "from a.b import c, d" then return "b"
261        return bottom
262
263    def _import_one(self, parent, modname, fqname):
264        "Import a single module."
265
266        # has the module already been imported?
267        try:
268            return sys.modules[fqname]
269        except KeyError:
270            pass
271
272        # load the module's code, or fetch the module itself
273        result = self.get_code(parent, modname, fqname)
274        if result is None:
275            return None
276
277        module = self._process_result(result, fqname)
278
279        # insert the module into its parent
280        if parent:
281            setattr(parent, modname, module)
282        return module
283
284    def _process_result(self, (ispkg, code, values), fqname):
285        # did get_code() return an actual module? (rather than a code object)
286        is_module = isinstance(code, _ModuleType)
287
288        # use the returned module, or create a new one to exec code into
289        if is_module:
290            module = code
291        else:
292            module = imp.new_module(fqname)
293
294        ### record packages a bit differently??
295        module.__importer__ = self
296        module.__ispkg__ = ispkg
297
298        # insert additional values into the module (before executing the code)
299        module.__dict__.update(values)
300
301        # the module is almost ready... make it visible
302        sys.modules[fqname] = module
303
304        # execute the code within the module's namespace
305        if not is_module:
306            try:
307                exec code in module.__dict__
308            except:
309                if fqname in sys.modules:
310                    del sys.modules[fqname]
311                raise
312
313        # fetch from sys.modules instead of returning module directly.
314        # also make module's __name__ agree with fqname, in case
315        # the "exec code in module.__dict__" played games on us.
316        module = sys.modules[fqname]
317        module.__name__ = fqname
318        return module
319
320    def _load_tail(self, m, parts):
321        """Import the rest of the modules, down from the top-level module.
322
323        Returns the last module in the dotted list of modules.
324        """
325        for part in parts:
326            fqname = "%s.%s" % (m.__name__, part)
327            m = self._import_one(m, part, fqname)
328            if not m:
329                raise ImportError, "No module named " + fqname
330        return m
331
332    def _import_fromlist(self, package, fromlist):
333        'Import any sub-modules in the "from" list.'
334
335        # if '*' is present in the fromlist, then look for the '__all__'
336        # variable to find additional items (modules) to import.
337        if '*' in fromlist:
338            fromlist = list(fromlist) + \
339                       list(package.__dict__.get('__all__', []))
340
341        for sub in fromlist:
342            # if the name is already present, then don't try to import it (it
343            # might not be a module!).
344            if sub != '*' and not hasattr(package, sub):
345                subname = "%s.%s" % (package.__name__, sub)
346                submod = self._import_one(package, sub, subname)
347                if not submod:
348                    raise ImportError, "cannot import name " + subname
349
350    def _do_import(self, parent, parts, fromlist):
351        """Attempt to import the module relative to parent.
352
353        This method is used when the import context specifies that <self>
354        imported the parent module.
355        """
356        top_name = parts[0]
357        top_fqname = parent.__name__ + '.' + top_name
358        top_module = self._import_one(parent, top_name, top_fqname)
359        if not top_module:
360            # this importer and parent could not find the module (relatively)
361            return None
362
363        return self._finish_import(top_module, parts[1:], fromlist)
364
365    ######################################################################
366    #
367    # METHODS TO OVERRIDE
368    #
369    def get_code(self, parent, modname, fqname):
370        """Find and retrieve the code for the given module.
371
372        parent specifies a parent module to define a context for importing. It
373        may be None, indicating no particular context for the search.
374
375        modname specifies a single module (not dotted) within the parent.
376
377        fqname specifies the fully-qualified module name. This is a
378        (potentially) dotted name from the "root" of the module namespace
379        down to the modname.
380        If there is no parent, then modname==fqname.
381
382        This method should return None, or a 3-tuple.
383
384        * If the module was not found, then None should be returned.
385
386        * The first item of the 2- or 3-tuple should be the integer 0 or 1,
387            specifying whether the module that was found is a package or not.
388
389        * The second item is the code object for the module (it will be
390            executed within the new module's namespace). This item can also
391            be a fully-loaded module object (e.g. loaded from a shared lib).
392
393        * The third item is a dictionary of name/value pairs that will be
394            inserted into new module before the code object is executed. This
395            is provided in case the module's code expects certain values (such
396            as where the module was found). When the second item is a module
397            object, then these names/values will be inserted *after* the module
398            has been loaded/initialized.
399        """
400        raise RuntimeError, "get_code not implemented"
401
402
403######################################################################
404#
405# Some handy stuff for the Importers
406#
407
408# byte-compiled file suffix character
409_suffix_char = __debug__ and 'c' or 'o'
410
411# byte-compiled file suffix
412_suffix = '.py' + _suffix_char
413
414def _compile(pathname, timestamp):
415    """Compile (and cache) a Python source file.
416
417    The file specified by <pathname> is compiled to a code object and
418    returned.
419
420    Presuming the appropriate privileges exist, the bytecodes will be
421    saved back to the filesystem for future imports. The source file's
422    modification timestamp must be provided as a Long value.
423    """
424    codestring = open(pathname, 'rU').read()
425    if codestring and codestring[-1] != '\n':
426        codestring = codestring + '\n'
427    code = __builtin__.compile(codestring, pathname, 'exec')
428
429    # try to cache the compiled code
430    try:
431        f = open(pathname + _suffix_char, 'wb')
432    except IOError:
433        pass
434    else:
435        f.write('\0\0\0\0')
436        f.write(struct.pack('<I', timestamp))
437        marshal.dump(code, f)
438        f.flush()
439        f.seek(0, 0)
440        f.write(imp.get_magic())
441        f.close()
442
443    return code
444
445_os_stat = _os_path_join = None
446def _os_bootstrap():
447    "Set up 'os' module replacement functions for use during import bootstrap."
448
449    names = sys.builtin_module_names
450
451    join = None
452    if 'posix' in names:
453        sep = '/'
454        from posix import stat
455    elif 'nt' in names:
456        sep = '\\'
457        from nt import stat
458    elif 'dos' in names:
459        sep = '\\'
460        from dos import stat
461    elif 'os2' in names:
462        sep = '\\'
463        from os2 import stat
464    elif 'mac' in names:
465        from mac import stat
466        def join(a, b):
467            if a == '':
468                return b
469            if ':' not in a:
470                a = ':' + a
471            if a[-1:] != ':':
472                a = a + ':'
473            return a + b
474    else:
475        raise ImportError, 'no os specific module found'
476
477    if join is None:
478        def join(a, b, sep=sep):
479            if a == '':
480                return b
481            lastchar = a[-1:]
482            if lastchar == '/' or lastchar == sep:
483                return a + b
484            return a + sep + b
485
486    global _os_stat
487    _os_stat = stat
488
489    global _os_path_join
490    _os_path_join = join
491
492def _os_path_isdir(pathname):
493    "Local replacement for os.path.isdir()."
494    try:
495        s = _os_stat(pathname)
496    except OSError:
497        return None
498    return (s.st_mode & 0170000) == 0040000
499
500def _timestamp(pathname):
501    "Return the file modification time as a Long."
502    try:
503        s = _os_stat(pathname)
504    except OSError:
505        return None
506    return long(s.st_mtime)
507
508
509######################################################################
510#
511# Emulate the import mechanism for builtin and frozen modules
512#
513class BuiltinImporter(Importer):
514    def get_code(self, parent, modname, fqname):
515        if parent:
516            # these modules definitely do not occur within a package context
517            return None
518
519        # look for the module
520        if imp.is_builtin(modname):
521            type = imp.C_BUILTIN
522        elif imp.is_frozen(modname):
523            type = imp.PY_FROZEN
524        else:
525            # not found
526            return None
527
528        # got it. now load and return it.
529        module = imp.load_module(modname, None, modname, ('', '', type))
530        return 0, module, { }
531
532
533######################################################################
534#
535# Internal importer used for importing from the filesystem
536#
537class _FilesystemImporter(Importer):
538    def __init__(self):
539        self.suffixes = [ ]
540
541    def add_suffix(self, suffix, importFunc):
542        assert callable(importFunc)
543        self.suffixes.append((suffix, importFunc))
544
545    def import_from_dir(self, dir, fqname):
546        result = self._import_pathname(_os_path_join(dir, fqname), fqname)
547        if result:
548            return self._process_result(result, fqname)
549        return None
550
551    def get_code(self, parent, modname, fqname):
552        # This importer is never used with an empty parent. Its existence is
553        # private to the ImportManager. The ImportManager uses the
554        # import_from_dir() method to import top-level modules/packages.
555        # This method is only used when we look for a module within a package.
556        assert parent
557
558        for submodule_path in parent.__path__:
559            code = self._import_pathname(_os_path_join(submodule_path, modname), fqname)
560            if code is not None:
561                return code
562        return self._import_pathname(_os_path_join(parent.__pkgdir__, modname),
563                                     fqname)
564
565    def _import_pathname(self, pathname, fqname):
566        if _os_path_isdir(pathname):
567            result = self._import_pathname(_os_path_join(pathname, '__init__'),
568                                           fqname)
569            if result:
570                values = result[2]
571                values['__pkgdir__'] = pathname
572                values['__path__'] = [ pathname ]
573                return 1, result[1], values
574            return None
575
576        for suffix, importFunc in self.suffixes:
577            filename = pathname + suffix
578            try:
579                finfo = _os_stat(filename)
580            except OSError:
581                pass
582            else:
583                return importFunc(filename, finfo, fqname)
584        return None
585
586######################################################################
587#
588# SUFFIX-BASED IMPORTERS
589#
590
591def py_suffix_importer(filename, finfo, fqname):
592    file = filename[:-3] + _suffix
593    t_py = long(finfo[8])
594    t_pyc = _timestamp(file)
595
596    code = None
597    if t_pyc is not None and t_pyc >= t_py:
598        f = open(file, 'rb')
599        if f.read(4) == imp.get_magic():
600            t = struct.unpack('<I', f.read(4))[0]
601            if t == t_py:
602                code = marshal.load(f)
603        f.close()
604    if code is None:
605        file = filename
606        code = _compile(file, t_py)
607
608    return 0, code, { '__file__' : file }
609
610class DynLoadSuffixImporter:
611    def __init__(self, desc):
612        self.desc = desc
613
614    def import_file(self, filename, finfo, fqname):
615        fp = open(filename, self.desc[1])
616        module = imp.load_module(fqname, fp, filename, self.desc)
617        module.__file__ = filename
618        return 0, module, { }
619
620
621######################################################################
622
623def _print_importers():
624    items = sys.modules.items()
625    items.sort()
626    for name, module in items:
627        if module:
628            print name, module.__dict__.get('__importer__', '-- no importer')
629        else:
630            print name, '-- non-existent module'
631
632def _test_revamp():
633    ImportManager().install()
634    sys.path.insert(0, BuiltinImporter())
635
636######################################################################
637
638#
639# TODO
640#
641# from Finn Bock:
642#   type(sys) is not a module in JPython. what to use instead?
643#   imp.C_EXTENSION is not in JPython. same for get_suffixes and new_module
644#
645#   given foo.py of:
646#      import sys
647#      sys.modules['foo'] = sys
648#
649#   ---- standard import mechanism
650#   >>> import foo
651#   >>> foo
652#   <module 'sys' (built-in)>
653#
654#   ---- revamped import mechanism
655#   >>> import imputil
656#   >>> imputil._test_revamp()
657#   >>> import foo
658#   >>> foo
659#   <module 'foo' from 'foo.py'>
660#
661#
662# from MAL:
663#   should BuiltinImporter exist in sys.path or hard-wired in ImportManager?
664#   need __path__ processing
665#   performance
666#   move chaining to a subclass [gjs: it's been nuked]
667#   deinstall should be possible
668#   query mechanism needed: is a specific Importer installed?
669#   py/pyc/pyo piping hooks to filter/process these files
670#   wish list:
671#     distutils importer hooked to list of standard Internet repositories
672#     module->file location mapper to speed FS-based imports
673#     relative imports
674#     keep chaining so that it can play nice with other import hooks
675#
676# from Gordon:
677#   push MAL's mapper into sys.path[0] as a cache (hard-coded for apps)
678#
679# from Guido:
680#   need to change sys.* references for rexec environs
681#   need hook for MAL's walk-me-up import strategy, or Tim's absolute strategy
682#   watch out for sys.modules[...] is None
683#   flag to force absolute imports? (speeds _determine_import_context and
684#       checking for a relative module)
685#   insert names of archives into sys.path  (see quote below)
686#   note: reload does NOT blast module dict
687#   shift import mechanisms and policies around; provide for hooks, overrides
688#       (see quote below)
689#   add get_source stuff
690#   get_topcode and get_subcode
691#   CRLF handling in _compile
692#   race condition in _compile
693#   refactoring of os.py to deal with _os_bootstrap problem
694#   any special handling to do for importing a module with a SyntaxError?
695#       (e.g. clean up the traceback)
696#   implement "domain" for path-type functionality using pkg namespace
697#       (rather than FS-names like __path__)
698#   don't use the word "private"... maybe "internal"
699#
700#
701# Guido's comments on sys.path caching:
702#
703# We could cache this in a dictionary: the ImportManager can have a
704# cache dict mapping pathnames to importer objects, and a separate
705# method for coming up with an importer given a pathname that's not yet
706# in the cache.  The method should do a stat and/or look at the
707# extension to decide which importer class to use; you can register new
708# importer classes by registering a suffix or a Boolean function, plus a
709# class.  If you register a new importer class, the cache is zapped.
710# The cache is independent from sys.path (but maintained per
711# ImportManager instance) so that rearrangements of sys.path do the
712# right thing.  If a path is dropped from sys.path the corresponding
713# cache entry is simply no longer used.
714#
715# My/Guido's comments on factoring ImportManager and Importer:
716#
717# > However, we still have a tension occurring here:
718# >
719# > 1) implementing policy in ImportManager assists in single-point policy
720# >    changes for app/rexec situations
721# > 2) implementing policy in Importer assists in package-private policy
722# >    changes for normal, operating conditions
723# >
724# > I'll see if I can sort out a way to do this. Maybe the Importer class will
725# > implement the methods (which can be overridden to change policy) by
726# > delegating to ImportManager.
727#
728# Maybe also think about what kind of policies an Importer would be
729# likely to want to change.  I have a feeling that a lot of the code
730# there is actually not so much policy but a *necessity* to get things
731# working given the calling conventions for the __import__ hook: whether
732# to return the head or tail of a dotted name, or when to do the "finish
733# fromlist" stuff.
734#