/tools/relic/relic
Python | 2491 lines | 2324 code | 38 blank | 129 comment | 84 complexity | 7af0a6d52a286b932e70663517416222 MD5 | raw file
Possible License(s): LGPL-2.1, MPL-2.0-no-copyleft-exception, BSD-3-Clause, GPL-2.0, Apache-2.0, MIT, JSON, 0BSD, BSD-2-Clause, LGPL-3.0, AGPL-1.0
- #!/usr/bin/python
- # ***** BEGIN LICENSE BLOCK *****
- # Version: MPL 1.1/GPL 2.0/LGPL 2.1
- #
- # The contents of this file are subject to the Mozilla Public License Version
- # 1.1 (the "License"); you may not use this file except in compliance with
- # the License. You may obtain a copy of the License at
- # http://www.mozilla.org/MPL/
- #
- # Software distributed under the License is distributed on an "AS IS" basis,
- # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- # for the specific language governing rights and limitations under the
- # License.
- #
- # The Original Code is the relic relicensing tool.
- #
- # The Initial Developer of the Original Code is
- # Trent Mick <TrentM@ActiveState.com>.
- # Portions created by the Initial Developer are Copyright (C) 2003-2005
- # the Initial Developer. All Rights Reserved.
- #
- # Contributor(s):
- # Gervase Markham <gerv@gerv.net>
- # Patrick Fey <bugzilla@nachtarbeiter.net>
- #
- # Alternatively, the contents of this file may be used under the terms of
- # either the GNU General Public License Version 2 or later (the "GPL"), or
- # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- # in which case the provisions of the GPL or the LGPL are applicable instead
- # of those above. If you wish to allow use of your version of this file only
- # under the terms of either the GPL or the LGPL, and not to allow others to
- # use your version of this file under the terms of the MPL, indicate your
- # decision by deleting the provisions above and replace them with the notice
- # and other provisions required by the GPL or the LGPL. If you do not delete
- # the provisions above, a recipient may use your version of this file under
- # the terms of any one of the MPL, the GPL or the LGPL.
- #
- # ***** END LICENSE BLOCK *****
- # Adapted from the 'lick' and 'ripl' Python scripts. (See:
- # <http://bugzilla.mozilla.org/show_bug.cgi?id=98089>)
- """
- relic - RE-LICense a given file, set of files, or directory of files
- from the Mozilla source tree
- Usage:
- relic [options...] [files...]
- relic [options...] < files...
- Options to Select Mode (use one):
- <none> List the licenses in each file.
- -s, --statistics Should a summary table of licenses in each file.
- The -x, --extended option may be added to show
- some additional detail to the stats.
- -r, --relicense Modify the given files to include to
- appropriate Mozilla license, where
- "appropriate" is either the NPL/GPL/LPGL
- tri-license if was already under the NPL or
- the MPL/LPGL/GPL license in all other cases.
- -R, --force-relicense
- Relicenses files (as -r|--relicense), but
- does NOT skip files that already appear to
- have a complete license.
- -A, --add Add a license to files that do not appear to
- have one.
- -I, --initial-developers
- Display initial developer for each file.
- General Options:
- -h, --help dump this help and exit
- -V, --version dump this script's version and exit
- -v, --verbose verbose output
- -d, --debug more verbose output
- -f, --force Continue processing after an error. (Errors
- are summarized at end.)
- -q, --quick Quick scanning. Use only basic license checks
- (only use in report mode).
- -M, --MPL Replace NPL licenses with MPL ones.
- -a, --all Check all files (only skip CVS directories).
- --dry-run Go through motions but don't actually change
- any files.
- --backup Make backups of changes files with
- relicensing. Backup filenames are the
- original filename suffixed with a ~# where
- "#" is the lowest number to avoid a file
- conflict.
- -o <orig_code_is> Provide fallback value for the "Original
- Code is" block.
- -D <orig_code_date> Provide fallback value for the date
- that is part of the "Original Code is" block.
- -i <initial_dev> Provide fallback value for the "Initial
- Developer of the Original Code is" block.
- -y <year> Provide fallback value for "Initial
- Developer" copyright year.
- --defaults Use the following default fallback values:
- original_code_is: "mozilla.org Code"
- initial_copyright_date: "2001"
- initial_developer: "Netscape Communications
- Corporation"
- Note: the "Original Code" date is generally
- not required, so a default is not included
- here.
- Examples:
- # List license in files under mozilla/js/src.
- relic mozilla/js/src # list licenses in files
- relic -s mozilla/js/src # show summary stats on licenses
- relic -r mozilla/js/src # re-license files
- """
- import os
- import sys
- import re
- import getopt
- import pprint
- import shutil
- class RelicError(Exception):
- pass
- #---- setup logging
- try:
- # This package will be std in Python 2.3, but many Python 2.2
- # installation will not have it.
- import logging
- logging.basicConfig()
- except ImportError:
- # Local fallback logging module.
- try:
- import _logging as logging
- except ImportError:
- sys.stderr.write("Your Python installation does not have the logging "
- "package, nor could the fallback _logging module be "
- "found. One of the two is required to run this "
- "script.\n\n")
- raise
- log = logging.getLogger("relic")
- #---- globals
- _version_ = (0, 7, 2)
- # When processing files, 'relic' skips files and directories according
- # to these settings. Note: files identified in .cvsignore files are also
- # skipped.
- _g_skip_exts = [".mdp", ".order", ".dsp", ".dsw", ".uf"]
- _g_skip_file_basenames = [
- # Used by CVS (and this script)
- ".cvsignore",
-
- # GPL with autoconf exception
- "config.guess",
- "config.sub",
- # Auto-generated from other files
- "configure",
- # license and readme files
- "license",
- "readme",
- "copyright",
- "LICENSE-MPL",
- "MPL-1.1.txt",
- ]
- _g_skip_files = [
- # TODO: update with MPL block - or CVS remove (check history)
- "tools/wizards/templates/licenses/MPL/lic.mak",
- "tools/wizards/templates/licenses/MPL/lic.pl",
- ###########################################################################
- # Everything in _g_skip_files below this line needs no further work.
- ###########################################################################
-
- # Files containing copies of licence text which confuses the script
- "LICENSE",
- "js2/COPYING",
- "security/svrcore/LICENSE",
- "extensions/xmlterm/doc/MPL",
- "gfx/cairo/cairo/COPYING-LGPL-2.1",
- "gfx/cairo/cairo/COPYING-MPL-1.1",
-
- # Files containing global licensing information
- "xpfe/global/resources/content/license.html",
- "toolkit/content/license.html",
-
- # Ben Bucksch - files are tri-licensed with an extra clause.
- "netwerk/streamconv/converters/mozTXTToHTMLConv.cpp",
- "netwerk/streamconv/converters/mozTXTToHTMLConv.h",
- "netwerk/streamconv/public/mozITXTToHTMLConv.idl",
-
- # GPLed build tools
- "config/preprocessor.pl",
- "intl/uconv/tools/parse-mozilla-encoding-table.pl",
- "intl/uconv/tools/gen-big5hkscs-2001-mozilla.pl",
- "js2/missing",
-
- # Files which the script doesn't handle well. All have been relicensed
- # manually.
- "xpinstall/wizard/windows/builder/readme.txt",
- "xpfe/bootstrap/icons/windows/readme.txt",
- "embedding/qa/testembed/README.TXT",
- "security/nss/lib/freebl/ecl/README.FP",
- "nsprpub/pkg/linux/sun-nspr.spec",
- "security/nss/pkg/linux/sun-nss.spec",
- "security/jss/pkg/linux/sun-jss.spec",
- "security/nss/lib/freebl/mpi/utils/README",
- "security/nss/lib/freebl/ecl/README",
- "security/nss/lib/freebl/mpi/README",
- "lib/mac/UserInterface/Tables/TableClasses.doc",
- "parser/htmlparser/tests/html/bug23680.html",
- "security/nss/lib/freebl/mpi/montmulfv9.s",
- "tools/performance/pageload/base/lxr.mozilla.org/index.html",
- "testing/performance/win32/page_load_test/" +\
- "base/lxr.mozilla.org/index.html",
- "testing/performance/win32/page_load_test/" +\
- "base/lxr.mozilla.org/20001028.html.orig",
-
- # Not sure what to do with this...
- "gfx/cairo/stdint.diff",
-
- # GPL with autoconf exception (same license as files distributed with)
- "build/autoconf/codeset.m4",
- "toolkit/airbag/airbag/autotools/depcomp",
- "toolkit/airbag/airbag/autotools/missing",
- "toolkit/airbag/airbag/autotools/ltmain.sh",
- "js/tamarin/pcre/ltmain.sh",
- "security/svrcore/compile",
- "security/svrcore/ltmain.sh",
- "security/svrcore/missing",
- "security/svrcore/depcomp",
- "security/svrcore/aclocal.m4",
-
- # Public domain or equivalent
- "nsprpub/config/nspr.m4",
- "toolkit/airbag/airbag/aclocal.m4",
- "security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s",
-
- # GSSAPI has BSD-like licence requiring some attribution
- "extensions/auth/gssapi.h",
- # This script
- "tools/relic/relic",
- ]
- _g_skip_dir_basenames = [
- "CVS",
- ]
- _g_skip_dir_basenames_cvs_only = [
- "CVS",
- ]
- # Complete path from mozilla dir to a dir to skip.
- _g_skip_dirs = [
- # Test files for this script, which cause it to crash!
- "tools/relic/test",
-
- # License template files (TODO: this directory may disappear)
- "tools/wizards/templates/licenses",
-
- # As per the "New Original Source Files" section of:
- # http://www.mozilla.org/MPL/license-policy.html
- # with obsolete or now-relicensed directories removed
- "apache", # Obsolete mod_gzip code
- "cck", # mkaply's baby; not core code anyway.
- "dbm",
- "js/rhino", # Currently MPL/GPL - may end up BSD
- "webtools", # Various MPLed webtools
-
- # These could be done, but no-one's clamouring for it, and it's a hassle
- # sorting it all out, so let sleeping dogs lie.
- "msgsdk",
- "java",
- "privacy",
- # These have their own BSD-like license
- "media/libjpeg",
-
- # The following are not supposed to be relicensed, but they do have a
- # few files in we care about (like makefiles)
- "media/libpng",
- "modules/zlib",
- "gc/boehm",
- "other-licenses",
-
- # Copy of GPLed tool
- "tools/buildbot",
-
- # Other directories we want to exclude
- "embedding/tests", # Agreed as BSD
- "calendar/libical", # LGPL/MPL
- "gfx/cairo/cairo/src", # LGPL/MPL
- ]
- _g_basename_to_comment_info = {
- "configure": (["dnl"], ),
- "Makefile": (["#"], ),
- "makefile": (["#"], ),
- "nfspwd": (["#"], ),
- "typemap": (["#"], ),
- "xmplflt.conf": (["#"], ),
- "ldapfriendly": (["#"], ),
- "ldaptemplates.conf": (["#"], ),
- "ldapsearchprefs.conf": (["#"], ),
- "ldapfilter.conf": (["#"], ),
- "README.configure": (["#"], ),
- "Options.txt": (["#"], ),
- "fdsetsize.txt": (["#"], ),
- "prototype": (["#"], ),
- "prototype_i386": (["#"], ),
- "prototype3_i386": (["#"], ),
- "prototype_com": (["#"], ),
- "prototype3_com": (["#"], ),
- "prototype_sparc": (["#"], ),
- "prototype3_sparc": (["#"], ),
- "nglayout.mac": (["#"], ),
- "pkgdepend": (["#"], ),
- "Maketests": (["#"], ),
- "depend": (["#"], ),
- "csh-aliases": (["#"], ),
- "csh-env": (["#"], ),
- ".cshrc": (["#"], ),
- "MANIFEST": (["#"], ),
- "mozconfig": (["#"], ),
- "makecommon": (["#"], ),
- "bld_awk_pkginfo": (["#"], ),
- "prototype_i86pc": (["#"], ),
- "pkgdepend_5_6": (["#"], ),
- "awk_pkginfo-i386": (["#"], ),
- "awk_pkginfo-sparc": (["#"], ),
- "pkgdepend_64bit": (["#"], ),
- "WIN32": (["#"], ),
- "WIN16": (["#"], ),
- "Makefile.linux": (["#"], ),
- "README": ([""], ["#"]),
- "copyright": ([""], ),
- "xptcstubs_asm_ppc_darwin.s.m4": (["/*", "*", "*/"], ),
- "xptcstubs_asm_mips.s.m4": (["/*", "*", "*/"], ),
- "nsIDocCharsetTest.txt": (["<!--", "-", "-->"], ),
- "nsIFontListTest.txt": (["<!--", "-", "-->"], ),
- "ComponentListTest.txt": (["<!--", "-", "-->"], ),
- "nsIWebBrowserPersistTest1.txt": (["<!--", "-", "-->"], ),
- "nsIWebBrowserPersistTest2.txt": (["<!--", "-", "-->"], ),
- "nsIWebBrowserPersistTest3.txt": (["<!--", "-", "-->"], ),
- "plugins.txt": (["<!--", "-", "-->"], ),
- "NsISHistoryTestCase1.txt": (["<!--", "-", "-->"], ),
- "EmbedSmokeTest.txt": (["<!--", "-", "-->"], ),
- "lineterm_LICENSE": (["/*", "*", "*/"], ),
- "XMLterm_LICENSE": (["/*", "*", "*/"], ),
- "BrowserView.cpp.mod": (["/*", "*", "*/"], ),
- "header_template": (["/*", "*", "*/"], ),
- "cpp_template": (["/*", "*", "*/"], ),
- "abcFormat470.txt": (["//"], ),
- "opcodes.tbl": (["//"], ),
- }
- _g_ext_to_comment_info = {
- ".txt": (["##", "#", ], ["#"]),
- ".TXT": (["##", "#", ]),
- ".doc": (["", ]),
- ".build": (["", ]),
- ".1st": (["", ]),
- ".lsm": (["", ]),
- ".FP": (["", ]),
- ".spec": (["", ]),
- ".CPP": (["/*", "*", "*/"], ),
- ".cpp": (["/*", "*", "*/"], ),
- ".H": (["/*", "*", "*/"], ),
- ".h": (["/*", "*", "*/"], ),
- ".hxx": (["/*", "*", "*/"], ),
- ".c": (["/*", "*", "*/"], ),
- ".css": (["/*", "*", "*/"], ['#']),
- ".js": (["/*", "*", "*/"], ['#']),
- ".idl": (["/*", "*", "*/"], ),
- ".ut": (["/*", "*", "*/"], ),
- ".rc": (["/*", "*", "*/"], ),
- ".rc2": (["/*", "*", "*/"], ),
- ".RC": (["/*", "*", "*/"], ),
- ".Prefix": (["/*", "*", "*/"], ),
- ".prefix": (["/*", "*", "*/"], ),
- ".cfg": (["/*", "*", "*/"], ["#"]),
- ".cp": (["/*", "*", "*/"], ),
- ".cs": (["/*", "*", "*/"], ),
- ".java": (["/*", "*", "*/"], ),
- ".jst": (["/*", "*", "*/"], ),
- ".tbl": (["/*", "*", "*/"], ),
- ".tab": (["/*", "*", "*/"], ),
- ".cc": (["/*", "*", "*/"], ),
- ".msg": (["/*", "*", "*/"], ),
- ".y": (["/*", "*", "*/"], ),
- ".r": (["/*", "*", "*/"], ),
- ".mm": (["/*", "*", "*/"], ),
- ".x-ccmap":(["/*", "*", "*/"], ),
- ".ccmap": (["/*", "*", "*/"], ),
- ".sql": (["/*", "*", "*/"], ),
- ".pch++": (["/*", "*", "*/"], ),
- ".xpm": (["/*", "*", "*/"], ),
- ".uih": (["/*", "*", "*/"], ),
- ".uil": (["/*", "*", "*/"], ),
- ".ccmap": (["/*", "*", "*/"], ),
- ".map": (["/*", "*", "*/"], ),
- ".win98": (["/*", "*", "*/"], ),
- ".php": (["/*", "*", "*/"], ),
- ".m": (["/*", "*", "*/"], ),
- ".jnot": (["/*", "*", "*/"], ),
- ".l": (["/*", "*", "*/"], ),
- ".htp": (["/*", "*", "*/"], ),
- ".xs": (["/*", "*", "*/"], ),
- ".as": (["/*", "*", "*/"], ),
- ".api": (["/*", "*", "*/"], ['#']),
- ".applescript": (["(*", "*", "*)"], ["--"], ["#"]),
- ".html": (["<!--", "-", "-->"], ["#"]),
- ".xml": (["<!--", "-", "-->"], ["#"]),
- ".xbl": (["<!--", "-", "-->"], ["#"]),
- ".xsl": (["<!--", "-", "-->"], ),
- ".xul": (["<!--", "-", "-->"], ["#"]),
- ".dtd": (["<!--", "-", "-->"], ["#"]),
- ".rdf": (["<!--", "-", "-->"], ["#"]),
- ".htm": (["<!--", "-", "-->"], ),
- ".out": (["<!--", "-", "-->"], ),
- ".resx": (["<!--", "-", "-->"], ),
- ".bl": (["<!--", "-", "-->"], ),
- ".xif": (["<!--", "-", "-->"], ),
- ".xhtml":(["<!--", "-", "-->"], ["#"]),
- ".inc": (["<!--", "-", "-->"],
- ["#"],
- ["@!"],
- ["/*", "*", "*/"]),
- ".properties": (["#"], ),
- ".win": (["#"], ),
- ".dsp": (["#"], ),
- ".exp": (["#"], ),
- ".mk": (["#"], ),
- ".mn": (["#"], ),
- ".mak": (["#"], ),
- ".MAK": (["#"], ),
- ".perl": (["#"], ),
- ".pl": (["#"], ),
- ".PL": (["#"], ),
- ".sh": (["#"], ),
- ".dsw": (["#"], ),
- ".cgi": (["#"], ),
- ".pm": (["#"], ),
- ".pod": (["#"], ),
- ".src": (["#"], ),
- ".csh": (["#"], ),
- ".DLLs": (["#"], ),
- ".ksh": (["#"], ),
- ".toc": (["#"], ),
- ".am": (["#"], ),
- ".df": (["#"], ),
- ".client": (["#"], ),
- ".ref": (["#"], ), # all of them "Makefile.ref"
- ".ldif": (["#"], ),
- ".ex": (["#"], ),
- ".reg": (["#"], ),
- ".py": (["#"], ),
- ".adb": (["#"], ),
- ".dtksh": (["#"], ),
- ".pkg": (["#"], ),
- ".et": (["#"], ),
- ".stub": (["#"], ),
- ".nss": (["#"], ),
- ".os2": (["#"], ),
- ".Solaris": (["#"], ),
- ".rep": (["#"], ),
- ".NSS": (["#"], ),
- ".server": (["#"], ),
- ".awk": (["#"], ),
- ".targ": (["#"], ),
- ".gnuplot": (["#"], ),
- ".bash": (["#"], ),
- ".tmpl": (["#"], ),
- ".com": (["#"], ),
- ".dat": (["#"], ),
- ".rpm": (["#"], ),
- ".nsi": (["#"], ),
- ".nsh": (["#"], ),
- ".template": (["#"], ),
- ".ldkd": (["#"], ),
- ".ldku": (["#"], ),
- ".arm": (["#"], ),
- ".tdf": ([";"], ),
- ".def": ([";+#"], [";"]),
- ".DEF": ([";+#"], [";"]),
- ".ini": ([";"], ),
- ".it": ([";"], ),
- ".lisp": ([";;;"], ),
- ".cmd": (["rem"], ["REM"]),
- ".bat": (["rem"], ["REM"]),
- ".tex": (["%"], ),
- ".texi": (["%"], ),
- ".m4": (["dnl"], ),
- ".asm": ([";"], ),
- ".vbs": (["'"], ),
- ".il": (["!"], ),
- ".ad": (["!"], ),
- ".script": (["(*", " *", "*)"], ),
- ".3x": (['.\\"'], ),
-
- # What a mess...
- ".s": (["#"], ["//"], ["/*", "*", "*/"], ["!"], [";"], ["/"]),
- }
- _g_shebang_pattern_to_comment_info = [
- (re.compile(ur'\A#!.*/bin/(ba)?sh.*$', re.IGNORECASE), (["#"], )),
- (re.compile(ur'\A#!.*perl.*$', re.IGNORECASE), (["#"], )),
- (re.compile(ur'\A#!.*php.*$', re.IGNORECASE), (["#"], )),
- (re.compile(ur'\A#!.*python.*$', re.IGNORECASE), (["#"], )),
- (re.compile(ur'\A#!.*ruby.*$', re.IGNORECASE), (["#"], )),
- (re.compile(ur'\A#!.*tclsh.*$', re.IGNORECASE), (["#"], )),
- (re.compile(ur'\A#!.*wish.*$', re.IGNORECASE), (["#"], )),
- (re.compile(ur'\A#!.*expect.*$', re.IGNORECASE), (["#"], )),
- ]
- _g_trilicense_parts = {
- "mpl": """\
- ***** BEGIN LICENSE BLOCK *****
- Version: MPL 1.1/GPL 2.0/LGPL 2.1
- The contents of this file are subject to the Mozilla Public License Version
- 1.1 (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
- http://www.mozilla.org/MPL/
- Software distributed under the License is distributed on an "AS IS" basis,
- WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- for the specific language governing rights and limitations under the
- License.
- """,
- "npl": """\
- ***** BEGIN LICENSE BLOCK *****
- Version: NPL 1.1/GPL 2.0/LGPL 2.1
- The contents of this file are subject to the Netscape Public License
- Version 1.1 (the "License"); you may not use this file except in
- compliance with the License. You may obtain a copy of the License at
- http://www.mozilla.org/NPL/
- Software distributed under the License is distributed on an "AS IS" basis,
- WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- for the specific language governing rights and limitations under the
- License.
- """,
- "original_code_is": """\
- The Original Code is %(original_code_is)s.
- """,
- "original_code_is_with_date": """\
- The Original Code is %(original_code_is)s, released
- %(original_code_date)s.
- """,
- "initial_developer": """\
- The Initial Developer of the Original Code is
- %(initial_developer)s.
- Portions created by the Initial Developer are Copyright (C) %(initial_copyright_date)s
- the Initial Developer. All Rights Reserved.
- """,
- "contributors": """\
- Contributor(s):
- %s
- """,
- "gpl for mpl": """\
- Alternatively, the contents of this file may be used under the terms of
- the GNU General Public License Version 2 or later (the "GPL"), in which
- case the provisions of the GPL are applicable instead of those above. If
- you wish to allow use of your version of this file only under the terms of
- the GPL and not to allow others to use your version of this file under the
- MPL, indicate your decision by deleting the provisions above and replacing
- them with the notice and other provisions required by the GPL. If you do
- not delete the provisions above, a recipient may use your version of this
- file under either the MPL or the GPL.
- ***** END LICENSE BLOCK *****""",
- "gpl for npl": """\
- Alternatively, the contents of this file may be used under the terms of
- the GNU General Public License Version 2 or later (the "GPL"), in which
- case the provisions of the GPL are applicable instead of those above. If
- you wish to allow use of your version of this file only under the terms of
- the GPL and not to allow others to use your version of this file under the
- NPL, indicate your decision by deleting the provisions above and replacing
- them with the notice and other provisions required by the GPL. If you do
- not delete the provisions above, a recipient may use your version of this
- file under either the NPL or the GPL.
- ***** END LICENSE BLOCK *****""",
- "gpl/lgpl for mpl": """\
- Alternatively, the contents of this file may be used under the terms of
- either the GNU General Public License Version 2 or later (the "GPL"), or
- the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- in which case the provisions of the GPL or the LGPL are applicable instead
- of those above. If you wish to allow use of your version of this file only
- under the terms of either the GPL or the LGPL, and not to allow others to
- use your version of this file under the terms of the MPL, indicate your
- decision by deleting the provisions above and replace them with the notice
- and other provisions required by the GPL or the LGPL. If you do not delete
- the provisions above, a recipient may use your version of this file under
- the terms of any one of the MPL, the GPL or the LGPL.
- ***** END LICENSE BLOCK *****""",
- "gpl/lgpl for npl": """\
- Alternatively, the contents of this file may be used under the terms of
- either the GNU General Public License Version 2 or later (the "GPL"), or
- the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- in which case the provisions of the GPL or the LGPL are applicable instead
- of those above. If you wish to allow use of your version of this file only
- under the terms of either the GPL or the LGPL, and not to allow others to
- use your version of this file under the terms of the NPL, indicate your
- decision by deleting the provisions above and replace them with the notice
- and other provisions required by the GPL or the LGPL. If you do not delete
- the provisions above, a recipient may use your version of this file under
- the terms of any one of the NPL, the GPL or the LGPL.
- ***** END LICENSE BLOCK *****""",
- }
- _g_dry_run = 0 # iff true, don't modify any files
- _g_force = 0
- _g_check_all = 0
- #---- internal support routines
- def _is_binary(filename):
- """Return true iff the given filename is binary.
- Raises an EnvironmentError if the file does not exist or cannot be
- accessed.
- """
- fin = open(filename, 'rb')
- try:
- CHUNKSIZE = 1024
- while 1:
- chunk = fin.read(CHUNKSIZE)
- if '\0' in chunk: # found null byte
- return 1
- if len(chunk) < CHUNKSIZE:
- break # done
- finally:
- fin.close()
- return 0
- _g_cvsignore_cache = {} # optimization: keep a cache of .cvsignore content
- def _should_skip_according_to_cvsignore(path):
- dirname, basename = os.path.split(path)
- cvsignore = os.path.join(dirname, ".cvsignore")
- if not os.path.exists(cvsignore):
- return 0
- elif cvsignore not in _g_cvsignore_cache:
- fin = open(cvsignore, 'r')
- to_ignore = []
- try:
- for f in fin:
- if f[-1] == "\n": f = f[:-1] # chomp
- if not f: continue # skip empty lines
- to_ignore.append(f)
- finally:
- fin.close()
- _g_cvsignore_cache[cvsignore] = to_ignore
- # At this point .cvsignore exists and its contents are in the cache.
- to_ignore = _g_cvsignore_cache[cvsignore]
- if basename in to_ignore:
- return 1
- else:
- return 0
- _g_backup_pattern = re.compile("~\d+$")
- def _should_skip_file(path):
- log.debug("_should_skip_file(path='%s')", path)
- if _g_check_all:
- return 0
- ext = os.path.splitext(path)[1]
- if ext in _g_skip_exts:
- log.info("Skipping '%s' (according to '_g_skip_exts').", path)
- return 1
- xpath = '/'.join(path.split(os.sep)) # use same sep as in _g_skip_files
- for sf in _g_skip_files:
- if xpath.endswith(sf):
- log.info("Skipping '%s' (according to '_g_skip_files').", path)
- return 1
- if os.path.basename(path) in _g_skip_file_basenames:
- log.info("Skipping '%s' (according to '_g_skip_file_basenames').", path)
- return 1
- if _should_skip_according_to_cvsignore(path):
- log.info("Skipping '%s' (according to .cvsignore).", path)
- return 1
- if _g_backup_pattern.search(path):
- log.info("Skipping '%s' (looks like backup file).", path)
- return 1
- return 0
- def _should_skip_dir(path):
- log.debug("_should_skip_dir(path='%s')", path)
- if _g_check_all:
- if os.path.basename(path) in _g_skip_dir_basenames_cvs_only:
- return 1
- return 0
- if os.path.basename(path) in _g_skip_dir_basenames:
- log.info("Skipping '%s' (according to _g_skip_dir_basenames).", path)
- return 1
- xpath = '/'.join(path.split(os.sep)) # use same sep as in _g_skip_dirs
- # These could do with being a proper path canonicalisation function...
- if xpath[-1] == '/': xpath = xpath[:-1] # treat "calendar/" the same as "calendar"
- if xpath[0:2] == './': xpath = xpath[2:] # treat "./calendar" the same as "calendar"
- for sd in _g_skip_dirs:
- # Changed by gerv to make skip_dirs require whole path
- if xpath == sd:
- log.info("Skipping '%s' (according to _g_skip_dirs).", path)
- return 1
- if _should_skip_according_to_cvsignore(path):
- log.info("Skipping '%s' (according to .cvsignore).", path)
- return 1
- return 0
- def _get_license_info(filename, show_initial=0, quick=0):
- """Return license block information for the given file.
- "filename" is the path to the file to scan.
- "show_initial" is a boolean that indicates if initial developer info
- should be displayed.
- "quick" is a boolean that can be set for a quick scan. In this
- case, only the "parts" field of the return dictionary will
- be filled out.
-
- Returns a dictionary adequately describing the license block in the
- given file for the purpose of determining whether to patch the
- license block and how. Returns a dictionary of the following form:
- {"parts": <list of zero or more of "mpl", "npl", "gpl", "lgpl",
- "unknown", "block_begin", "block_end" in the
- order in which they were found>,
- # if necessary, the following keys are included as well
- "begin_line": <(0-based) index at which license block starts>,
- "end_line": <(0-based) index at which license block ends>,
- "first_prefix": <prefix to use for new license block first line>,
- "subsequent_prefix": <prefix to use for subsequent lines>,
- "last_suffix": <suffix to use for last line>,
- # The following fields are correspond to the file specific
- # portions of the license template as described here:
- # http://www.mozilla.org/MPL/relicensing-faq.html#new-license
- # If the associated block is not found, then the value is None.
- "original_code_is": ...,
- "original_code_date": ...,
- "initial_developer": ...,
- "initial_copyright_date": ...,
- "contributors": ...,
- }
- precondition: should not be called on binary files
- """
- lic_info = {
- "parts": [],
- }
- fin = open(filename, 'r')
- try:
- content = fin.read()
- finally:
- fin.close()
-
- # Help me find filena
- log.info("Next file is: %s", filename)
- # do quick search to see if any of the desired licenses is in here
- # - if it looks like all the parts are there, good, done
- # - if some but not all parts, continue
- parts_pattern = re.compile("""(
- (?P<block_begin>\*\*\*\*\*\ BEGIN\ LICENSE\ BLOCK\ \*\*\*\*\*)
- | (?P<mpl>The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Mozilla)
- | (?P<npl>The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Netscape)
- | (?P<gpl>GNU\ (General\ )?Public\ License)
- | (?P<lgpl>(Library|Lesser)\ General\ Public\ License)
- | (?P<block_end>\*\*\*\*\*\ END\ LICENSE\ BLOCK\ \*\*\*\*\*)
- )""",
- re.VERBOSE)
- parts = [] # found license parts in this file
- start = 0
- blocks = 0
- while 1:
- match = parts_pattern.search(content, start)
- if match:
- # Skip this block, if the last license block is more than 10 lines
- # away (file is probably used for autogeneration of files then).
- if blocks == 1 and (match.start()-start) > 10:
- break
- else:
- parts = match.groupdict()
- for part in parts:
- if parts[part]:
- lic_info["parts"].append(part)
- log.info("%s license/delimeter found", part)
- start = match.end()
- if part == "block_end":
- blocks = blocks + 1
- else:
- blocks = 0
- break
- else:
- raise RelicError("unexpected license part: %r" % parts)
- else:
- break
- # no license block at all
- if not parts:
- # - if not, check to see if License or Copyright shows up in the
- # file; if so, then error out; if not, skip out
- any_lic_pattern = re.compile("(Copyright|Licen[sc]e)", re.IGNORECASE)
- match = any_lic_pattern.search(content)
- if match:
- lic_info["parts"].append("unknown")
- log.info("unknown license found: %r",
- content[max(match.start()-20,0):match.end()+20])
- else:
- log.info("no license found")
- return lic_info
- # license block with non-tri-license version headers
- elif lic_info["parts"] == ["block_begin", "block_end"]:
- lic_info["parts"].append("unknown")
- log.info("unknown license found (license block with non-tri-license)")
- return lic_info
- # license block with tri-license version headers
- elif (lic_info["parts"] == ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
- lic_info["parts"] == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
- log.info("license looks good, no changes necessary")
- if quick:
- return lic_info
- # Otherwise, the license needs to be fixed, so gather more detailed
- # information. Here is the algorithm we will use:
- # - find first license line
- # - find the end of this comment block (assumption: from the first
- # license line to the end of the comment block is the full
- # license block)
- # This is a bad assumption in two cases and steps have been taken
- # to try to deal with those cases:
- # - There could be a trailing part bit of comment that is
- # NOT part of the license but is part of the same comment
- # block. A common example are the:
- # This Original Code has been modified by IBM...
- # files (about 130 of them in the moz tree).
- # (c.f. test_relicense_ibm_copyright_suffix.c)
- # - Some files have split up the license paragraphs into
- # multiple comment blocks, e.g.
- # "mozilla/build/unix/abs2rel.pl":
- # # The contents of this file are subject to the
- # # ...
- # # the License at http://www.mozilla.org/MPL/
- #
- # # The Initial Developer of the Original Code
- # # ...
- # # Rights Reserved.
- # (c.f. test_relicense_separated_license_comment_blocks.pl)
- # - these are the lines to replace
- # - gather embedded lic data
- # - use second line to determine line prefix
- # ? Should we only allow processing of unknown-delimiter-files with
- # an option?
- # Get comment delimiter info for this file.
- comment_delim_sets = _get_comment_delim_sets(filename)
- # - find first license line (and determine which set of comment
- # delimiters are in use)
- lines = content.splitlines()
- for comment_delims in comment_delim_sets:
- if len(comment_delims) == 3:
- # Note: allow for whitespace before continuation character
- prefix_pattern = "%s|\s*%s|" % (re.escape(comment_delims[0]),
- re.escape(comment_delims[1]))
- suffix_pattern = "%s" % re.escape(comment_delims[2])
- elif len(comment_delims) == 2:
- prefix_pattern = "%s|" % re.escape(comment_delims[0])
- suffix_pattern = "%s" % re.escape(comment_delims[1])
- elif len(comment_delims) == 1:
- prefix_pattern = re.escape(comment_delims[-1])
- suffix_pattern = ""
- else: # len(comment_delims) == 0
- prefix_pattern = ""
- suffix_pattern = ""
- lic_begin_pattern = re.compile("""
- ^(?P<prefix>%s)
- (?P<space>\s*)
- (\*+\ BEGIN\ LICENSE\ BLOCK\ \*+
- |\-+\ BEGIN\ LICENSE\ BLOCK\ \-+
- | Version:\ MPL\ \d+\.\d+/GPL\ \d+\.\d+/LGPL\ \d+\.\d+
- | The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Mozilla[\w ]*
- | The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Netscape[\w ]*
- | Alternatively,\ the\ contents\ of\ this\ file\ may\ be\ used\ under\ the[\w ]*)
- (?P<suffix>%s|)\s*?$
- """ % (prefix_pattern, suffix_pattern), re.VERBOSE)
- for i in range(len(lines)):
- match = lic_begin_pattern.search(lines[i])
- if match:
- beginline = {
- "content": lines[i],
- "linenum": i,
- "prefix": match.group("prefix"),
- "space": match.group("space"),
- "suffix": match.group("suffix")
- }
- # Optimization: If the line before the "beginline" is simply
- # a block comment open the include that line in parsed out
- # license block. E.g.,
- # <!--
- # - ***** BEGIN LICENSE BLOCK *****
- # ...
- if (len(comment_delims) > 1 # only for block comments
- and beginline["prefix"] != comment_delims[0]
- and i-1 >= 0
- and lines[i-1].strip() == comment_delims[0]):
- beginline["linenum"] -= 1
- beginline["prefix"] = comment_delims[0]
- break
- if match: break
- else:
- raise RelicError("couldn't find start line with this pattern (even "
- "though it looks like there is a license block in "
- "%s): %s" % (filename, lic_begin_pattern.pattern))
- log.info("comment delimiters: %s", comment_delims)
- log.debug("beginline dict: %s", beginline)
- lic_info["comment_delims"] = comment_delims
- lic_info["begin_line"] = beginline["linenum"]
- lic_info["first_prefix"] = beginline["prefix"]
- log.info("prefix for first line: '%s'", beginline["prefix"])
- # - get second license line
- lic_middle_pattern = re.compile("""
- ^(?P<prefix>%s|)
- (?P<space>\s*)
- (?P<content>.*)
- (?P<suffix>%s|)\s*?$
- """ % (prefix_pattern, suffix_pattern),
- re.VERBOSE)
- # skip empty lines which might result in bogus scanning later, e.g.:
- # mozilla/layout/html/tests/table/marvin/x_thead_align_center.xml
- second_linenum = beginline["linenum"]+1
- while second_linenum < len(lines):
- if lines[second_linenum].strip():
- break
- log.debug("skip blank 'second' line: %d", second_linenum)
- second_linenum +=1
- else:
- raise RelicError("all lines after the first license block line (%d) "
- "were empty" % (beginline["linenum"]+1))
- match = lic_middle_pattern.search(lines[second_linenum])
- if match:
- secondline = {
- "content": lines[second_linenum],
- "linenum": second_linenum,
- "prefix": match.group("prefix"),
- "space": match.group("space"),
- "suffix": match.group("suffix")
- }
- else:
- raise RelicError("didn't find second line with pattern: %s"
- % lic_middle_pattern.pattern)
- log.debug("secondline dict: %s", secondline)
- lic_info["subsequent_prefix"] = secondline["prefix"]
- log.info("prefix for subsequent lines: '%s'", secondline["prefix"])
- # - find block comment end
- orig_code_modified_pattern = re.compile("This Original Code has been "
- "modified", re.I)
- non_lic_content_in_same_comment_block = 0
- if len(comment_delims) == 1:
- # line-style comments: The comment block "end" is defined as the
- # last line before a line NOT using the block comment delimiter.
- #XXX:BUG: This is not good enough for:
- # test/inputs/separated_license_comment_blocks.pl
- if comment_delims[0] == "":
- raise RelicError(
- "Don't know how to find the end of a line-style comment "
- "block when the delimiter is the empty string. (Basically "
- "this script cannot handle this type of file.)")
- for i in range(beginline["linenum"]+1, len(lines)):
- if not lines[i].startswith(comment_delims[0]):
- endlinenum = i-1
- break
- elif lines[i].find("END LICENSE BLOCK") != -1:
- endlinenum = i
- break
- # As per "test_relicense_trailing_orig_code_modified.pl", a
- # paragraph starting with:
- # This Original Code has been modified
- # is deemed to be OUTside the license block, i.e. it is not
- # replaced for relicensing.
- if orig_code_modified_pattern.search(lines[i]):
- non_lic_content_in_same_comment_block = 1
- # The endline is the first non-blank line before this one.
- endlinenum = i-1
- while 1:
- line = lines[endlinenum]
- match = lic_middle_pattern.search(line)
- if not match:
- raise RelicError("Line did not match lic_middle_pattern "
- "unexpectedly: %r" % line)
- if match.group("content").strip(): # non-empty line
- break
- endlinenum -= 1
- break
- else:
- raise RelicError("Could not find license comment block end "
- "line in '%s'." % filename)
- elif len(comment_delims) >= 2: # block-style comments
- for i in range(beginline["linenum"]+1, len(lines)):
- if lines[i].find(comment_delims[-1]) != -1:
- endlinenum = i
- break
- elif lines[i].find("END LICENSE BLOCK") != -1:
- endlinenum = i
- non_lic_content_in_same_comment_block = 1
- break
- # As per "test_relicense_ibm_copyright_suffix.c", a
- # paragraph starting with:
- # This Original Code has been modified
- # is deemed to be OUTside the license block, i.e. it is not
- # replaced for relicensing.
- if orig_code_modified_pattern.search(lines[i]):
- non_lic_content_in_same_comment_block = 1
- # The endline is the first non-blank line before this one.
- endlinenum = i-1
- while 1:
- line = lines[endlinenum]
- match = lic_middle_pattern.search(line)
- if not match:
- raise RelicError("Line did not match lic_middle_pattern "
- "unexpectedly: %r" % line)
- if match.group("content").strip(): # non-empty line
- break
- endlinenum -= 1
- break
- else:
- raise RelicError("Could not find license comment block end "
- "line in '%s'." % filename)
- if not non_lic_content_in_same_comment_block\
- and not lines[endlinenum].strip().endswith(comment_delims[-1]):
- raise RelicError(
- "There is text AFTER the license block comment end "
- "delimiter, but on the SAME LINE. This is unexpected. "
- "Bailing.\n%s:%s:%r"
- % (filename, endlinenum, lines[endlinenum]))
- else: # len(comment_delims) == 0
- # For files without a comment character to help out, we ONLY
- # successfully break one the full correct "END LICENSE BLOCK"
- # token.
- for i in range(beginline["linenum"]+1, len(lines)):
- if lines[i].find("END LICENSE BLOCK") != -1:
- endlinenum = i
- break
- elif i > beginline["linenum"]+1+50:
- raise RelicError("Haven't found 'END LICENSE BLOCK' marker "
- "within 50 lines of the start of the "
- "license block on line %d. Aborting."
- % (beginline["linenum"]+1))
- # As per "test_relicense_trailing_orig_code_modified.pl", a
- # paragraph starting with:
- # This Original Code has been modified
- # is deemed to be OUTside the license block, i.e. it is not
- # replaced for relicensing.
- if orig_code_modified_pattern.search(lines[i]):
- non_lic_content_in_same_comment_block = 1
- # The endline is the first non-blank line before this one.
- endlinenum = i-1
- while 1:
- line = lines[endlinenum]
- match = lic_middle_pattern.search(line)
- if not match:
- raise RelicError("Line did not match lic_middle_pattern "
- "unexpectedly: %r" % line)
- if match.group("content").strip(): # non-empty line
- break
- endlinenum -= 1
- break
- else:
- raise RelicError("Could not find license comment block end "
- "line in '%s'." % filename)
- # Test case: test_relicense_separated_license_comment_blocks.pl
- # It is possible that a separate comment block immediately following
- # the license block we just parsed should be included in the license
- # block.
- if (not non_lic_content_in_same_comment_block
- and len(comment_delims) == 1): # only do this for line-style comments
- lic_indicators = [
- re.compile("^The content of this file are subject to", re.I),
- re.compile("^Software distributed under the License", re.I),
- re.compile("^The Original Code is", re.I),
- re.compile("^The Initial Developer", re.I),
- re.compile("^Contributor", re.I),
- re.compile("^Alternatively, the content of this file", re.I),
- ]
- comment_line_pattern = re.compile("""
- ^(?P<prefix>%s|)
- (?P<space>\s*)
- (?P<content>.*)
- (?P<suffix>%s|)\s*?$
- """ % (prefix_pattern, suffix_pattern),
- re.VERBOSE)
- i = endlinenum
- while i+1 < len(lines):
- i += 1; line = lines[i]
- comment_index = line.find(comment_delims[0])
- if comment_index != -1:
- content = line[:comment_index].strip()
- comment = line[comment_index+len(comment_delims[0]):].strip()
- else:
- content = line.strip()
- comment = ""
- if content: # if non-comment content, then skip out
- break
- if not comment:
- continue
- for indicator in lic_indicators:
- if indicator.search(comment):
- # include this paragraph in the lic block
- while i < len(lines):
- i += 1; line = lines[i]
- if not line.strip().startswith(comment_delims[0]):
- break
- if not line.strip()[len(comment_delims[0]):]:
- break
- endlinenum = i-1
- break
- else:
- break # this is a non-lic-related comment
-
- # Get the end-line data.
- if non_lic_content_in_same_comment_block:
- lic_end_pattern = re.compile(
- "^(?P<prefix>%s)(?P<space>\s*).*?\s*?$"
- % prefix_pattern)
- else:
- lic_end_pattern = re.compile(
- "^(?P<prefix>%s)(?P<space>\s*).*?(?P<suffix>%s)\s*?$"
- % (prefix_pattern, suffix_pattern))
- match = lic_end_pattern.match(lines[endlinenum])
- if match:
- endline = {
- "content": lines[endlinenum],
- "linenum": endlinenum,
- "prefix": match.group("prefix"),
- "space": match.group("space"),
- "suffix": match.groupdict().get("suffix", ""),
- }
- else:
- raise RelicError("license block end line did not match: line='%s', "
- "pattern='%s'"
- % (lines[endlinenum], lic_end_pattern.pattern))
- log.debug("endline dict: %s", endline)
- lic_info["last_suffix"] = endline["suffix"]
- log.info("suffix for last line: '%s'", endline["suffix"])
- lic_info["end_line"] = endline["linenum"]
- log.info("license lines: %d-%d", beginline["linenum"], endline["linenum"])
- # So at this point we have the beginline, secondline, and endline
- # dicts describing and bounding the license block.
-
- # - gather embedded lic data
- # As described here:
- # http://www.mozilla.org/MPL/relicensing-faq.html#new-license
- # we have to parse out the following possible fields:
- # original_code_is
- # original_code_date
- # initial_developer
- # initial_copyright_date
- # contributors
- lic_line_pattern = re.compile( # regex to parse out the line _body_
- "^(?P<prefix>%s)(?P<space>\s*)(?P<body>.*?)(?P<suffix>%s|)\s*?$"
- % (prefix_pattern, suffix_pattern))
- original_code_is = None
- original_code_date = None
- # Parse out the "The Original Code is ..." paragraph _content_.
- paragraph = ""
- in_paragraph = 0
- for i in range(beginline["linenum"], endline["linenum"]+1):
- body = lic_line_pattern.match(lines[i]).group("body")
- if (not in_paragraph and body.startswith("The Original Code is")):
- in_paragraph = 1
- if in_paragraph:
- if not body.strip(): # i.e. a blank line, end of paragraph
- break
- # ensure one space btwn lines
- if paragraph: paragraph = paragraph.rstrip() + " "
- paragraph += body
- if paragraph:
- pattern1 = re.compile('^The Original Code is (.*), released (.*)\.')
- match = pattern1.search(paragraph)
- if match:
- original_code_is = match.group(1)
- original_code_date = match.group(2)
- else:
- pattern2 = re.compile('^The Original Code is (.*?)\.?$')
- match = pattern2.search(paragraph)
- if match:
- original_code_is = match.group(1)
- else:
- raise RelicError(
- "%s: 'The Original Code is' paragraph did not match the "
- "expected patterns. paragraph=\n\t%r\n"
- "pattern1=\n\t%r\npattern2=\n\t%r"
- % (filename, paragraph, pattern1.pattern, pattern2.pattern))
- lic_info["original_code_is"] = original_code_is
- lic_info["original_code_date"] = original_code_date
- log.info("original code is: %s", original_code_is)
- log.info("original_code_date: %s", original_code_date)
- initial_developer = None
- initial_copyright_date = None
- # Parse out the "The Initial Developer..." paragraph _content_.
- paragraph = ""
- in_paragraph = 0
- for i in range(beginline["linenum"], endline["linenum"]+1):
- body = lic_line_pattern.match(lines[i]).group("body")
- if (not in_paragraph and
- (body.startswith("The Initial Developer of") or
- body.startswith("The Initial Developers of"))):
- in_paragraph = 1
- if in_paragraph:
- if not body.strip(): # i.e. a blank line, end of paragraph
- # Catch the possible case where there is an empty line
- # but the paragraph picks up on the next line with
- # "Portions created by"
- # (test_relicense_no_period_after_origcodeis.cpp).
- try:
- nextlinebody = lic_line_pattern.match(lines[i+1]).group("body")
- except:
- nextlinebody = ""
- if not nextlinebody.startswith("Portions created by"):
- break
- # ensure one space btwn lines
- if paragraph: paragraph = paragraph.rstrip() + " "
- paragraph += body
- if paragraph:
- pattern = re.compile("""^
- The\ Initial\ Developers?\ of\
- (the\ Original\ Code\ (is\ |are\ |is\.)|this\ code\ under\ the\ [MN]PL\ (is|are)\ )
- (?P<developer>.*?)
- \.? # maybe a trailing period
- (
- \s+Portions\ created\ by\ .*?
- are\ Copyright\ \(C\)\[?\ (?P<date>[\d-]+)
- .*? # maybe a trailing period
- (\s+All\ Rights\ Reserved\.)?
- )?
- $""", re.VERBOSE)
- match = pattern.search(paragraph)
- if not match:
- raise RelicError(
- "%s: 'This Initial Developer' paragraph did not match the "
- "expected pattern. paragraph=\n\t%r\npattern=\n\t%s"
- % (filename, paragraph, pattern.pattern))
- initial_developer = match.group("developer")
- initial_copyright_date = match.group("date")
- lic_info["initial_developer"] = initial_developer
- lic_info["initial_copyright_date"] = initial_copyright_date
- log.info("initial developer paragraph: %r", paragraph)
- log.info("initial developer: %r", initial_developer)
- log.info("initial copyright date: %r", initial_copyright_date)
- contributors = []
- normal_leading_space = None
- in_contributors_block = 0
- contrib_end = endline["linenum"]
- # If line-style comment, include the last line in the block in the
- # range we examine; if block-style comment, we only allow it if the
- # comment-block doesn't end on the endline. On top of these
- # conditions we don't search the last line if it includes the
- # special end-of-license marker.
- if len(comment_delims) == 1 or not endline["suffix"]:
- if endline["content"].find("END LICENSE BLOCK") == -1:
- contrib_end += 1
- for i in range(beginline["linenum"], contrib_end):
- match = lic_line_pattern.match(lines[i])
- body = match.group("body")
- space = match.group("space").replace('\t', ' '*8)
- if not in_contributors_block \
- and body.startswith("Contributor"):
- in_contributors_block = 1
- normal_leading_space = space
- # Try to pickup "foo@bar.org" as a contributor for a
- # possible line like this:
- # Contributor(s): foo@bar.org
- pivot = body.find(':')
- if pivot != -1:
- remainder = body[pivot+1:].strip()
- if remainder:
- contributors.append(remainder)
- elif in_contributors_block:
- if not body.strip():
- # i.e. a blank line, end of paragraph
- #XXX:BUG This condition causes the latter two
- # contributor lines to be lost from, e.g.,
- # test/x_thead_align_center.xml.
- break
- if len(space) <= len(normal_leading_space):
- # A line in the "Contributor(s) paragraph is not
- # indented. This is considered an error. Likely this is
- # a (not indented) contributor, but it might also be the
- # start of another paragraph (i.e. no blank line
- # terminating the "Contributor(s):" paragraph). We could
- # just error out here, but this is very common in the
- # Moz tree (~500) so lets try to deal with it.
- # - Heuristic #1: if the line contains what looks like
- # an email address then this it is a contributor.
- # - Heuristic #2 (to accomodate js/rhino): if the line
- # looks like just a person's name.
- # Otherwise, error out.
- words = body.split()
- if '@' in body:
- lic_info["unindented_contributor_lines"] = 1
- elif (2 <= len(words) <= 3 and
- words == [word[0].upper()+word[1:] for word in words]):
- # Try to accept the following names:
- # Norris Boyd
- # Mike McCabe
- # George C. Scott
- lic_info["unindented_contributor_lines"] = 1
- else:
- raise RelicError("This line is part of the "
- "'Contributor(s):' paragraph but (1) is not indented "
- "and (2) does not look like it contains an email "
- "address: %s:%s: %r" % (filename, i, lines[i]))
- contributors.append(body.strip())
- log.info("contributors: %s", contributors)
- lic_info["contributors"] = contributors
- ## Optimization: The only content in the remain license block lines
- ## (i.e. after the contributors block) should really be the GPL/LGPL
- ## or nothing. Trapping this will avoid losing the latter two
- ## contributor lines in test/x_thead_align_center.xml.
- #gpl_lgpl_lines = _g_trilicense_parts["gpl/lgpl"].splitlines(0)
- #gpl_lgpl = " ".join(gpl_lgpl_lines)
- #for i in range(i, endline["linenum"]):
- # match = lic_line_pattern.match(lines[i])
- # body = match.group("body")
- # space = match.group("space").replace('\t', ' '*8)
- # if not body.strip():
- # continue
- # #XXX This test is no robust enough to use.
- # if (gpl_lgpl.find(body) == -1 and
- # body.find(gpl_lgpl) == -1):
- # print "QQQ: bogus following text: %r" % body
- return lic_info
- def _report_on_file(path, (results, switch_to_mpl, show_initial, quick, _errors)):
- log.debug("_report_on_file(path='%s', results)", path)
- output = path + "\n"
- lic_info = {}
-
- if _is_binary(path):
- output += "... binary, skipping this file\n"
- else:
- try:
- lic_info = _get_license_info(path, show_initial, quick)
- except RelicError, ex:
- return _relicensing_error(ex, path, _errors)
-
- if log.isEnabledFor(logging.DEBUG):
- pprint.pprint(lic_info)
- parts = lic_info["parts"]
- if not parts:
- output += "... no license found\n"
- elif "unknown" in parts:
- output += "... unknown license (possibly) found\n"
- elif ((parts == ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
- parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]) and
- not lic_info.get("unindented_contributor_lines")):
- if (switch_to_mpl and
- parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
- output += "... %s found (looks complete, but is not MPL)"\
- % "/".join(parts) + "\n"
- else:
- output += "... %s found (looks complete)"\
- % "/".join(parts) + "\n"
- else:
- output += "... %s found" % "/".join(parts) + "\n"
- if not quick:
- if "begin_line" in lic_info and "end_line" in lic_info:
- output += "... license block lines: %(begin_line)d-%(end_line)d"\
- % lic_info + "\n"
- if "original_code_is" in lic_info:
- output += "... original code is: %(original_code_is)s"\
- % lic_info + "\n"
- if "original_code_date" in lic_info:
- output += "... original code date: %(original_code_date)s"\
- % lic_info + "\n"
- if "initial_developer" in lic_info:
- output += "... initial developer: %(initial_developer)s"\
- % lic_info + "\n"
- if "initial_copyright_date" in lic_info:
- output += "... initial copyright date: %(initial_copyright_date)s"\
- % lic_info + "\n"
- if "contributors" in lic_info:
- output += "... contributors: %s"\
- % ", ".join(lic_info["contributors"]) + "\n"
- if lic_info.get("unindented_contributor_lines"):
- output += "... one or more contributor lines were not indented properly"\
- + "\n"
- if show_initial:
- if "initial_developer" in lic_info:
- print lic_info["initial_developer"]
- else:
- print output;
- def _gather_info_on_file(path, (results, _errors)):
- log.debug("_gather_info_on_file(path='%s', results)", path)
- # Skip binary files.
- try:
- if _is_binary(path):
- log.debug("Skipping binary file '%s'.", path)
- return
- except Exception, ex:
- return _relicensing_error(
- "error determining if file is binary: %s" % ex,
- path, _errors)
- try:
- results[path] = _get_license_info(path)
- except RelicError, ex:
- return _relicensing_error(ex, path, _errors, 1)
- def _make_backup_path(path):
- for n in range(100):
- backup_path = "%s~%d" % (path, n)
- if not os.path.exists(backup_path):
- return backup_path
- raise RelicError("Could not find an unused backup path for '%s'." % path)
- def _relicensing_error(err, path, cache=None, quiet=1):
- """Handle an error during relicensing.
-
- "err" may be an error string or an exception instance.
- "path" is the path of the file on which this error occured.
- "cache" is a mapping of path to errors on which errors may be
- stored for later reporting.
- "quiet" optionally allows one to silence the stdout output when
- force is in effect.
- If the --force option is in-effect then errors may be remembered and
- processing continues, rather than halting the whole process.
- """
- if _g_force:
- if not quiet:
- print "...", err
- if cache is not None:
- cache[path] = err
- elif isinstance(err, Exception):
- raise
- else:
- raise RelicError("%s: %s" % (path, err))
- def _get_comment_delim_sets(filename):
- comment_delims = None
- if os.path.splitext(filename)[1] == ".in":
- # "<foo>.in" is generally a precursor for a filetype
- # identifiable without the ".in". Drop it.
- xfilename = os.path.splitext(filename)[0]
- else:
- xfilename = filename
- # special cases for some basenames
- basename = os.path.basename(xfilename)
- try:
- comment_delims = _g_basename_to_comment_info[basename]
- except KeyError:
- pass
- if not comment_delims: # use the file extension
- ext = os.path.splitext(xfilename)[1]
- try:
- comment_delims = _g_ext_to_comment_info[ext]
- except KeyError:
- pass
- if not comment_delims: # try to use the shebang line, if any
- fin = open(filename, 'r')
- firstline = fin.readline()
- fin.close()
- if firstline.startswith("#!"):
- for pattern, cds in _g_shebang_pattern_to_comment_info:
- if pattern.match(firstline):
- comment_delims = cds
- break
- if not comment_delims:
- raise RelicError("%s: couldn't determine file type (and "
- "comment delimiter info) from basename '%s' or "
- "extension '%s'): you may need to add to "
- "'_g_basename_to_comment_info', "
- "'_g_ext_to_comment_info', "
- "'_g_shebang_pattern_to_comment_info' "
- "or one of the '_g_skip_*' globals"
- % (filename, basename, ext))
- return comment_delims
-
- def _relicense_file(original_path,
- (fallback_initial_copyright_date,
- fallback_initial_developer,
- fallback_original_code_is,
- fallback_original_code_date,
- switch_to_mpl,
- backup,
- results,
- force_relicensing,
- _errors)):
- """Relicense the given file.
- "original_path" is the file to relicense
- "fallback_initial_copyright_date"
- "fallback_initial_developer"
- "fallback_original_code_is"
- "fallback_original_code_date"
- User-specified fallback values to use for the new license
- block if they cannot be found in the original.
- "switch_to_mpl" is a boolean indicating if an NPL-based license
- should be converted to MPL.
- "backup" (optional, default false) is a boolean indicating if
- backups should be made
- "results" is a dictionary in which to store statistics and errors.
- See relicense() for schema.
- "force_relicensing" is a boolean indicating if relicensing
- should be done even if the license block looks complete.
- "_errors" is a dictionary on which errors are reported
- (keyed by file path) when the force option is in effect.
- The function does not return anything.
- """
- log.debug("_relicense_file(original_path='%s')", original_path)
- print original_path
- # Ensure can access file.
- if not os.access(original_path, os.R_OK|os.W_OK):
- return _relicensing_error("cannot access", original_path, _errors)
- else:
- log.info("have read/write access")
- # Skip binary files.
- try:
- if _is_binary(original_path):
- print "... binary, skipping this file"
- results["binary"] += 1
- return
- except Exception, ex:
- return _relicensing_error(
- "error determining if file is binary: %s" % ex,
- original_path, _errors)
- try:
- lic_info = _get_license_info(original_path, 0)
- except RelicError, ex:
- return _relicensing_error(ex, original_path, _errors)
- # Load fallback info if necessary.
- if not lic_info.get("initial_copyright_date"):
- lic_info["initial_copyright_date"] = fallback_initial_copyright_date
- if not lic_info.get("initial_developer"):
- lic_info["initial_developer"] = fallback_initial_developer
- if not lic_info.get("original_code_is"):
- lic_info["original_code_is"] = fallback_original_code_is
- if not lic_info.get("original_code_date"):
- lic_info["original_code_date"] = fallback_original_code_date
- # Return/abort if cannot or do not need to re-license.
- parts = lic_info["parts"]
- if not parts:
- results["no license"] += 1
- print "... no license found, skipping this file"
- return
- elif "unknown" in parts:
- return _relicensing_error("unknown license (possibly) found",
- original_path, _errors)
- elif parts.count("block_begin") > 1: # sanity check
- return _relicensing_error(
- "'BEGIN LICENSE BLOCK' delimiter found more than once",
- original_path, _errors)
- elif parts.count("block_end") > 1: # sanity check
- return _relicensing_error(
- "'END LICENSE BLOCK' delimiter found more than once",
- original_path, _errors)
- elif not lic_info["initial_developer"]:
- return _relicensing_error(
- "no 'Initial Developer' section was found -- use "
- "the -i option to specify your own",
- original_path, _errors)
- elif not lic_info["initial_copyright_date"]:
- return _relicensing_error(
- "no initial copyright year was found -- use "
- "the -y option to specify your own",
- original_path, _errors)
- elif not lic_info["original_code_is"]:
- return _relicensing_error(
- "no 'Original Code is' section was found -- use "
- "the -o option to specify your own",
- original_path, _errors)
- elif ((parts == ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
- parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]) and
- not lic_info.get("unindented_contributor_lines")):
- #XXX Should add an option to relicense anyway because matching
- # is not super-strict. E.g. nsWidgetFactory.cpp.
- if (switch_to_mpl and
- parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
- print "... %s found (looks complete, but is not MPL)"\
- % "/".join(parts)
- elif force_relicensing:
- print "... %s found (looks complete, but forcing relicensing)"\
- % "/".join(parts)
- else:
- results["good"] += 1
- print "... %s found (looks complete), nothing to do"\
- % "/".join(parts)
- return
- # We need to re-license this file.
- print "... %s found, need to relicense" % "/".join(parts)
- if lic_info["original_code_is"]:
- print "... original code is: %(original_code_is)s" % lic_info
- if lic_info["original_code_date"]:
- print "... original code date: %(original_code_date)s" % lic_info
- if lic_info["initial_developer"]:
- print "... initial developer: %(initial_developer)s" % lic_info
- if lic_info["initial_copyright_date"]:
- print "... initial copyright date: %(initial_copyright_date)s" % lic_info
- if lic_info["contributors"]:
- print "... contributors: %s" % ", ".join(lic_info["contributors"])
- # Put the license block together.
- # - build up the license block from the appropriate parts
- trilicense = ""
- if (not switch_to_mpl) and ( "npl" in parts ):
- trilicense_name = "NPL/GPL/LGPL"
- trilicense += _g_trilicense_parts["npl"]
- else:
- trilicense_name = "MPL/GPL/LGPL"
- trilicense += _g_trilicense_parts["mpl"]
- print "... replacing lines %d-%d with %s tri-license"\
- % (lic_info["begin_line"], lic_info["end_line"], trilicense_name)
- if lic_info["original_code_is"] is not None:
- if lic_info["original_code_date"] is not None:
- trilicense += _g_trilicense_parts["original_code_is_with_date"] % lic_info
- else:
- trilicense += _g_trilicense_parts["original_code_is"] % lic_info
- #else:
- # raise RelicError("Gerv, how should the new license block handle no "
- # "'Originial Code is...' information? --TM")
- if (lic_info["initial_developer"] is not None
- and lic_info["initial_copyright_date"] is not None):
- trilicense += _g_trilicense_parts["initial_developer"] % lic_info
- #else:
- # raise RelicError("Gerv, how should the new license block handle no "
- # "'Initial Developer is...' information? --TM")
- if lic_info["contributors"]:
- contributors = " " + "\n ".join(lic_info["contributors"]) + "\n"
- else:
- contributors = ""
- trilicense += _g_trilicense_parts["contributors"] % contributors
- if trilicense_name == "NPL/GPL/LGPL":
- trilicense += _g_trilicense_parts["gpl/lgpl for npl"]
- else: # trilicense_name == "MPL/GPL/LGPL"
- trilicense += _g_trilicense_parts["gpl/lgpl for mpl"]
- # get fallback comment subsequent prefix
- fallback_prefix = _get_comment_delim_sets(original_path)
-
- # - add the comment delimiters
- lines = trilicense.splitlines()
- for i in range(len(lines)):
- if i == 0:
- prefix = lic_info["first_prefix"]
- else:
- if lic_info["subsequent_prefix"]:
- prefix = lic_info["subsequent_prefix"]
- else:
- prefix = fallback_prefix[0][1]
- if lines[i]:
- if len(lic_info["comment_delims"]) == 0:
- lines[i] = prefix + lines[i]
- else:
- lines[i] = prefix + ' ' + lines[i]
- else: # don't add trailing whitespace
- lines[i] = prefix
- if lic_info["last_suffix"]: # don't add that ' ' if there is no suffix
- lines[-1] += ' ' + lic_info["last_suffix"]
- for i in range(len(lines)): lines[i] += '\n'
- trilicense_lines = lines
- ##### uncomment to debug license block
- # pprint.pprint(lines)
- # return
- # Skip out now if doing a dry-run.
- if _g_dry_run:
- results["relicensed"] += 1
- return
- # Make a backup.
- if backup:
- backup_path = _make_backup_path(original_path)
- print "... backing up to '%s'" % backup_path
- try:
- shutil.copy(original_path, backup_path)
- except EnvironmentError, ex:
- return _relicensing_error(ex, original_path, _errors)
- # Re-license the file.
- try:
- fin = open(original_path, "r")
- try:
- lines = fin.readlines()
- finally:
- fin.close()
- lines[lic_info["begin_line"]:lic_info["end_line"]+1] = trilicense_lines
- fout = open(original_path, "w")
- try:
- fout.write(''.join(lines))
- finally:
- fout.close()
- results["relicensed"] += 1
- print "... done relicensing '%s'" % original_path
- except:
- if backup:
- print "... error relicensing, restoring original"
- if os.path.exists(original_path):
- os.remove(original_path)
- os.rename(backup_path, original_path)
- else:
- print "... error relicensing, file may be corrupted"
- # fallback to type_ for string exceptions
- type_, value, tb = sys.exc_info()
- return _relicensing_error(value or type_,
- original_path, _errors)
- def _add_license_to_file(original_path,
- (initial_copyright_date,
- initial_developer,
- original_code_is,
- original_code_date,
- backup,
- results,
- _errors)):
- """Relicense the given file.
- "original_path" is the file to relicense
- "initial_copyright_date"
- "initial_developer"
- "original_code_is"
- "original_code_date"
- User-specified values to use for the new license. All but
- "original_code_date" are required.
- "backup" (optional, default false) is a boolean indicating if
- backups should be made
- "results" is a dictionary in which to store statistics and errors.
- See relicense() for schema.
- "_errors" is a dictionary on which errors are reported
- (keyed by file path) when the force option is in effect.
- The function does not return anything.
- """
- log.debug("_add_license_to_file(original_path='%s')", original_path)
- print original_path
- # Ensure can access file.
- if not os.access(original_path, os.R_OK|os.W_OK):
- return _relicensing_error("cannot access", original_path, _errors)
- else:
- log.info("have read/write access")
- # Skip binary files.
- try:
- if _is_binary(original_path):
- print "... binary, skipping this file"
- results["binary"] += 1
- return
- except Exception, ex:
- return _relicensing_error(
- "error determining if file is binary: %s" % ex,
- original_path, _errors)
- try:
- lic_info = _get_license_info(original_path, 0)
- except RelicError, ex:
- return _relicensing_error(ex, original_path, _errors)
- # Return/abort if cannot or do not need to re-license.
- parts = lic_info["parts"]
- if lic_info["parts"]: # has a license
- results["license"] += 1
- print "... license found, skipping this file"
- return
- #... else we need to add a license to this file.
- print "... no license found, need to add one"
- # Load license info.
- lic_info["initial_developer"] = initial_developer
- print "... initial developer: %(initial_developer)s" % lic_info
- lic_info["initial_copyright_date"] = initial_copyright_date
- print "... initial copyright date: %(initial_copyright_date)s" % lic_info
- lic_info["original_code_is"] = original_code_is
- print "... original code is: %(original_code_is)s" % lic_info
- if original_code_date:
- lic_info["original_code_date"] = original_code_date
- print "... original code date: %(original_code_date)s" % lic_info
- else:
- lic_info["original_code_date"] = None
- # Determine what line we can start putting the license block on.
- # Typically this would be line 0, but for the following exceptions:
- # - Shebang (#!) lines
- # - Emacs local variables line:
- # /* -*- Mode: C++; ... -*- */
- # This line does not HAVE to be first, but that seems to be a
- # trend, so might as well honour it.
- # - XML magic "number": <?xml version="2.0" ... ?>
- # where "..." might include newlines
- startline = 0
- try:
- comment_delim_sets = _get_comment_delim_sets(original_path)
- except RelicError, ex:
- return _relicensing_error(ex, original_path, _errors, 1)
- fin = open(original_path, 'r')
- try:
- lines = fin.readlines()
- finally:
- fin.close()
- # If this is an XML file, advance past the magic number tag.
- if lines and lines[0].find("<?xml") != -1:
- line = lines[0]
- if (line.find('encoding="utf-8"') != -1
- and line.startswith("\xef\xbb\xbf")):
- # remove UTF-8 BOM
- # Note: this is hardly robust Unicode XML handling :)
- line = line[3:]
- if line.startswith("<?xml"):
- end_index = lines[startline].find("?>")
- while startline < len(lines):
- startline += 1
- if end_index != -1: # found end of tag
- break
- # Note: this does not catch something like this:
- # <?xml version="2.0"?> <?stylesheet ...
- # ...?>
- # but that is just crazy.
- # else, advance past a possible shebang line.
- else:
- for comment_delims in comment_delim_sets:
- if (len(comment_delims) == 1 and comment_delims[0] == "#"
- and lines[0].startswith("#!")):
- startline += 1
- # Advance past an Emacs local variable line.
- comment_delims = None
- if lines[startline].find("-*-") != -1:
- for comment_delims in comment_delim_sets:
- if lines[startline].find(comment_delims[0]) != -1:
- break
- else:
- # We were hoping to be able to determine which of the set of
- # possible commenting styles was in use by finding the
- # comment start token on the same line as the -*-
- # Emacs-modeline signifier, but could not. This likely means
- # that this file uses a block-style comment but the block
- # doesn't start on the same line. Fallback to the
- # block-style comment delimiter set, if there is one.
- for comment_delims in comment_delim_sets:
- if len(comment_delims) == 3:
- break
- else:
- comment_delims = comment_delim_sets[0]
- if len(comment_delims) == 1: # line-style comments
- startline += 1
- else: # block-style comments
- in_comment = 0
- while startline < len(lines):
- line = lines[startline]
- linepos = 0
- while linepos < len(line):
- if not in_comment:
- i = line.find(comment_delims[0], linepos)
- if i == -1:
- break
- else:
- in_comment = 1
- linepos = i+1
- else:
- i = line.find(comment_delims[-1], linepos)
- if i == -1:
- break
- else:
- in_comment = 0
- linepos = i+1
- startline += 1
- if not in_comment:
- break
-
- # Put the license block together.
- # - build up the license block from the appropriate parts
- trilicense_name = "MPL/GPL/LGPL"
- print "... adding %s tri-license starting at line %s (zero-based)"\
- % (trilicense_name, startline)
- trilicense = _g_trilicense_parts["mpl"]
- if lic_info["original_code_date"] is not None:
- trilicense += _g_trilicense_parts["original_code_is_with_date"] % lic_info
- else:
- trilicense += _g_trilicense_parts["original_code_is"] % lic_info
- trilicense += _g_trilicense_parts["initial_developer"] % lic_info
- if lic_info.get("contributors"):
- contributors = " " + "\n ".join(lic_info["contributors"]) + "\n"
- else:
- contributors = ""
- trilicense += _g_trilicense_parts["contributors"] % contributors
- trilicense += _g_trilicense_parts["gpl/lgpl for mpl"]
- # - add the comment delimiters
- if comment_delims is None:
- for comment_delims in comment_delim_sets:
- if lines[startline].find(comment_delims[0]) != -1:
- break
- elif len(comment_delims) == 3 and lines[startline].find(comment_delims[1]) != -1:
- break
- else:
- # We were hoping to be able to determine which of the set of
- # possible commenting styles was in use by finding the
- # comment start token on the same line as the -*-
- # Emacs-modeline signifier, but could not. This likely means
- # that this file uses a block-style comment but the block
- # doesn't start on the same line. Fallback to the
- # block-style comment delimiter set, if there is one.
- for comment_delims in comment_delim_sets:
- if len(comment_delims) == 3:
- break
- else:
- comment_delims = comment_delim_sets[0]
- print "comment delims were none: %r" % comment_delims
- t_lines = trilicense.splitlines()
- if len(comment_delims) == 1: # line-style comments
- for i in range(len(t_lines)):
- if t_lines[i]:
- t_lines[i] = comment_delims[0] + ' ' + t_lines[i]
- else: # don't add trailing whitespace
- t_lines[i] = comment_delims[0]
- else: # block-style comments
- if t_lines[0]:
- t_lines[0] = comment_delims[0] + ' ' + t_lines[0]
- else: # don't add trailing whitespace
- t_lines[0] = comment_delims[0]
- for i in range(1, len(t_lines)):
- if t_lines[i]:
- t_lines[i] = comment_delims[1] + ' ' + t_lines[i]
- else: # don't add trailing whitespace
- t_lines[i] = comment_delims[1]
- t_lines[-1] += ' ' + comment_delims[-1]
- for i in range(len(t_lines)): t_lines[i] += '\n'
- t_lines[-1] += '\n' # add a blank line at end of lic block
- trilicense_lines = t_lines
- #pprint.pprint(t_lines)
- # Skip out now if doing a dry-run.
- if _g_dry_run:
- results["added"] += 1
- return
- # Make a backup.
- if backup:
- backup_path = _make_backup_path(original_path)
- print "... backing up to '%s'" % backup_path
- try:
- shutil.copy(original_path, backup_path)
- except EnvironmentError, ex:
- return _relicensing_error(ex, original_path, _errors)
- # Add the license to the file.
- try:
- lines[startline:startline] = trilicense_lines
- fout = open(original_path, "w")
- try:
- fout.write(''.join(lines))
- finally:
- fout.close()
- results["added"] += 1
- print "... done adding license to '%s'" % original_path
- except:
- if backup:
- print "... error adding license, restoring original"
- if os.path.exists(original_path):
- os.remove(original_path)
- os.rename(backup_path, original_path)
- else:
- print "... error adding license, file may be corrupted"
- # fallback to type_ for string exceptions
- type_, value, tb = sys.exc_info()
- return _relicensing_error(value or type_,
- original_path, _errors)
- def _traverse_dir((file_handler, results), dirname, names):
- """os.path.walk target to traverse the give dir"""
- log.debug("_traverse_dir((file_handler, results), dirname='%s', "
- "names=%s)", dirname, names)
- # Iterate over names backwards because may modify it in-place.
- # Modifying it in-place ensures that removed entries are not
- # traversed by os.path.walk.
- for i in range(len(names)-1, -1, -1):
- path = os.path.join(dirname, names[i])
- if os.path.isdir(path):
- if _should_skip_dir(path):
- del names[i]
- continue
- if os.path.isfile(path):
- if _should_skip_file(path):
- del names[i]
- continue
- if file_handler is not None:
- file_handler(path, results)
- def _traverse(paths, file_handler, arg):
- """Traverse the given path(s) and call the given callback for each.
-
- "paths" is either a list of files or directories, or it is an
- input stream with a path on each line.
- "file_handler" is a callable to be called on each file traversed.
- It is called with the following signature:
- file_handler(path, arg)
- "arg" is some object passed to each callback. This is useful for
- recording results.
- This method takes care of skipping files and directories that should
- be skipped according to .cvsignore files and the configured skip
- paths. This method does not return anything.
- """
- log.debug("_traverse(paths=%s, file_handler=%s, arg=%s)",
- paths, file_handler, arg)
- for path in paths:
- if path[-1] == "\n": path = path[:-1] # chomp if 'paths' is a stream
- if not os.path.exists(path):
- log.warn("'%s' does not exist, skipping", path)
- elif os.path.isfile(path):
- if _should_skip_file(path):
- continue
- if file_handler is not None:
- file_handler(path, arg)
- elif os.path.isdir(path):
- if _should_skip_dir(path):
- continue
- os.path.walk(path, _traverse_dir, (file_handler, arg))
- else:
- raise RelicError("unexpected path type '%s'" % path)
- #---- public routines
- def relicense(paths,
- fallback_initial_copyright_date=None,
- fallback_initial_developer=None,
- fallback_original_code_is=None,
- fallback_original_code_date=None,
- switch_to_mpl=0,
- backup=0,
- force_relicensing=0,
- _errors=None):
- """Relicense the given file(s) (or files in the given dir).
-
- "paths" is either a list of files or directories, or it is an
- input stream with a path on each line.
- "fallback_initial_copyright_date"
- "fallback_initial_developer"
- "fallback_original_code_is"
- "fallback_original_code_date"
- User-specified fallback values to use for the new license
- block if they cannot be found in the original.
- "switch_to_mpl" (optional, default false) is a boolean
- indicating if an NPL-based license should be converted to
- MPL.
- "backup" (optional, default false)is a boolean indicating if
- backups should be made
- "force_relicensing" (option, default false) is a boolean
- indicating if relicensing should happen even if the license
- block looks complete
- "_errors" (optional) is a dictionary on which errors are reported
- (keyed by file path) when the force option is in effect.
- This method does not return anything. It will raise RelicError if
- there is a problem. Note that OSError/IOError may also be raised.
- """
- log.debug("relicense(paths=%s, backup=%r)", paths, backup)
- results = {
- "relicensed": 0,
- "no license": 0,
- "good": 0,
- "binary": 0,
- }
- _traverse(paths, _relicense_file,
- (fallback_initial_copyright_date,
- fallback_initial_developer,
- fallback_original_code_is,
- fallback_original_code_date,
- switch_to_mpl,
- backup,
- results,
- force_relicensing,
- _errors))
- print
- print "--------------------- Summary of Results ------------------------"
- print "Files skipped b/c they are binary:", results["binary"]
- print "Files skipped b/c they already had proper license:", results["good"]
- print "Files skipped b/c they had no license:", results["no license"]
- if _g_dry_run:
- print "Files re-licensed: %d (dry-run)" % results["relicensed"]
- else:
- print "Files re-licensed:", results["relicensed"]
- print "-----------------------------------------------------------------"
- def addlicense(paths,
- initial_copyright_date,
- initial_developer,
- original_code_is,
- original_code_date=None,
- backup=0,
- _errors=None):
- """Add a license to those of the given file(s) that do not appear to
- have one.
-
- "paths" is either a list of files or directories, or it is an
- input stream with a path on each line.
- "initial_copyright_date"
- "initial_developer"
- "original_code_is"
- "original_code_date"
- User-specified values to use for the new license block. All
- but "original_code_date" are required.
- "backup" (optional, default false) is a boolean indicating if
- backups should be made
- "_errors" (optional) is a dictionary on which errors are reported
- (keyed by file path) when the force option is in effect.
- This method does not return anything. It will raise RelicError if
- there is a problem. Note that OSError/IOError may also be raised.
- """
- log.debug("addlicense(paths=%s, backup=%r)", paths, backup)
- if not initial_copyright_date:
- raise RelicError("no Initial Developer copyright year was "
- "specified -- use the -y option")
- if not initial_developer:
- raise RelicError("no 'Initial Developer' section was specified "
- "-- use the -i option")
- if not original_code_is:
- raise RelicError("no 'Original Code is' section was specified "
- "-- use the -o option")
- results = {
- "added": 0,
- "license": 0,
- "binary": 0,
- }
- _traverse(paths, _add_license_to_file,
- (initial_copyright_date,
- initial_developer,
- original_code_is,
- original_code_date,
- backup,
- results,
- _errors))
- print
- print "--------------------- Summary of Results ------------------------"
- print "Files skipped b/c they are binary:", results["binary"]
- print "Files skipped b/c they already had a license:", results["license"]
- if _g_dry_run:
- print "Files to which a license was added: %d (dry-run)" % results["added"]
- else:
- print "Files to which a license was added: %d" % results["added"]
- print "-----------------------------------------------------------------"
- def report(paths, switch_to_mpl=0, show_initial=1, quick=0, _errors=None):
- """Report on the existing licenses in the given file(s).
-
- "paths" is either a list of files or directories, or it is an
- input stream with a path on each line.
- "switch_to_mpl" (optional, default false) is a boolean
- indicating if an NPL-based license should be converted to
- MPL.
- "show_initial" (optional, default true) is a boolean indicating
- if the initial developer should be displayed for each file.
- "quick" (optional, default false) is a boolean indicating if only
- basic license checking should be applied.
- "_errors" (optional) is a dictionary on which errors are reported
- (keyed by file path) when the force option is in effect.
- This method does not return anything. It will raise RelicError if
- there is a problem.
- """
- log.debug("report(paths=%s)", paths)
- results = {}
- _traverse(paths,\
- _report_on_file,\
- (results, switch_to_mpl, show_initial, quick, _errors))
- def statistics(paths, extended=0, quick=0, _errors=None):
- """Show a summary table of licenses in files in the given path(s).
- "paths" is either a list of files or directories, or it is an
- input stream with a path on each line.
- "extended" (optional) is a boolean indicating if extended
- statistics should be shown
- "quick" (optional) is a boolean indicating if quick scan mode should
- be enabled.
- "_errors" (optional) is a dictionary on which errors are reported
- (keyed by file path) when the force option is in effect.
- This method does not return anything.
- """
- #XXX Info gathering returns a lot more info now. We might be able to
- # output more interesting stats.
- log.debug("statistics(paths=%s, extended=%s)",
- paths, extended)
- results = {}
- _traverse(paths, _gather_info_on_file, (results, _errors))
- # Process results and print out some stats.
- stats = {
- # <lic type>: [<number of hits>, [<files>...]]
- }
- for file, info in results.items():
- lic_types = [p for p in info["parts"]
- if p not in ["block_begin", "block_end"]]
- if not lic_types:
- name = "<none found>"
- elif "unknown" in lic_types:
- name = "<unknown license>"
- # Distinguish between complete mpl/gpl/lgpl (i.e. with the block
- # begin and end tokens) and incomplete mpl/gpl/lgpl. Likewise
- # NPL.
- elif (info["parts"] == ["block_begin", "mpl", "gpl", "lgpl", "block_end"]):
- name = "mpl/gpl/lgpl (complete)"
- elif (info["parts"] == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
- name = "npl/gpl/lgpl (complete)"
- else:
- name = "/".join(lic_types)
- if name not in stats: stats[name] = [0, []]
- stats[name][0] += 1
- stats[name][1].append(file)
- statslist = [(hits, name, files) for name, (hits, files) in stats.items()]
- statslist.sort() # sort by number of hits
- statslist.reverse() # most common first
- print "Summary of Licenses in Files"
- print "============================"
- print " Number Percent License"
- print "------- -------- -----------"
- # 115 55.55% npl/gpl
- for hits, name, files in statslist:
- print "%7d %7.2f%% %s"\
- % (hits, (hits*100.0/len(results)), name)
- #XXX Removed for now because I am not clear if this is at all
- # useful.
- #if extended:
- # hits_per_ext = {}
- # for file in files:
- # ext = os.path.splitext(file)[1]
- # if ext not in hits_per_ext: hits_per_ext[ext] = 0
- # hits_per_ext[ext] += 1
- # hits_per_ext_list = [(h, e) for e, h in hits_per_ext.items()]
- # hits_per_ext_list.sort()
- # hits_per_ext_list.reverse()
- # for ext_hits, ext in hits_per_ext_list:
- # if not ext: ext = "<no extension>"
- # print " %7d %s" % (ext_hits, ext)
- print "----------------------------"
- print "%7d files processed" % len(results)
- # Print some other interesting statistics.
- no_original_code_is = []
- no_initial_developer = []
- unindented_contributor_lines = []
- for file, info in results.items():
- if "original_code_is" in info and info["original_code_is"] is None:
- no_original_code_is.append(file)
- if "initial_developer" in info and info["initial_developer"] is None:
- no_initial_developer.append(file)
- if info.get("unindented_contributor_lines"):
- unindented_contributor_lines.append(file)
- print
- print "Licensed files with no 'Initial Developer...' info: %d" % len(no_initial_developer)
- if extended:
- for f in no_initial_developer:
- print " %s" % f
- print "Licensed files with no 'Original Code is...' info: %d" % len(no_original_code_is)
- if extended:
- for f in no_original_code_is:
- print " %s" % f
- print "Licensed files with improperly indented 'Contributor(s):' line(s): %d" % len(unindented_contributor_lines)
- if extended:
- for f in unindented_contributor_lines:
- print " %s" % f
- if extended:
- for hits, name, files in statslist:
- print "Files in license category '%s'" % name
- sortedFiles = files[:]
- sortedFiles.sort()
- for file in sortedFiles:
- print " %s" % file
- print
- #---- mainline
- def main(argv):
- try:
- opts, args = getopt.getopt(argv[1:], "VvadhqfML:sxry:i:o:D:ARI",
- ["version", "verbose", "all", "help", "debug",
- "dry-run", "force", "MPL", "license=",
- "statistics", "relicense", "backup", "add", "defaults",
- "force-relicense", "initial-developers", "quick"])
- except getopt.GetoptError, ex:
- log.error(str(ex))
- log.error("Try `%s --help'.", argv[0])
- return 2
- debug = 0
- mode = "report"
- extended = 0
- backup = 0
- quick = 0
- force_relicensing = 0
- fallback_initial_copyright_date = None
- fallback_initial_developer = None
- fallback_original_code_is = None
- fallback_original_code_date = None
- switch_to_mpl = 0
- show_initial = 0
- for opt, optarg in opts:
- if opt in ("-h", "--help"):
- sys.stdout.write(__doc__)
- return
- elif opt in ("-V", "--version"):
- ver = '.'.join([str(part) for part in _version_])
- print "relic %s" % ver
- return
- elif opt in ("-v", "--verbose"):
- log.setLevel(logging.INFO)
- elif opt in ("-a", "--all"):
- global _g_check_all
- _g_check_all = 1
- elif opt in ("-M", "--MPL"):
- switch_to_mpl = 1
- elif opt in ("-d", "--debug"):
- log.setLevel(logging.DEBUG)
- debug = 1
- elif opt in ("--dry-run",):
- global _g_dry_run
- _g_dry_run = 1
- elif opt in ("-f", "--force"):
- global _g_force
- _g_force = 1
- elif opt in ("-s", "--statistics"):
- mode = "statistics"
- elif opt in ("-x",):
- extended = 1
- elif opt in ("-r", "--relicense"):
- mode = "relicense"
- elif opt in ("-R", "--force-relicense"):
- mode = "relicense"
- force_relicensing = 1
- elif opt in ("-A", "--add"):
- mode = "add"
- elif opt == "--backup":
- backup = 1
- elif opt == "-y":
- fallback_initial_copyright_date = optarg
- elif opt == "-i":
- fallback_initial_developer = optarg
- elif opt == "-o":
- fallback_original_code_is = optarg
- elif opt == "-D":
- fallback_original_code_date = optarg
- elif opt in ("-I", "--initial-developers"):
- show_initial = 1
- elif opt == "--defaults":
- fallback_original_code_is = "mozilla.org Code"
- fallback_initial_copyright_date = "2001"
- fallback_initial_developer = "Netscape Communications Corporation"
- elif opt in ("-q", "--quick"):
- quick = 1
- try:
- # Prepare the input.
- if not args:
- log.debug("no given files, trying stdin")
- paths = sys.stdin
- else:
- paths = args
- # Invoke the requested action.
- _errors = {}
- if mode == "relicense":
- relicense(paths,
- fallback_initial_copyright_date,
- fallback_initial_developer,
- fallback_original_code_is,
- fallback_original_code_date,
- switch_to_mpl,
- backup,
- force_relicensing,
- _errors=_errors)
- elif mode == "statistics":
- statistics(paths, extended, quick, _errors=_errors)
- elif mode == "report":
- report(paths, switch_to_mpl, show_initial, quick, _errors=_errors)
- elif mode == "add":
- addlicense(paths,
- fallback_initial_copyright_date,
- fallback_initial_developer,
- fallback_original_code_is,
- fallback_original_code_date,
- backup,
- _errors=_errors)
- else:
- raise RelicError("unexpected mode: '%s'" % mode)
- # Report any delayed errors.
- if _errors:
- print
- print "=================== Summary of Errors ==========================="
- print "Files with processing errors:", len(_errors)
- print "================================================================="
- for file, error in _errors.items():
- print "%s: %s" % (file, error)
- print
- print "================================================================="
- except RelicError, ex:
- log.error(str(ex) +
- " (the --force option can be used to skip problematic "
- "files and continue processing rather than aborting)")
- if debug:
- print
- import traceback
- traceback.print_exception(*sys.exc_info())
- return 1
- except KeyboardInterrupt:
- pass
- if __name__ == "__main__":
- sys.exit( main(sys.argv) )