/tools/win/split_link/split_link.py
Python | 420 lines | 372 code | 29 blank | 19 comment | 31 complexity | da0e8f492dc32737440660184c0a40fb MD5 | raw file
- # Copyright 2013 The Chromium Authors. All rights reserved.
- # Use of this source code is governed by a BSD-style license that can be
- # found in the LICENSE file.
- """Takes the same arguments as Windows link.exe, and a definition of libraries
- to split into subcomponents. Does multiple passes of link.exe invocation to
- determine exports between parts and generates .def and import libraries to
- cause symbols to be available to other parts."""
- import _winreg
- import ctypes
- import os
- import re
- import shutil
- import subprocess
- import sys
- import tempfile
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- # This can be set to ignore data exports. The resulting DLLs will probably not
- # run, but at least they can be generated. The log of data exports will still
- # be output.
- IGNORE_DATA = 0
- def Log(message):
- print 'split_link:', message
- def GetFlagsAndInputs(argv):
- """Parses the command line intended for link.exe and return the flags and
- input files."""
- rsp_expanded = []
- for arg in argv:
- if arg[0] == '@':
- with open(arg[1:]) as rsp:
- rsp_expanded.extend(rsp.read().splitlines())
- else:
- rsp_expanded.append(arg)
- # Use CommandLineToArgvW so we match link.exe parsing.
- try:
- size = ctypes.c_int()
- ptr = ctypes.windll.shell32.CommandLineToArgvW(
- ctypes.create_unicode_buffer(' '.join(rsp_expanded)),
- ctypes.byref(size))
- ref = ctypes.c_wchar_p * size.value
- raw = ref.from_address(ptr)
- args = [arg for arg in raw]
- finally:
- ctypes.windll.kernel32.LocalFree(ptr)
- inputs = []
- flags = []
- intermediate_manifest = ''
- for arg in args:
- lower_arg = arg.lower()
- # We'll be replacing these ourselves.
- if lower_arg.startswith('/out:'):
- continue
- if lower_arg.startswith('/manifestfile:'):
- intermediate_manifest = arg[arg.index(':')+1:]
- continue
- if lower_arg.startswith('/pdb:'):
- continue
- if (not lower_arg.startswith('/') and
- lower_arg.endswith(('.obj', '.lib', '.res'))):
- inputs.append(arg)
- else:
- flags.append(arg)
- return flags, inputs, intermediate_manifest
- def GetRegistryValue(subkey):
- try:
- val = _winreg.QueryValue(_winreg.HKEY_CURRENT_USER,
- 'Software\\Chromium\\' + subkey)
- if os.path.exists(val):
- return val
- except WindowsError:
- pass
- raise SystemExit("Couldn't read from registry")
- def GetOriginalLinkerPath():
- return GetRegistryValue('split_link_installed')
- def GetMtPath():
- return GetRegistryValue('split_link_mt_path')
- def PartFor(input_file, description_parts, description_all):
- """Determines which part a given link input should be put into (or all)."""
- # Check if it should go in all parts.
- input_file = input_file.lower()
- if any(re.search(spec, input_file) for spec in description_all):
- return -1
- # Or pick which particular one it belongs in.
- for i, spec_list in enumerate(description_parts):
- if any(re.search(spec, input_file) for spec in spec_list):
- return i
- raise ValueError("couldn't find location for %s" % input_file)
- def ParseOutExternals(output):
- """Given the stdout of link.exe, parses the error messages to retrieve all
- symbols that are unresolved."""
- result = set()
- # Styles of messages for unresolved externals, and a boolean to indicate
- # whether the error message emits the symbols with or without a leading
- # underscore.
- unresolved_regexes = [
- (re.compile(r' : error LNK2019: unresolved external symbol ".*" \((.*)\)'
- r' referenced in function'),
- False),
- (re.compile(r' : error LNK2001: unresolved external symbol ".*" \((.*)\)$'),
- False),
- (re.compile(r' : error LNK2019: unresolved external symbol (.*)'
- r' referenced in function '),
- True),
- (re.compile(r' : error LNK2001: unresolved external symbol (.*)$'),
- True),
- ]
- for line in output.splitlines():
- line = line.strip()
- for regex, strip_leading_underscore in unresolved_regexes:
- mo = regex.search(line)
- if mo:
- if strip_leading_underscore:
- result.add(mo.group(1)[1:])
- else:
- result.add(mo.group(1))
- break
- mo = re.search(r'fatal error LNK1120: (\d+) unresolved externals', output)
- # Make sure we have the same number that the linker thinks we have.
- if mo is None and result:
- raise SystemExit(output)
- if len(result) != int(mo.group(1)):
- print output
- print 'Expecting %d, got %d' % (int(mo.group(1)), len(result))
- assert len(result) == int(mo.group(1))
- return sorted(result)
- def AsCommandLineArgs(items):
- """Intended for output to a response file. Quotes all arguments."""
- return '\n'.join('"' + x + '"' for x in items)
- def OutputNameForIndex(index):
- """Gets the final output DLL name, given a zero-based index."""
- if index == 0:
- return "chrome.dll"
- else:
- return 'chrome%d.dll' % index
- def ManifestNameForIndex(index):
- return OutputNameForIndex(index) + '.intermediate.manifest'
- def PdbNameForIndex(index):
- return OutputNameForIndex(index) + '.pdb'
- def RunLinker(flags, index, inputs, phase, intermediate_manifest):
- """Invokes the linker and returns the stdout, returncode and target name."""
- rspfile = 'part%d_%s.rsp' % (index, phase)
- with open(rspfile, 'w') as f:
- print >> f, AsCommandLineArgs(inputs)
- print >> f, AsCommandLineArgs(flags)
- output_name = OutputNameForIndex(index)
- manifest_name = ManifestNameForIndex(index)
- print >> f, '/ENTRY:ChromeEmptyEntry@12'
- print >> f, '/OUT:' + output_name
- print >> f, '/MANIFESTFILE:' + manifest_name
- print >> f, '/PDB:' + PdbNameForIndex(index)
- # Log('[[[\n' + open(rspfile).read() + '\n]]]')
- link_exe = GetOriginalLinkerPath()
- popen = subprocess.Popen([link_exe, '@' + rspfile], stdout=subprocess.PIPE)
- stdout, _ = popen.communicate()
- if index == 0 and popen.returncode == 0 and intermediate_manifest:
- # Hack for ninja build. After the linker runs, it does some manifest
- # things and expects there to be a file in this location. We just put it
- # there so it's happy. This is a no-op.
- if os.path.isdir(os.path.dirname(intermediate_manifest)):
- shutil.copyfile(manifest_name, intermediate_manifest)
- return stdout, popen.returncode, output_name
- def GetLibObjList(lib):
- """Gets the list of object files contained in a .lib."""
- link_exe = GetOriginalLinkerPath()
- popen = subprocess.Popen(
- [link_exe, '/lib', '/nologo', '/list', lib], stdout=subprocess.PIPE)
- stdout, _ = popen.communicate()
- return stdout.splitlines()
- def ExtractObjFromLib(lib, obj):
- """Extracts a .obj file contained in a .lib file. Returns the absolute path
- a temp file."""
- link_exe = GetOriginalLinkerPath()
- temp = tempfile.NamedTemporaryFile(
- prefix='split_link_', suffix='.obj', delete=False)
- temp.close()
- subprocess.check_call([
- link_exe, '/lib', '/nologo', '/extract:' + obj, lib, '/out:' + temp.name])
- return temp.name
- def Unmangle(export):
- "Returns the human-presentable name of a mangled symbol."""
- # Use dbghelp.dll to demangle the name.
- # TODO(scottmg): Perhaps a simple cache? Seems pretty fast though.
- UnDecorateSymbolName = ctypes.windll.dbghelp.UnDecorateSymbolName
- buffer_size = 2048
- output_string = ctypes.create_string_buffer(buffer_size)
- if not UnDecorateSymbolName(
- export, ctypes.byref(output_string), buffer_size, 0):
- raise ctypes.WinError()
- return output_string.value
- def IsDataDefinition(export):
- """Determines if a given name is data rather than a function. Always returns
- False for C-style (as opposed to C++-style names)."""
- if export[0] != '?':
- return False
- # If it contains a '(' we assume it's a function.
- return '(' not in Unmangle(export)
- def GenerateDefFiles(unresolved_by_part):
- """Given a list of unresolved externals, generates a .def file that will
- cause all those symbols to be exported."""
- deffiles = []
- Log('generating .def files')
- for i, part in enumerate(unresolved_by_part):
- deffile = 'part%d.def' % i
- with open(deffile, 'w') as f:
- print >> f, 'LIBRARY %s' % OutputNameForIndex(i)
- print >> f, 'EXPORTS'
- for j, part in enumerate(unresolved_by_part):
- if i == j:
- continue
- is_data = \
- [' DATA' if IsDataDefinition(export) and not IGNORE_DATA else ''
- for export in part]
- print >> f, '\n'.join(' ' + export + data
- for export, data in zip(part, is_data))
- deffiles.append(deffile)
- return deffiles
- def BuildImportLibs(flags, inputs_by_part, deffiles):
- """Runs the linker to generate an import library."""
- import_libs = []
- Log('building import libs')
- for i, (inputs, deffile) in enumerate(zip(inputs_by_part, deffiles)):
- libfile = 'part%d.lib' % i
- flags_with_implib_and_deffile = flags + ['/IMPLIB:%s' % libfile,
- '/DEF:%s' % deffile]
- RunLinker(flags_with_implib_and_deffile, i, inputs, 'implib', None)
- import_libs.append(libfile)
- return import_libs
- def AttemptLink(flags, inputs_by_part, unresolved_by_part, deffiles,
- import_libs, intermediate_manifest):
- """Tries to run the linker for all parts using the current round of
- generated import libs and .def files. If the link fails, updates the
- unresolved externals list per part."""
- dlls = []
- all_succeeded = True
- new_externals = []
- Log('unresolveds now: %r' % [len(part) for part in unresolved_by_part])
- for i, (inputs, deffile) in enumerate(zip(inputs_by_part, deffiles)):
- Log('running link, part %d' % i)
- others_implibs = import_libs[:]
- others_implibs.pop(i)
- inputs_with_implib = inputs + filter(lambda x: x, others_implibs)
- if deffile:
- flags = flags + ['/DEF:%s' % deffile, '/LTCG']
- stdout, rc, output = RunLinker(
- flags, i, inputs_with_implib, 'final', intermediate_manifest)
- if rc != 0:
- all_succeeded = False
- new_externals.append(ParseOutExternals(stdout))
- else:
- new_externals.append([])
- dlls.append(output)
- combined_externals = [sorted(set(prev) | set(new))
- for prev, new in zip(unresolved_by_part, new_externals)]
- return all_succeeded, dlls, combined_externals
- def ExtractSubObjsTargetedAtAll(
- inputs,
- num_parts,
- description_parts,
- description_all,
- description_all_from_libs):
- """For (lib, obj) tuples in the all_from_libs section, extract the obj out of
- the lib and added it to inputs. Returns a list of lists for which part the
- extracted obj belongs in (which is whichever the .lib isn't in)."""
- by_parts = [[] for _ in range(num_parts)]
- for lib_spec, obj_spec in description_all_from_libs:
- for input_file in inputs:
- if re.search(lib_spec, input_file):
- objs = GetLibObjList(input_file)
- match_count = 0
- for obj in objs:
- if re.search(obj_spec, obj, re.I):
- extracted_obj = ExtractObjFromLib(input_file, obj)
- #Log('extracted %s (%s %s)' % (extracted_obj, input_file, obj))
- i = PartFor(input_file, description_parts, description_all)
- if i == -1:
- raise SystemExit(
- '%s is already in all parts, but matched '
- '%s in all_from_libs' % (input_file, obj))
- # See note in main().
- assert num_parts == 2, "Can't handle > 2 dlls currently"
- by_parts[1 - i].append(obj)
- match_count += 1
- if match_count == 0:
- raise SystemExit(
- '%s, %s matched a lib, but no objs' % (lib_spec, obj_spec))
- return by_parts
- def main():
- flags, inputs, intermediate_manifest = GetFlagsAndInputs(sys.argv[1:])
- partition_file = os.path.normpath(
- os.path.join(BASE_DIR, '../../../build/split_link_partition.py'))
- with open(partition_file) as partition:
- description = eval(partition.read())
- inputs_by_part = []
- description_parts = description['parts']
- # We currently assume that if a symbol isn't in dll 0, then it's in dll 1
- # when generating def files. Otherwise, we'd need to do more complex things
- # to figure out where each symbol actually is to assign it to the correct
- # .def file.
- num_parts = len(description_parts)
- assert num_parts == 2, "Can't handle > 2 dlls currently"
- description_parts.reverse()
- objs_from_libs = ExtractSubObjsTargetedAtAll(
- inputs,
- num_parts,
- description_parts,
- description['all'],
- description['all_from_libs'])
- objs_from_libs.reverse()
- inputs_by_part = [[] for _ in range(num_parts)]
- for input_file in inputs:
- i = PartFor(input_file, description_parts, description['all'])
- if i == -1:
- for part in inputs_by_part:
- part.append(input_file)
- else:
- inputs_by_part[i].append(input_file)
- inputs_by_part.reverse()
- # Put the subobjs on to the main list.
- for i, part in enumerate(objs_from_libs):
- Log('%d sub .objs added to part %d' % (len(part), i))
- inputs_by_part[i].extend(part)
- unresolved_by_part = [[] for _ in range(num_parts)]
- import_libs = [None] * num_parts
- deffiles = [None] * num_parts
- data_exports = 0
- for i in range(5):
- Log('--- starting pass %d' % i)
- ok, dlls, unresolved_by_part = AttemptLink(
- flags, inputs_by_part, unresolved_by_part, deffiles, import_libs,
- intermediate_manifest)
- if ok:
- break
- data_exports = 0
- for i, part in enumerate(unresolved_by_part):
- for export in part:
- if IsDataDefinition(export):
- print 'part %d contains data export: %s (aka %s)' % (
- i, Unmangle(export), export)
- data_exports += 1
- deffiles = GenerateDefFiles(unresolved_by_part)
- import_libs = BuildImportLibs(flags, inputs_by_part, deffiles)
- else:
- if data_exports and not IGNORE_DATA:
- print '%d data exports found, see report above.' % data_exports
- print('These cannot be exported, and must be either duplicated to the '
- 'target DLL (if constant), or wrapped in a function.')
- return 1
- mt_exe = GetMtPath()
- for i, dll in enumerate(dlls):
- Log('embedding manifest in %s' % dll)
- args = [mt_exe, '-nologo', '-manifest']
- args.append(ManifestNameForIndex(i))
- args.append(description['manifest'])
- args.append('-outputresource:%s;2' % dll)
- subprocess.check_call(args)
- Log('built %r' % dlls)
- return 0
- if __name__ == '__main__':
- sys.exit(main())