igor.erl | searchcode

/lib/syntax_tools/src/igor.erl

https://github.com/bmizerany/jungerl · Erlang · 3033 lines · 1739 code · 281 blank · 1013 comment · 56 complexity · ff484948db07f89ac55ea1732e1dd4a7 MD5 · raw file
Large files are truncated click here to view the full file

%% =====================================================================
%% Igor, the Module Merger
%%
%% Copyright (C) 1998-2001 Richard Carlsson
%%
%% This library is free software; you can redistribute it and/or modify
%% it under the terms of the GNU Lesser General Public License as
%% published by the Free Software Foundation; either version 2 of the
%% License, or (at your option) any later version.
%%
%% This library is distributed in the hope that it will be useful, but
%% WITHOUT ANY WARRANTY; without even the implied warranty of
%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
%% Lesser General Public License for more details.
%%
%% You should have received a copy of the GNU Lesser General Public
%% License along with this library; if not, write to the Free Software
%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
%% USA
%%
%% Author contact: richardc@csd.uu.se
%%
%% $Id$
%%
%% =====================================================================
%%
%% @doc Igor: the Module Merger and Renamer.
%%
%% <p>The program Igor merges the source code of one or more Erlang
%% modules into a single module, which can then replace the original set
%% of modules. Igor is also able to rename a set of (possibly
%% interdependent) modules, without joining them into a single
%% module.</p>
%%
%% <p>The main user interface consists of the functions <a
%% href="#merge-3"><code>merge</code></a> and <a
%% href="#rename-3"><code>rename</code></a>. See also the <a
%% href="#parse_transform-2"><code>parse_transform</code></a>
%% function</p>
%%
%% <p>A note of warning: Igor cannot do anything about the case when the
%% name of a remote function is passed to the built-in functions
%% <code>apply</code> and <code>spawn</code> <em>unless</em> the module
%% and function names are explicitly stated in the call, as in e.g.
%% <code>apply(lists, reverse, [Xs])</code>. In all other cases, Igor
%% leaves such calls unchanged, and warns the user that manual editing
%% might be necessary.</p>
%%
%% <p>Also note that Erlang records will be renamed as necessary to
%% avoid non-equivalent definitions using the same record name. This
%% does not work if the source code accesses the name field of such
%% record tuples by <code>element/2</code> or similar methods. Always
%% use the record syntax to handle record tuples, if possible.</p>
%%
%% <p>Disclaimer: the author of this program takes no responsibility for
%% the correctness of the produced output, or for any effects of its
%% execution. In particular, the author may not be held responsible
%% should Igor include the code of a deceased madman in the result.</p>
%%
%% <p>For further information on Igors in general, see e.g. "Young
%% Frankenstein", Mel Brooks, 1974, and "The Fifth Elephant", Terry
%% Pratchett, 1999.</p>
%% @end
%% 
%% =====================================================================
%%
%% This program is named after the character Igor, assistant to Dr.
%% Frankenstein, in the 1939 film "Son of Frankenstein" (with Boris
%% Karloff playing The Monster for the last time; Igor was played by
%% Bela Lugosi). Igor's job (in the film) was mainly to bring reasonably
%% fresh parts of various human corpses to the good Doctor, for his
%% purpose of reanimating them in the shape of a new formidable, living
%% creature.
%%
%% Merging code is done by joining the sources, possibly changing the
%% order of declarations as necessary, renaming functions and records to
%% avoid name clashes, and changing remote calls to local calls where
%% possible. Stub modules may be automatically generated to redirect any
%% calls that still use the old names. Indirectly, code merging can be
%% used to simply rename a set of modules.
%%
%% What Igor does not do is to optimise the resulting code, which
%% typically can benefit from techniques such as inlining, constant
%% folding, specialisation, etc. This task is left to the Doctor.
%% (Luckily, Igor can call on Inga to do some cleanup; cf. 'erl_tidy'.)

%% FIXME: don't remove module qualifier if name is (auto-)imported!
%% TODO: handle merging of parameterized modules (somehow).
%% TODO: check for redefinition of macros; check equivalence; comment out.
%% TODO: {export, [E]}, E = atom() | {atom(), atom(), integer()}.
%% TODO: improve documentation. 
%% TODO: optionally rename all functions from specified (or all) modules.

-module(igor).

-export([create_stubs/2, merge/2, merge/3, merge_files/3, merge_files/4,
	 merge_sources/3, parse_transform/2, rename/2, rename/3]).

-include_lib("kernel/include/file.hrl").


%% =====================================================================
%% Global Constants

-define(NOTE_HEADER, "Note from Igor: ").
-define(COMMENT_PREFIX, "% ").
-define(COMMENT_BAR,
	"======================="
	"======================="
	"=======================").
-define(NOTE_PREFIX, "%! ").
-define(KILL_PREFIX, "%<<< ").
-define(DEFAULT_INCLUDES, ["."]).
-define(DEFAULT_MACROS, []).
-define(DEFAULT_SUFFIX, ".erl").
-define(DEFAULT_BACKUP_SUFFIX, ".bak").
-define(DEFAULT_DIR, "").
-define(DEFAULT_STUB_DIR, "stubs").
-define(TIDY_OPTS, [quiet]).

%% This may also be used in patterns. R must not be an integer, i.e.,
%% the structure must be distinct from function names.

-define(record_name(R), {record, R}).


%% Data structure for module information

-record(module, {name,		% = atom()
		 vars = none,	% = [atom()] | none
		 functions,	% = ordset({atom(), int()})
		 exports,	% = ordset({atom(), int()})
				% | ordset({{atom(), int()},
				%	    term()})
		 aliases,	% = ordset({{atom(), int()},
				%	    {atom(),
				%	     {atom(), int()}}})
		 attributes,	% = ordset({atom(), term()})
		 records	% = [{atom(), [{atom(), term()}]}]
		}).

%% The default pretty-printing function.

default_printer(Tree, Options) ->
    erl_prettypr:format(Tree, Options).


%% =====================================================================
%% @spec parse_transform(Forms::[syntaxTree()], Options::[term()]) ->
%%           [syntaxTree()]
%%
%%         syntaxTree() = erl_syntax:syntaxTree()
%%
%% @doc Allows Igor to work as a component of the Erlang compiler.
%% Including the term <code>{parse_transform, igor}</code> in the
%% compile options when compiling an Erlang module (cf.
%% <code>compile:file/2</code>), will call upon Igor to process the
%% source code, allowing automatic inclusion of other source files. No
%% files are created or overwritten when this function is used.
%%
%% <p>Igor will look for terms <code>{igor, List}</code> in the compile
%% options, where <code>List</code> is a list of Igor-specific options,
%% as follows:
%% <dl>
%%  <dt><code>{files, [filename()]}</code></dt>
%%    <dd>The value specifies a list of source files to be merged with
%%    the file being compiled; cf. <code>merge_files/4</code>.</dd>
%% </dl>
%%
%% See <code>merge_files/4</code> for further options. Note, however,
%% that some options are preset by this function and cannot be
%% overridden by the user; in particular, all cosmetic features are
%% turned off, for efficiency. Preprocessing is turned on.</p>
%%
%% @see merge_files/4
%% @see compile:file/2

parse_transform(Forms, Options) ->
    M = get_module_info(Forms),
    Name = M#module.name,
    Opts = proplists:append_values(igor, Options),
    Files = proplists:append_values(files, Opts),
    %% We turn off all features that are only cosmetic, and make sure to
    %% turn on preservation of `file' attributes.
    Opts1 = [{comments, false},
	     {notes, no},
	     {no_imports, true},
	     {file_attributes, yes},
	     {preprocess, true},
	     {export, [Name]}
	     | Opts],
    {T, _} = merge_files(Name, [Forms], Files, Opts1),
    verbose("done.", Opts1),
    erl_syntax:revert_forms(T).


%% =====================================================================
%% @spec merge(Name::atom(), Files::[filename()]) -> [filename()]
%% @equiv merge(Name, Files, [])

merge(Name, Files) ->
    merge(Name, Files, []).

%% =====================================================================
%% @spec merge(Name::atom(), Files::[filename()], Options::[term()]) ->
%%           [filename()]
%%
%%	    filename() = file:filename()
%%
%% @doc Merges source code files to a single file. <code>Name</code>
%% specifies the name of the resulting module - not the name of the
%% output file. <code>Files</code> is a list of file names and/or module
%% names of source modules to be read and merged (see
%% <code>merge_files/4</code> for details). All the input modules must
%% be distinctly named.
%%
%% <p>The resulting source code is written to a file named
%% "<code><em>Name</em>.erl</code>" in the current directory, unless
%% otherwise specified by the options <code>dir</code> and
%% <code>outfile</code> described below.</p>
%%
%% <p>Examples:
%% <ul>
%%   <li>given a module <code>m</code> in file "<code>m.erl</code>"
%%   which uses the standard library module <code>lists</code>, calling
%%   <code>igor:merge(m, [m, lists])</code> will create a new file
%%   "<code>m.erl</code> which contains the code from <code>m</code> and
%%   exports the same functions, and which includes the referenced code
%%   from the <code>lists</code> module. The original file will be
%%   renamed to "<code>m.erl.bak</code>".</li>
%%
%%   <li>given modules <code>m1</code> and <code>m2</code>, in
%%   corresponding files, calling <code>igor:merge(m, [m1, m2])</code>
%%   will create a file "<code>m.erl</code>" which contains the code
%%   from <code>m1</code> and <code>m2</code> and exports the functions
%%   of <code>m1</code>.</li>
%% </ul></p>
%%
%% <p>Stub module files are created for those modules that are to be
%% exported by the target module (see options <code>export</code>,
%% <code>stubs</code> and <code>stub_dir</code>).</p>
%%
%% <p>The function returns the list of file names of all created
%% modules, including any automatically created stub modules. The file
%% name of the target module is always first in the list.</p>
%%
%% <p>Note: If you get a "syntax error" message when trying to merge
%% files (and you know those files to be correct), then try the
%% <code>preprocess</code> option. It typically means that your code
%% contains too strange macros to be handled without actually performing
%% the preprocessor expansions.</p>
%% 
%% <p>Options:
%% <dl>
%%   <dt><code>{backup_suffix, string()}</code></dt>
%%
%%     <dd>Specifies the file name suffix to be used when a backup file
%%     is created; the default value is <code>".bak"</code>.</dd>
%%
%%   <dt><code>{backups, bool()}</code></dt>
%%
%%     <dd>If the value is <code>true</code>, existing files will be
%%     renamed before new files are opened for writing. The new names
%%     are formed by appending the string given by the
%%     <code>backup_suffix</code> option to the original name. The
%%     default value is <code>true</code>.</dd>
%%
%%   <dt><code>{dir, filename()}</code></dt>
%%
%%     <dd>Specifies the name of the directory in which the output file
%%     is to be written. An empty string is interpreted as the current
%%     directory. By default, the current directory is used.</dd>
%%
%%   <dt><code>{outfile, filename()}</code></dt>
%%
%%     <dd>Specifies the name of the file (without suffix) to which the
%%     resulting source code is to be written. By default, this is the
%%     same as the <code>Name</code> argument.</dd>
%%
%%   <dt><code>{preprocess, bool()}</code></dt>
%%
%%     <dd>If the value is <code>true</code>, preprocessing will be done
%%     when reading the source code. See <code>merge_files/4</code> for
%%     details.</dd>
%%
%%   <dt><code>{printer, Function}</code></dt>
%%     <dd><ul>
%%       <li><code>Function = (syntaxTree()) -> string()</code></li>
%%     </ul>
%%     Specifies a function for prettyprinting Erlang syntax trees.
%%     This is used for outputting the resulting module definition, as
%%     well as for creating stub files. The function is assumed to
%%     return formatted text for the given syntax tree, and should raise
%%     an exception if an error occurs. The default formatting function
%%     calls <code>erl_prettypr:format/2</code>.</dd>
%%
%%   <dt><code>{stub_dir, filename()}</code></dt>
%%
%%     <dd>Specifies the name of the directory to which any generated
%%     stub module files are written. The default value is
%%     <code>"stubs"</code>.</dd>
%%
%%   <dt><code>{stubs, bool()}</code></dt>
%%
%%     <dd>If the value is <code>true</code>, stub module files will be
%%     automatically generated for all exported modules that do not have
%%     the same name as the target module. The default value is
%%     <code>true</code>.</dd>
%%
%%   <dt><code>{suffix, string()}</code></dt>
%%
%%     <dd>Specifies the suffix to be used for the output file names;
%%     the default value is <code>".erl"</code>.</dd>
%% </dl>
%%
%% See <code>merge_files/4</code> for further options.</p>
%%
%% @see merge/2
%% @see merge_files/4

%% The defaults for 'merge' are also used for 'create_stubs'.

-define(DEFAULT_MERGE_OPTS,
	[{backup_suffix, ?DEFAULT_BACKUP_SUFFIX},
	 backups, 
	 {dir, ?DEFAULT_DIR},
	 {printer, fun default_printer/2},
	 {stub_dir, ?DEFAULT_STUB_DIR},
	 stubs,
	 {suffix, ?DEFAULT_SUFFIX},
	 {verbose, false}]).

merge(Name, Files, Opts) ->
    Opts1 = Opts ++ ?DEFAULT_MERGE_OPTS,
    {Tree, Stubs} = merge_files(Name, Files, Opts1),
    Dir = proplists:get_value(dir, Opts1, ""),
    Filename = proplists:get_value(outfile, Opts1, Name),
    File = write_module(Tree, Filename, Dir, Opts1),
    [File | maybe_create_stubs(Stubs, Opts1)].


%% =====================================================================
%% @spec merge_files(Name::atom(), Files::[filename()],
%%                   Options::[term()]) ->
%%           {syntaxTree(), [stubDescriptor()]}
%% @equiv merge_files(Name, [], Files, Options)

merge_files(Name, Files, Options) ->
    merge_files(Name, [], Files, Options).


%% =====================================================================
%% @spec merge_files(Name::atom(), Sources::[Forms],
%%                   Files::[filename()], Options::[term()]) ->
%%           {syntaxTree(), [stubDescriptor()]}
%%     Forms = syntaxTree() | [syntaxTree()]
%%
%% @doc Merges source code files and syntax trees to a single syntax
%% tree. This is a file-reading front end to
%% <code>merge_sources/3</code>. <code>Name</code> specifies the name of
%% the resulting module - not the name of the output file.
%% <code>Sources</code> is a list of syntax trees and/or lists of
%% "source code form" syntax trees, each entry representing a module
%% definition. <code>Files</code> is a list of file names and/or module
%% names of source modules to be read and included. All the input
%% modules must be distinctly named.
%%
%% <p>If a name in <code>Files</code> is not the name of an existing
%% file, Igor assumes it represents a module name, and tries to locate
%% and read the corresponding source file. The parsed files are appended
%% to <code>Sources</code> and passed on to
%% <code>merge_sources/3</code>, i.e., entries in <code>Sources</code>
%% are listed before entries read from files.</p>
%%
%% <p>If no exports are listed by an <code>export</code> option (see
%% <code>merge_sources/3</code> for details), then if <code>Name</code>
%% is also the name of one of the input modules, that module will be
%% exported; otherwise, the first listed module will be exported. Cf.
%% the examples under <code>merge/3</code>.</p>
%%
%% <p>The result is a pair <code>{Tree, Stubs}</code>, where
%% <code>Tree</code> represents the source code that is the result of
%% merging all the code in <code>Sources</code> and <code>Files</code>,
%% and <code>Stubs</code> is a list of stub module descriptors (see
%% <code>merge_sources/3</code> for details).</p>
%%
%% <p>Options:
%% <dl>
%%   <dt><code>{comments, bool()}</code></dt>
%%
%%     <dd>If the value is <code>true</code>, source code comments in
%%     the original files will be preserved in the output. The default
%%     value is <code>true</code>.</dd>
%%
%%   <dt><code>{find_src_rules, [{string(), string()}]}</code></dt>
%%
%%     <dd>Specifies a list of rules for associating object files with
%%     source files, to be passed to the function
%%     <code>filename:find_src/2</code>. This can be used to change the
%%     way Igor looks for source files. If this option is not specified,
%%     the default system rules are used. The first occurrence of this
%%     option completely overrides any later in the option list.</dd>
%%
%%   <dt><code>{includes, [filename()]}</code></dt>
%%
%%     <dd>Specifies a list of directory names for the Erlang
%%     preprocessor, if used, to search for include files (cf. the
%%     <code>preprocess</code> option). The default value is the empty
%%     list. The directory of the source file and the current directory
%%     are automatically appended to the list.</dd>
%%
%%   <dt><code>{macros, [{atom(), term()}]}</code></dt>
%%
%%     <dd>Specifies a list of "pre-defined" macro definitions for the
%%     Erlang preprocessor, if used (cf. the <code>preprocess</code>
%%     option). The default value is the empty list.</dd>
%%
%%   <dt><code>{preprocess, bool()}</code></dt>
%%
%%     <dd>If the value is <code>false</code>, Igor will read source
%%     files without passing them through the Erlang preprocessor
%%     (<code>epp</code>), in order to avoid expansion of preprocessor
%%     directives such as <code>-include(...).</code>,
%%     <code>-define(...).</code> and <code>-ifdef(...)</code>, and
%%     macro calls such as <code>?LINE</code> and <code>?MY_MACRO(x,
%%     y)</code>. The default value is <code>false</code>, i.e.,
%%     preprocessing is not done. (See the module
%%     <code>epp_dodger</code> for details.)
%%
%%     <p>Notes: If a file contains too exotic definitions or uses of
%%     macros, it will not be possible to read it without preprocessing.
%%     Furthermore, Igor does not currently try to sort out multiple
%%     inclusions of the same file, or redefinitions of the same macro
%%     name. Therefore, when preprocessing is turned off, it may become
%%     necessary to edit the resulting source code, removing such
%%     re-inclusions and redefinitions.</p></dd>
%% </dl>
%%
%% See <code>merge_sources/3</code> for further options.</p>
%%
%% @see merge/3
%% @see merge_files/3
%% @see merge_sources/3
%% @see filename:find_src/2
%% @see epp_dodger

merge_files(_, _Trees, [], _) ->
    report_error("no files to merge."),
    exit(badarg);
merge_files(Name, Trees, Files, Opts) ->
    Opts1 = Opts ++ [{includes, ?DEFAULT_INCLUDES},
		     {macros, ?DEFAULT_MACROS},
		     {preprocess, false},
		     comments],
    Sources = [read_module(F, Opts1) || F <- Files],
    merge_sources(Name, Trees ++ Sources, Opts1).


%% =====================================================================
%% @spec merge_sources(Name::atom(), Sources::[Forms],
%%                     Options::[term()]) ->
%%           {syntaxTree(), [stubDescriptor()]}
%%
%%     Forms = syntaxTree() | [syntaxTree()]
%%
%% @type stubDescriptor() = [{ModuleName, Functions, [Attribute]}]
%%	    ModuleName = atom()
%%	    Functions = [{FunctionName, {ModuleName, FunctionName}}]
%%	    FunctionName = {atom(), integer()}
%%	    Attribute = {atom(), term()}.
%%
%%      A stub module descriptor contains the module name, a list of
%%      exported functions, and a list of module attributes. Each
%%      function is described by its name (which includes its arity),
%%      and the corresponding module and function that it calls. (The
%%      arities should always match.) The attributes are simply
%%      described by key-value pairs.
%%
%% @doc Merges syntax trees to a single syntax tree. This is the main
%% code merging "engine". <code>Name</code> specifies the name of the
%% resulting module. <code>Sources</code> is a list of syntax trees of
%% type <code>form_list</code> and/or lists of "source code form" syntax
%% trees, each entry representing a module definition. All the input
%% modules must be distinctly named.
%%
%% <p>Unless otherwise specified by the options, all modules are assumed
%% to be at least "static", and all except the target module are assumed
%% to be "safe". See the <code>static</code> and <code>safe</code>
%% options for details.</p>
%%
%% <p>If <code>Name</code> is also the name of one of the input modules,
%% the code from that module will occur at the top of the resulting
%% code, and no extra "header" comments will be added. In other words,
%% the look of that module will be preserved.</p>
%%
%% <p>The result is a pair <code>{Tree, Stubs}</code>, where
%% <code>Tree</code> represents the source code that is the result of
%% merging all the code in <code>Sources</code>, and <code>Stubs</code>
%% is a list of stub module descriptors (see below).</p>
%%
%% <p><code>Stubs</code> contains one entry for each exported input
%% module (cf. the <code>export</code> option), each entry describing a
%% stub module that redirects calls of functions in the original module
%% to the corresponding (possibly renamed) functions in the new module.
%% The stub descriptors can be used to automatically generate stub
%% modules; see <code>create_stubs/2</code>.</p>
%%
%% <p>Options:
%% <dl>
%%   <dt><code>{export, [atom()]}</code></dt>
%%
%%     <dd>Specifies a list of names of input modules whose interfaces
%%     should be exported by the output module. A stub descriptor is
%%     generated for each specified module, unless its name is
%%     <code>Name</code>. If no modules are specified, then if
%%     <code>Name</code> is also the name of an input module, that
%%     module will be exported; otherwise the first listed module in
%%     <code>Sources</code> will be exported. The default value is the
%%     empty list.</dd>
%%
%%   <dt><code>{export_all, bool()}</code></dt>
%%
%%     <dd>If the value is <code>true</code>, this is equivalent to
%%     listing all of the input modules in the <code>export</code>
%%     option. The default value is <code>false</code>.</dd>
%%
%% <dt><code>{file_attributes, Preserve}</code></dt>
%%     <dd><ul>
%%       <li><code>Preserve = yes | comment | no</code></li>
%%     </ul>
%%     If the value is <code>yes</code>, all file attributes
%%     <code>-file(...)</code> in the input sources will be preserved in
%%     the resulting code. If the value is <code>comment</code>, they
%%     will be turned into comments, but remain in their original
%%     positions in the code relative to the other source code forms. If
%%     the value is <code>no</code>, all file attributes will be removed
%%     from the code, unless they have attached comments, in which case
%%     they will be handled as in the <code>comment</code> case. The
%%     default value is <code>no</code>.</dd>
%%
%% <dt><code>{no_banner, bool()}</code></dt>
%%
%%     <dd>If the value is <code>true</code>, no banner comment will be
%%     added at the top of the resulting module, even if the target
%%     module does not have the same name as any of the input modules.
%%     Instead, Igor will try to preserve the look of the module whose
%%     code is at the top of the output. The default value is
%%     <code>false</code>.</dd>
%%
%% <dt><code>{no_headers, bool()}</code></dt>
%%
%%     <dd>If the value is <code>true</code>, no header comments will be
%%     added to the resulting module at the beginning of each section of
%%     code that originates from a particular input module. The default
%%     value is <code>false</code>, which means that section headers are
%%     normally added whenever more than two or more modules are
%%     merged.</dd>
%%
%% <dt><code>{no_imports, bool()}</code></dt>
%%
%%     <dd>If the value is <code>true</code>, all
%%     <code>-import(...)</code> declarations in the original code will
%%     be expanded in the result; otherwise, as much as possible of the
%%     original import declarations will be preserved. The default value
%%     is <code>false</code>.</dd>
%%
%% <dt><code>{notes, Notes}</code></dt>
%%     <dd><ul>
%%       <li><code>Notes = always | yes | no</code></li>
%%     </ul>
%%     If the value is <code>yes</code>, comments will be inserted where
%%     important changes have been made in the code. If the value is
%%     <code>always</code>, <em>all</em> changes to the code will be
%%     commented. If the value is <code>no</code>, changes will be made
%%     without comments. The default value is <code>yes</code>.</dd>
%%
%% <dt><code>{redirect, [{atom(), atom()}]}</code></dt>
%%
%%     <dd>Specifies a list of pairs of module names, representing a
%%     mapping from old names to new. <em>The set of old names may not
%%     include any of the names of the input modules.</em> All calls to
%%     the listed old modules will be rewritten to refer to the
%%     corresponding new modules. <em>The redirected calls will not be
%%     further processed, even if the new destination is in one of the
%%     input modules.</em> This option mainly exists to support module
%%     renaming; cf. <code>rename/3</code>. The default value is the
%%     empty list.</dd>
%%
%% <dt><code>{safe, [atom()]}</code></dt>
%%
%%     <dd>Specifies a list of names of input modules such that calls to
%%     these "safe" modules may be turned into direct local calls, that
%%     do not test for code replacement. Typically, this can be done for
%%     e.g. standard library modules. If a module is "safe", it is per
%%     definition also "static" (cf. below). The list may be empty. By
%%     default, all involved modules <em>except the target module</em>
%%     are considered "safe".</dd>
%%
%% <dt><code>{static, [atom()]}</code></dt>
%%
%%     <dd>Specifies a list of names of input modules which will be
%%     assumed never to be replaced (reloaded) unless the target module
%%     is also first replaced. The list may be empty. The target module
%%     itself (which may also be one of the input modules) is always
%%     regarded as "static", regardless of the value of this option. By
%%     default, all involved modules are assumed to be static.</dd>
%%
%% <dt><code>{tidy, bool()}</code></dt>
%%
%%     <dd>If the value is <code>true</code>, the resulting code will be
%%     processed using the <code>erl_tidy</code> module, which removes
%%     unused functions and does general code cleanup. (See
%%     <code>erl_tidy:module/2</code> for additional options.) The
%%     default value is <code>true</code>.</dd>
%%
%% <dt><code>{verbose, bool()}</code></dt>
%%
%%     <dd>If the value is <code>true</code>, progress messages will be
%%     output while the program is running; the default value is
%%     <code>false</code>.</dd>
%% </dl></p>
%%
%% <p>Note: The distinction between "static" and "safe" modules is
%% necessary in order not to break the semantics of dynamic code
%% replacement. A "static" source module will not be replaced unless the
%% target module also is. Now imagine a state machine implemented by
%% placing the code for each state in a separate module, and suppose
%% that we want to merge this into a single target module, marking all
%% source modules as static. At each point in the original code where a
%% call is made from one of the modules to another (i.e., the state
%% transitions), code replacement is expected to be detected. Then, if
%% we in the merged code do not check at these points if the
%% <em>target</em> module (the result of the merge) has been replaced,
%% we can not be sure in general that we will be able to do code
%% replacement of the merged state machine - it could run forever
%% without detecting the code change. Therefore, all such calls must
%% remain remote-calls (detecting code changes), but may call the target
%% module directly.</p>
%%
%% <p>If we are sure that this kind of situation cannot ensue, we may
%% specify the involved modules as "safe", and all calls between them
%% will become local. Note that if the target module itself is specified
%% as safe, "remote" calls to itself will be turned into local calls.
%% This would destroy the code replacement properties of e.g. a typical
%% server loop.</p>
%%
%% @see create_stubs/2
%% @see rename/3
%% @see erl_tidy:module/2

%% Currently, there is no run-time support in Erlang for detecting
%% whether some module has been changed since the current module was
%% loaded. Therefore, if a source module is specified as non-static, not
%% much will be gained from merging: a call to a non-static module will
%% remain a remote call using the old module name, even when it is
%% performed from within the merged code. If that module is specified as
%% exported, the old name could then refer to an auto-generated stub,
%% redirecting the call back to the corresponding function in the target
%% module. This could possibly be useful in some cases, but efficiency
%% is not improved by such a transformation. If support for efficient
%% testing for module updates is added to Erlang in future versions,
%% code merging will be able to use local calls even for non-static
%% source modules, opening the way for compiler optimisations over the
%% module boundaries.

%% Data structure for merging environment.

-record(merge, {target,		% = atom()
		sources,	% = ordset(atom())
		export,		% = ordset(atom())
		static,		% = ordset(atom())
		safe,		% = ordset(atom())
		preserved,	% = bool()
		no_headers,	% = bool()
		notes,		% = bool()
		redirect,	% = dict(atom(), atom())
		no_imports,	% = ordset(atom())
		options		% = [term()]
	       }).

merge_sources(Name, Sources, Opts) ->
    %% Prepare the options and the inputs.
    Opts1 = Opts ++ [{export_all, false},
		     {file_attributes, no},
		     {no_imports, false},
		     {notes, yes},
		     tidy,
		     {verbose, false}],
    Trees = case Sources of
		[] ->
		    report_error("no sources to merge."),
		    exit(badarg);
		_ ->
		    [if list(M) -> erl_syntax:form_list(M);
			true -> M
		     end
		     || M <- Sources]
	    end,
    %% There must be at least one module to work with.
    Modules = [get_module_info(T) || T <- Trees],
    merge_sources_1(Name, Modules, Trees, Opts1).

%% Data structure for keeping state during transformation.

-record(state, {export}).

state__add_export(Name, Arity, S) ->
    S#state{export = sets:add_element({Name, Arity},
				      S#state.export)}.

merge_sources_1(Name, Modules, Trees, Opts) ->
    %% Get the (nonempty) list of source module names, in the given
    %% order. Multiple occurrences of the same source module name are
    %% not accepted.
    Ns = [M#module.name || M <- Modules],
    case duplicates(Ns) of
	[] ->
	    ok;
	Ns1 ->
	    report_error("same module names repeated in input: ~p.",
			 [Ns1]),
	    exit(error)
    end,
    Sources = ordsets:from_list(Ns),
    All = ordsets:add_element(Name, Sources),

    %% Initialise the merging environment from the given options.
    %%
    %% If the `export' option is the empty list, then if the target
    %% module is the same as one of the sources, that module will be
    %% exported; otherwise the first listed source module is exported.
    %% This simplifies use in most cases, and guarantees that the
    %% generated module has a well-defined interface. If `export_all' is
    %% `true', we expand it here by including the set of source module
    %% names.
    Es = case proplists:append_values(export, Opts) of
	     [] ->
		 case ordsets:is_element(Name, Sources) of
		     true ->
			 [Name];
		     false ->
			 [hd(Ns)]
		 end;
	     Es1 when list(Es1) ->
		 ordsets:from_list(Es1);
	     Es1 ->
		 report_error("bad value for `export' option: ~P.",
			      [Es1, 5])
	 end,
    Export = case proplists:get_bool(export_all, Opts) of
		 false ->
		     Es;
		 true ->
		     ordsets:union(Sources, Es)
	     end,
    check_module_names(Export, Sources, "declared as exported"),
    verbose("modules exported from `~w': ~p.", [Name, Export], Opts),

    %% The target module is always "static". (Particularly useful when
    %% the target is the same as one of the source modules). It is
    %% however not "safe" by default. If no modules are explicitly
    %% specified as static, it is assumed that *all* are static.
    Static0 = ordsets:from_list(proplists:append_values(static, Opts)),
    case proplists:is_defined(static, Opts) of
	false ->
	    Static = All;
	true ->
	    Static = ordsets:add_element(Name, Static0)
    end,
    check_module_names(Static, All, "declared 'static'"),
    verbose("static modules: ~p.", [Static], Opts),

    %% If no modules are explicitly specified as "safe", it is assumed
    %% that *all* source modules are "safe" except the target module and
    %% those explicitly specified as "static".
    Safe = case proplists:is_defined(safe, Opts) of
	       false ->
		   ordsets:subtract(Sources,
				    ordsets:add_element(Name, Static0));
	       true ->
		   ordsets:from_list(
		     proplists:append_values(safe, Opts))
	   end,
    check_module_names(Safe, All, "declared 'safe'"),
    verbose("safe modules: ~p.", [Safe], Opts),

    Preserved = (ordsets:is_element(Name, Sources)
		 and ordsets:is_element(Name, Export))
	or proplists:get_bool(no_banner, Opts),
    NoHeaders = proplists:get_bool(no_headers, Opts),
    Notes = proplists:get_value(notes, Opts, always),
    Rs = proplists:append_values(redirect, Opts),
    Redirect = case is_atom_map(Rs) of
		   true ->
		       Ms = ordsets:from_list([M || {M, _} <- Rs]),
		       case ordsets:intersection(Sources, Ms) of
			   [] ->
			       ok;
			   Ms1 ->
			       report_error("cannot redirect calls to "
					    "modules in input set: ~p.",
					    [Ms1]),
			       exit(error)
		       end,
		       dict:from_list(Rs);
		   false ->
		       report_error("bad value for `redirect' option: "
				    "~P.",
				    [Rs, 10]),
		       exit(error)
	       end,
    NoImports = case proplists:get_bool(no_imports, Opts) of
		    true ->
			ordsets:from_list(Sources ++
					  dict:fetch_keys(Redirect));
		    false ->
			ordsets:from_list(dict:fetch_keys(Redirect))
		end,
    Env = #merge{target = Name,
		 sources = Sources,
		 export = Export,
		 safe = Safe,
		 static = Static,
		 preserved = Preserved,
		 no_headers = NoHeaders,
		 notes = Notes,
		 redirect = Redirect,
		 no_imports = NoImports,
		 options = Opts},
    merge_sources_2(Env, Modules, Trees, Opts).

is_atom_map([{A1, A2} | As]) when atom(A1), atom(A2) ->
    is_atom_map(As);
is_atom_map([]) ->
    true;
is_atom_map(_) ->
    false.

check_module_names(Names, Sources, Txt) ->
    case Names -- Sources of
	[] ->
	    ok;
	Xs ->
	    report_error("unknown modules ~s: ~p.", [Txt, Xs]),
	    exit(error)
    end.

%% This function performs all the stages of the actual merge:

merge_sources_2(Env, Modules, Trees, Opts) ->
    %% Compute the merged name space and the list of renamings.
    {Names, Renaming} = merge_namespaces(Modules, Env),
    
    %% Merge the source module descriptions, computing a structure
    %% describing the resulting module, and a table of aliases which
    %% must be expanded.
    {Module, Expansions} = merge_info(Modules, Names, Renaming,
				      Env),
    
    %% Merge the actual source code, also returning the "original
    %% header" (for the first code section in the output).
    St = #state{export = sets:new()},
    {Tree, Header, St1} = merge_code(Trees, Modules, Expansions,
				     Renaming, Env, St),

    %% Filter out unwanted program forms and add a preamble to the code,
    %% making a complete module.
    Tree1 = erl_syntax:form_list([make_preamble(Module, Header,
						Env, St1),
				  filter_forms(Tree, Env)]),
    
    %% Tidy the final syntax tree (removing unused functions) and return
    %% it together with the list of stub descriptors.
    {tidy(Tree1, Opts), make_stubs(Modules, Renaming, Env)}.

make_preamble(Module, Header, Env, St) ->
    Name = Module#module.name,
    Vars = Module#module.vars,
    Extras = ordsets:from_list(sets:to_list(St#state.export)),
    Exports = make_exports(Module#module.exports, Extras),
    Imports = make_imports(Module#module.aliases),
    Attributes = make_attributes(Module#module.attributes),
    erl_syntax:form_list(module_header(Header, Name, Vars, Env)
			 ++ Exports
			 ++ Imports
			 ++ Attributes).

%% If the target preserves one of the source modules, we do not generate
%% a new header, but use the original.

module_header(Forms, Name, Vars, Env) ->
    case Env#merge.preserved of
	true ->
	    update_header(Forms, Name, Vars);
	false ->
	    [comment([?COMMENT_BAR,
		      "This module was formed by merging "
		      "the following modules:",
		      ""]
		     ++ [lists:flatten(io_lib:fwrite("\t\t`~w'",
						     [M]))
			 || M <- Env#merge.sources]
		     ++ ["",
			 timestamp(),
			 ""]),
	     erl_syntax:attribute(erl_syntax:atom('module'),
				  [erl_syntax:atom(Name)])]
    end.

update_header(Fs, Name, Vars) ->
    [M | Fs1] = lists:reverse(Fs),
    Ps = if Vars == none -> [];
	    true -> [erl_syntax:list([erl_syntax:variable(V)
				      || V <- Vars])]
	 end,
    M1 = rewrite(M, erl_syntax:attribute(erl_syntax:atom('module'),
					 [erl_syntax:atom(Name) | Ps])),
    lists:reverse([M1 | Fs1]).

%% Some functions may have been noted as necessary to export (because of
%% how they are called) even though the user did not specify that the
%% modules in which these functions originated should be part of the
%% interface of the resulting module.

make_exports(Exports, Extras) ->
    case ordsets:subtract(Extras, Exports) of
	[] ->
	    [make_export(Exports)];
	Es ->
	    [make_export(Exports),
	     comment(["** The following exports "
		      "are not official: **"]),
	     make_export(Es)]
    end.

make_export(Names) ->
    Es = [erl_syntax:arity_qualifier(erl_syntax:atom(F),
				     erl_syntax:integer(A))
	  || {F, A} <- Names],
    if Es == [] ->
	    comment(["** Nothing is officially exported "
		    "from this module! **"]);
       true ->
	    erl_syntax:attribute(erl_syntax:atom('export'),
				 [erl_syntax:list(Es)])
    end.

%% Any aliases that cannot be expressed using `import' (i.e. those not
%% on the form `{F, {M, F}}') are ignored.

make_imports(As) ->
    %% First remove any auto-imports and "non-proper" imports from
    %% the list.
    As1 = [A || {F, {_M, F}} = A <- As, not is_auto_import(F)],
    [make_import(M, Fs) || {M, Fs} <- group_imports(As1)].

make_import(Module, Names) ->
    Is = [erl_syntax:arity_qualifier(erl_syntax:atom(F),
				     erl_syntax:integer(A))
	  || {F, A} <- Names],
    erl_syntax:attribute(erl_syntax:atom('import'),
			 [erl_syntax:atom(Module),
			  erl_syntax:list(Is)]).

%% Group aliases by module.

group_imports(Imports) ->
    dict:to_list(
      lists:foldl(
	fun ({F, {M, F}}, D) ->
		case dict:find(M, D) of
		    {ok, V} ->
			V1 = ordsets:add_element(F, V),
			dict:store(M, V1, D);
		    error ->
			dict:store(M, [F], D)
		end
	end,
	dict:new(), Imports)).


%% ---------------------------------------------------------------------
%% Making stub descriptors
%%
%% These are generated for all exported modules that are not the target
%% module.

make_stubs(Modules, Renaming, Env) ->
    make_stubs_1(Modules, Renaming, Env).

make_stubs_1([M | Ms], Renaming, Env) ->
    Name = M#module.name,
    if Name /= Env#merge.target ->
	    case ordsets:is_element(Name, Env#merge.export) of
		true ->
		    [make_stub(M, Renaming(Name), Env)
		     | make_stubs_1(Ms, Renaming, Env)];
		false ->
		    make_stubs_1(Ms, Renaming, Env)
	    end;
       true ->
	    make_stubs_1(Ms, Renaming, Env)
    end;
make_stubs_1([], _, _) ->
    [].

make_stub(M, Map, Env) ->
    Target = Env#merge.target,
    Es = [{F, {Target, Map(F)}} || F <- M#module.exports],
    {M#module.name, Es, M#module.attributes}.


%% ---------------------------------------------------------------------
%% Removing and/or out-commenting program forms. The returned form
%% sequence tree is not necessarily flat.

-record(filter, {records, file_attributes, attributes}).

filter_forms(Tree, Env) ->
    Forms = erl_syntax:form_list_elements(
	      erl_syntax:flatten_form_list(Tree)),
    erl_syntax:form_list(filter_forms_1(Forms, Env)).

filter_forms_1(Forms, Env) ->
    {Fs, _} = filter_forms_2(Forms, Env),
    lists:reverse(Fs).

filter_forms_2(Forms, Env) ->
    FileAttrsOpt = proplists:get_value(file_attributes,
				       Env#merge.options, comment),
    %% Sanity check and translation of option value:
    FileAttrs = case FileAttrsOpt of
		    yes -> keep;
		    no -> delete;
		    comment -> kill;
		    _ ->
			report_error("invalid value for option "
				     "`file_attributes': ~w.",
				     [FileAttrsOpt]),
			exit(error)
		end,
    Attrs = if length(Env#merge.sources) == 1 ->
		    delete;    %% keeping the originals looks weird
	       true ->
		    kill
	    end,
    S = #filter{records = sets:new(),
		file_attributes = FileAttrs,
		attributes = Attrs},
    lists:foldl(
      fun (F, {Fs, S0}) ->
	      case filter_form(F, S0) of
		  {keep, S1} ->
		      {[F | Fs], S1};    % keep
		  {kill, S1} ->
		      {[kill_form(F) | Fs], S1};    % kill
		  {delete, S1} ->
		      %% Remove, or kill if it has comments (only
		      %% top-level comments are examined).
		      case erl_syntax:has_comments(F) of
			  false ->
			      {Fs, S1};
			  true ->
			      {[kill_form(F) | Fs], S1}
		      end
	      end
      end,
      {[], S}, Forms).

filter_form(F, S) ->
    case erl_syntax_lib:analyze_form(F) of
	{attribute, {'file', _}} ->
	    {S#filter.file_attributes, S};
	{attribute, {'module', _}} ->
	    {delete, S};
	{attribute, {'export', _}} ->
	    {delete, S};
	{attribute, {'import', _}} ->
	    {delete, S};
	{attribute, {'record', {R, _}}} ->
	    Records = S#filter.records,
	    case sets:is_element(R, Records) of
		true ->
		    {kill, S};    % already defined above
		false ->
		    S1 = S#filter{records =
				  sets:add_element(R, Records)},
		    {keep, S1}
	    end;
	{attribute, preprocessor} ->
	    {keep, S};    %% keep all preprocessor attributes
	{attribute, _} ->
	    {S#filter.attributes, S};    %% handle all other attributes
	{error_marker, _} ->
	    {delete, S};
	{warning_marker, _} ->
	    {delete, S};
	eof_marker ->
	    {delete, S};    % these must be deleted!
	_ ->
	    {keep, S}    % keep all other Erlang forms
    end.

%% This out-comments (kills) a program form. Any top-level pre-comments
%% are moved out, to avoid "nested" comments.

kill_form(F) ->
    F1 = erl_syntax:set_precomments(F, []),
    F2 = erl_syntax_lib:to_comment(F1, ?KILL_PREFIX),
    erl_syntax:set_precomments(F2,
			       erl_syntax:get_precomments(F)).


%% ---------------------------------------------------------------------
%% Merging the name spaces of a set of modules. Returns the final set
%% (see module `sets') of names and a total renaming function (atom())
%% -> ({atom(), integer()}) -> {atom(), integer()}.
%%
%% Names are added in two passes, in order to avoid renaming the
%% interface functions whenever possible: all exported functions are
%% added to the name space before any nonexported are added, and
%% "exported" modules are taken before any other. Thus, the order is:
%%
%%   - exported functions of exported modules
%%   - exported functions of nonexported modules
%%   - internal functions of exported modules
%%   - internal functions of nonexported modules
%%
%% In fact, only the first group is important, but there might be some
%% point in establishing the above order, for better readability of the
%% final code.

merge_namespaces(Modules, Env) ->
    Export = Env#merge.export,
    Split = fun (M) ->
		    ordsets:is_element(M#module.name, Export)
	    end,
    {M1, M2} = split_list(Split, Modules),
    R = dict:new(),
    Acc = {sets:new(), R},
    {M3, Acc1} = merge_namespaces_1(M1, Acc),

    %% Detect and warn about renamed interface functions
    {_, Maps0} = Acc1,
    case [{M, dict:to_list(Map)}
	  || {M, Map} <- dict:to_list(Maps0), dict:size(Map) /= 0] of
	[] ->
	    ok;
	Fs ->
	    report_warning("interface functions renamed:\n\t~p.",
			   [Fs])
    end,
    {M4, Acc2} = merge_namespaces_1(M2, Acc1),
    Ms = M3 ++ M4,
    Acc3 = merge_namespaces_2(Ms, Acc2),
    {{Names, Maps}, _} = merge_namespaces_3(Ms, Acc3),
    {Names, make_renaming_function(Maps)}.

%% Adding exported names. (Note that the list gets a new temporary
%% format also containing the exports.) This first step initialises the
%% Maps "dict-of-dicts" structure.

merge_namespaces_1(Modules, Acc) ->
    lists:mapfoldl(
      fun (Module, {Names, Maps}) ->
	      Exports = sets:from_list(Module#module.exports),
	      M = Module#module.name,
	      {Names1, Map} = add_function_renamings(M, Exports, Names,
						     dict:new()),
	      Maps1 = dict:store(M, Map, Maps),
	      {{Module, Exports}, {Names1, Maps1}}
      end,
      Acc, Modules).

%% Adding nonexported names.

merge_namespaces_2(Modules, Acc) ->
    lists:foldl(
      fun ({Module, Exports}, {Names, Maps}) ->
	      Other = sets:subtract(
			sets:from_list(Module#module.functions),
			Exports),
	      M = Module#module.name,
	      Map = dict:fetch(M, Maps),
	      {Names1, Map1} = add_function_renamings(M, Other, Names,
						      Map),
	      Maps1 = dict:store(M, Map1, Maps),
	      {Names1, Maps1}
      end,
      Acc, Modules).

%% Adding record names. We need to keep a global
%% "record-definition-to-new-record-name" mapping RMap while doing this.

merge_namespaces_3(Modules, Acc) ->
    lists:foldl(
      fun ({Module, _Exports}, {{Names, Maps}, RMap}) ->
	      Records = Module#module.records,
	      M = Module#module.name,
	      Map = dict:fetch(M, Maps),
	      {Names1, Map1, RMap1} = add_record_renamings(M, Records,
							   Names, Map,
							   RMap),
	      Maps1 = dict:store(M, Map1, Maps),
	      {{Names1, Maps1}, RMap1}
      end,
      {Acc, dict:new()}, Modules).

%% This takes the set of added function names together with the existing
%% name set, creates new function names where necessary, and returns the
%% final name set together with the list of renamings.

add_function_renamings(Module, New, Names, Map) ->
    Clashes = sets:to_list(sets:intersection(New, Names)),
    lists:foldl(
      fun (F = {_, A}, {Names, Map}) when integer(A) ->
	      F1 = new_function_name(Module, F, Names),
	      {sets:add_element(F1, Names), dict:store(F, F1, Map)}
      end,
      {sets:union(New, Names), Map}, Clashes).

%% This is similar to the above, but for record names. Note that we add
%% both the record name and the whole definition to the namespace.

add_record_renamings(Module, Records, Names, Map, RMap) ->
    lists:foldl(
      fun (N = {R, Fs}, {Names, Map, RMap}) ->
	      case sets:is_element(?record_name(R), Names) of
		  true ->
		      %% The name is already in use.
		      case sets:is_element(?record_name(N), Names) of
			  true ->
			      %% We have seen this definition before;
			      %% make sure we use the same name.
			      {R1, _} = remap_record_name(N, RMap),
			      Map1 = dict:store(?record_name(R),
						?record_name(R1), Map),
			      {Names, Map1, RMap};
			  false ->
			      %% Redefinition of existing name. Create
			      %% new name and set up renamings.
			      N1 = {R1, _} = new_record_name(Module, R,
							     Fs, Names),
			      Map1 = dict:store(?record_name(R),
						?record_name(R1), Map),
			      RMap1 = dict:store(N, N1, RMap),
			      Names1 = sets:add_element(?record_name(N1),
							Names),
			      {Names1, Map1, RMap1}
		      end;
		  false ->
		      %% A previously unused record name.
		      Names1 = sets:add_element(?record_name(R), Names),
		      Names2 = sets:add_element(?record_name(N), Names1),
		      {Names2, Map, RMap}
	      end
      end,
      {Names, Map, RMap}, Records).

remap_record_name(N, Map) ->
    case dict:find(N, Map) of
	{ok, N1} -> N1;
	error -> N
    end.

%% This hides the implementation of the record namespace. Since Map
%% yields identity for non-remapped names, the remapped names must be
%% stored in wrapped form.

map_record_name(R, Map) ->
    ?record_name(R1) = Map(?record_name(R)),
    R1.

%% When we rename a function, we want the new name to be as close as
%% possible to the old, and as informative as possible. Therefore, we
%% first prefix it with the name of the originating module, followed by
%% two underscore characters, and then if there still is a name clash,
%% we suffix the name by "_N", where N is the smallest possible positive
%% integer that does not cause a clash.

new_function_name(M, {F, A}, Names) ->
    Base = atom_to_list(M) ++ "__" ++ atom_to_list(F),
    Name = {list_to_atom(Base), A},
    case sets:is_element(Name, Names) of
	false ->
	    Name;
	true ->
	    new_function_name(1, A, Base, Names)
    end.

new_function_name(N, Arity, Base, Names) ->
    Name = {list_to_atom(Base ++ "_" ++ integer_to_list(N)),
	    Arity},
    case sets:is_element(Name, Names) of
	false ->
	    Name;
	true ->
	    %% Increment counter and try again.
	    new_function_name(N + 1, Arity, Base, Names)
    end.

%% This is pretty much the same as new_function_name, for now.

new_record_name(M, R, Fs, Names) ->
    Base = atom_to_list(M) ++ "__" ++ atom_to_list(R),
    Name = {list_to_atom(Base), Fs},
    case sets:is_element(?record_name(Name), Names) of
	false ->
	    Name;
	true ->
	    new_record_name_1(1, Base, Fs, Names)
    end.

new_record_name_1(N, Base, Fs, Names) ->
    Name = {list_to_atom(Base ++ "_" ++ integer_to_list(N)), Fs},
    case sets:is_element(?record_name(Name), Names) of
	false ->
	    Name;
	true ->
	    %% Increment counter and try again.
	    new_record_name_1(N + 1, Base, Fs, Names)
    end.

%% This returns a *total* function from the set of module names to the
%% set of *total* operators on function names, yielding identity for all
%% functi…