ClassDependencyIndexCreator.java

/projects/netbeans-7.3/maven.indexer/src/org/netbeans/modules/maven/indexer/ClassDependencyIndexCreator.java

https://gitlab.com/essere.lab.public/qualitas.class-corpus
Java | 409 lines | 324 code | 19 blank | 66 comment | 70 complexity | ea000e56c65170f4ebd0049faabdcc70 MD5 | raw file

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2011 Oracle and/or its affiliates. All rights reserved.
 *
 * Oracle and Java are registered trademarks of Oracle and/or its affiliates.
 * Other names may be trademarks of their respective owners.
 *
 * The contents of this file are subject to the terms of either the GNU General
 * Public License Version 2 only ("GPL") or the Common Development and
 * Distribution License("CDDL") (collectively, the "License"). You may not use
 * this file except in compliance with the License. You can obtain a copy of the
 * License at http://www.netbeans.org/cddl-gplv2.html or
 * nbbuild/licenses/CDDL-GPL-2-CP. See the License for the specific language
 * governing permissions and limitations under the License. When distributing
 * the software, include this License Header Notice in each file and include the
 * License file at nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this
 * particular file as subject to the "Classpath" exception as provided by Oracle
 * in the GPL Version 2 section of the License file that accompanied this code.
 * If applicable, add the following below the License Header, with the fields
 * enclosed by brackets [] replaced by your own identifying information:
 * "Portions Copyrighted [year] [name of copyright owner]"
 *
 * If you wish your version of this file to be governed by only the CDDL or only
 * the GPL Version 2, indicate your decision by adding "[Contributor] elects to
 * include this software in this distribution under the [CDDL or GPL Version 2]
 * license." If you do not indicate a single choice of license, a recipient has
 * the option to distribute your version of this file under either the CDDL, the
 * GPL Version 2 or to extend the choice of license to its licensees as provided
 * above. However, if you add GPL Version 2 code and therefore, elected the GPL
 * Version 2 license, then the option applies only if the new code is made
 * subject to such option by the copyright holder.
 *
 * Contributor(s):
 *
 * Portions Copyrighted 2011 Sun Microsystems, Inc.
 */

package org.netbeans.modules.maven.indexer;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.CRC32;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.maven.index.ArtifactContext;
import org.apache.maven.index.ArtifactInfo;
import org.apache.maven.index.Field;
import org.apache.maven.index.IndexerField;
import org.apache.maven.index.IndexerFieldVersion;
import org.apache.maven.index.NexusIndexer;
import org.apache.maven.index.context.IndexUtils;
import org.apache.maven.index.context.IndexingContext;
import org.apache.maven.index.creator.AbstractIndexCreator;
import org.apache.maven.index.creator.MinimalArtifactInfoIndexCreator;
import org.apache.maven.index.expr.StringSearchExpression;
import org.codehaus.plexus.util.Base64;
import org.netbeans.modules.maven.indexer.api.NBVersionInfo;
import org.netbeans.modules.maven.indexer.api.RepositoryQueries.ClassUsage;
import org.openide.filesystems.FileUtil;
import org.openide.util.Utilities;

/**
 * Scans classes in (local) JARs for their Java dependencies.
 */
class ClassDependencyIndexCreator extends AbstractIndexCreator {

    private static final Logger LOG = Logger.getLogger(ClassDependencyIndexCreator.class.getName());

    private static final String NB_DEPENDENCY_CLASSES = "nbdc";
    private static final IndexerField FLD_NB_DEPENDENCY_CLASS = new IndexerField(new Field(null, "urn:NbClassDependenciesIndexCreator", NB_DEPENDENCY_CLASSES, "Java dependencies"), IndexerFieldVersion.V3, NB_DEPENDENCY_CLASSES, "Java dependencies", Store.YES, Index.ANALYZED);

    ClassDependencyIndexCreator() {
        super(ClassDependencyIndexCreator.class.getName(), Arrays.asList(MinimalArtifactInfoIndexCreator.ID));
    }

    // XXX should rather be Map<ArtifactInfo,...> so we do not rely on interleaving of populateArtifactInfo vs. updateDocument
    /** class/in/this/Jar -> [foreign/Class, other/foreign/Nested$Class] */
    private Map<String,Set<String>> classDeps;

    @Override public void populateArtifactInfo(ArtifactContext context) throws IOException {
        classDeps = null;
        ArtifactInfo ai = context.getArtifactInfo();
        if (ai.classifier != null) {
            return;
        }
        if ("pom".equals(ai.packaging) || ai.fextension.endsWith(".lastUpdated")) {
            return;
        }
        File jar = context.getArtifact();
        if (jar == null || !jar.isFile()) {
            LOG.log(Level.FINER, "no artifact for {0}", ai); // not a big deal, maybe just *.pom (or *.pom + *.nbm) here
            return;
        }
        if (!ai.packaging.equals("jar") && !isArchiveFile(jar)) {
            LOG.log(Level.FINE, "skipping artifact {0} with unrecognized packaging based on {1}", new Object[] {ai, jar});
            return;
        }
        LOG.log(Level.FINER, "reading {0}", jar);
        Map<String, byte[]> classfiles = read(jar);
        classDeps = new HashMap<String, Set<String>>();
        Set<String> classes = classfiles.keySet();
        for (Map.Entry<String, byte[]> entry : classfiles.entrySet()) {
            addDependenciesToMap(entry.getKey(), entry.getValue(), classDeps, classes, jar);
        }
    }

    // adapted from FileUtil, since we do not want to have to use FileObject's here
    private static boolean isArchiveFile(File jar) throws IOException {
        InputStream in = new FileInputStream(jar);
        try {
            byte[] buffer = new byte[4];
            return in.read(buffer, 0, 4) == 4 && (Arrays.equals(ZIP_HEADER_1, buffer) || Arrays.equals(ZIP_HEADER_2, buffer));
        } finally {
            in.close();
        }
    }
    private static byte[] ZIP_HEADER_1 = {80, 75, 3, 4};
    private static byte[] ZIP_HEADER_2 = {80, 75, 5, 6};
    
    @Override public boolean updateArtifactInfo(Document document, ArtifactInfo artifactInfo) {
        return false;
    }
    
    @Override public void updateDocument(ArtifactInfo ai, Document doc) {
        if (classDeps == null || classDeps.isEmpty()) {
            return;
        }
        if (ai.classNames == null) {
            // Might be *.hpi, *.war, etc. - so JarFileContentsIndexCreator ignores it (and our results would anyway be wrong due to WEB-INF/classes/ prefix)
            LOG.log(Level.FINE, "no class names in index for {0}; therefore cannot store class usages", ai);
            return;
        }
        StringBuilder b = new StringBuilder();
        String[] classNamesSplit = ai.classNames.split("\n");
        for (String referrerTopLevel : classNamesSplit) {
            Set<String> referees = classDeps.remove(referrerTopLevel.substring(1));
            if (referees != null) {
                for (String referee : referees) {
                    b.append(crc32base64(referee));
                    b.append(' ');
                }
            }
            b.append(' ');
        }
        if (!classDeps.isEmpty()) {
            // E.g. findbugs-1.2.0.jar has TigerSubstitutes.class, TigerSubstitutesTest$Foo.class, etc., but no TigerSubstitutesTest.class (?)
            // Or guice-3.0-rc2.jar has e.g. $Transformer.class with no source equivalent.
            LOG.log(Level.FINE, "found dependencies for {0} from classes {1} not among {2}", new Object[] {ai, classDeps.keySet(), Arrays.asList(classNamesSplit)});
        }
        LOG.log(Level.FINER, "Class dependencies index field: {0}", b);
        // XXX is it possible to _store_ something more compact (binary) using a custom tokenizer?
        // seems like DefaultIndexingContext hardcodes NexusAnalyzer
        doc.add(FLD_NB_DEPENDENCY_CLASS.toField(b.toString()));
    }

    static void search(String className, NexusIndexer indexer, Collection<IndexingContext> contexts, List<? super ClassUsage> results) throws IOException {
        String searchString = crc32base64(className.replace('.', '/'));
        Query refClassQuery = indexer.constructQuery(ClassDependencyIndexCreator.FLD_NB_DEPENDENCY_CLASS.getOntology(), new StringSearchExpression(searchString));
        TopScoreDocCollector collector = TopScoreDocCollector.create(NexusRepositoryIndexerImpl.MAX_RESULT_COUNT, true);
        for (IndexingContext context : contexts) {
            IndexSearcher searcher = context.acquireIndexSearcher();
            try {
        searcher.search(refClassQuery, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        LOG.log(Level.FINER, "for {0} ~ {1} found {2} hits", new Object[] {className, searchString, hits.length});
        for (ScoreDoc hit : hits) {
            int docId = hit.doc;
            Document d = searcher.doc(docId);
            String fldValue = d.get(ClassDependencyIndexCreator.NB_DEPENDENCY_CLASSES);
            LOG.log(Level.FINER, "{0} uses: {1}", new Object[] {className, fldValue});
            Set<String> refClasses = parseField(searchString, fldValue, d.get(ArtifactInfo.NAMES));
            if (!refClasses.isEmpty()) {
                ArtifactInfo ai = IndexUtils.constructArtifactInfo(d, context);
                if (ai != null) {
                    ai.repository = context.getRepositoryId();
                    List<NBVersionInfo> version = NexusRepositoryIndexerImpl.convertToNBVersionInfo(Collections.singleton(ai));
                    if (!version.isEmpty()) {
                        results.add(new ClassUsage(version.get(0), refClasses));
                    }
                }
            }
        }
        } finally {
            context.releaseIndexSearcher(searcher);
        }
        }
    }
    private static Set<String> parseField(String refereeCRC, String field, String referrersNL) {
        Set<String> referrers = new TreeSet<String>();
        int p = 0;
        for (String referrer : referrersNL.split("\n")) {
            while (true) {
                if (field.charAt(p) == ' ') {
                    p++;
                    break;
                }
                if (field.substring(p, p + 6).equals(refereeCRC)) {
                    referrers.add(referrer.substring(1).replace('/', '.'));
                }
                p += 7;
            }
        }
        return referrers;
    }

    /**
     * @param referrer a referring class, as {@code pkg/Outer$Inner}
     * @param data its bytecode
     * @param depsMap map from referring outer classes (as {@code pkg/Outer}) to referred-to classes (as {@code pkg/Outer$Inner})
     * @param siblings other referring classes in the same artifact (including this one), as {@code pkg/Outer$Inner}
     * @param jar the jar file, for diagnostics
     */
    private static void addDependenciesToMap(String referrer, byte[] data, Map<String, Set<String>> depsMap, Set<String> siblings, File jar) throws IOException {
        ClassLoader jre = ClassLoader.getSystemClassLoader().getParent();
        int shell = referrer.indexOf('$', referrer.lastIndexOf('/') + 1);
        String referrerTopLevel = shell == -1 ? referrer : referrer.substring(0, shell);
        for (String referee : dependencies(data, referrer, jar)) {
            if (siblings.contains(referee)) {
                continue; // in same JAR, not interesting
            }
            try {
                jre.loadClass(referee.replace('/', '.')); // XXX ought to cache this result
                continue; // in JRE, not interesting
            } catch (ClassNotFoundException x) {
            }
            Set<String> referees = depsMap.get(referrerTopLevel);
            if (referees == null) {
                referees = new TreeSet<String>();
                depsMap.put(referrerTopLevel, referees);
            }
            referees.add(referee);
        }
    }

    static Map<String,byte[]> read(File jar) throws IOException {
        JarFile jf = new JarFile(jar, false);
        try {
            Map<String, byte[]> classfiles = new TreeMap<String, byte[]>();
            Enumeration<JarEntry> e = jf.entries();
            while (e.hasMoreElements()) {
                JarEntry entry = e.nextElement();
                String name = entry.getName();
                if (!name.endsWith(".class")) {
                    continue;
                }
                String clazz = name.substring(0, name.length() - 6);
                ByteArrayOutputStream baos = new ByteArrayOutputStream(Math.max((int) entry.getSize(), 0));
                InputStream is = jf.getInputStream(entry);
                try {
                    FileUtil.copy(is, baos);
                } finally {
                    is.close();
                }
                classfiles.put(clazz, baos.toByteArray());
            }
            return classfiles;
        } catch (SecurityException x) {
            throw new IOException(x);
        } finally {
            jf.close();
        }
    }

    // adapted from org.netbeans.nbbuild.VerifyClassLinkage
    private static Set<String> dependencies(byte[] data, String clazz, File jar) throws IOException {
        Set<String> result = new TreeSet<String>();
        DataInput input = new DataInputStream(new ByteArrayInputStream(data));
        skip(input, 8); // magic, minor_version, major_version
        int size = input.readUnsignedShort() - 1; // constantPoolCount
        String[] utf8Strings = new String[size];
        boolean[] isClassName = new boolean[size];
        boolean[] isDescriptor = new boolean[size];
        for (int i = 0; i < size; i++) {
            byte tag = input.readByte();
            switch (tag) {
            case 1: // CONSTANT_Utf8
                utf8Strings[i] = input.readUTF();
                break;
            case 7: // CONSTANT_Class
                int index = input.readUnsignedShort() - 1;
                if (index >= size) {
                    throw new IOException("@" + i + ": CONSTANT_Class_info.name_index " + index + " too big for size of pool " + size);
                }
                //LOG.finest("Class reference at " + index);
                isClassName[index] = true;
                break;
            case 3: // CONSTANT_Integer
            case 4: // CONSTANT_Float
            case 9: // CONSTANT_Fieldref
            case 10: // CONSTANT_Methodref
            case 11: // CONSTANT_InterfaceMethodref
                skip(input, 4);
                break;
            case 12: // CONSTANT_NameAndType
                skip(input, 2);
                index = input.readUnsignedShort() - 1;
                if (index >= size || index < 0) {
                    throw new IOException("@" + i + ": CONSTANT_NameAndType_info.descriptor_index " + index + " too big for size of pool " + size);
                }
                isDescriptor[index] = true;
                break;
            case 8: // CONSTANT_String
                skip(input, 2);
                break;
            case 5: // CONSTANT_Long
            case 6: // CONSTANT_Double
                skip(input, 8);
                i++; // weirdness in spec
                break;
            default:
                // E.g. com/ibm/icu/icu4j/2.6.1/icu4j-2.6.1.jar!/com/ibm/icu/impl/data/LocaleElements_zh__PINYIN.class is corrupt even acc. to javap.
                LOG.log(Level.FINE, "jar:{4}!/{3}.class: Unrecognized constant pool tag {0} at index {1}; running UTF-8 strings: {2}", new Object[] {tag, i, Arrays.asList(utf8Strings), clazz, Utilities.toURI(jar)});
                continue;
            }
        }
        //LOG.finest("UTF-8 strings: " + Arrays.asList(utf8Strings));
        for (int i = 0; i < size; i++) {
            String s = utf8Strings[i];
            if (s != null) {
                if (isClassName[i]) {
                    while (s.charAt(0) == '[') {
                        // array type
                        s = s.substring(1);
                    }
                    if (s.length() == 1) {
                        // primitive
                        continue;
                    }
                    String c;
                    if (s.charAt(s.length() - 1) == ';' && s.charAt(0) == 'L') {
                        // Uncommon but seems sometimes this happens.
                        c = s.substring(1, s.length() - 1);
                    } else {
                        c = s;
                    }
                    result.add(c);
                } else if (isDescriptor[i]) {
                    int idx = 0;
                    while ((idx = s.indexOf('L', idx)) != -1) {
                        int semi = s.indexOf(';', idx);
                        if (semi == -1) {
                            throw new IOException("Invalid type or descriptor: " + s);
                        }
                        result.add(s.substring(idx + 1, semi));
                        idx = semi;
                    }
                }
            }
        }
        return result;
    }

    private static void skip(DataInput input, int bytes) throws IOException {
        int skipped = input.skipBytes(bytes);
        if (skipped != bytes) {
            throw new IOException("Truncated class file");
        }
    }

    @Override public Collection<IndexerField> getIndexerFields() {
        return Arrays.asList(FLD_NB_DEPENDENCY_CLASS);
    }

    /**
     * @param s a string, such as a class name
     * @return the CRC-32 of its UTF-8 representation, as big-endian Base-64 without padding (so six chars), with _ for + (safer for Lucene)
     */
    static String crc32base64(String s) {
        crc.reset();
        crc.update(s.getBytes(UTF8));
        long v = crc.getValue();
        byte[] b64 = Base64.encodeBase64(new byte[] {(byte) (v >> 24 & 0xFF), (byte) (v >> 16 & 0xFF), (byte) (v >> 8 & 0xFF), (byte) (v & 0xFF)});
        assert b64.length == 8;
        assert b64[6] == '=';
        assert b64[7] == '=';
        return new String(b64, 0, 6, LATIN1).replace('+', '_');
    }
    private static final CRC32 crc = new CRC32();
    private static final Charset UTF8 = Charset.forName("UTF-8");
    private static final Charset LATIN1 = Charset.forName("ISO-8859-1");

}