PageRenderTime 42ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/projects/netbeans-7.3/maven.indexer/src/org/netbeans/modules/maven/indexer/ClassDependencyIndexCreator.java

https://gitlab.com/essere.lab.public/qualitas.class-corpus
Java | 409 lines | 324 code | 19 blank | 66 comment | 70 complexity | ea000e56c65170f4ebd0049faabdcc70 MD5 | raw file
  1. /*
  2. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
  3. *
  4. * Copyright 2011 Oracle and/or its affiliates. All rights reserved.
  5. *
  6. * Oracle and Java are registered trademarks of Oracle and/or its affiliates.
  7. * Other names may be trademarks of their respective owners.
  8. *
  9. * The contents of this file are subject to the terms of either the GNU General
  10. * Public License Version 2 only ("GPL") or the Common Development and
  11. * Distribution License("CDDL") (collectively, the "License"). You may not use
  12. * this file except in compliance with the License. You can obtain a copy of the
  13. * License at http://www.netbeans.org/cddl-gplv2.html or
  14. * nbbuild/licenses/CDDL-GPL-2-CP. See the License for the specific language
  15. * governing permissions and limitations under the License. When distributing
  16. * the software, include this License Header Notice in each file and include the
  17. * License file at nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this
  18. * particular file as subject to the "Classpath" exception as provided by Oracle
  19. * in the GPL Version 2 section of the License file that accompanied this code.
  20. * If applicable, add the following below the License Header, with the fields
  21. * enclosed by brackets [] replaced by your own identifying information:
  22. * "Portions Copyrighted [year] [name of copyright owner]"
  23. *
  24. * If you wish your version of this file to be governed by only the CDDL or only
  25. * the GPL Version 2, indicate your decision by adding "[Contributor] elects to
  26. * include this software in this distribution under the [CDDL or GPL Version 2]
  27. * license." If you do not indicate a single choice of license, a recipient has
  28. * the option to distribute your version of this file under either the CDDL, the
  29. * GPL Version 2 or to extend the choice of license to its licensees as provided
  30. * above. However, if you add GPL Version 2 code and therefore, elected the GPL
  31. * Version 2 license, then the option applies only if the new code is made
  32. * subject to such option by the copyright holder.
  33. *
  34. * Contributor(s):
  35. *
  36. * Portions Copyrighted 2011 Sun Microsystems, Inc.
  37. */
  38. package org.netbeans.modules.maven.indexer;
  39. import java.io.ByteArrayInputStream;
  40. import java.io.ByteArrayOutputStream;
  41. import java.io.DataInput;
  42. import java.io.DataInputStream;
  43. import java.io.File;
  44. import java.io.FileInputStream;
  45. import java.io.IOException;
  46. import java.io.InputStream;
  47. import java.nio.charset.Charset;
  48. import java.util.Arrays;
  49. import java.util.Collection;
  50. import java.util.Collections;
  51. import java.util.Enumeration;
  52. import java.util.HashMap;
  53. import java.util.List;
  54. import java.util.Map;
  55. import java.util.Set;
  56. import java.util.TreeMap;
  57. import java.util.TreeSet;
  58. import java.util.jar.JarEntry;
  59. import java.util.jar.JarFile;
  60. import java.util.logging.Level;
  61. import java.util.logging.Logger;
  62. import java.util.zip.CRC32;
  63. import org.apache.lucene.document.Document;
  64. import org.apache.lucene.document.Field.Index;
  65. import org.apache.lucene.document.Field.Store;
  66. import org.apache.lucene.search.IndexSearcher;
  67. import org.apache.lucene.search.Query;
  68. import org.apache.lucene.search.ScoreDoc;
  69. import org.apache.lucene.search.TopScoreDocCollector;
  70. import org.apache.maven.index.ArtifactContext;
  71. import org.apache.maven.index.ArtifactInfo;
  72. import org.apache.maven.index.Field;
  73. import org.apache.maven.index.IndexerField;
  74. import org.apache.maven.index.IndexerFieldVersion;
  75. import org.apache.maven.index.NexusIndexer;
  76. import org.apache.maven.index.context.IndexUtils;
  77. import org.apache.maven.index.context.IndexingContext;
  78. import org.apache.maven.index.creator.AbstractIndexCreator;
  79. import org.apache.maven.index.creator.MinimalArtifactInfoIndexCreator;
  80. import org.apache.maven.index.expr.StringSearchExpression;
  81. import org.codehaus.plexus.util.Base64;
  82. import org.netbeans.modules.maven.indexer.api.NBVersionInfo;
  83. import org.netbeans.modules.maven.indexer.api.RepositoryQueries.ClassUsage;
  84. import org.openide.filesystems.FileUtil;
  85. import org.openide.util.Utilities;
  86. /**
  87. * Scans classes in (local) JARs for their Java dependencies.
  88. */
  89. class ClassDependencyIndexCreator extends AbstractIndexCreator {
  90. private static final Logger LOG = Logger.getLogger(ClassDependencyIndexCreator.class.getName());
  91. private static final String NB_DEPENDENCY_CLASSES = "nbdc";
  92. private static final IndexerField FLD_NB_DEPENDENCY_CLASS = new IndexerField(new Field(null, "urn:NbClassDependenciesIndexCreator", NB_DEPENDENCY_CLASSES, "Java dependencies"), IndexerFieldVersion.V3, NB_DEPENDENCY_CLASSES, "Java dependencies", Store.YES, Index.ANALYZED);
  93. ClassDependencyIndexCreator() {
  94. super(ClassDependencyIndexCreator.class.getName(), Arrays.asList(MinimalArtifactInfoIndexCreator.ID));
  95. }
  96. // XXX should rather be Map<ArtifactInfo,...> so we do not rely on interleaving of populateArtifactInfo vs. updateDocument
  97. /** class/in/this/Jar -> [foreign/Class, other/foreign/Nested$Class] */
  98. private Map<String,Set<String>> classDeps;
  99. @Override public void populateArtifactInfo(ArtifactContext context) throws IOException {
  100. classDeps = null;
  101. ArtifactInfo ai = context.getArtifactInfo();
  102. if (ai.classifier != null) {
  103. return;
  104. }
  105. if ("pom".equals(ai.packaging) || ai.fextension.endsWith(".lastUpdated")) {
  106. return;
  107. }
  108. File jar = context.getArtifact();
  109. if (jar == null || !jar.isFile()) {
  110. LOG.log(Level.FINER, "no artifact for {0}", ai); // not a big deal, maybe just *.pom (or *.pom + *.nbm) here
  111. return;
  112. }
  113. if (!ai.packaging.equals("jar") && !isArchiveFile(jar)) {
  114. LOG.log(Level.FINE, "skipping artifact {0} with unrecognized packaging based on {1}", new Object[] {ai, jar});
  115. return;
  116. }
  117. LOG.log(Level.FINER, "reading {0}", jar);
  118. Map<String, byte[]> classfiles = read(jar);
  119. classDeps = new HashMap<String, Set<String>>();
  120. Set<String> classes = classfiles.keySet();
  121. for (Map.Entry<String, byte[]> entry : classfiles.entrySet()) {
  122. addDependenciesToMap(entry.getKey(), entry.getValue(), classDeps, classes, jar);
  123. }
  124. }
  125. // adapted from FileUtil, since we do not want to have to use FileObject's here
  126. private static boolean isArchiveFile(File jar) throws IOException {
  127. InputStream in = new FileInputStream(jar);
  128. try {
  129. byte[] buffer = new byte[4];
  130. return in.read(buffer, 0, 4) == 4 && (Arrays.equals(ZIP_HEADER_1, buffer) || Arrays.equals(ZIP_HEADER_2, buffer));
  131. } finally {
  132. in.close();
  133. }
  134. }
  135. private static byte[] ZIP_HEADER_1 = {80, 75, 3, 4};
  136. private static byte[] ZIP_HEADER_2 = {80, 75, 5, 6};
  137. @Override public boolean updateArtifactInfo(Document document, ArtifactInfo artifactInfo) {
  138. return false;
  139. }
  140. @Override public void updateDocument(ArtifactInfo ai, Document doc) {
  141. if (classDeps == null || classDeps.isEmpty()) {
  142. return;
  143. }
  144. if (ai.classNames == null) {
  145. // Might be *.hpi, *.war, etc. - so JarFileContentsIndexCreator ignores it (and our results would anyway be wrong due to WEB-INF/classes/ prefix)
  146. LOG.log(Level.FINE, "no class names in index for {0}; therefore cannot store class usages", ai);
  147. return;
  148. }
  149. StringBuilder b = new StringBuilder();
  150. String[] classNamesSplit = ai.classNames.split("\n");
  151. for (String referrerTopLevel : classNamesSplit) {
  152. Set<String> referees = classDeps.remove(referrerTopLevel.substring(1));
  153. if (referees != null) {
  154. for (String referee : referees) {
  155. b.append(crc32base64(referee));
  156. b.append(' ');
  157. }
  158. }
  159. b.append(' ');
  160. }
  161. if (!classDeps.isEmpty()) {
  162. // E.g. findbugs-1.2.0.jar has TigerSubstitutes.class, TigerSubstitutesTest$Foo.class, etc., but no TigerSubstitutesTest.class (?)
  163. // Or guice-3.0-rc2.jar has e.g. $Transformer.class with no source equivalent.
  164. LOG.log(Level.FINE, "found dependencies for {0} from classes {1} not among {2}", new Object[] {ai, classDeps.keySet(), Arrays.asList(classNamesSplit)});
  165. }
  166. LOG.log(Level.FINER, "Class dependencies index field: {0}", b);
  167. // XXX is it possible to _store_ something more compact (binary) using a custom tokenizer?
  168. // seems like DefaultIndexingContext hardcodes NexusAnalyzer
  169. doc.add(FLD_NB_DEPENDENCY_CLASS.toField(b.toString()));
  170. }
  171. static void search(String className, NexusIndexer indexer, Collection<IndexingContext> contexts, List<? super ClassUsage> results) throws IOException {
  172. String searchString = crc32base64(className.replace('.', '/'));
  173. Query refClassQuery = indexer.constructQuery(ClassDependencyIndexCreator.FLD_NB_DEPENDENCY_CLASS.getOntology(), new StringSearchExpression(searchString));
  174. TopScoreDocCollector collector = TopScoreDocCollector.create(NexusRepositoryIndexerImpl.MAX_RESULT_COUNT, true);
  175. for (IndexingContext context : contexts) {
  176. IndexSearcher searcher = context.acquireIndexSearcher();
  177. try {
  178. searcher.search(refClassQuery, collector);
  179. ScoreDoc[] hits = collector.topDocs().scoreDocs;
  180. LOG.log(Level.FINER, "for {0} ~ {1} found {2} hits", new Object[] {className, searchString, hits.length});
  181. for (ScoreDoc hit : hits) {
  182. int docId = hit.doc;
  183. Document d = searcher.doc(docId);
  184. String fldValue = d.get(ClassDependencyIndexCreator.NB_DEPENDENCY_CLASSES);
  185. LOG.log(Level.FINER, "{0} uses: {1}", new Object[] {className, fldValue});
  186. Set<String> refClasses = parseField(searchString, fldValue, d.get(ArtifactInfo.NAMES));
  187. if (!refClasses.isEmpty()) {
  188. ArtifactInfo ai = IndexUtils.constructArtifactInfo(d, context);
  189. if (ai != null) {
  190. ai.repository = context.getRepositoryId();
  191. List<NBVersionInfo> version = NexusRepositoryIndexerImpl.convertToNBVersionInfo(Collections.singleton(ai));
  192. if (!version.isEmpty()) {
  193. results.add(new ClassUsage(version.get(0), refClasses));
  194. }
  195. }
  196. }
  197. }
  198. } finally {
  199. context.releaseIndexSearcher(searcher);
  200. }
  201. }
  202. }
  203. private static Set<String> parseField(String refereeCRC, String field, String referrersNL) {
  204. Set<String> referrers = new TreeSet<String>();
  205. int p = 0;
  206. for (String referrer : referrersNL.split("\n")) {
  207. while (true) {
  208. if (field.charAt(p) == ' ') {
  209. p++;
  210. break;
  211. }
  212. if (field.substring(p, p + 6).equals(refereeCRC)) {
  213. referrers.add(referrer.substring(1).replace('/', '.'));
  214. }
  215. p += 7;
  216. }
  217. }
  218. return referrers;
  219. }
  220. /**
  221. * @param referrer a referring class, as {@code pkg/Outer$Inner}
  222. * @param data its bytecode
  223. * @param depsMap map from referring outer classes (as {@code pkg/Outer}) to referred-to classes (as {@code pkg/Outer$Inner})
  224. * @param siblings other referring classes in the same artifact (including this one), as {@code pkg/Outer$Inner}
  225. * @param jar the jar file, for diagnostics
  226. */
  227. private static void addDependenciesToMap(String referrer, byte[] data, Map<String, Set<String>> depsMap, Set<String> siblings, File jar) throws IOException {
  228. ClassLoader jre = ClassLoader.getSystemClassLoader().getParent();
  229. int shell = referrer.indexOf('$', referrer.lastIndexOf('/') + 1);
  230. String referrerTopLevel = shell == -1 ? referrer : referrer.substring(0, shell);
  231. for (String referee : dependencies(data, referrer, jar)) {
  232. if (siblings.contains(referee)) {
  233. continue; // in same JAR, not interesting
  234. }
  235. try {
  236. jre.loadClass(referee.replace('/', '.')); // XXX ought to cache this result
  237. continue; // in JRE, not interesting
  238. } catch (ClassNotFoundException x) {
  239. }
  240. Set<String> referees = depsMap.get(referrerTopLevel);
  241. if (referees == null) {
  242. referees = new TreeSet<String>();
  243. depsMap.put(referrerTopLevel, referees);
  244. }
  245. referees.add(referee);
  246. }
  247. }
  248. static Map<String,byte[]> read(File jar) throws IOException {
  249. JarFile jf = new JarFile(jar, false);
  250. try {
  251. Map<String, byte[]> classfiles = new TreeMap<String, byte[]>();
  252. Enumeration<JarEntry> e = jf.entries();
  253. while (e.hasMoreElements()) {
  254. JarEntry entry = e.nextElement();
  255. String name = entry.getName();
  256. if (!name.endsWith(".class")) {
  257. continue;
  258. }
  259. String clazz = name.substring(0, name.length() - 6);
  260. ByteArrayOutputStream baos = new ByteArrayOutputStream(Math.max((int) entry.getSize(), 0));
  261. InputStream is = jf.getInputStream(entry);
  262. try {
  263. FileUtil.copy(is, baos);
  264. } finally {
  265. is.close();
  266. }
  267. classfiles.put(clazz, baos.toByteArray());
  268. }
  269. return classfiles;
  270. } catch (SecurityException x) {
  271. throw new IOException(x);
  272. } finally {
  273. jf.close();
  274. }
  275. }
  276. // adapted from org.netbeans.nbbuild.VerifyClassLinkage
  277. private static Set<String> dependencies(byte[] data, String clazz, File jar) throws IOException {
  278. Set<String> result = new TreeSet<String>();
  279. DataInput input = new DataInputStream(new ByteArrayInputStream(data));
  280. skip(input, 8); // magic, minor_version, major_version
  281. int size = input.readUnsignedShort() - 1; // constantPoolCount
  282. String[] utf8Strings = new String[size];
  283. boolean[] isClassName = new boolean[size];
  284. boolean[] isDescriptor = new boolean[size];
  285. for (int i = 0; i < size; i++) {
  286. byte tag = input.readByte();
  287. switch (tag) {
  288. case 1: // CONSTANT_Utf8
  289. utf8Strings[i] = input.readUTF();
  290. break;
  291. case 7: // CONSTANT_Class
  292. int index = input.readUnsignedShort() - 1;
  293. if (index >= size) {
  294. throw new IOException("@" + i + ": CONSTANT_Class_info.name_index " + index + " too big for size of pool " + size);
  295. }
  296. //LOG.finest("Class reference at " + index);
  297. isClassName[index] = true;
  298. break;
  299. case 3: // CONSTANT_Integer
  300. case 4: // CONSTANT_Float
  301. case 9: // CONSTANT_Fieldref
  302. case 10: // CONSTANT_Methodref
  303. case 11: // CONSTANT_InterfaceMethodref
  304. skip(input, 4);
  305. break;
  306. case 12: // CONSTANT_NameAndType
  307. skip(input, 2);
  308. index = input.readUnsignedShort() - 1;
  309. if (index >= size || index < 0) {
  310. throw new IOException("@" + i + ": CONSTANT_NameAndType_info.descriptor_index " + index + " too big for size of pool " + size);
  311. }
  312. isDescriptor[index] = true;
  313. break;
  314. case 8: // CONSTANT_String
  315. skip(input, 2);
  316. break;
  317. case 5: // CONSTANT_Long
  318. case 6: // CONSTANT_Double
  319. skip(input, 8);
  320. i++; // weirdness in spec
  321. break;
  322. default:
  323. // E.g. com/ibm/icu/icu4j/2.6.1/icu4j-2.6.1.jar!/com/ibm/icu/impl/data/LocaleElements_zh__PINYIN.class is corrupt even acc. to javap.
  324. LOG.log(Level.FINE, "jar:{4}!/{3}.class: Unrecognized constant pool tag {0} at index {1}; running UTF-8 strings: {2}", new Object[] {tag, i, Arrays.asList(utf8Strings), clazz, Utilities.toURI(jar)});
  325. continue;
  326. }
  327. }
  328. //LOG.finest("UTF-8 strings: " + Arrays.asList(utf8Strings));
  329. for (int i = 0; i < size; i++) {
  330. String s = utf8Strings[i];
  331. if (s != null) {
  332. if (isClassName[i]) {
  333. while (s.charAt(0) == '[') {
  334. // array type
  335. s = s.substring(1);
  336. }
  337. if (s.length() == 1) {
  338. // primitive
  339. continue;
  340. }
  341. String c;
  342. if (s.charAt(s.length() - 1) == ';' && s.charAt(0) == 'L') {
  343. // Uncommon but seems sometimes this happens.
  344. c = s.substring(1, s.length() - 1);
  345. } else {
  346. c = s;
  347. }
  348. result.add(c);
  349. } else if (isDescriptor[i]) {
  350. int idx = 0;
  351. while ((idx = s.indexOf('L', idx)) != -1) {
  352. int semi = s.indexOf(';', idx);
  353. if (semi == -1) {
  354. throw new IOException("Invalid type or descriptor: " + s);
  355. }
  356. result.add(s.substring(idx + 1, semi));
  357. idx = semi;
  358. }
  359. }
  360. }
  361. }
  362. return result;
  363. }
  364. private static void skip(DataInput input, int bytes) throws IOException {
  365. int skipped = input.skipBytes(bytes);
  366. if (skipped != bytes) {
  367. throw new IOException("Truncated class file");
  368. }
  369. }
  370. @Override public Collection<IndexerField> getIndexerFields() {
  371. return Arrays.asList(FLD_NB_DEPENDENCY_CLASS);
  372. }
  373. /**
  374. * @param s a string, such as a class name
  375. * @return the CRC-32 of its UTF-8 representation, as big-endian Base-64 without padding (so six chars), with _ for + (safer for Lucene)
  376. */
  377. static String crc32base64(String s) {
  378. crc.reset();
  379. crc.update(s.getBytes(UTF8));
  380. long v = crc.getValue();
  381. byte[] b64 = Base64.encodeBase64(new byte[] {(byte) (v >> 24 & 0xFF), (byte) (v >> 16 & 0xFF), (byte) (v >> 8 & 0xFF), (byte) (v & 0xFF)});
  382. assert b64.length == 8;
  383. assert b64[6] == '=';
  384. assert b64[7] == '=';
  385. return new String(b64, 0, 6, LATIN1).replace('+', '_');
  386. }
  387. private static final CRC32 crc = new CRC32();
  388. private static final Charset UTF8 = Charset.forName("UTF-8");
  389. private static final Charset LATIN1 = Charset.forName("ISO-8859-1");
  390. }