/branches/branch-0.9.9/src/org/apache/lucene/index/IndexGate.java

http://luke.googlecode.com/ · Java · 212 lines · 187 code · 17 blank · 8 comment · 22 complexity · 298a4a45fdd7e09c093f7c5d3991f8f8 MD5 · raw file

  1. package org.apache.lucene.index;
  2. import java.io.IOException;
  3. import java.io.PrintStream;
  4. import java.lang.reflect.Field;
  5. import java.util.ArrayList;
  6. import java.util.HashMap;
  7. import java.util.List;
  8. import org.apache.lucene.store.Directory;
  9. import org.apache.lucene.store.IndexInput;
  10. import org.getopt.luke.KeepAllIndexDeletionPolicy;
  11. /**
  12. * This class allows us to peek at various Lucene internals, not available
  13. * through public APIs (for good reasons, but inquiring minds want to know ...).
  14. *
  15. * @author ab
  16. *
  17. */
  18. public class IndexGate {
  19. static Field deletable = null;
  20. static Field hasChanges = null;
  21. static PrintStream infoStream = IndexWriter.getDefaultInfoStream();
  22. static HashMap<String, String> knownExtensions = new HashMap<String, String>();
  23. static {
  24. knownExtensions.put(IndexFileNames.COMPOUND_FILE_EXTENSION, "compound file with various index data");
  25. knownExtensions.put(IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, "compound shared doc store file");
  26. knownExtensions.put(IndexFileNames.DELETABLE, "list of deletable files (pre-lockless index)");
  27. knownExtensions.put(IndexFileNames.DELETES_EXTENSION, "list of deleted documents");
  28. knownExtensions.put(IndexFileNames.FIELD_INFOS_EXTENSION, "field names / infos");
  29. knownExtensions.put(IndexFileNames.FIELDS_EXTENSION, "stored fields data");
  30. knownExtensions.put(IndexFileNames.FIELDS_INDEX_EXTENSION, "stored fields index data");
  31. knownExtensions.put(IndexFileNames.FREQ_EXTENSION, "term frequency postings data");
  32. knownExtensions.put(IndexFileNames.GEN_EXTENSION, "generation number - global file");
  33. knownExtensions.put(IndexFileNames.NORMS_EXTENSION, "norms data for all fields");
  34. knownExtensions.put(IndexFileNames.PLAIN_NORMS_EXTENSION, "per-field norms data");
  35. knownExtensions.put(IndexFileNames.PROX_EXTENSION, "term position postings data");
  36. knownExtensions.put(IndexFileNames.SEGMENTS, "per-commit list of segments");
  37. knownExtensions.put(IndexFileNames.SEPARATE_NORMS_EXTENSION, "separate per-field norms data");
  38. knownExtensions.put(IndexFileNames.TERMS_EXTENSION, "terms dictionary");
  39. knownExtensions.put(IndexFileNames.TERMS_INDEX_EXTENSION, "terms dictionary index");
  40. knownExtensions.put(IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, "term vectors document data");
  41. knownExtensions.put(IndexFileNames.VECTORS_FIELDS_EXTENSION, "term vector field data");
  42. knownExtensions.put(IndexFileNames.VECTORS_INDEX_EXTENSION, "term vectors index");
  43. try {
  44. deletable = IndexFileDeleter.class.getDeclaredField("deletable");
  45. deletable.setAccessible(true);
  46. hasChanges = IndexReader.class.getDeclaredField("hasChanges");
  47. hasChanges.setAccessible(true);
  48. } catch (Exception e) {
  49. e.printStackTrace();
  50. }
  51. }
  52. public static String getFileFunction(String file) {
  53. if (file == null || file.trim().length() == 0) return file;
  54. String res = null;
  55. file = file.trim();
  56. int idx = file.indexOf('.');
  57. String suffix = null;
  58. if (idx != -1) {
  59. suffix = file.substring(idx + 1);
  60. }
  61. if (suffix == null) {
  62. if (file.startsWith("segments_")) {
  63. return knownExtensions.get(IndexFileNames.SEGMENTS);
  64. }
  65. } else {
  66. res = knownExtensions.get(suffix);
  67. if (res != null) {
  68. return res;
  69. }
  70. // perhaps per-field norms?
  71. if (suffix.length() == 2) {
  72. res = knownExtensions.get(suffix.substring(0, 1));
  73. }
  74. }
  75. return res;
  76. }
  77. public static int getIndexFormat(final Directory dir) throws Exception {
  78. SegmentInfos.FindSegmentsFile fsf = new SegmentInfos.FindSegmentsFile(dir) {
  79. protected Object doBody(String segmentsFile) throws CorruptIndexException,
  80. IOException {
  81. IndexInput in = dir.openInput(segmentsFile);
  82. Integer indexFormat = new Integer(in.readInt());
  83. in.close();
  84. return indexFormat;
  85. }
  86. };
  87. Integer indexFormat = (Integer)fsf.run();
  88. return indexFormat.intValue();
  89. }
  90. public static int getCurrentIndexFormat() {
  91. return SegmentInfos.CURRENT_FORMAT;
  92. }
  93. public static FormatDetails getFormatDetails(int format) {
  94. FormatDetails res = new FormatDetails();
  95. switch (format) {
  96. case SegmentInfos.FORMAT:
  97. res.capabilities = "old plain";
  98. res.genericName = "Lucene Pre-2.1";
  99. break;
  100. case SegmentInfos.FORMAT_LOCKLESS:
  101. res.capabilities = "lock-less";
  102. res.genericName = "Lucene 2.1";
  103. break;
  104. case SegmentInfos.FORMAT_SINGLE_NORM_FILE:
  105. res.capabilities = "lock-less, single norms file";
  106. res.genericName = "Lucene 2.2";
  107. break;
  108. case SegmentInfos.FORMAT_SHARED_DOC_STORE:
  109. res.capabilities = "lock-less, single norms file, shared doc store";
  110. res.genericName = "Lucene 2.3";
  111. break;
  112. case SegmentInfos.FORMAT_CHECKSUM:
  113. res.capabilities = "lock-less, single norms, shared doc store, checksum";
  114. res.genericName = "Lucene 2.4";
  115. break;
  116. case SegmentInfos.FORMAT_DEL_COUNT:
  117. res.capabilities = "lock-less, single norms, shared doc store, checksum, del count";
  118. res.genericName = "Lucene 2.4";
  119. break;
  120. case SegmentInfos.FORMAT_HAS_PROX:
  121. res.capabilities = "lock-less, single norms, shared doc store, checksum, del count, omitTf";
  122. res.genericName = "Lucene 2.4";
  123. break;
  124. case SegmentInfos.FORMAT_USER_DATA:
  125. res.capabilities = "lock-less, single norms, shared doc store, checksum, del count, omitTf, user data";
  126. res.genericName = "Lucene 2.9-dev";
  127. break;
  128. case SegmentInfos.FORMAT_DIAGNOSTICS:
  129. res.capabilities = "lock-less, single norms, shared doc store, checksum, del count, omitTf, user data, diagnostics";
  130. res.genericName = "Lucene 2.9";
  131. break;
  132. default:
  133. res.capabilities = "unknown";
  134. res.genericName = "Lucene 1.3 or prior";
  135. break;
  136. }
  137. if (SegmentInfos.CURRENT_FORMAT > format) {
  138. res.capabilities = "(WARNING: newer version of Lucene that this tool)";
  139. res.genericName = "UNKNOWN";
  140. }
  141. return res;
  142. }
  143. public static boolean preferCompoundFormat(Directory dir) throws Exception {
  144. SegmentInfos infos = new SegmentInfos();
  145. infos.read(dir);
  146. int compound = 0, nonCompound = 0;
  147. for (int i = 0; i < infos.size(); i++) {
  148. if (((SegmentInfo)infos.get(i)).getUseCompoundFile()) {
  149. compound++;
  150. } else {
  151. nonCompound++;
  152. }
  153. }
  154. return compound > nonCompound;
  155. }
  156. public static void deletePendingFiles(Directory dir, IndexDeletionPolicy policy) throws Exception {
  157. SegmentInfos infos = new SegmentInfos();
  158. infos.read(dir);
  159. IndexFileDeleter deleter = new IndexFileDeleter(dir, policy, infos, infoStream, null);
  160. deleter.close();
  161. }
  162. public static List<String> getDeletableFiles(Directory dir) throws Exception {
  163. SegmentInfos infos = new SegmentInfos();
  164. infos.read(dir);
  165. IndexFileDeleter deleter = new IndexFileDeleter(dir, new KeepAllIndexDeletionPolicy(), infos, infoStream, null);
  166. return (List<String>)deletable.get(deleter);
  167. }
  168. public static List<String> getIndexFiles(Directory dir) throws Exception {
  169. SegmentInfos infos = new SegmentInfos();
  170. infos.read(dir);
  171. ArrayList<String> names = new ArrayList<String>();
  172. for (int i = 0; i < infos.size(); i++) {
  173. SegmentInfo info = (SegmentInfo)infos.info(i);
  174. names.addAll(info.files());
  175. names.add(info.getDelFileName());
  176. }
  177. names.add(infos.getCurrentSegmentFileName());
  178. names.add(IndexFileNames.SEGMENTS_GEN);
  179. return names;
  180. }
  181. public static class FormatDetails {
  182. public String genericName = "N/A";
  183. public String capabilities = "N/A";
  184. }
  185. public static boolean hasChanges(IndexReader ir) {
  186. if (ir == null) {
  187. return false;
  188. }
  189. try {
  190. return hasChanges.getBoolean(ir);
  191. } catch (Exception e) {
  192. return false;
  193. }
  194. }
  195. }