PageRenderTime 48ms CodeModel.GetById 38ms app.highlight 9ms RepoModel.GetById 0ms app.codeStats 0ms

/branches/branch-0.9.9/src/org/apache/lucene/index/IndexGate.java

http://luke.googlecode.com/
Java | 212 lines | 187 code | 17 blank | 8 comment | 22 complexity | 298a4a45fdd7e09c093f7c5d3991f8f8 MD5 | raw file
  1package org.apache.lucene.index;
  2
  3import java.io.IOException;
  4import java.io.PrintStream;
  5import java.lang.reflect.Field;
  6import java.util.ArrayList;
  7import java.util.HashMap;
  8import java.util.List;
  9
 10import org.apache.lucene.store.Directory;
 11import org.apache.lucene.store.IndexInput;
 12import org.getopt.luke.KeepAllIndexDeletionPolicy;
 13
 14/**
 15 * This class allows us to peek at various Lucene internals, not available
 16 * through public APIs (for good reasons, but inquiring minds want to know ...).
 17 * 
 18 * @author ab
 19 *
 20 */
 21public class IndexGate {
 22  static Field deletable = null;
 23  static Field hasChanges = null;
 24  static PrintStream infoStream = IndexWriter.getDefaultInfoStream();
 25  static HashMap<String, String> knownExtensions = new HashMap<String, String>();
 26  
 27  static {
 28    knownExtensions.put(IndexFileNames.COMPOUND_FILE_EXTENSION, "compound file with various index data");
 29    knownExtensions.put(IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, "compound shared doc store file");
 30    knownExtensions.put(IndexFileNames.DELETABLE, "list of deletable files (pre-lockless index)");
 31    knownExtensions.put(IndexFileNames.DELETES_EXTENSION, "list of deleted documents");
 32    knownExtensions.put(IndexFileNames.FIELD_INFOS_EXTENSION, "field names / infos");
 33    knownExtensions.put(IndexFileNames.FIELDS_EXTENSION, "stored fields data");
 34    knownExtensions.put(IndexFileNames.FIELDS_INDEX_EXTENSION, "stored fields index data");
 35    knownExtensions.put(IndexFileNames.FREQ_EXTENSION, "term frequency postings data");
 36    knownExtensions.put(IndexFileNames.GEN_EXTENSION, "generation number - global file");
 37    knownExtensions.put(IndexFileNames.NORMS_EXTENSION, "norms data for all fields");
 38    knownExtensions.put(IndexFileNames.PLAIN_NORMS_EXTENSION, "per-field norms data");
 39    knownExtensions.put(IndexFileNames.PROX_EXTENSION, "term position postings data");
 40    knownExtensions.put(IndexFileNames.SEGMENTS, "per-commit list of segments");
 41    knownExtensions.put(IndexFileNames.SEPARATE_NORMS_EXTENSION, "separate per-field norms data");
 42    knownExtensions.put(IndexFileNames.TERMS_EXTENSION, "terms dictionary");
 43    knownExtensions.put(IndexFileNames.TERMS_INDEX_EXTENSION, "terms dictionary index");
 44    knownExtensions.put(IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, "term vectors document data");
 45    knownExtensions.put(IndexFileNames.VECTORS_FIELDS_EXTENSION, "term vector field data");
 46    knownExtensions.put(IndexFileNames.VECTORS_INDEX_EXTENSION, "term vectors index");
 47
 48    try {
 49      deletable = IndexFileDeleter.class.getDeclaredField("deletable");
 50      deletable.setAccessible(true);
 51      hasChanges = IndexReader.class.getDeclaredField("hasChanges");
 52      hasChanges.setAccessible(true);
 53    } catch (Exception e) {
 54      e.printStackTrace();
 55    }
 56  }
 57  
 58  public static String getFileFunction(String file) {
 59    if (file == null || file.trim().length() == 0) return file;
 60    String res = null;
 61    file = file.trim();
 62    int idx = file.indexOf('.');
 63    String suffix = null;
 64    if (idx != -1) {
 65      suffix = file.substring(idx + 1);
 66    }
 67    if (suffix == null) {
 68      if (file.startsWith("segments_")) {
 69        return knownExtensions.get(IndexFileNames.SEGMENTS);
 70      }
 71    } else {
 72      res = knownExtensions.get(suffix);
 73      if (res != null) {
 74        return res;
 75      }
 76      // perhaps per-field norms?
 77      if (suffix.length() == 2) {
 78        res = knownExtensions.get(suffix.substring(0, 1));
 79      }
 80    }
 81    return res;
 82  }
 83  
 84  public static int getIndexFormat(final Directory dir) throws Exception {
 85    SegmentInfos.FindSegmentsFile fsf = new SegmentInfos.FindSegmentsFile(dir) {
 86
 87      protected Object doBody(String segmentsFile) throws CorruptIndexException,
 88          IOException {
 89        IndexInput in = dir.openInput(segmentsFile);
 90        Integer indexFormat = new Integer(in.readInt());
 91        in.close();
 92        return indexFormat;
 93      }
 94    };
 95    Integer indexFormat = (Integer)fsf.run();
 96    return indexFormat.intValue();
 97  }
 98  
 99  public static int getCurrentIndexFormat() {
100    return SegmentInfos.CURRENT_FORMAT;
101  }
102  
103  public static FormatDetails getFormatDetails(int format) {
104    FormatDetails res = new FormatDetails();
105    switch (format) {
106    case SegmentInfos.FORMAT:
107      res.capabilities = "old plain";
108      res.genericName = "Lucene Pre-2.1";
109      break;
110    case SegmentInfos.FORMAT_LOCKLESS:
111      res.capabilities = "lock-less";
112      res.genericName = "Lucene 2.1";
113      break;
114    case SegmentInfos.FORMAT_SINGLE_NORM_FILE:
115      res.capabilities = "lock-less, single norms file";
116      res.genericName = "Lucene 2.2";
117      break;
118    case SegmentInfos.FORMAT_SHARED_DOC_STORE:
119      res.capabilities = "lock-less, single norms file, shared doc store";
120      res.genericName = "Lucene 2.3";
121      break;
122    case SegmentInfos.FORMAT_CHECKSUM:
123      res.capabilities = "lock-less, single norms, shared doc store, checksum";
124      res.genericName = "Lucene 2.4";
125      break;
126    case SegmentInfos.FORMAT_DEL_COUNT:
127      res.capabilities = "lock-less, single norms, shared doc store, checksum, del count";
128      res.genericName = "Lucene 2.4";
129      break;
130    case SegmentInfos.FORMAT_HAS_PROX:
131      res.capabilities = "lock-less, single norms, shared doc store, checksum, del count, omitTf";
132      res.genericName = "Lucene 2.4";
133      break;
134    case SegmentInfos.FORMAT_USER_DATA:
135      res.capabilities = "lock-less, single norms, shared doc store, checksum, del count, omitTf, user data";
136      res.genericName = "Lucene 2.9-dev";
137      break;
138    case SegmentInfos.FORMAT_DIAGNOSTICS:
139      res.capabilities = "lock-less, single norms, shared doc store, checksum, del count, omitTf, user data, diagnostics";
140      res.genericName = "Lucene 2.9";
141      break;
142    default:
143      res.capabilities = "unknown";
144      res.genericName = "Lucene 1.3 or prior";
145      break;
146    }
147    if (SegmentInfos.CURRENT_FORMAT > format) {
148      res.capabilities = "(WARNING: newer version of Lucene that this tool)";
149      res.genericName = "UNKNOWN";
150    }
151    return res;
152  }
153  
154  public static boolean preferCompoundFormat(Directory dir) throws Exception {
155    SegmentInfos infos = new SegmentInfos();
156    infos.read(dir);
157    int compound = 0, nonCompound = 0;
158    for (int i = 0; i < infos.size(); i++) {
159      if (((SegmentInfo)infos.get(i)).getUseCompoundFile()) {
160        compound++;
161      } else {
162        nonCompound++;
163      }
164    }
165    return compound > nonCompound;
166  }
167  
168  public static void deletePendingFiles(Directory dir, IndexDeletionPolicy policy) throws Exception {
169    SegmentInfos infos = new SegmentInfos();
170    infos.read(dir);
171    IndexFileDeleter deleter = new IndexFileDeleter(dir, policy, infos, infoStream, null);
172    deleter.close();
173  }
174  
175  public static List<String> getDeletableFiles(Directory dir) throws Exception {
176    SegmentInfos infos = new SegmentInfos();
177    infos.read(dir);
178    IndexFileDeleter deleter = new IndexFileDeleter(dir, new KeepAllIndexDeletionPolicy(), infos, infoStream, null);
179    return (List<String>)deletable.get(deleter);
180  }
181  
182  public static List<String> getIndexFiles(Directory dir) throws Exception {
183    SegmentInfos infos = new SegmentInfos();
184    infos.read(dir);
185    ArrayList<String> names = new ArrayList<String>();
186    for (int i = 0; i < infos.size(); i++) {
187      SegmentInfo info = (SegmentInfo)infos.info(i);
188      names.addAll(info.files());
189      names.add(info.getDelFileName());
190    }
191    names.add(infos.getCurrentSegmentFileName());
192    names.add(IndexFileNames.SEGMENTS_GEN);
193    return names;
194  }
195  
196  public static class FormatDetails {
197    public String genericName = "N/A";
198    public String capabilities = "N/A";
199  }
200  
201  public static boolean hasChanges(IndexReader ir) {
202    if (ir == null) {
203      return false;
204    }
205    try {
206      return hasChanges.getBoolean(ir);
207    } catch (Exception e) {
208      return false;
209    }
210  }
211
212}