PageRenderTime 52ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/src/core/Index/CompoundFileWriter.cs

https://bitbucket.org/jmblair/lucene.net
C# | 275 lines | 150 code | 36 blank | 89 comment | 22 complexity | 44ebcd2106d182be772447ba8e60ecf6 MD5 | raw file
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. using System;
  18. using System.Collections.Generic;
  19. using Directory = Lucene.Net.Store.Directory;
  20. using IndexInput = Lucene.Net.Store.IndexInput;
  21. using IndexOutput = Lucene.Net.Store.IndexOutput;
  22. namespace Lucene.Net.Index
  23. {
  24. /// <summary> Combines multiple files into a single compound file.
  25. /// The file format:<br/>
  26. /// <list type="bullet">
  27. /// <item>VInt fileCount</item>
  28. /// <item>{Directory}
  29. /// fileCount entries with the following structure:</item>
  30. /// <list type="bullet">
  31. /// <item>long dataOffset</item>
  32. /// <item>String fileName</item>
  33. /// </list>
  34. /// <item>{File Data}
  35. /// fileCount entries with the raw data of the corresponding file</item>
  36. /// </list>
  37. ///
  38. /// The fileCount integer indicates how many files are contained in this compound
  39. /// file. The {directory} that follows has that many entries. Each directory entry
  40. /// contains a long pointer to the start of this file's data section, and a String
  41. /// with that file's name.
  42. /// </summary>
  43. public sealed class CompoundFileWriter : IDisposable
  44. {
  45. private sealed class FileEntry
  46. {
  47. /// <summary>source file </summary>
  48. internal System.String file;
  49. /// <summary>temporary holder for the start of directory entry for this file </summary>
  50. internal long directoryOffset;
  51. /// <summary>temporary holder for the start of this file's data section </summary>
  52. internal long dataOffset;
  53. }
  54. private readonly Directory directory;
  55. private readonly String fileName;
  56. private readonly HashSet<string> ids;
  57. private readonly LinkedList<FileEntry> entries;
  58. private bool merged = false;
  59. private readonly SegmentMerger.CheckAbort checkAbort;
  60. /// <summary>Create the compound stream in the specified file. The file name is the
  61. /// entire name (no extensions are added).
  62. /// </summary>
  63. /// <throws> NullPointerException if <c>dir</c> or <c>name</c> is null </throws>
  64. public CompoundFileWriter(Directory dir, System.String name):this(dir, name, null)
  65. {
  66. }
  67. internal CompoundFileWriter(Directory dir, System.String name, SegmentMerger.CheckAbort checkAbort)
  68. {
  69. if (dir == null)
  70. throw new ArgumentNullException("dir");
  71. if (name == null)
  72. throw new ArgumentNullException("name");
  73. this.checkAbort = checkAbort;
  74. directory = dir;
  75. fileName = name;
  76. ids = new HashSet<string>();
  77. entries = new LinkedList<FileEntry>();
  78. }
  79. /// <summary>Returns the directory of the compound file. </summary>
  80. public Directory Directory
  81. {
  82. get { return directory; }
  83. }
  84. /// <summary>Returns the name of the compound file. </summary>
  85. public string Name
  86. {
  87. get { return fileName; }
  88. }
  89. /// <summary>Add a source stream. <c>file</c> is the string by which the
  90. /// sub-stream will be known in the compound stream.
  91. ///
  92. /// </summary>
  93. /// <throws> IllegalStateException if this writer is closed </throws>
  94. /// <throws> NullPointerException if <c>file</c> is null </throws>
  95. /// <throws> IllegalArgumentException if a file with the same name </throws>
  96. /// <summary> has been added already
  97. /// </summary>
  98. public void AddFile(String file)
  99. {
  100. if (merged)
  101. throw new InvalidOperationException("Can't add extensions after merge has been called");
  102. if (file == null)
  103. throw new ArgumentNullException("file");
  104. try
  105. {
  106. ids.Add(file);
  107. }
  108. catch (Exception)
  109. {
  110. throw new ArgumentException("File " + file + " already added");
  111. }
  112. var entry = new FileEntry {file = file};
  113. entries.AddLast(entry);
  114. }
  115. [Obsolete("Use Dispose() instead")]
  116. public void Close()
  117. {
  118. Dispose();
  119. }
  120. /// <summary>Merge files with the extensions added up to now.
  121. /// All files with these extensions are combined sequentially into the
  122. /// compound stream. After successful merge, the source files
  123. /// are deleted.
  124. /// </summary>
  125. /// <throws> IllegalStateException if close() had been called before or </throws>
  126. /// <summary> if no file has been added to this object
  127. /// </summary>
  128. public void Dispose()
  129. {
  130. // Extract into protected method if class ever becomes unsealed
  131. // TODO: Dispose shouldn't throw exceptions!
  132. if (merged)
  133. throw new SystemException("Merge already performed");
  134. if ((entries.Count == 0))
  135. throw new SystemException("No entries to merge have been defined");
  136. merged = true;
  137. // open the compound stream
  138. IndexOutput os = null;
  139. try
  140. {
  141. os = directory.CreateOutput(fileName);
  142. // Write the number of entries
  143. os.WriteVInt(entries.Count);
  144. // Write the directory with all offsets at 0.
  145. // Remember the positions of directory entries so that we can
  146. // adjust the offsets later
  147. long totalSize = 0;
  148. foreach (FileEntry fe in entries)
  149. {
  150. fe.directoryOffset = os.FilePointer;
  151. os.WriteLong(0); // for now
  152. os.WriteString(fe.file);
  153. totalSize += directory.FileLength(fe.file);
  154. }
  155. // Pre-allocate size of file as optimization --
  156. // this can potentially help IO performance as
  157. // we write the file and also later during
  158. // searching. It also uncovers a disk-full
  159. // situation earlier and hopefully without
  160. // actually filling disk to 100%:
  161. long finalLength = totalSize + os.FilePointer;
  162. os.SetLength(finalLength);
  163. // Open the files and copy their data into the stream.
  164. // Remember the locations of each file's data section.
  165. var buffer = new byte[16384];
  166. foreach (FileEntry fe in entries)
  167. {
  168. fe.dataOffset = os.FilePointer;
  169. CopyFile(fe, os, buffer);
  170. }
  171. // Write the data offsets into the directory of the compound stream
  172. foreach (FileEntry fe in entries)
  173. {
  174. os.Seek(fe.directoryOffset);
  175. os.WriteLong(fe.dataOffset);
  176. }
  177. System.Diagnostics.Debug.Assert(finalLength == os.Length);
  178. // Close the output stream. Set the os to null before trying to
  179. // close so that if an exception occurs during the close, the
  180. // finally clause below will not attempt to close the stream
  181. // the second time.
  182. IndexOutput tmp = os;
  183. os = null;
  184. tmp.Close();
  185. }
  186. finally
  187. {
  188. if (os != null)
  189. try
  190. {
  191. os.Close();
  192. }
  193. catch (System.IO.IOException)
  194. {
  195. }
  196. }
  197. }
  198. /// <summary>Copy the contents of the file with specified extension into the
  199. /// provided output stream. Use the provided buffer for moving data
  200. /// to reduce memory allocation.
  201. /// </summary>
  202. private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
  203. {
  204. IndexInput isRenamed = null;
  205. try
  206. {
  207. long startPtr = os.FilePointer;
  208. isRenamed = directory.OpenInput(source.file);
  209. long length = isRenamed.Length();
  210. long remainder = length;
  211. int chunk = buffer.Length;
  212. while (remainder > 0)
  213. {
  214. var len = (int) Math.Min(chunk, remainder);
  215. isRenamed.ReadBytes(buffer, 0, len, false);
  216. os.WriteBytes(buffer, len);
  217. remainder -= len;
  218. if (checkAbort != null)
  219. // Roughly every 2 MB we will check if
  220. // it's time to abort
  221. checkAbort.Work(80);
  222. }
  223. // Verify that remainder is 0
  224. if (remainder != 0)
  225. throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
  226. // Verify that the output length diff is equal to original file
  227. long endPtr = os.FilePointer;
  228. long diff = endPtr - startPtr;
  229. if (diff != length)
  230. throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
  231. }
  232. finally
  233. {
  234. if (isRenamed != null)
  235. isRenamed.Close();
  236. }
  237. }
  238. }
  239. }