/src/core/Index/CompoundFileWriter.cs
C# | 275 lines | 150 code | 36 blank | 89 comment | 22 complexity | 44ebcd2106d182be772447ba8e60ecf6 MD5 | raw file
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- using System;
- using System.Collections.Generic;
- using Directory = Lucene.Net.Store.Directory;
- using IndexInput = Lucene.Net.Store.IndexInput;
- using IndexOutput = Lucene.Net.Store.IndexOutput;
- namespace Lucene.Net.Index
- {
-
-
- /// <summary> Combines multiple files into a single compound file.
- /// The file format:<br/>
- /// <list type="bullet">
- /// <item>VInt fileCount</item>
- /// <item>{Directory}
- /// fileCount entries with the following structure:</item>
- /// <list type="bullet">
- /// <item>long dataOffset</item>
- /// <item>String fileName</item>
- /// </list>
- /// <item>{File Data}
- /// fileCount entries with the raw data of the corresponding file</item>
- /// </list>
- ///
- /// The fileCount integer indicates how many files are contained in this compound
- /// file. The {directory} that follows has that many entries. Each directory entry
- /// contains a long pointer to the start of this file's data section, and a String
- /// with that file's name.
- /// </summary>
- public sealed class CompoundFileWriter : IDisposable
- {
-
- private sealed class FileEntry
- {
- /// <summary>source file </summary>
- internal System.String file;
-
- /// <summary>temporary holder for the start of directory entry for this file </summary>
- internal long directoryOffset;
-
- /// <summary>temporary holder for the start of this file's data section </summary>
- internal long dataOffset;
- }
-
-
- private readonly Directory directory;
- private readonly String fileName;
- private readonly HashSet<string> ids;
- private readonly LinkedList<FileEntry> entries;
- private bool merged = false;
- private readonly SegmentMerger.CheckAbort checkAbort;
-
- /// <summary>Create the compound stream in the specified file. The file name is the
- /// entire name (no extensions are added).
- /// </summary>
- /// <throws> NullPointerException if <c>dir</c> or <c>name</c> is null </throws>
- public CompoundFileWriter(Directory dir, System.String name):this(dir, name, null)
- {
- }
-
- internal CompoundFileWriter(Directory dir, System.String name, SegmentMerger.CheckAbort checkAbort)
- {
- if (dir == null)
- throw new ArgumentNullException("dir");
- if (name == null)
- throw new ArgumentNullException("name");
- this.checkAbort = checkAbort;
- directory = dir;
- fileName = name;
- ids = new HashSet<string>();
- entries = new LinkedList<FileEntry>();
- }
- /// <summary>Returns the directory of the compound file. </summary>
- public Directory Directory
- {
- get { return directory; }
- }
- /// <summary>Returns the name of the compound file. </summary>
- public string Name
- {
- get { return fileName; }
- }
- /// <summary>Add a source stream. <c>file</c> is the string by which the
- /// sub-stream will be known in the compound stream.
- ///
- /// </summary>
- /// <throws> IllegalStateException if this writer is closed </throws>
- /// <throws> NullPointerException if <c>file</c> is null </throws>
- /// <throws> IllegalArgumentException if a file with the same name </throws>
- /// <summary> has been added already
- /// </summary>
- public void AddFile(String file)
- {
- if (merged)
- throw new InvalidOperationException("Can't add extensions after merge has been called");
-
- if (file == null)
- throw new ArgumentNullException("file");
-
- try
- {
- ids.Add(file);
- }
- catch (Exception)
- {
- throw new ArgumentException("File " + file + " already added");
- }
- var entry = new FileEntry {file = file};
- entries.AddLast(entry);
- }
-
- [Obsolete("Use Dispose() instead")]
- public void Close()
- {
- Dispose();
- }
- /// <summary>Merge files with the extensions added up to now.
- /// All files with these extensions are combined sequentially into the
- /// compound stream. After successful merge, the source files
- /// are deleted.
- /// </summary>
- /// <throws> IllegalStateException if close() had been called before or </throws>
- /// <summary> if no file has been added to this object
- /// </summary>
- public void Dispose()
- {
- // Extract into protected method if class ever becomes unsealed
- // TODO: Dispose shouldn't throw exceptions!
- if (merged)
- throw new SystemException("Merge already performed");
- if ((entries.Count == 0))
- throw new SystemException("No entries to merge have been defined");
- merged = true;
- // open the compound stream
- IndexOutput os = null;
- try
- {
- os = directory.CreateOutput(fileName);
- // Write the number of entries
- os.WriteVInt(entries.Count);
- // Write the directory with all offsets at 0.
- // Remember the positions of directory entries so that we can
- // adjust the offsets later
- long totalSize = 0;
- foreach (FileEntry fe in entries)
- {
- fe.directoryOffset = os.FilePointer;
- os.WriteLong(0); // for now
- os.WriteString(fe.file);
- totalSize += directory.FileLength(fe.file);
- }
- // Pre-allocate size of file as optimization --
- // this can potentially help IO performance as
- // we write the file and also later during
- // searching. It also uncovers a disk-full
- // situation earlier and hopefully without
- // actually filling disk to 100%:
- long finalLength = totalSize + os.FilePointer;
- os.SetLength(finalLength);
- // Open the files and copy their data into the stream.
- // Remember the locations of each file's data section.
- var buffer = new byte[16384];
- foreach (FileEntry fe in entries)
- {
- fe.dataOffset = os.FilePointer;
- CopyFile(fe, os, buffer);
- }
- // Write the data offsets into the directory of the compound stream
- foreach (FileEntry fe in entries)
- {
- os.Seek(fe.directoryOffset);
- os.WriteLong(fe.dataOffset);
- }
- System.Diagnostics.Debug.Assert(finalLength == os.Length);
- // Close the output stream. Set the os to null before trying to
- // close so that if an exception occurs during the close, the
- // finally clause below will not attempt to close the stream
- // the second time.
- IndexOutput tmp = os;
- os = null;
- tmp.Close();
- }
- finally
- {
- if (os != null)
- try
- {
- os.Close();
- }
- catch (System.IO.IOException)
- {
- }
- }
- }
-
- /// <summary>Copy the contents of the file with specified extension into the
- /// provided output stream. Use the provided buffer for moving data
- /// to reduce memory allocation.
- /// </summary>
- private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
- {
- IndexInput isRenamed = null;
- try
- {
- long startPtr = os.FilePointer;
-
- isRenamed = directory.OpenInput(source.file);
- long length = isRenamed.Length();
- long remainder = length;
- int chunk = buffer.Length;
-
- while (remainder > 0)
- {
- var len = (int) Math.Min(chunk, remainder);
- isRenamed.ReadBytes(buffer, 0, len, false);
- os.WriteBytes(buffer, len);
- remainder -= len;
- if (checkAbort != null)
- // Roughly every 2 MB we will check if
- // it's time to abort
- checkAbort.Work(80);
- }
-
- // Verify that remainder is 0
- if (remainder != 0)
- throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
-
- // Verify that the output length diff is equal to original file
- long endPtr = os.FilePointer;
- long diff = endPtr - startPtr;
- if (diff != length)
- throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
- }
- finally
- {
- if (isRenamed != null)
- isRenamed.Close();
- }
- }
- }
- }