PageRenderTime 54ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsPackCompactor.java

https://bitbucket.org/albfan/jgit
Java | 317 lines | 173 code | 32 blank | 112 comment | 17 complexity | ed0e5c71c4d140d01580a84373437d9c MD5 | raw file
  1. /*
  2. * Copyright (C) 2011, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.storage.dfs;
  44. import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
  45. import java.io.IOException;
  46. import java.util.ArrayList;
  47. import java.util.Collections;
  48. import java.util.Comparator;
  49. import java.util.List;
  50. import org.eclipse.jgit.JGitText;
  51. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  52. import org.eclipse.jgit.lib.AnyObjectId;
  53. import org.eclipse.jgit.lib.NullProgressMonitor;
  54. import org.eclipse.jgit.lib.ObjectId;
  55. import org.eclipse.jgit.lib.ProgressMonitor;
  56. import org.eclipse.jgit.revwalk.RevFlag;
  57. import org.eclipse.jgit.revwalk.RevObject;
  58. import org.eclipse.jgit.revwalk.RevWalk;
  59. import org.eclipse.jgit.storage.file.PackIndex;
  60. import org.eclipse.jgit.storage.pack.PackConfig;
  61. import org.eclipse.jgit.storage.pack.PackWriter;
  62. import org.eclipse.jgit.util.BlockList;
  63. import org.eclipse.jgit.util.io.CountingOutputStream;
  64. /**
  65. * Combine several pack files into one pack.
  66. * <p>
  67. * The compactor combines several pack files together by including all objects
  68. * contained in each pack file into the same output pack. If an object appears
  69. * multiple times, it is only included once in the result. Because the new pack
  70. * is constructed by enumerating the indexes of the source packs, it is quicker
  71. * than doing a full repack of the repository, however the result is not nearly
  72. * as space efficient as new delta compression is disabled.
  73. * <p>
  74. * This method is suitable for quickly combining several packs together after
  75. * receiving a number of small fetch or push operations into a repository,
  76. * allowing the system to maintain reasonable read performance without expending
  77. * a lot of time repacking the entire repository.
  78. */
  79. public class DfsPackCompactor {
  80. private final DfsRepository repo;
  81. private final List<DfsPackFile> srcPacks;
  82. private final List<DfsPackDescription> newPacks;
  83. private final List<PackWriter.Statistics> newStats;
  84. private int autoAddSize;
  85. /**
  86. * Initialize a pack compactor.
  87. *
  88. * @param repository
  89. * repository objects to be packed will be read from.
  90. */
  91. public DfsPackCompactor(DfsRepository repository) {
  92. repo = repository;
  93. autoAddSize = 5 * 1024 * 1024; // 5 MiB
  94. srcPacks = new ArrayList<DfsPackFile>();
  95. newPacks = new ArrayList<DfsPackDescription>(1);
  96. newStats = new ArrayList<PackWriter.Statistics>(1);
  97. }
  98. /**
  99. * Add a pack to be compacted.
  100. * <p>
  101. * All of the objects in this pack will be copied into the resulting pack.
  102. * The resulting pack will order objects according to the source pack's own
  103. * description ordering (which is based on creation date), and then by the
  104. * order the objects appear in the source pack.
  105. *
  106. * @param pack
  107. * a pack to combine into the resulting pack.
  108. * @return {@code this}
  109. */
  110. public DfsPackCompactor add(DfsPackFile pack) {
  111. srcPacks.add(pack);
  112. return this;
  113. }
  114. /**
  115. * Automatically select packs to be included, and add them.
  116. * <p>
  117. * Packs are selected based on size, smaller packs get included while bigger
  118. * ones are omitted.
  119. *
  120. * @return {@code this}
  121. * @throws IOException
  122. * existing packs cannot be read.
  123. */
  124. public DfsPackCompactor autoAdd() throws IOException {
  125. DfsObjDatabase objdb = repo.getObjectDatabase();
  126. for (DfsPackFile pack : objdb.getPacks()) {
  127. DfsPackDescription d = pack.getPackDescription();
  128. if (d.getPackSize() < autoAddSize)
  129. add(pack);
  130. }
  131. return this;
  132. }
  133. /**
  134. * Compact the pack files together.
  135. *
  136. * @param pm
  137. * progress monitor to receive updates on as packing may take a
  138. * while, depending on the size of the repository.
  139. * @throws IOException
  140. * the packs cannot be compacted.
  141. */
  142. public void compact(ProgressMonitor pm) throws IOException {
  143. if (pm == null)
  144. pm = NullProgressMonitor.INSTANCE;
  145. DfsObjDatabase objdb = repo.getObjectDatabase();
  146. DfsReader ctx = (DfsReader) objdb.newReader();
  147. try {
  148. PackConfig pc = new PackConfig(repo);
  149. pc.setIndexVersion(2);
  150. pc.setDeltaCompress(false);
  151. pc.setReuseDeltas(true);
  152. pc.setReuseObjects(true);
  153. PackWriter pw = new PackWriter(pc, ctx);
  154. try {
  155. pw.setDeltaBaseAsOffset(true);
  156. pw.setReuseDeltaCommits(false);
  157. addObjectsToPack(pw, ctx, pm);
  158. if (pw.getObjectCount() == 0)
  159. return;
  160. boolean rollback = true;
  161. DfsPackDescription pack = objdb.newPack(COMPACT);
  162. try {
  163. writePack(objdb, pack, pw, pm);
  164. writeIndex(objdb, pack, pw);
  165. PackWriter.Statistics stats = pw.getStatistics();
  166. pw.release();
  167. pw = null;
  168. pack.setPackStats(stats);
  169. objdb.commitPack(Collections.singletonList(pack), toPrune());
  170. newPacks.add(pack);
  171. newStats.add(stats);
  172. rollback = false;
  173. } finally {
  174. if (rollback)
  175. objdb.rollbackPack(Collections.singletonList(pack));
  176. }
  177. } finally {
  178. if (pw != null)
  179. pw.release();
  180. }
  181. } finally {
  182. ctx.release();
  183. }
  184. }
  185. /** @return all of the source packs that fed into this compaction. */
  186. public List<DfsPackDescription> getSourcePacks() {
  187. return toPrune();
  188. }
  189. /** @return new packs created by this compaction. */
  190. public List<DfsPackDescription> getNewPacks() {
  191. return newPacks;
  192. }
  193. /** @return statistics corresponding to the {@link #getNewPacks()}. */
  194. public List<PackWriter.Statistics> getNewPackStatistics() {
  195. return newStats;
  196. }
  197. private List<DfsPackDescription> toPrune() {
  198. int cnt = srcPacks.size();
  199. List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt);
  200. for (DfsPackFile pack : srcPacks)
  201. all.add(pack.getPackDescription());
  202. return all;
  203. }
  204. private void addObjectsToPack(PackWriter pw, DfsReader ctx,
  205. ProgressMonitor pm) throws IOException,
  206. IncorrectObjectTypeException {
  207. // Sort packs by description ordering, this places newer packs before
  208. // older packs, allowing the PackWriter to be handed newer objects
  209. // first and older objects last.
  210. Collections.sort(srcPacks, new Comparator<DfsPackFile>() {
  211. public int compare(DfsPackFile a, DfsPackFile b) {
  212. return a.getPackDescription().compareTo(b.getPackDescription());
  213. }
  214. });
  215. RevWalk rw = new RevWalk(ctx);
  216. RevFlag added = rw.newFlag("ADDED");
  217. pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
  218. for (DfsPackFile src : srcPacks) {
  219. List<ObjectIdWithOffset> want = new BlockList<ObjectIdWithOffset>();
  220. for (PackIndex.MutableEntry ent : src.getPackIndex(ctx)) {
  221. ObjectId id = ent.toObjectId();
  222. RevObject obj = rw.lookupOrNull(id);
  223. if (obj == null || !obj.has(added))
  224. want.add(new ObjectIdWithOffset(id, ent.getOffset()));
  225. }
  226. // Sort objects by the order they appear in the pack file, for
  227. // two benefits. Scanning object type information is faster when
  228. // the pack is traversed in order, and this allows the PackWriter
  229. // to be given the new objects in a relatively sane newest-first
  230. // ordering without additional logic, like unpacking commits and
  231. // walking a commit queue.
  232. Collections.sort(want, new Comparator<ObjectIdWithOffset>() {
  233. public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) {
  234. return Long.signum(a.offset - b.offset);
  235. }
  236. });
  237. // Only pack each object at most once into the output file. The
  238. // PackWriter will later select a representation to reuse, which
  239. // may be the version in this pack, or may be from another pack if
  240. // the object was copied here to complete a thin pack and is larger
  241. // than a delta from another pack. This is actually somewhat common
  242. // if an object is modified frequently, such as the top level tree.
  243. for (ObjectIdWithOffset id : want) {
  244. int type = src.getObjectType(ctx, id.offset);
  245. RevObject obj = rw.lookupAny(id, type);
  246. if (!obj.has(added)) {
  247. pm.update(1);
  248. pw.addObject(obj);
  249. obj.add(added);
  250. }
  251. }
  252. }
  253. pm.endTask();
  254. }
  255. private void writePack(DfsObjDatabase objdb, DfsPackDescription pack,
  256. PackWriter pw, ProgressMonitor pm) throws IOException {
  257. DfsOutputStream out = objdb.writePackFile(pack);
  258. try {
  259. CountingOutputStream cnt = new CountingOutputStream(out);
  260. pw.writePack(pm, pm, cnt);
  261. pack.setObjectCount(pw.getObjectCount());
  262. pack.setPackSize(cnt.getCount());
  263. } finally {
  264. out.close();
  265. }
  266. }
  267. private void writeIndex(DfsObjDatabase objdb, DfsPackDescription pack,
  268. PackWriter pw) throws IOException {
  269. DfsOutputStream out = objdb.writePackIndex(pack);
  270. try {
  271. CountingOutputStream cnt = new CountingOutputStream(out);
  272. pw.writeIndex(cnt);
  273. pack.setIndexSize(cnt.getCount());
  274. } finally {
  275. out.close();
  276. }
  277. }
  278. private static class ObjectIdWithOffset extends ObjectId {
  279. final long offset;
  280. ObjectIdWithOffset(AnyObjectId id, long ofs) {
  281. super(id);
  282. offset = ofs;
  283. }
  284. }
  285. }