/platform/util/src/com/intellij/util/io/IntToIntBtree.java

https://bitbucket.org/nbargnesi/idea · Java · 1113 lines · 883 code · 186 blank · 44 comment · 233 complexity · d7656dfa7131fc1579b8749b24c7a9a4 MD5 · raw file

  1. package com.intellij.util.io;
  2. import com.intellij.openapi.util.io.FileUtil;
  3. import gnu.trove.TIntIntHashMap;
  4. import org.jetbrains.annotations.NotNull;
  5. import java.io.File;
  6. import java.io.IOException;
  7. import java.nio.ByteBuffer;
  8. import java.util.Arrays;
  9. /**
  10. * Created by IntelliJ IDEA.
  11. * User: maximmossienko
  12. * Date: 7/12/11
  13. * Time: 1:34 PM
  14. */
  15. class IntToIntBtree {
  16. public static int version() {
  17. return 3;
  18. }
  19. private static final int HAS_ZERO_KEY_MASK = 0xFF000000;
  20. static final boolean doSanityCheck = false;
  21. static final boolean doDump = false;
  22. final int pageSize;
  23. private final short maxInteriorNodes;
  24. private final short maxLeafNodes;
  25. private final short maxLeafNodesInHash;
  26. final BtreeIndexNodeView root;
  27. private int height;
  28. private int maxStepsSearchedInHash;
  29. private int totalHashStepsSearched;
  30. private int hashSearchRequests;
  31. private int pagesCount;
  32. private int hashedPagesCount;
  33. private int count;
  34. private int movedMembersCount;
  35. private boolean hasZeroKey;
  36. private int zeroKeyValue;
  37. private boolean isLarge = true;
  38. private final ResizeableMappedFile storage;
  39. private final boolean offloadToSiblingsBeforeSplit = false;
  40. private boolean indexNodeIsHashTable = true;
  41. final int metaDataLeafPageLength;
  42. final int hashPageCapacity;
  43. private static final boolean hasCachedMappings = false;
  44. private TIntIntHashMap myCachedMappings;
  45. private final int myCachedMappingsSize;
  46. public IntToIntBtree(int _pageSize, File file, PagedFileStorage.StorageLockContext storageLockContext, boolean initial) throws IOException {
  47. pageSize = _pageSize;
  48. if (initial) {
  49. FileUtil.delete(file);
  50. }
  51. storage = new ResizeableMappedFile(file, pageSize, storageLockContext, 1024 * 1024, true);
  52. root = new BtreeIndexNodeView(this);
  53. if (initial) {
  54. nextPage(); // allocate root
  55. root.setAddress(0);
  56. root.setIndexLeaf(true);
  57. }
  58. int i = (pageSize - BtreePage.RESERVED_META_PAGE_LEN) / BtreeIndexNodeView.INTERIOR_SIZE - 1;
  59. assert i < Short.MAX_VALUE && i % 2 == 0;
  60. maxInteriorNodes = (short)i;
  61. maxLeafNodes = (short)i;
  62. int metaPageLen = BtreePage.RESERVED_META_PAGE_LEN;
  63. if (indexNodeIsHashTable) {
  64. ++i;
  65. while(!isPrime(i)) i -= 2;
  66. hashPageCapacity = i;
  67. metaPageLen = BtreePage.RESERVED_META_PAGE_LEN;
  68. i = (int)(hashPageCapacity * 0.8);
  69. if ((i & 1) == 1) ++i;
  70. } else {
  71. hashPageCapacity = -1;
  72. }
  73. metaDataLeafPageLength = metaPageLen;
  74. assert i > 0 && i % 2 == 0;
  75. maxLeafNodesInHash = (short) i;
  76. if (hasCachedMappings) {
  77. myCachedMappings = new TIntIntHashMap(myCachedMappingsSize = 4 * maxLeafNodes);
  78. } else {
  79. myCachedMappings = null;
  80. myCachedMappingsSize = -1;
  81. }
  82. }
  83. public void persistVars(BtreeDataStorage storage, boolean toDisk) {
  84. if (toDisk) {
  85. storage.persistInt(0, height | (hasZeroKey ? HAS_ZERO_KEY_MASK :0), true);
  86. } else {
  87. int i = storage.persistInt(0, 0, false);
  88. hasZeroKey = (i & HAS_ZERO_KEY_MASK) != 0;
  89. height = i & ~HAS_ZERO_KEY_MASK;
  90. }
  91. pagesCount = storage.persistInt(4, pagesCount, toDisk);
  92. movedMembersCount = storage.persistInt(8, movedMembersCount, toDisk);
  93. maxStepsSearchedInHash = storage.persistInt(12, maxStepsSearchedInHash, toDisk);
  94. count = storage.persistInt(16, count, toDisk);
  95. hashSearchRequests = storage.persistInt(20, hashSearchRequests, toDisk);
  96. totalHashStepsSearched = storage.persistInt(24, totalHashStepsSearched, toDisk);
  97. hashedPagesCount = storage.persistInt(28, hashedPagesCount, toDisk);
  98. root.setAddress(storage.persistInt(32, root.address, toDisk));
  99. zeroKeyValue = storage.persistInt(36, zeroKeyValue, toDisk);
  100. }
  101. interface BtreeDataStorage {
  102. int persistInt(int offset, int value, boolean toDisk);
  103. }
  104. private static boolean isPrime(int val) {
  105. if (val % 2 == 0) return false;
  106. int maxDivisor = (int)Math.sqrt(val);
  107. for(int i = 3; i <= maxDivisor; i+=2) {
  108. if (val % i == 0) return false;
  109. }
  110. return true;
  111. }
  112. private int nextPage() {
  113. int pageStart = (int)storage.length();
  114. storage.putInt(pageStart + pageSize - 4, 0);
  115. ++pagesCount;
  116. return pageStart;
  117. }
  118. private BtreeIndexNodeView myAccessNodeView;
  119. private int myLastGetKey, myOptimizedInserts;
  120. private boolean myCanUseLastKey;
  121. public boolean get(int key, int[] result) {
  122. if (key == 0) {
  123. if (hasZeroKey) {
  124. result[0] = zeroKeyValue;
  125. return true;
  126. }
  127. return false;
  128. }
  129. if (hasCachedMappings) {
  130. if (myCachedMappings.containsKey(key)) {
  131. result[0] = myCachedMappings.get(key);
  132. return true;
  133. }
  134. }
  135. if (myAccessNodeView == null) myAccessNodeView = new BtreeIndexNodeView(this);
  136. myAccessNodeView.initTraversal(root.address);
  137. int index = myAccessNodeView.locate(key, false);
  138. if (index < 0) {
  139. myCanUseLastKey = true;
  140. myLastGetKey = key;
  141. return false;
  142. } else {
  143. myCanUseLastKey = false;
  144. }
  145. result[0] = myAccessNodeView.addressAt(index);
  146. return true;
  147. }
  148. public void put(int key, int value) {
  149. if (key == 0) {
  150. hasZeroKey = true;
  151. zeroKeyValue = value;
  152. return;
  153. }
  154. if (hasCachedMappings) {
  155. myCachedMappings.put(key, value);
  156. if (myCachedMappings.size() == myCachedMappingsSize) flushCachedMappings();
  157. } else {
  158. boolean canUseLastKey = myCanUseLastKey;
  159. if (canUseLastKey) {
  160. myCanUseLastKey = false;
  161. if (key == myLastGetKey && !myAccessNodeView.myHasFullPagesAlongPath) {
  162. ++myOptimizedInserts;
  163. ++count;
  164. myAccessNodeView.insert(key, value);
  165. return;
  166. }
  167. }
  168. doPut(key, value);
  169. }
  170. }
  171. private void doPut(int key, int value) {
  172. if (myAccessNodeView == null) myAccessNodeView = new BtreeIndexNodeView(this);
  173. myAccessNodeView.initTraversal(root.address);
  174. int index = myAccessNodeView.locate(key, true);
  175. if (index < 0) {
  176. ++count;
  177. myAccessNodeView.insert(key, value);
  178. } else {
  179. myAccessNodeView.setAddressAt(index, value);
  180. if (!myAccessNodeView.myIsDirty) myAccessNodeView.markDirty();
  181. }
  182. }
  183. void dumpStatistics() {
  184. int leafPages = height == 3 ? pagesCount - (1 + root.getChildrenCount() + 1):height == 2 ? pagesCount - 1:1;
  185. long leafNodesCapacity = hashedPagesCount * maxLeafNodesInHash + (leafPages - hashedPagesCount)* maxLeafNodes;
  186. long leafNodesCapacity2 = leafPages * maxLeafNodes;
  187. int usedPercent = (int)((count * 100L) / leafNodesCapacity);
  188. int usedPercent2 = (int)((count * 100L) / leafNodesCapacity2);
  189. IOStatistics.dump("pagecount:" + pagesCount +
  190. ", height:" + height +
  191. ", movedMembers:"+movedMembersCount +
  192. ", optimized inserts:"+myOptimizedInserts +
  193. ", hash steps:" + maxStepsSearchedInHash +
  194. ", avg search in hash:" + (hashSearchRequests != 0 ? totalHashStepsSearched / hashSearchRequests:0) +
  195. ", leaf pages used:" + usedPercent +
  196. "%, leaf pages used if sorted: " +
  197. usedPercent2 + "%, size:"+storage.length()
  198. );
  199. }
  200. private void flushCachedMappings() {
  201. if (hasCachedMappings) {
  202. int[] keys = myCachedMappings.keys();
  203. Arrays.sort(keys);
  204. for(int key:keys) doPut(key, myCachedMappings.get(key));
  205. myCachedMappings.clear();
  206. myCanUseLastKey = false;
  207. }
  208. }
  209. void doClose() throws IOException {
  210. myCachedMappings = null;
  211. storage.close();
  212. }
  213. void doFlush() {
  214. flushCachedMappings();
  215. storage.force();
  216. }
  217. static void myAssert(boolean b) {
  218. if (!b) {
  219. myAssert("breakpoint place" != "do not remove");
  220. }
  221. assert b;
  222. }
  223. static class BtreePage {
  224. static final int RESERVED_META_PAGE_LEN = 8;
  225. protected final IntToIntBtree btree;
  226. protected int address = -1;
  227. private short myChildrenCount;
  228. protected int myAddressInBuffer;
  229. protected ByteBuffer myBuffer;
  230. protected boolean myHasFullPagesAlongPath;
  231. protected boolean myIsDirty;
  232. public BtreePage(IntToIntBtree btree) {
  233. this.btree = btree;
  234. myChildrenCount = -1;
  235. }
  236. void setAddress(int _address) {
  237. if (doSanityCheck) myAssert(_address % btree.pageSize == 0);
  238. address = _address;
  239. syncWithStore();
  240. }
  241. protected void syncWithStore() {
  242. PagedFileStorage pagedFileStorage = btree.storage.getPagedFileStorage();
  243. myAddressInBuffer = pagedFileStorage.getOffsetInPage(address);
  244. myBuffer = pagedFileStorage.getByteBuffer(address, false);
  245. myIsDirty = false; // we will mark dirty on child count change, attrs change or existing key put
  246. doInitFlags(myBuffer.getInt(myAddressInBuffer));
  247. }
  248. protected void doInitFlags(int anInt) {
  249. myChildrenCount = (short)((anInt >>> 8) & 0xFFFF);
  250. }
  251. protected final void setFlag(int mask, boolean flag) {
  252. byte b = myBuffer.get(myAddressInBuffer);
  253. if (flag) b |= mask;
  254. else b &= ~mask;
  255. myBuffer.put(myAddressInBuffer, b);
  256. if (!myIsDirty) markDirty();
  257. }
  258. void markDirty() {
  259. btree.storage.getPagedFileStorage().getByteBuffer(address, true);
  260. myIsDirty = true;
  261. }
  262. protected final short getChildrenCount() {
  263. return myChildrenCount;
  264. }
  265. protected final void setChildrenCount(short value) {
  266. myChildrenCount = value;
  267. myBuffer.putShort(myAddressInBuffer + 1, value);
  268. if (!myIsDirty) markDirty();
  269. }
  270. protected final void setNextPage(int nextPage) {
  271. putInt(3, nextPage);
  272. }
  273. // TODO: use it
  274. protected final int getNextPage() {
  275. return getInt(3);
  276. }
  277. protected final int getInt(int address) {
  278. return myBuffer.getInt(myAddressInBuffer + address);
  279. }
  280. protected final void putInt(int offset, int value) {
  281. myBuffer.putInt(myAddressInBuffer + offset, value);
  282. }
  283. protected final ByteBuffer getBytes(int address, int length) {
  284. ByteBuffer duplicate = myBuffer.duplicate();
  285. int newPosition = address + myAddressInBuffer;
  286. duplicate.position(newPosition);
  287. duplicate.limit(newPosition + length);
  288. return duplicate;
  289. }
  290. protected final void putBytes(int address, ByteBuffer buffer) {
  291. myBuffer.position(address + myAddressInBuffer);
  292. myBuffer.put(buffer);
  293. }
  294. }
  295. // Leaf index node
  296. // (value_address {<0 if address in duplicates segment}, hash key) {getChildrenCount()}
  297. // (|next_node {<0} , hash key|) {getChildrenCount()} , next_node {<0}
  298. // next_node[i] is pointer to all less than hash_key[i] except for the last
  299. private static class BtreeIndexNodeView extends BtreePage {
  300. static final int INTERIOR_SIZE = 8;
  301. static final int KEY_OFFSET = 4;
  302. static final int MIN_ITEMS_TO_SHARE = 20;
  303. private boolean isIndexLeaf;
  304. private boolean isHashedLeaf;
  305. private static final int LARGE_MOVE_THRESHOLD = 5;
  306. BtreeIndexNodeView(IntToIntBtree btree) {
  307. super(btree);
  308. }
  309. private static final int HASH_FREE = 0;
  310. private int search(int value) {
  311. if (isIndexLeaf() && isHashedLeaf()) {
  312. return hashIndex(value);
  313. }
  314. else {
  315. int hi = getChildrenCount() - 1;
  316. int lo = 0;
  317. while(lo <= hi) {
  318. int mid = lo + (hi - lo) / 2;
  319. int keyAtMid = keyAt(mid);
  320. if (value > keyAtMid) {
  321. lo = mid + 1;
  322. } else if (value < keyAtMid) {
  323. hi = mid - 1;
  324. } else {
  325. return mid;
  326. }
  327. }
  328. return -(lo + 1);
  329. }
  330. }
  331. final int addressAt(int i) {
  332. if (doSanityCheck) {
  333. short childrenCount = getChildrenCount();
  334. if (isHashedLeaf()) myAssert(i < btree.hashPageCapacity);
  335. else myAssert(i < childrenCount || (!isIndexLeaf() && i == childrenCount));
  336. }
  337. return getInt(indexToOffset(i));
  338. }
  339. private void setAddressAt(int i, int value) {
  340. int offset = indexToOffset(i);
  341. if (doSanityCheck) {
  342. short childrenCount = getChildrenCount();
  343. final int metaPageLen;
  344. if (isHashedLeaf()) {
  345. myAssert(i < btree.hashPageCapacity);
  346. metaPageLen = btree.metaDataLeafPageLength;
  347. }
  348. else {
  349. myAssert(i < childrenCount || (!isIndexLeaf() && i == childrenCount));
  350. metaPageLen = RESERVED_META_PAGE_LEN;
  351. }
  352. myAssert(offset + 4 <= btree.pageSize);
  353. myAssert(offset >= metaPageLen);
  354. }
  355. putInt(offset, value);
  356. }
  357. private final int indexToOffset(int i) {
  358. return i * INTERIOR_SIZE + (isHashedLeaf() ? btree.metaDataLeafPageLength:RESERVED_META_PAGE_LEN);
  359. }
  360. private final int keyAt(int i) {
  361. if (doSanityCheck) {
  362. if (isHashedLeaf()) myAssert(i < btree.hashPageCapacity);
  363. else myAssert(i < getChildrenCount());
  364. }
  365. return getInt(indexToOffset(i) + KEY_OFFSET);
  366. }
  367. private void setKeyAt(int i, int value) {
  368. final int offset = indexToOffset(i) + KEY_OFFSET;
  369. if (doSanityCheck) {
  370. final int metaPageLen;
  371. if (isHashedLeaf()) {
  372. myAssert(i < btree.hashPageCapacity);
  373. metaPageLen = btree.metaDataLeafPageLength;
  374. }
  375. else {
  376. myAssert(i < getChildrenCount());
  377. metaPageLen = RESERVED_META_PAGE_LEN;
  378. }
  379. myAssert(offset + 4 <= btree.pageSize);
  380. myAssert(offset >= metaPageLen);
  381. }
  382. putInt(offset, value);
  383. }
  384. static final int INDEX_LEAF_MASK = 0x1;
  385. static final int HASHED_LEAF_MASK = 0x2;
  386. final boolean isIndexLeaf() {
  387. return isIndexLeaf;
  388. }
  389. protected void doInitFlags(int flags) {
  390. super.doInitFlags(flags);
  391. flags = (flags >> 24) & 0xFF;
  392. isHashedLeaf = (flags & HASHED_LEAF_MASK) == HASHED_LEAF_MASK;
  393. isIndexLeaf = (flags & INDEX_LEAF_MASK) == INDEX_LEAF_MASK;
  394. }
  395. void setIndexLeaf(boolean value) {
  396. isIndexLeaf = value;
  397. setFlag(INDEX_LEAF_MASK, value);
  398. }
  399. private final boolean isHashedLeaf() {
  400. return isHashedLeaf;
  401. }
  402. void setHashedLeaf(boolean value) {
  403. isHashedLeaf = value;
  404. setFlag(HASHED_LEAF_MASK, value);
  405. }
  406. final short getMaxChildrenCount() {
  407. return isIndexLeaf() ? isHashedLeaf() ? btree.maxLeafNodesInHash:btree.maxLeafNodes:btree.maxInteriorNodes;
  408. }
  409. final boolean isFull() {
  410. short childrenCount = getChildrenCount();
  411. if (!isIndexLeaf()) {
  412. ++childrenCount;
  413. }
  414. return childrenCount == getMaxChildrenCount();
  415. }
  416. boolean processMappings(KeyValueProcessor processor) throws IOException {
  417. assert isIndexLeaf();
  418. if (isHashedLeaf()) {
  419. int offset = myAddressInBuffer + indexToOffset(0);
  420. for(int i = 0; i < btree.hashPageCapacity; ++i) {
  421. int key = myBuffer.getInt(offset + KEY_OFFSET);
  422. if (key != HASH_FREE) {
  423. if(!processor.process(key, myBuffer.getInt(offset))) return false;
  424. }
  425. offset += INTERIOR_SIZE;
  426. }
  427. } else {
  428. final int childrenCount = getChildrenCount();
  429. for(int i = 0; i < childrenCount; ++i) {
  430. if (!processor.process(keyAt(i), addressAt(i))) return false;
  431. }
  432. }
  433. return true;
  434. }
  435. public void initTraversal(int address) {
  436. myHasFullPagesAlongPath = false;
  437. setAddress(address);
  438. }
  439. private static class HashLeafData {
  440. final BtreeIndexNodeView nodeView;
  441. final int[] keys;
  442. final TIntIntHashMap values;
  443. HashLeafData(BtreeIndexNodeView _nodeView, int recordCount) {
  444. nodeView = _nodeView;
  445. final IntToIntBtree btree = _nodeView.btree;
  446. int offset = nodeView.myAddressInBuffer + nodeView.indexToOffset(0);
  447. final ByteBuffer buffer = nodeView.myBuffer;
  448. keys = new int[recordCount];
  449. values = new TIntIntHashMap(recordCount);
  450. int keyNumber = 0;
  451. for(int i = 0; i < btree.hashPageCapacity; ++i) {
  452. int key = buffer.getInt(offset + KEY_OFFSET);
  453. if (key != HASH_FREE) {
  454. int value = buffer.getInt(offset);
  455. keys[keyNumber++] = key;
  456. values.put(key, value);
  457. }
  458. offset += INTERIOR_SIZE;
  459. }
  460. Arrays.sort(keys);
  461. }
  462. private void clean() {
  463. final IntToIntBtree btree = nodeView.btree;
  464. for(int i = 0; i < btree.hashPageCapacity; ++i) {
  465. nodeView.setKeyAt(i, HASH_FREE);
  466. }
  467. }
  468. }
  469. private int splitNode(int parentAddress) {
  470. final boolean indexLeaf = isIndexLeaf();
  471. if (doSanityCheck) {
  472. myAssert(isFull());
  473. dump("before split:"+indexLeaf);
  474. }
  475. final boolean hashedLeaf = isHashedLeaf();
  476. final short recordCount = getChildrenCount();
  477. BtreeIndexNodeView parent = null;
  478. HashLeafData hashLeafData = null;
  479. if (parentAddress != 0) {
  480. parent = new BtreeIndexNodeView(btree);
  481. parent.setAddress(parentAddress);
  482. if (btree.offloadToSiblingsBeforeSplit) {
  483. if (hashedLeaf) {
  484. hashLeafData = new HashLeafData(this, recordCount);
  485. if (doOffloadToSiblingsWhenHashed(parent, hashLeafData)) return parentAddress;
  486. } else {
  487. if (doOffloadToSiblingsSorted(parent)) return parentAddress;
  488. }
  489. }
  490. }
  491. short maxIndex = (short)(getMaxChildrenCount() / 2);
  492. BtreeIndexNodeView newIndexNode = new BtreeIndexNodeView(btree);
  493. newIndexNode.setAddress(btree.nextPage());
  494. syncWithStore(); // next page can cause ByteBuffer to be invalidated!
  495. if (parent != null) parent.syncWithStore();
  496. btree.root.syncWithStore();
  497. newIndexNode.setIndexLeaf(indexLeaf);
  498. int nextPage = getNextPage();
  499. setNextPage(newIndexNode.address);
  500. newIndexNode.setNextPage(nextPage);
  501. int medianKey = -1;
  502. if (indexLeaf && hashedLeaf) {
  503. if (hashLeafData == null) hashLeafData = new HashLeafData(this, recordCount);
  504. final int[] keys = hashLeafData.keys;
  505. boolean defaultSplit = true;
  506. //if (keys[keys.length - 1] < newValue && btree.height <= 3) { // optimization for adding element to last block
  507. // btree.root.syncWithStore();
  508. // if (btree.height == 2 && btree.root.search(keys[0]) == btree.root.getChildrenCount() - 1) {
  509. // defaultSplit = false;
  510. // } else if (btree.height == 3 &&
  511. // btree.root.search(keys[0]) == -btree.root.getChildrenCount() - 1 &&
  512. // parent.search(keys[0]) == parent.getChildrenCount() - 1
  513. // ) {
  514. // defaultSplit = false;
  515. // }
  516. //
  517. // if (!defaultSplit) {
  518. // newIndexNode.setChildrenCount((short)0);
  519. // newIndexNode.insert(newValue, 0);
  520. // ++btree.count;
  521. // medianKey = newValue;
  522. // }
  523. //}
  524. if (defaultSplit) {
  525. hashLeafData.clean();
  526. final TIntIntHashMap map = hashLeafData.values;
  527. final int avg = keys.length / 2;
  528. medianKey = keys[avg];
  529. --btree.hashedPagesCount;
  530. setChildrenCount((short)0);
  531. newIndexNode.setChildrenCount((short)0);
  532. for(int i = 0; i < avg; ++i) {
  533. int key = keys[i];
  534. insert(key, map.get(key));
  535. key = keys[avg + i];
  536. newIndexNode.insert(key, map.get(key));
  537. }
  538. /*setHashedLeaf(false);
  539. setChildrenCount((short)keys.length);
  540. --btree.hashedPagesCount;
  541. btree.movedMembersCount += keys.length;
  542. for(int i = 0; i < keys.length; ++i) {
  543. int key = keys[i];
  544. setKeyAt(i, key);
  545. setAddressAt(i, map.get(key));
  546. }
  547. return parentAddress;*/
  548. }
  549. } else {
  550. short recordCountInNewNode = (short)(recordCount - maxIndex);
  551. newIndexNode.setChildrenCount(recordCountInNewNode);
  552. if (btree.isLarge) {
  553. ByteBuffer buffer = getBytes(indexToOffset(maxIndex), recordCountInNewNode * INTERIOR_SIZE);
  554. newIndexNode.putBytes(newIndexNode.indexToOffset(0), buffer);
  555. } else {
  556. for(int i = 0; i < recordCountInNewNode; ++i) {
  557. newIndexNode.setAddressAt(i, addressAt(i + maxIndex));
  558. newIndexNode.setKeyAt(i, keyAt(i + maxIndex));
  559. }
  560. }
  561. if (indexLeaf) {
  562. medianKey = newIndexNode.keyAt(0);
  563. } else {
  564. newIndexNode.setAddressAt(recordCountInNewNode, addressAt(recordCount));
  565. --maxIndex;
  566. medianKey = keyAt(maxIndex); // key count is odd (since children count is even) and middle key goes to parent
  567. }
  568. setChildrenCount(maxIndex);
  569. }
  570. if (parent != null) {
  571. if (doSanityCheck) {
  572. int medianKeyInParent = parent.search(medianKey);
  573. int ourKey = keyAt(0);
  574. int ourKeyInParent = parent.search(ourKey);
  575. parent.dump("About to insert "+medianKey + "," + newIndexNode.address+"," + medianKeyInParent + " our key " + ourKey + ", " + ourKeyInParent);
  576. myAssert(medianKeyInParent < 0);
  577. myAssert(!parent.isFull());
  578. }
  579. parent.insert(medianKey, -newIndexNode.address);
  580. if (doSanityCheck) {
  581. parent.dump("After modifying parent");
  582. int search = parent.search(medianKey);
  583. myAssert(search >= 0);
  584. myAssert(parent.addressAt(search + 1) == -newIndexNode.address);
  585. dump("old node after split:");
  586. newIndexNode.dump("new node after split:");
  587. }
  588. } else {
  589. if (doSanityCheck) {
  590. btree.root.dump("Splitting root:"+medianKey);
  591. }
  592. int newRootAddress = btree.nextPage();
  593. newIndexNode.syncWithStore();
  594. syncWithStore();
  595. if (doSanityCheck) {
  596. System.out.println("Pages:"+btree.pagesCount+", elements:"+btree.count + ", average:" + (btree.height + 1));
  597. }
  598. btree.root.setAddress(newRootAddress);
  599. parentAddress = newRootAddress;
  600. btree.root.setChildrenCount((short)1);
  601. btree.root.setKeyAt(0, medianKey);
  602. btree.root.setAddressAt(0, -address);
  603. btree.root.setAddressAt(1, -newIndexNode.address);
  604. if (doSanityCheck) {
  605. btree.root.dump("New root");
  606. dump("First child");
  607. newIndexNode.dump("Second child");
  608. }
  609. }
  610. return parentAddress;
  611. }
  612. private boolean doOffloadToSiblingsWhenHashed(BtreeIndexNodeView parent, final HashLeafData hashLeafData) {
  613. int indexInParent = parent.search(hashLeafData.keys[0]);
  614. if (indexInParent >= 0) {
  615. BtreeIndexNodeView sibling = new BtreeIndexNodeView(btree);
  616. sibling.setAddress(-parent.addressAt(indexInParent));
  617. int numberOfKeysToMove = (sibling.getMaxChildrenCount() - sibling.getChildrenCount()) / 2;
  618. if (!sibling.isFull() && numberOfKeysToMove > MIN_ITEMS_TO_SHARE) {
  619. if (doSanityCheck) {
  620. sibling.dump("Offloading to left sibling");
  621. parent.dump("parent before");
  622. }
  623. final int childrenCount = getChildrenCount();
  624. final int[] keys = hashLeafData.keys;
  625. final TIntIntHashMap map = hashLeafData.values;
  626. for(int i = 0; i < numberOfKeysToMove; ++i) {
  627. final int key = keys[i];
  628. sibling.insert(key, map.get(key));
  629. }
  630. if (doSanityCheck) {
  631. sibling.dump("Left sibling after");
  632. }
  633. parent.setKeyAt(indexInParent, keys[numberOfKeysToMove]);
  634. setChildrenCount((short)0);
  635. --btree.hashedPagesCount;
  636. hashLeafData.clean();
  637. for(int i = numberOfKeysToMove; i < childrenCount; ++i) {
  638. final int key = keys[i];
  639. insert(key, map.get(key));
  640. }
  641. } else if (indexInParent + 1 < parent.getChildrenCount()) {
  642. insertToRightSiblingWhenHashed(parent, hashLeafData, indexInParent, sibling);
  643. }
  644. } else if (indexInParent == -1) {
  645. insertToRightSiblingWhenHashed(parent, hashLeafData, 0, new BtreeIndexNodeView(btree));
  646. }
  647. if (!isFull()) {
  648. if (doSanityCheck) {
  649. dump("old node after split:");
  650. parent.dump("Parent node after split");
  651. }
  652. return true;
  653. }
  654. return false;
  655. }
  656. private void insertToRightSiblingWhenHashed(BtreeIndexNodeView parent,
  657. HashLeafData hashLeafData,
  658. int indexInParent,
  659. BtreeIndexNodeView sibling) {
  660. sibling.setAddress(-parent.addressAt(indexInParent + 1));
  661. int numberOfKeysToMove = (sibling.getMaxChildrenCount() - sibling.getChildrenCount()) / 2;
  662. if (!sibling.isFull() && numberOfKeysToMove > MIN_ITEMS_TO_SHARE) {
  663. if (doSanityCheck) {
  664. sibling.dump("Offloading to right sibling");
  665. parent.dump("parent before");
  666. }
  667. final int[] keys = hashLeafData.keys;
  668. final TIntIntHashMap map = hashLeafData.values;
  669. final int childrenCount = getChildrenCount();
  670. final int lastChildIndex = childrenCount - numberOfKeysToMove;
  671. for(int i = lastChildIndex; i < childrenCount; ++i) {
  672. final int key = keys[i];
  673. sibling.insert(key, map.get(key));
  674. }
  675. if (doSanityCheck) {
  676. sibling.dump("Right sibling after");
  677. }
  678. parent.setKeyAt(indexInParent, keys[lastChildIndex]);
  679. setChildrenCount((short)0);
  680. --btree.hashedPagesCount;
  681. hashLeafData.clean();
  682. for(int i = 0; i < lastChildIndex; ++i) {
  683. final int key = keys[i];
  684. insert(key, map.get(key));
  685. }
  686. }
  687. }
  688. private boolean doOffloadToSiblingsSorted(BtreeIndexNodeView parent) {
  689. if (!isIndexLeaf()) return false; // TODO
  690. int indexInParent = parent.search(keyAt(0));
  691. if (indexInParent >= 0) {
  692. if (doSanityCheck) {
  693. myAssert(parent.keyAt(indexInParent) == keyAt(0));
  694. myAssert(parent.addressAt(indexInParent + 1) == -address);
  695. }
  696. BtreeIndexNodeView sibling = new BtreeIndexNodeView(btree);
  697. sibling.setAddress(-parent.addressAt(indexInParent));
  698. final int toMove = (sibling.getMaxChildrenCount() - sibling.getChildrenCount()) / 2;
  699. if (toMove > 0) {
  700. if (doSanityCheck) {
  701. sibling.dump("Offloading to left sibling");
  702. parent.dump("parent before");
  703. }
  704. for(int i = 0; i < toMove; ++i) sibling.insert(keyAt(i), addressAt(i));
  705. if (doSanityCheck) {
  706. sibling.dump("Left sibling after");
  707. }
  708. parent.setKeyAt(indexInParent, keyAt(toMove));
  709. int indexOfLastChildToMove = (int)getChildrenCount() - toMove;
  710. btree.movedMembersCount += indexOfLastChildToMove;
  711. if (btree.isLarge) {
  712. ByteBuffer buffer = getBytes(indexToOffset(toMove), indexOfLastChildToMove * INTERIOR_SIZE);
  713. putBytes(indexToOffset(0), buffer);
  714. }
  715. else {
  716. for (int i = 0; i < indexOfLastChildToMove; ++i) {
  717. setAddressAt(i, addressAt(i + toMove));
  718. setKeyAt(i, keyAt(i + toMove));
  719. }
  720. }
  721. setChildrenCount((short)indexOfLastChildToMove);
  722. }
  723. else if (indexInParent + 1 < parent.getChildrenCount()) {
  724. insertToRightSiblingWhenSorted(parent, indexInParent + 1, sibling);
  725. }
  726. } else if (indexInParent == -1) {
  727. insertToRightSiblingWhenSorted(parent, 0, new BtreeIndexNodeView(btree));
  728. }
  729. if (!isFull()) {
  730. if (doSanityCheck) {
  731. dump("old node after split:");
  732. parent.dump("Parent node after split");
  733. }
  734. return true;
  735. }
  736. return false;
  737. }
  738. private void insertToRightSiblingWhenSorted(BtreeIndexNodeView parent, int indexInParent, BtreeIndexNodeView sibling) {
  739. sibling.setAddress(-parent.addressAt(indexInParent + 1));
  740. int toMove = (sibling.getMaxChildrenCount() - sibling.getChildrenCount()) / 2;
  741. if (toMove > 0) {
  742. if (doSanityCheck) {
  743. sibling.dump("Offloading to right sibling");
  744. parent.dump("parent before");
  745. }
  746. int childrenCount = getChildrenCount();
  747. int lastChildIndex = childrenCount - toMove;
  748. for(int i = lastChildIndex; i < childrenCount; ++i) sibling.insert(keyAt(i), addressAt(i));
  749. if (doSanityCheck) {
  750. sibling.dump("Right sibling after");
  751. }
  752. parent.setKeyAt(indexInParent, keyAt(lastChildIndex));
  753. setChildrenCount((short)lastChildIndex);
  754. }
  755. }
  756. private void dump(String s) {
  757. if (doDump) {
  758. immediateDump(s);
  759. }
  760. }
  761. private void immediateDump(String s) {
  762. short maxIndex = getChildrenCount();
  763. System.out.println(s + " @" + address);
  764. for (int i = 0; i < maxIndex; ++i) {
  765. System.out.print(addressAt(i) + " " + keyAt(i) + " ");
  766. }
  767. if (!isIndexLeaf()) {
  768. System.out.println(addressAt(maxIndex));
  769. }
  770. else {
  771. System.out.println();
  772. }
  773. }
  774. private int locate(int valueHC, boolean split) {
  775. int searched = 0;
  776. int parentAddress = 0;
  777. final int maxHeight = btree.height + 1;
  778. while(true) {
  779. if (isFull()) {
  780. if (split) {
  781. parentAddress = splitNode(parentAddress);
  782. if (parentAddress != 0) setAddress(parentAddress);
  783. --searched;
  784. } else {
  785. myHasFullPagesAlongPath = true;
  786. }
  787. }
  788. int i = search(valueHC);
  789. ++searched;
  790. if (searched > maxHeight) throw new IllegalStateException();
  791. if (isIndexLeaf()) {
  792. btree.height = Math.max(btree.height, searched);
  793. return i;
  794. }
  795. int address = i < 0 ? addressAt(-i - 1):addressAt(i + 1);
  796. parentAddress = this.address;
  797. setAddress(-address);
  798. }
  799. }
  800. private void insert(int valueHC, int newValueId) {
  801. if (doSanityCheck) myAssert(!isFull());
  802. short recordCount = getChildrenCount();
  803. if (doSanityCheck) myAssert(recordCount < getMaxChildrenCount());
  804. final boolean indexLeaf = isIndexLeaf();
  805. if (indexLeaf) {
  806. if (recordCount == 0 && btree.indexNodeIsHashTable) {
  807. setHashedLeaf(true);
  808. ++btree.hashedPagesCount;
  809. }
  810. if (isHashedLeaf()) {
  811. int index = hashIndex(valueHC);
  812. if (index < 0) {
  813. index = -index - 1;
  814. }
  815. setKeyAt(index, valueHC);
  816. setAddressAt(index, newValueId);
  817. setChildrenCount((short)(recordCount + 1));
  818. return;
  819. }
  820. }
  821. int medianKeyInParent = search(valueHC);
  822. if (doSanityCheck) myAssert(medianKeyInParent < 0);
  823. int index = -medianKeyInParent - 1;
  824. setChildrenCount((short)(recordCount + 1));
  825. final int itemsToMove = recordCount - index;
  826. btree.movedMembersCount += itemsToMove;
  827. if (indexLeaf) {
  828. if (btree.isLarge && itemsToMove > LARGE_MOVE_THRESHOLD) {
  829. ByteBuffer buffer = getBytes(indexToOffset(index), itemsToMove * INTERIOR_SIZE);
  830. putBytes(indexToOffset(index + 1), buffer);
  831. } else {
  832. for(int i = recordCount - 1; i >= index; --i) {
  833. setKeyAt(i + 1, keyAt(i));
  834. setAddressAt(i + 1, addressAt(i));
  835. }
  836. }
  837. setKeyAt(index, valueHC);
  838. setAddressAt(index, newValueId);
  839. } else {
  840. // <address> (<key><address>) {record_count - 1}
  841. //
  842. setAddressAt(recordCount + 1, addressAt(recordCount));
  843. if (btree.isLarge && itemsToMove > LARGE_MOVE_THRESHOLD) {
  844. int elementsAfterIndex = recordCount - index - 1;
  845. if (elementsAfterIndex > 0) {
  846. ByteBuffer buffer = getBytes(indexToOffset(index + 1), elementsAfterIndex * INTERIOR_SIZE);
  847. putBytes(indexToOffset(index + 2), buffer);
  848. }
  849. } else {
  850. for(int i = recordCount - 1; i > index; --i) {
  851. setKeyAt(i + 1, keyAt(i));
  852. setAddressAt(i + 1, addressAt(i));
  853. }
  854. }
  855. if (index < recordCount) setKeyAt(index + 1, keyAt(index));
  856. setKeyAt(index, valueHC);
  857. setAddressAt(index + 1, newValueId);
  858. }
  859. if (doSanityCheck) {
  860. if (index > 0) myAssert(keyAt(index - 1) < keyAt(index));
  861. if (index < recordCount) myAssert(keyAt(index) < keyAt(index + 1));
  862. }
  863. }
  864. private static final boolean useDoubleHash = true;
  865. private int hashIndex(int value) {
  866. int hash, index;
  867. final int length = btree.hashPageCapacity;
  868. hash = value & 0x7fffffff;
  869. index = hash % length;
  870. int keyAtIndex = keyAt(index);
  871. int total = 0;
  872. btree.hashSearchRequests++;
  873. if (useDoubleHash) {
  874. if (keyAtIndex != value && keyAtIndex != HASH_FREE) {
  875. // see Knuth, p. 529
  876. final int probe = 1 + (hash % (length - 2));
  877. do {
  878. index -= probe;
  879. if (index < 0) index += length;
  880. keyAtIndex = keyAt(index);
  881. ++total;
  882. if (total > length) {
  883. throw new IllegalStateException("Index corrupted"); // violation of Euler's theorem
  884. }
  885. }
  886. while (keyAtIndex != value && keyAtIndex != HASH_FREE);
  887. }
  888. } else {
  889. while(keyAtIndex != value && keyAtIndex != HASH_FREE) {
  890. if (index == 0) index = length;
  891. --index;
  892. keyAtIndex = keyAt(index);
  893. ++total;
  894. if (total > length) throw new IllegalStateException("Index corrupted"); // violation of Euler's theorem
  895. }
  896. }
  897. btree.maxStepsSearchedInHash = Math.max(btree.maxStepsSearchedInHash, total);
  898. btree.totalHashStepsSearched += total;
  899. return keyAtIndex == HASH_FREE ? -index - 1 : index;
  900. }
  901. }
  902. public static abstract class KeyValueProcessor {
  903. public abstract boolean process(int key, int value) throws IOException;
  904. }
  905. public boolean processMappings(@NotNull KeyValueProcessor processor) throws IOException {
  906. doFlush();
  907. root.syncWithStore();
  908. if (hasZeroKey) {
  909. if(!processor.process(0, zeroKeyValue)) return false;
  910. }
  911. return processLeafPages(root, processor);
  912. }
  913. private boolean processLeafPages(@NotNull BtreeIndexNodeView node, @NotNull KeyValueProcessor processor) throws IOException {
  914. if (node.isIndexLeaf()) {
  915. return node.processMappings(processor);
  916. }
  917. BtreeIndexNodeView child = null;
  918. for(int i = 0; i <= node.getChildrenCount(); ++i) {
  919. if (child == null) child = new BtreeIndexNodeView(this);
  920. child.setAddress(-node.addressAt(i));
  921. if (!processLeafPages(child, processor)) return false;
  922. }
  923. return true;
  924. }
  925. }