1// ignore-tidy-filelength2/**3 * @import * as stringdex from "./stringdex.d.ts"4 */56const EMPTY_UINT8 = new Uint8Array();78/**9 * @property {Uint8Array} keysAndCardinalities10 * @property {Uint8Array[]} containers11 */12class RoaringBitmap {13 /**14 * @param {Uint8Array|null} u8array15 * @param {number} [startingOffset]16 */17 constructor(u8array, startingOffset) {18 const start = startingOffset ? startingOffset : 0;19 let i = start;20 /** @type {Uint8Array} */21 this.keysAndCardinalities = EMPTY_UINT8;22 /** @type {(RoaringBitmapArray|RoaringBitmapBits|RoaringBitmapRun)[]} */23 this.containers = [];24 /** @type {number} */25 this.consumed_len_bytes = 0;26 if (u8array === null || u8array.length === i || u8array[i] === 0) {27 return this;28 } else if (u8array[i] > 0xf0) {29 // Special representation of tiny sets that are close together30 const lspecial = u8array[i] & 0x0f;31 this.keysAndCardinalities = new Uint8Array(lspecial * 4);32 let pspecial = i + 1;33 let key = u8array[pspecial + 2] | (u8array[pspecial + 3] << 8);34 let value = u8array[pspecial] | (u8array[pspecial + 1] << 8);35 let entry = (key << 16) | value;36 let container;37 container = new RoaringBitmapArray(1, new Uint8Array(4));38 container.array[0] = value & 0xFF;39 container.array[1] = (value >> 8) & 0xFF;40 this.containers.push(container);41 this.keysAndCardinalities[0] = key;42 this.keysAndCardinalities[1] = key >> 8;43 pspecial += 4;44 for (let ispecial = 1; ispecial < lspecial; ispecial += 1) {45 entry += u8array[pspecial] | (u8array[pspecial + 1] << 8);46 value = entry & 0xffff;47 key = entry >> 16;48 container = this.addToArrayAt(key);49 const cardinalityOld = container.cardinality;50 container.array[cardinalityOld * 2] = value & 0xFF;51 container.array[(cardinalityOld * 2) + 1] = (value >> 8) & 0xFF;52 container.cardinality = cardinalityOld + 1;53 pspecial += 2;54 }55 this.consumed_len_bytes = pspecial - i;56 return this;57 } else if (u8array[i] > 0xe0) {58 // Special representation of a node with multiple runs59 const run_count_m1 = (u8array[i] & 0x0f);60 const run_count = run_count_m1 + 1;61 this.keysAndCardinalities = new Uint8Array(run_count * 4);62 // the run keys and values63 let pspecial = i + 1;64 // the run lengths65 let pnspecial = pspecial + (run_count * 4);66 /** @type {number|null} */67 let previous_key = null;68 /** @type {RoaringBitmapRun|null} */69 let previous_container = null;70 for (let j = 0; j < run_count; j += 1) {71 const key = u8array[pspecial + 2] | (u8array[pspecial + 3] << 8);72 const value = u8array[pspecial] | (u8array[pspecial + 1] << 8);73 const run_length_m1 = j % 2 === 0 ?74 (u8array[pnspecial] >> 4) :75 (u8array[pnspecial] & 0x0f);76 if (j % 2 !== 0) {77 pnspecial += 1;78 }79 pspecial += 4;80 if (key === previous_key && previous_container !== null) {81 const new_container_array = new Uint8Array(82 (previous_container.runcount + 1) * 4,83 );84 new_container_array.set(previous_container.array);85 new_container_array[previous_container.runcount * 4] = value & 0xFF;86 new_container_array[(previous_container.runcount * 4) + 1] =87 (value >> 8) & 0xFF;88 new_container_array[(previous_container.runcount * 4) + 2] =89 run_length_m1;90 previous_container.array = new_container_array;91 previous_container.runcount += 1;92 let cardinalitym1 =93 this.keysAndCardinalities[(this.containers.length * 4) - 2] |94 (this.keysAndCardinalities[(this.containers.length * 4) - 1] << 8);95 cardinalitym1 += run_length_m1 + 1;96 this.keysAndCardinalities[(this.containers.length * 4) - 2] =97 cardinalitym1 & 0xFF;98 this.keysAndCardinalities[(this.containers.length * 4) - 1] =99 (cardinalitym1 >> 8) & 0xFF;100 } else {101 previous_key = key;102 previous_container = new RoaringBitmapRun(1, Uint8Array.of(103 value & 0xFF,104 (value >> 8) & 0xFF,105 run_length_m1,106 0,107 ));108 this.containers.push(previous_container);109 this.keysAndCardinalities[(this.containers.length * 4) - 4] = key & 0xFF;110 this.keysAndCardinalities[(this.containers.length * 4) - 3] = (key >> 8) & 0xFF;111 this.keysAndCardinalities[(this.containers.length * 4) - 2] = run_length_m1;112 }113 }114 if (run_count % 2 !== 0) {115 pnspecial += 1;116 }117 this.consumed_len_bytes = pnspecial - i;118 return this;119 } else if (u8array[i] > 0xd0) {120 // Special representation of tiny sets that are close together121 const lspecial = u8array[i] & 0x0f;122 this.keysAndCardinalities = new Uint8Array(lspecial * 4);123 let pspecial = i + 1;124 let key = u8array[pspecial + 2] | (u8array[pspecial + 3] << 8);125 let value = u8array[pspecial] | (u8array[pspecial + 1] << 8);126 let entry = (key << 16) | value;127 let container;128 container = new RoaringBitmapArray(1, new Uint8Array(4));129 container.array[0] = value & 0xFF;130 container.array[1] = (value >> 8) & 0xFF;131 this.containers.push(container);132 this.keysAndCardinalities[0] = key;133 this.keysAndCardinalities[1] = key >> 8;134 pspecial += 4;135 for (let ispecial = 1; ispecial < lspecial; ispecial += 1) {136 entry += u8array[pspecial];137 value = entry & 0xffff;138 key = entry >> 16;139 container = this.addToArrayAt(key);140 const cardinalityOld = container.cardinality;141 container.array[cardinalityOld * 2] = value & 0xFF;142 container.array[(cardinalityOld * 2) + 1] = (value >> 8) & 0xFF;143 container.cardinality = cardinalityOld + 1;144 pspecial += 1;145 }146 this.consumed_len_bytes = pspecial - i;147 return this;148 } else if (u8array[i] > 0x80) {149 // Special representation of tiny sets that are runs150 const lspecial = u8array[i] & 0x3f;151 const lspecialm1 = lspecial - 1;152 this.keysAndCardinalities = new Uint8Array(4);153 i += 1;154 const key = u8array[i + 2] | (u8array[i + 3] << 8);155 const value = u8array[i] | (u8array[i + 1] << 8);156 const container = new RoaringBitmapRun(1, new Uint8Array(4));157 container.array[0] = value & 0xFF;158 container.array[1] = (value >> 8) & 0xFF;159 container.array[2] = lspecialm1 & 0xFF;160 container.array[3] = lspecialm1 >> 8;161 this.containers.push(container);162 this.keysAndCardinalities[0] = key & 0xFF;163 this.keysAndCardinalities[1] = (key >> 8) & 0xFF;164 this.keysAndCardinalities[2] = lspecialm1 & 0xFF;165 this.keysAndCardinalities[3] = lspecialm1 >> 8;166 this.consumed_len_bytes = 5;167 return this;168 } else if (u8array[i] < 0x3a) {169 // Special representation of tiny sets with arbitrary 32-bit integers170 const lspecial = u8array[i];171 this.keysAndCardinalities = new Uint8Array(lspecial * 4);172 let pspecial = i + 1;173 for (let ispecial = 0; ispecial < lspecial; ispecial += 1) {174 const key = u8array[pspecial + 2] | (u8array[pspecial + 3] << 8);175 const value = u8array[pspecial] | (u8array[pspecial + 1] << 8);176 const container = this.addToArrayAt(key);177 const cardinalityOld = container.cardinality;178 container.array[cardinalityOld * 2] = value & 0xFF;179 container.array[(cardinalityOld * 2) + 1] = (value >> 8) & 0xFF;180 container.cardinality = cardinalityOld + 1;181 pspecial += 4;182 }183 this.consumed_len_bytes = pspecial - i;184 return this;185 }186 // https://github.com/RoaringBitmap/RoaringFormatSpec187 //188 // Roaring bitmaps are used for flags that can be kept in their189 // compressed form, even when loaded into memory. This decoder190 // turns the containers into objects, but uses byte array191 // slices of the original format for the data payload.192 const has_runs = u8array[i] === 0x3b;193 if (u8array[i] !== 0x3a && u8array[i] !== 0x3b) {194 throw new Error("not a roaring bitmap: " + u8array[i]);195 }196 const size = has_runs ?197 ((u8array[i + 2] | (u8array[i + 3] << 8)) + 1) :198 ((u8array[i + 4] | (u8array[i + 5] << 8) |199 (u8array[i + 6] << 16) | (u8array[i + 7] << 24)));200 i += has_runs ? 4 : 8;201 let is_run;202 if (has_runs) {203 const is_run_len = (size + 7) >> 3;204 is_run = new Uint8Array(u8array.buffer, i + u8array.byteOffset, is_run_len);205 i += is_run_len;206 } else {207 is_run = EMPTY_UINT8;208 }209 this.keysAndCardinalities = u8array.subarray(i, i + (size * 4));210 i += size * 4;211 let offsets = null;212 if (!has_runs || size >= 4) {213 offsets = [];214 for (let j = 0; j < size; ++j) {215 offsets.push(u8array[i] | (u8array[i + 1] << 8) | (u8array[i + 2] << 16) |216 (u8array[i + 3] << 24));217 i += 4;218 }219 }220 for (let j = 0; j < size; ++j) {221 if (offsets && offsets[j] !== i - start) {222 throw new Error(`corrupt bitmap ${j}: ${i - start} / ${offsets[j]}`);223 }224 const cardinality = (this.keysAndCardinalities[(j * 4) + 2] |225 (this.keysAndCardinalities[(j * 4) + 3] << 8)) + 1;226 if (is_run[j >> 3] & (1 << (j & 0x7))) {227 const runcount = (u8array[i] | (u8array[i + 1] << 8));228 i += 2;229 this.containers.push(new RoaringBitmapRun(230 runcount,231 new Uint8Array(u8array.buffer, i + u8array.byteOffset, runcount * 4),232 ));233 i += runcount * 4;234 } else if (cardinality >= 4096) {235 this.containers.push(new RoaringBitmapBits(new Uint8Array(236 u8array.buffer,237 i + u8array.byteOffset, 8192,238 )));239 i += 8192;240 } else {241 const end = cardinality * 2;242 this.containers.push(new RoaringBitmapArray(243 cardinality,244 new Uint8Array(u8array.buffer, i + u8array.byteOffset, end),245 ));246 i += end;247 }248 }249 this.consumed_len_bytes = i - start;250 }251 /**252 * @param {number} number253 * @returns {RoaringBitmap}254 */255 static makeSingleton(number) {256 const result = new RoaringBitmap(null, 0);257 result.keysAndCardinalities = Uint8Array.of(258 (number >> 16), (number >> 24),259 0, 0, // keysAndCardinalities stores the true cardinality minus 1260 );261 result.containers.push(new RoaringBitmapArray(262 1,263 Uint8Array.of(number, number >> 8),264 ));265 return result;266 }267 /** @returns {RoaringBitmap} */268 static everything() {269 if (EVERYTHING_BITMAP.isEmpty()) {270 let i = 0;271 const l = 1 << 16;272 const everything_range = new RoaringBitmapRun(1, Uint8Array.of(0, 0, 0xff, 0xff));273 EVERYTHING_BITMAP.keysAndCardinalities = new Uint8Array(l * 4);274 while (i < l) {275 EVERYTHING_BITMAP.containers.push(everything_range);276 // key277 EVERYTHING_BITMAP.keysAndCardinalities[(i * 4) + 0] = i;278 EVERYTHING_BITMAP.keysAndCardinalities[(i * 4) + 1] = i >> 8;279 // cardinality (minus one)280 EVERYTHING_BITMAP.keysAndCardinalities[(i * 4) + 2] = 0xff;281 EVERYTHING_BITMAP.keysAndCardinalities[(i * 4) + 3] = 0xff;282 i += 1;283 }284 }285 return EVERYTHING_BITMAP;286 }287 /** @returns {RoaringBitmap} */288 static empty() {289 return EMPTY_BITMAP;290 }291 /** @returns {boolean} */292 isEmpty() {293 return this.containers.length === 0;294 }295 /**296 * Helper function used when constructing bitmaps from lists.297 * Returns an array container with at least two free byte slots298 * and bumps `this.cardinalities`.299 * @param {number} key300 * @returns {RoaringBitmapArray}301 */302 addToArrayAt(key) {303 let mid = this.getContainerId(key);304 /** @type {RoaringBitmapArray|RoaringBitmapBits|RoaringBitmapRun} */305 let container;306 if (mid === -1) {307 container = new RoaringBitmapArray(0, new Uint8Array(2));308 mid = this.containers.length;309 this.containers.push(container);310 if (mid * 4 > this.keysAndCardinalities.length) {311 const keysAndContainers = new Uint8Array(mid * 8);312 keysAndContainers.set(this.keysAndCardinalities);313 this.keysAndCardinalities = keysAndContainers;314 }315 this.keysAndCardinalities[(mid * 4) + 0] = key;316 this.keysAndCardinalities[(mid * 4) + 1] = key >> 8;317 } else {318 container = this.containers[mid];319 const cardinalityOld =320 this.keysAndCardinalities[(mid * 4) + 2] |321 (this.keysAndCardinalities[(mid * 4) + 3] << 8);322 const cardinality = cardinalityOld + 1;323 this.keysAndCardinalities[(mid * 4) + 2] = cardinality;324 this.keysAndCardinalities[(mid * 4) + 3] = cardinality >> 8;325 }326 // the logic for handing this number is annoying, because keysAndCardinalities stores327 // the cardinality *minus one*, so that it can count up to 65536 with only two bytes328 // (because empty containers are never stored).329 //330 // So, if this is a new container, the stored cardinality contains `0 0`, which is331 // the proper value of the old cardinality (an imaginary empty container existed).332 // If this is adding to an existing container, then the above `else` branch bumps it333 // by one, leaving us with a proper value of `cardinality - 1`.334 const cardinalityOld =335 this.keysAndCardinalities[(mid * 4) + 2] |336 (this.keysAndCardinalities[(mid * 4) + 3] << 8);337 if (!(container instanceof RoaringBitmapArray) ||338 container.array.byteLength < ((cardinalityOld + 1) * 2)339 ) {340 const newBuf = new Uint8Array((cardinalityOld + 1) * 4);341 let idx = 0;342 for (const cvalue of container.values()) {343 newBuf[idx] = cvalue & 0xFF;344 newBuf[idx + 1] = (cvalue >> 8) & 0xFF;345 idx += 2;346 }347 if (container instanceof RoaringBitmapArray) {348 container.cardinality = cardinalityOld;349 container.array = newBuf;350 return container;351 }352 const newcontainer = new RoaringBitmapArray(cardinalityOld, newBuf);353 this.containers[mid] = newcontainer;354 return newcontainer;355 } else {356 return container;357 }358 }359 /**360 * @param {RoaringBitmap} that361 * @returns {RoaringBitmap}362 */363 union(that) {364 if (this.isEmpty()) {365 return that;366 }367 if (that.isEmpty()) {368 return this;369 }370 if (this === RoaringBitmap.everything() || that === RoaringBitmap.everything()) {371 return RoaringBitmap.everything();372 }373 let i = 0;374 const il = this.containers.length;375 let j = 0;376 const jl = that.containers.length;377 const result = new RoaringBitmap(null, 0);378 result.keysAndCardinalities = new Uint8Array((il + jl) * 4);379 while (i < il || j < jl) {380 const ik = i * 4;381 const jk = j * 4;382 const k = result.containers.length * 4;383 if (j >= jl || (i < il && (384 (this.keysAndCardinalities[ik + 1] < that.keysAndCardinalities[jk + 1]) ||385 (this.keysAndCardinalities[ik + 1] === that.keysAndCardinalities[jk + 1] &&386 this.keysAndCardinalities[ik] < that.keysAndCardinalities[jk])387 ))) {388 result.keysAndCardinalities[k + 0] = this.keysAndCardinalities[ik + 0];389 result.keysAndCardinalities[k + 1] = this.keysAndCardinalities[ik + 1];390 result.keysAndCardinalities[k + 2] = this.keysAndCardinalities[ik + 2];391 result.keysAndCardinalities[k + 3] = this.keysAndCardinalities[ik + 3];392 result.containers.push(this.containers[i]);393 i += 1;394 } else if (i >= il || (j < jl && (395 (that.keysAndCardinalities[jk + 1] < this.keysAndCardinalities[ik + 1]) ||396 (that.keysAndCardinalities[jk + 1] === this.keysAndCardinalities[ik + 1] &&397 that.keysAndCardinalities[jk] < this.keysAndCardinalities[ik])398 ))) {399 result.keysAndCardinalities[k + 0] = that.keysAndCardinalities[jk + 0];400 result.keysAndCardinalities[k + 1] = that.keysAndCardinalities[jk + 1];401 result.keysAndCardinalities[k + 2] = that.keysAndCardinalities[jk + 2];402 result.keysAndCardinalities[k + 3] = that.keysAndCardinalities[jk + 3];403 result.containers.push(that.containers[j]);404 j += 1;405 } else {406 // this key is not smaller than that key407 // that key is not smaller than this key408 // they must be equal409 const thisContainer = this.containers[i];410 const thatContainer = that.containers[j];411 let card = 0;412 if (thisContainer instanceof RoaringBitmapBits &&413 thatContainer instanceof RoaringBitmapBits414 ) {415 const resultArray = new Uint8Array(416 thisContainer.array.length > thatContainer.array.length ?417 thisContainer.array.length :418 thatContainer.array.length,419 );420 let k = 0;421 const kl = resultArray.length;422 while (k < kl) {423 const c = thisContainer.array[k] | thatContainer.array[k];424 resultArray[k] = c;425 card += bitCount(c);426 k += 1;427 }428 result.containers.push(new RoaringBitmapBits(resultArray));429 } else {430 const thisValues = thisContainer.values();431 const thatValues = thatContainer.values();432 let thisResult = thisValues.next();433 let thatResult = thatValues.next();434 /** @type {Array<number>} */435 const resultValues = [];436 while (!thatResult.done || !thisResult.done) {437 // generator will definitely implement the iterator protocol correctly438 /** @type {number} */439 const thisValue = thisResult.value;440 /** @type {number} */441 const thatValue = thatResult.value;442 if (thatResult.done || thisValue < thatValue) {443 resultValues.push(thisValue);444 thisResult = thisValues.next();445 } else if (thisResult.done || thatValue < thisValue) {446 resultValues.push(thatValue);447 thatResult = thatValues.next();448 } else {449 // this value is not smaller than that value450 // that value is not smaller than this value451 // they must be equal452 resultValues.push(thisValue);453 thisResult = thisValues.next();454 thatResult = thatValues.next();455 }456 }457 const resultArray = new Uint8Array(resultValues.length * 2);458 let k = 0;459 for (const value of resultValues) {460 // roaring bitmap is little endian461 resultArray[k] = value & 0xFF;462 resultArray[k + 1] = (value >> 8) & 0xFF;463 k += 2;464 }465 result.containers.push(new RoaringBitmapArray(466 resultValues.length,467 resultArray,468 ));469 card = resultValues.length;470 }471 result.keysAndCardinalities[k + 0] = this.keysAndCardinalities[ik + 0];472 result.keysAndCardinalities[k + 1] = this.keysAndCardinalities[ik + 1];473 card -= 1;474 result.keysAndCardinalities[k + 2] = card;475 result.keysAndCardinalities[k + 3] = card >> 8;476 i += 1;477 j += 1;478 }479 }480 return result;481 }482 /**483 * @param {RoaringBitmap} that484 * @returns {RoaringBitmap}485 */486 intersection(that) {487 if (this.isEmpty() || that.isEmpty()) {488 return EMPTY_BITMAP;489 }490 if (this === RoaringBitmap.everything()) {491 return that;492 }493 if (that === RoaringBitmap.everything()) {494 return this;495 }496 let i = 0;497 const il = this.containers.length;498 let j = 0;499 const jl = that.containers.length;500 const result = new RoaringBitmap(null, 0);501 result.keysAndCardinalities = new Uint8Array((il > jl ? il : jl) * 4);502 while (i < il && j < jl) {503 const ik = i * 4;504 const jk = j * 4;505 const k = result.containers.length * 4;506 if (j >= jl || (i < il && (507 (this.keysAndCardinalities[ik + 1] < that.keysAndCardinalities[jk + 1]) ||508 (this.keysAndCardinalities[ik + 1] === that.keysAndCardinalities[jk + 1] &&509 this.keysAndCardinalities[ik] < that.keysAndCardinalities[jk])510 ))) {511 i += 1;512 } else if (i >= il || (j < jl && (513 (that.keysAndCardinalities[jk + 1] < this.keysAndCardinalities[ik + 1]) ||514 (that.keysAndCardinalities[jk + 1] === this.keysAndCardinalities[ik + 1] &&515 that.keysAndCardinalities[jk] < this.keysAndCardinalities[ik])516 ))) {517 j += 1;518 } else {519 // this key is not smaller than that key520 // that key is not smaller than this key521 // they must be equal522 const thisContainer = this.containers[i];523 const thatContainer = that.containers[j];524 let card = 0;525 if (thisContainer instanceof RoaringBitmapBits &&526 thatContainer instanceof RoaringBitmapBits527 ) {528 const resultArray = new Uint8Array(529 thisContainer.array.length > thatContainer.array.length ?530 thisContainer.array.length :531 thatContainer.array.length,532 );533 let k = 0;534 const kl = resultArray.length;535 while (k < kl) {536 const c = thisContainer.array[k] & thatContainer.array[k];537 resultArray[k] = c;538 card += bitCount(c);539 k += 1;540 }541 if (card !== 0) {542 result.containers.push(new RoaringBitmapBits(resultArray));543 }544 } else {545 const thisValues = thisContainer.values();546 const thatValues = thatContainer.values();547 let thisValue = thisValues.next();548 let thatValue = thatValues.next();549 const resultValues = [];550 while (!thatValue.done && !thisValue.done) {551 if (thisValue.value < thatValue.value) {552 thisValue = thisValues.next();553 } else if (thatValue.value < thisValue.value) {554 thatValue = thatValues.next();555 } else {556 // this value is not smaller than that value557 // that value is not smaller than this value558 // they must be equal559 resultValues.push(thisValue.value);560 thisValue = thisValues.next();561 thatValue = thatValues.next();562 }563 }564 card = resultValues.length;565 if (card !== 0) {566 const resultArray = new Uint8Array(resultValues.length * 2);567 let k = 0;568 for (const value of resultValues) {569 // roaring bitmap is little endian570 resultArray[k] = value & 0xFF;571 resultArray[k + 1] = (value >> 8) & 0xFF;572 k += 2;573 }574 result.containers.push(new RoaringBitmapArray(575 resultValues.length,576 resultArray,577 ));578 }579 }580 if (card !== 0) {581 result.keysAndCardinalities[k + 0] = this.keysAndCardinalities[ik + 0];582 result.keysAndCardinalities[k + 1] = this.keysAndCardinalities[ik + 1];583 card -= 1;584 result.keysAndCardinalities[k + 2] = card;585 result.keysAndCardinalities[k + 3] = card >> 8;586 }587 i += 1;588 j += 1;589 }590 }591 return result;592 }593 /** @param {number} keyvalue */594 contains(keyvalue) {595 const key = keyvalue >> 16;596 const value = keyvalue & 0xFFFF;597 const mid = this.getContainerId(key);598 return mid === -1 ? false : this.containers[mid].contains(value);599 }600 /**601 * @param {number} keyvalue602 * @returns {RoaringBitmap}603 */604 remove(keyvalue) {605 const key = keyvalue >> 16;606 const value = keyvalue & 0xFFFF;607 const mid = this.getContainerId(key);608 if (mid === -1) {609 return this;610 }611 const container = this.containers[mid];612 if (!container.contains(value)) {613 return this;614 }615 const newCardinality = (this.keysAndCardinalities[(mid * 4) + 2] |616 (this.keysAndCardinalities[(mid * 4) + 3] << 8));617 const l = this.containers.length;618 const m = l - (newCardinality === 0 ? 1 : 0);619 const result = new RoaringBitmap(null, 0);620 result.keysAndCardinalities = new Uint8Array(m * 4);621 let j = 0;622 for (let i = 0; i < l; i += 1) {623 if (i === mid) {624 if (newCardinality !== 0) {625 result.keysAndCardinalities[(j * 4) + 0] = key;626 result.keysAndCardinalities[(j * 4) + 1] = key >> 8;627 const card = newCardinality - 1;628 result.keysAndCardinalities[(j * 4) + 2] = card;629 result.keysAndCardinalities[(j * 4) + 3] = card >> 8;630 const newContainer = new RoaringBitmapArray(631 newCardinality,632 new Uint8Array(newCardinality * 2),633 );634 let newContainerSlot = 0;635 for (const containerValue of container.values()) {636 if (containerValue !== value) {637 newContainer.array[newContainerSlot] = value & 0xFF;638 newContainerSlot += 1;639 newContainer.array[newContainerSlot] = value >> 8;640 newContainerSlot += 1;641 }642 }643 result.containers.push(newContainer);644 j += 1;645 }646 } else {647 result.keysAndCardinalities[(j * 4) + 0] = this.keysAndCardinalities[(i * 4) + 0];648 result.keysAndCardinalities[(j * 4) + 1] = this.keysAndCardinalities[(i * 4) + 1];649 result.keysAndCardinalities[(j * 4) + 2] = this.keysAndCardinalities[(i * 4) + 2];650 result.keysAndCardinalities[(j * 4) + 3] = this.keysAndCardinalities[(i * 4) + 3];651 result.containers.push(this.containers[i]);652 j += 1;653 }654 }655 return result;656 }657 /**658 * @param {number} key659 * @returns {number}660 */661 getContainerId(key) {662 // Binary search algorithm copied from663 // https://en.wikipedia.org/wiki/Binary_search#Procedure664 //665 // Format is required by specification to be sorted.666 // Because keys are 16 bits and unique, length can't be667 // bigger than 2**16, and because we have 32 bits of safe int,668 // left + right can't overflow.669 let left = 0;670 let right = this.containers.length - 1;671 while (left <= right) {672 const mid = Math.floor((left + right) / 2);673 const x = this.keysAndCardinalities[(mid * 4)] |674 (this.keysAndCardinalities[(mid * 4) + 1] << 8);675 if (x < key) {676 left = mid + 1;677 } else if (x > key) {678 right = mid - 1;679 } else {680 return mid;681 }682 }683 return -1;684 }685 * entries() {686 const l = this.containers.length;687 for (let i = 0; i < l; ++i) {688 const key = this.keysAndCardinalities[i * 4] |689 (this.keysAndCardinalities[(i * 4) + 1] << 8);690 for (const value of this.containers[i].values()) {691 yield (key << 16) | value;692 }693 }694 }695 /**696 * @returns {number|null}697 */698 first() {699 for (const entry of this.entries()) {700 return entry;701 }702 return null;703 }704 /**705 * @returns {number}706 */707 cardinality() {708 let result = 0;709 const l = this.containers.length;710 for (let i = 0; i < l; ++i) {711 const card = this.keysAndCardinalities[(i * 4) + 2] |712 (this.keysAndCardinalities[(i * 4) + 3] << 8);713 result += card + 1;714 }715 return result;716 }717}718719class RoaringBitmapRun {720 /**721 * @param {number} runcount722 * @param {Uint8Array} array723 */724 constructor(runcount, array) {725 this.runcount = runcount;726 this.array = array;727 }728 /** @param {number} value */729 contains(value) {730 // Binary search algorithm copied from731 // https://en.wikipedia.org/wiki/Binary_search#Procedure732 //733 // Since runcount is stored as 16 bits, left + right734 // can't overflow.735 let left = 0;736 let right = this.runcount - 1;737 while (left <= right) {738 const mid = (left + right) >> 1;739 const i = mid * 4;740 const start = this.array[i] | (this.array[i + 1] << 8);741 const lenm1 = this.array[i + 2] | (this.array[i + 3] << 8);742 if ((start + lenm1) < value) {743 left = mid + 1;744 } else if (start > value) {745 right = mid - 1;746 } else {747 return true;748 }749 }750 return false;751 }752 * values() {753 let i = 0;754 while (i < this.runcount) {755 const start = this.array[i * 4] | (this.array[(i * 4) + 1] << 8);756 const lenm1 = this.array[(i * 4) + 2] | (this.array[(i * 4) + 3] << 8);757 let value = start;758 let j = 0;759 while (j <= lenm1) {760 yield value;761 value += 1;762 j += 1;763 }764 i += 1;765 }766 }767}768class RoaringBitmapArray {769 /**770 * @param {number} cardinality771 * @param {Uint8Array} array772 */773 constructor(cardinality, array) {774 this.cardinality = cardinality;775 this.array = array;776 }777 /** @param {number} value */778 contains(value) {779 // Binary search algorithm copied from780 // https://en.wikipedia.org/wiki/Binary_search#Procedure781 //782 // Since cardinality can't be higher than 4096, left + right783 // cannot overflow.784 let left = 0;785 let right = this.cardinality - 1;786 while (left <= right) {787 const mid = (left + right) >> 1;788 const i = mid * 2;789 const x = this.array[i] | (this.array[i + 1] << 8);790 if (x < value) {791 left = mid + 1;792 } else if (x > value) {793 right = mid - 1;794 } else {795 return true;796 }797 }798 return false;799 }800 /** @returns {Generator<number>} */801 * values() {802 let i = 0;803 const l = this.cardinality * 2;804 while (i < l) {805 yield this.array[i] | (this.array[i + 1] << 8);806 i += 2;807 }808 }809}810class RoaringBitmapBits {811 /**812 * @param {Uint8Array} array813 */814 constructor(array) {815 this.array = array;816 }817 /** @param {number} value */818 contains(value) {819 return !!(this.array[value >> 3] & (1 << (value & 7)));820 }821 * values() {822 let i = 0;823 const l = this.array.length << 3;824 while (i < l) {825 if (this.contains(i)) {826 yield i;827 }828 i += 1;829 }830 }831}832833const EMPTY_BITMAP = new RoaringBitmap(null, 0);834EMPTY_BITMAP.consumed_len_bytes = 0;835const EMPTY_BITMAP1 = new RoaringBitmap(null, 0);836EMPTY_BITMAP1.consumed_len_bytes = 1;837const EVERYTHING_BITMAP = new RoaringBitmap(null, 0);838839/**840 * A mapping from six byte nodeids to an arbitrary value.841 * We don't just use `Map` because that requires double hashing.842 * @template T843 * @property {Uint8Array} keys844 * @property {T[]} values845 * @property {number} size846 * @property {number} capacityClass847 */848class HashTable {849 /**850 * Construct an empty hash table.851 */852 constructor() {853 this.keys = EMPTY_UINT8;854 /** @type {(T|undefined)[]} */855 this.values = [];856 this.size = 0;857 this.capacityClass = 0;858 }859 /**860 * @returns {Generator<[Uint8Array, T]>}861 */862 * entries() {863 const keys = this.keys;864 const values = this.values;865 const l = this.values.length;866 for (let i = 0; i < l; i += 1) {867 const value = values[i];868 if (value !== undefined) {869 yield [keys.subarray(i * 5, (i + 1) * 5), value];870 }871 }872 }873 /**874 * Add a value to the hash table.875 * @param {Uint8Array} key876 * @param {T} value877 */878 set(key, value) {879 // 90 % load factor880 if (this.size * 10 >= this.values.length * 9) {881 const keys = this.keys;882 const values = this.values;883 const l = values.length;884 this.capacityClass += 1;885 const capacity = 1 << this.capacityClass;886 this.keys = new Uint8Array(capacity * 5);887 this.values = [];888 for (let i = 0; i < capacity; i += 1) {889 this.values.push(undefined);890 }891 this.size = 0;892 for (let i = 0; i < l; i += 1) {893 const oldValue = values[i];894 if (oldValue !== undefined) {895 this.setNoGrow(keys, i * 5, oldValue);896 }897 }898 }899 this.setNoGrow(key, 0, value);900 }901 /**902 * @param {Uint8Array} key903 * @param {number} start904 * @param {T} value905 */906 setNoGrow(key, start, value) {907 const mask = ~(0xffffffff << this.capacityClass);908 const keys = this.keys;909 const values = this.values;910 const l = 1 << this.capacityClass;911 // because we know that our values are already hashed,912 // just chop off the first byte913 let slot = (914 (key[start + 1] << 24) |915 (key[start + 2] << 16) |916 (key[start + 3] << 8) |917 key[start + 4]918 ) & mask;919 for (let distance = 0; distance < l; ) {920 const j = slot * 5;921 const otherValue = values[slot];922 if (otherValue === undefined) {923 values[slot] = value;924 const keysStart = slot * 5;925 keys[keysStart + 0] = key[start + 0];926 keys[keysStart + 1] = key[start + 1];927 keys[keysStart + 2] = key[start + 2];928 keys[keysStart + 3] = key[start + 3];929 keys[keysStart + 4] = key[start + 4];930 this.size += 1;931 break;932 } else if (933 key[start + 0] === keys[j + 0] &&934 key[start + 1] === keys[j + 1] &&935 key[start + 2] === keys[j + 2] &&936 key[start + 3] === keys[j + 3] &&937 key[start + 4] === keys[j + 4]938 ) {939 values[slot] = value;940 break;941 } else {942 const otherPreferredSlot = (943 (keys[j + 1] << 24) | (keys[j + 2] << 16) |944 (keys[j + 3] << 8) | keys[j + 4]945 ) & mask;946 const otherDistance = otherPreferredSlot <= slot ?947 slot - otherPreferredSlot :948 (l - otherPreferredSlot) + slot;949 if (distance > otherDistance) {950 // if the other key is closer to its preferred slot than this one,951 // then insert our node in its place and swap952 //953 // https://cglab.ca/~abeinges/blah/robinhood-part-1/954 const otherKey = keys.slice(j, j + 5);955 values[slot] = value;956 value = otherValue;957 keys[j + 0] = key[start + 0];958 keys[j + 1] = key[start + 1];959 keys[j + 2] = key[start + 2];960 keys[j + 3] = key[start + 3];961 keys[j + 4] = key[start + 4];962 key = otherKey;963 start = 0;964 distance = otherDistance;965 }966 distance += 1;967 slot = (slot + 1) & mask;968 }969 }970 }971 /**972 * Retrieve a value973 * @param {Uint8Array} key974 * @returns {T|undefined}975 */976 get(key) {977 if (key.length !== 5) {978 throw "invalid key";979 }980 return this.getWithOffsetKey(key, 0);981 }982 /**983 * Retrieve a value984 * @param {Uint8Array} key985 * @param {number} start986 * @returns {T|undefined}987 */988 getWithOffsetKey(key, start) {989 const mask = ~(0xffffffff << this.capacityClass);990 const keys = this.keys;991 const values = this.values;992 const l = 1 << this.capacityClass;993 // because we know that our values are already hashed,994 // just chop off the lower four bytes995 let slot = (996 (key[start + 1] << 24) |997 (key[start + 2] << 16) |998 (key[start + 3] << 8) |999 key[start + 4]1000 ) & mask;1001 for (let distance = 0; distance < l; distance += 1) {1002 const j = slot * 5;1003 const value = values[slot];1004 if (value === undefined) {1005 break;1006 } else if (1007 key[start + 0] === keys[j + 0] &&1008 key[start + 1] === keys[j + 1] &&1009 key[start + 2] === keys[j + 2] &&1010 key[start + 3] === keys[j + 3] &&1011 key[start + 4] === keys[j + 4]1012 ) {1013 return value;1014 } else {1015 const otherPreferredSlot = (1016 (keys[j + 1] << 24) | (keys[j + 2] << 16) |1017 (keys[j + 3] << 8) | keys[j + 4]1018 ) & mask;1019 const otherDistance = otherPreferredSlot <= slot ?1020 slot - otherPreferredSlot :1021 (l - otherPreferredSlot) + slot;1022 if (distance > otherDistance) {1023 break;1024 }1025 }1026 slot = (slot + 1) & mask;1027 }1028 return undefined;1029 }1030}10311032/*eslint-disable */1033// ignore-tidy-linelength1034/** <https://stackoverflow.com/questions/43122082/efficiently-count-the-number-of-bits-in-an-integer-in-javascript>1035 * @param {number} n1036 * @returns {number}1037 */1038function bitCount(n) {1039 n = (~~n) - ((n >> 1) & 0x55555555);1040 n = (n & 0x33333333) + ((n >> 2) & 0x33333333);1041 return ((n + (n >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;1042}1043/*eslint-enable */10441045/**1046 * https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm1047 */1048class Uint8ArraySearchPattern {1049 /** @param {Uint8Array} needle */1050 constructor(needle) {1051 this.needle = needle;1052 this.skipTable = [];1053 const m = needle.length;1054 for (let i = 0; i < 256; i += 1) {1055 this.skipTable.push(m);1056 }1057 for (let i = 0; i < m - 1; i += 1) {1058 this.skipTable[needle[i]] = m - 1 - i;1059 }1060 }1061 /**1062 * @param {Uint8Array} haystack1063 * @returns {boolean}1064 */1065 matches(haystack) {1066 const needle = this.needle;1067 const skipTable = this.skipTable;1068 const m = needle.length;1069 const n = haystack.length;10701071 let skip = 0;1072 search: while (n - skip >= m) {1073 for (let i = m - 1; i >= 0; i -= 1) {1074 if (haystack[skip + i] !== needle[i]) {1075 skip += skipTable[haystack[skip + m - 1]];1076 continue search;1077 }1078 }1079 return true;1080 }1081 return false;1082 }1083}10841085/**1086 * @param {stringdex.Hooks} hooks1087 * @returns {Promise<stringdex.Database>}1088 */1089function loadDatabase(hooks) {1090 /** @type {stringdex.Callbacks} */1091 const callbacks = {1092 rr_: function(data) {1093 const dataObj = JSON.parse(data);1094 for (const colName of Object.keys(dataObj)) {1095 if (Object.hasOwn(dataObj[colName], "N")) {1096 const counts = [];1097 const countsstring = dataObj[colName]["N"];1098 let i = 0;1099 const l = countsstring.length;1100 while (i < l) {1101 let n = 0;1102 let c = countsstring.charCodeAt(i);1103 while (c < 96) { // 96 = "`"1104 n = (n << 4) | (c & 0xF);1105 i += 1;1106 c = countsstring.charCodeAt(i);1107 }1108 n = (n << 4) | (c & 0xF);1109 counts.push(n);1110 i += 1;1111 }1112 registry.dataColumns.set(colName, new DataColumn(1113 counts,1114 makeUint8ArrayFromBase64(dataObj[colName]["H"]),1115 new RoaringBitmap(makeUint8ArrayFromBase64(dataObj[colName]["E"]), 0),1116 colName,1117 Object.hasOwn(dataObj[colName], "I") ?1118 makeSearchTreeFromBase64(dataObj[colName].I)[1] :1119 null,1120 ));1121 }1122 }1123 const cb = registry.searchTreeRootCallback;1124 if (cb) {1125 cb(null, new Database(registry.searchTreeRoots, registry.dataColumns));1126 }1127 },1128 err_rr_: function(err) {1129 const cb = registry.searchTreeRootCallback;1130 if (cb) {1131 cb(err, null);1132 }1133 },1134 rd_: function(dataString) {1135 const l = dataString.length;1136 const data = new Uint8Array(l);1137 for (let i = 0; i < l; ++i) {1138 data[i] = dataString.charCodeAt(i);1139 }1140 loadColumnFromBytes(data);1141 },1142 err_rd_: function(filename, err) {1143 const nodeid = makeUint8ArrayFromHex(filename);1144 const cb = registry.dataColumnLoadPromiseCallbacks.get(nodeid);1145 if (cb) {1146 cb(err, null);1147 }1148 },1149 rb_: function(dataString64) {1150 loadColumnFromBytes(makeUint8ArrayFromBase64(dataString64));1151 },1152 err_rb_: function(filename, err) {1153 const nodeid = makeUint8ArrayFromHex(filename);1154 const cb = registry.dataColumnLoadPromiseCallbacks.get(nodeid);1155 if (cb) {1156 cb(err, null);1157 }1158 },1159 rn_: function(inputBase64) {1160 const [nodeid, tree] = makeSearchTreeFromBase64(inputBase64);1161 const cb = registry.searchTreeLoadPromiseCallbacks.get(nodeid);1162 if (cb) {1163 cb(null, tree);1164 registry.searchTreeLoadPromiseCallbacks.set(nodeid, null);1165 }1166 },1167 err_rn_: function(filename, err) {1168 const nodeid = makeUint8ArrayFromHex(filename);1169 const cb = registry.searchTreeLoadPromiseCallbacks.get(nodeid);1170 if (cb) {1171 cb(err, null);1172 }1173 },1174 };11751176 /**1177 * @type {{1178 * searchTreeRoots: Map<string, SearchTree>;1179 * searchTreeLoadPromiseCallbacks: HashTable<(function(any, SearchTree?): any)|null>;1180 * searchTreePromises: HashTable<Promise<SearchTree>>;1181 * dataColumnLoadPromiseCallbacks: HashTable<function(any, Uint8Array[]?): any>;1182 * dataColumns: Map<string, DataColumn>;1183 * dataColumnsBuckets: HashTable<Promise<Uint8Array[]>>;1184 * searchTreeLoadByNodeID: function(Uint8Array): Promise<SearchTree>;1185 * searchTreeRootCallback?: function(any, Database?): any;1186 * dataLoadByNameAndHash: function(string, Uint8Array): Promise<Uint8Array[]>;1187 * }}1188 */1189 const registry = {1190 searchTreeRoots: new Map(),1191 searchTreeLoadPromiseCallbacks: new HashTable(),1192 searchTreePromises: new HashTable(),1193 dataColumnLoadPromiseCallbacks: new HashTable(),1194 dataColumns: new Map(),1195 dataColumnsBuckets: new HashTable(),1196 searchTreeLoadByNodeID: function(nodeid) {1197 /** @type {Promise<SearchTree>} */1198 let newPromise;1199 if ((nodeid[0] & 0x80) !== 0) {1200 const isSuffixOnly = (nodeid[0] & 0x40) !== 0;1201 const isRun = (nodeid[0] & 0x20) !== 0;1202 const lengthOrData = nodeid[0] & 0x1F;1203 const id = (nodeid[1] << 24) | (nodeid[2] << 16) | (nodeid[3] << 8) | nodeid[4];1204 let bitmap;1205 if (isRun) {1206 bitmap = new RoaringBitmap(null);1207 bitmap.containers.push(new RoaringBitmapRun(1208 1,1209 Uint8Array.of(1210 id & 0xFF,1211 (id >> 8) & 0xFF,1212 lengthOrData,1213 0,1214 ),1215 ));1216 bitmap.keysAndCardinalities = Uint8Array.of(1217 (id >> 16) & 0xff,1218 (id >> 24) & 0xff,1219 lengthOrData,1220 0,1221 );1222 } else {1223 bitmap = RoaringBitmap.makeSingleton(id);1224 }1225 let tree;1226 if (isSuffixOnly) {1227 tree = new SuffixSearchTree(1228 EMPTY_SEARCH_TREE_BRANCHES,1229 isRun ? 0 : (lengthOrData + 1),1230 bitmap,1231 );1232 } else {1233 tree = new PrefixSearchTree(1234 EMPTY_SEARCH_TREE_BRANCHES,1235 EMPTY_SEARCH_TREE_BRANCHES,1236 isRun ? EMPTY_UINT8 : Uint8Array.of(LONG_ALPHABET.chars[lengthOrData]),1237 bitmap,1238 EMPTY_BITMAP,1239 );1240 }1241 newPromise = Promise.resolve(tree);1242 } else {1243 const existingPromise = registry.searchTreePromises.get(nodeid);1244 if (existingPromise) {1245 return existingPromise;1246 }1247 const hashHex = makeHexFromUint8Array(nodeid);1248 newPromise = new Promise((resolve, reject) => {1249 const cb = registry.searchTreeLoadPromiseCallbacks.get(nodeid);1250 if (cb) {1251 registry.searchTreeLoadPromiseCallbacks.set(nodeid, (err, data) => {1252 cb(err, data);1253 if (data) {1254 resolve(data);1255 } else {1256 reject(err);1257 }1258 });1259 } else {1260 registry.searchTreeLoadPromiseCallbacks.set(nodeid, (err, data) => {1261 if (data) {1262 resolve(data);1263 } else {1264 reject(err);1265 }1266 });1267 hooks.loadTreeByHash(hashHex);1268 }1269 });1270 registry.searchTreePromises.set(nodeid, newPromise);1271 }1272 return newPromise;1273 },1274 dataLoadByNameAndHash: function(name, hash) {1275 const existingBucket = registry.dataColumnsBuckets.get(hash);1276 if (existingBucket) {1277 return existingBucket;1278 }1279 const hashHex = makeHexFromUint8Array(hash);1280 /** @type {Promise<Uint8Array[]>} */1281 const newBucket = new Promise((resolve, reject) => {1282 const cb = registry.dataColumnLoadPromiseCallbacks.get(hash);1283 if (cb) {1284 registry.dataColumnLoadPromiseCallbacks.set(hash, (err, data) => {1285 cb(err, data);1286 if (data) {1287 resolve(data);1288 } else {1289 reject(err);1290 }1291 });1292 } else {1293 registry.dataColumnLoadPromiseCallbacks.set(hash, (err, data) => {1294 if (data) {1295 resolve(data);1296 } else {1297 reject(err);1298 }1299 });1300 hooks.loadDataByNameAndHash(name, hashHex);1301 }1302 });1303 registry.dataColumnsBuckets.set(hash, newBucket);1304 return newBucket;1305 },1306 };13071308 /**1309 * The set of child subtrees.1310 * @template ST1311 * @type {{1312 * nodeids: Uint8Array,1313 * subtrees: Array<Promise<ST>|null>,1314 * }}1315 */1316 class SearchTreeBranches {1317 /**1318 * Construct the subtree list with `length` nulls1319 * @param {number} length1320 * @param {Uint8Array} nodeids1321 */1322 constructor(length, nodeids) {1323 this.nodeids = nodeids;1324 this.subtrees = [];1325 for (let i = 0; i < length; ++i) {1326 this.subtrees.push(null);1327 }1328 }1329 /**1330 * @param {number} i1331 * @returns {Uint8Array}1332 */1333 getNodeID(i) {1334 return new Uint8Array(1335 this.nodeids.buffer,1336 this.nodeids.byteOffset + (i * 5),1337 5,1338 );1339 }1340 // https://github.com/microsoft/TypeScript/issues/172271341 /** @returns {Generator<[number, Promise<ST>|null]>} */1342 entries() {1343 throw new Error();1344 }1345 /**1346 * @param {number} _k1347 * @returns {number}1348 */1349 getIndex(_k) {1350 throw new Error();1351 }1352 /**1353 * @param {number} _i1354 * @returns {number}1355 */1356 getKey(_i) {1357 throw new Error();1358 }1359 /**1360 * @returns {Uint8Array}1361 */1362 getKeys() {1363 throw new Error();1364 }1365 }13661367 /**1368 * A sorted array of search tree branches.1369 *1370 * @template ST1371 * @extends SearchTreeBranches<ST>1372 * @type {{1373 * keys: Uint8Array,1374 * nodeids: Uint8Array,1375 * subtrees: Array<Promise<ST>|null>,1376 * }}1377 */1378 class SearchTreeBranchesArray extends SearchTreeBranches {1379 /**1380 * @param {Uint8Array} keys1381 * @param {Uint8Array} nodeids1382 */1383 constructor(keys, nodeids) {1384 super(keys.length, nodeids);1385 this.keys = keys;1386 let i = 1;1387 while (i < this.keys.length) {1388 if (this.keys[i - 1] >= this.keys[i]) {1389 throw new Error("HERE");1390 }1391 i += 1;1392 }1393 }1394 /** @returns {Generator<[number, Promise<ST>|null]>} */1395 * entries() {1396 let i = 0;1397 const l = this.keys.length;1398 while (i < l) {1399 yield [this.keys[i], this.subtrees[i]];1400 i += 1;1401 }1402 }1403 /**1404 * @param {number} k1405 * @returns {number}1406 */1407 getIndex(k) {1408 // Since length can't be bigger than 256,1409 // left + right can't overflow.1410 let left = 0;1411 let right = this.keys.length - 1;1412 while (left <= right) {1413 const mid = (left + right) >> 1;1414 if (this.keys[mid] < k) {1415 left = mid + 1;1416 } else if (this.keys[mid] > k) {1417 right = mid - 1;1418 } else {1419 return mid;1420 }1421 }1422 return -1;1423 }1424 /**1425 * @param {number} i1426 * @returns {number}1427 */1428 getKey(i) {1429 return this.keys[i];1430 }1431 /**1432 * @returns {Uint8Array}1433 */1434 getKeys() {1435 return this.keys;1436 }1437 }14381439 const EMPTY_SEARCH_TREE_BRANCHES = new SearchTreeBranchesArray(1440 EMPTY_UINT8,1441 EMPTY_UINT8,1442 );14431444 class Alphabet {1445 constructor() {1446 /** @type {number[]} */1447 this.chars = [];1448 /** @type {number} */1449 this.len = 0;1450 /** @type {number} */1451 this.bytes = 0;1452 /** @type {number} */1453 this.flag = 0;1454 /** @type {number} */1455 this.bitwidth = 0;1456 }1457 /**1458 * @param {number} c1459 * @returns {boolean}1460 */1461 contains(c) {1462 return this.chars.indexOf(c) !== -1;1463 }1464 /**1465 * @param {number} c1466 * @returns {number}1467 */1468 index(c) {1469 return this.chars.indexOf(c);1470 }1471 }14721473 /** @type {Alphabet} */1474 const VOWELONLY_ALPHABITMAP = Object.assign(1475 new Alphabet(),1476 {1477 chars: [0x61, 0x65, 0x69, 0x6f, 0x75],1478 len: 5,1479 bytes: 0,1480 flag: 0x80,1481 },1482 );14831484 /** @type {Alphabet} */1485 const CONSONANTSONLY_ALPHABET = Object.assign(1486 new Alphabet(),1487 {1488 chars: [],1489 len: 21,1490 bytes: 2,1491 flag: 0xc0,1492 },1493 );1494 for (let i = 0x61; i <= 0x7A; ++i) {1495 if (i === 0x61 || i === 0x65 || i === 0x69 || i === 0x6f || i === 0x75) {1496 // 21 bits, 26 letters, so skip aeiou1497 continue;1498 }1499 CONSONANTSONLY_ALPHABET.chars.push(i);1500 }15011502 /** @type {Alphabet} */1503 const HEX_ALPHABET = Object.assign(1504 new Alphabet(),1505 {1506 chars: [],1507 len: 16,1508 bytes: 2,1509 flag: 0xfc,1510 },1511 );1512 for (let i = 0x30; i <= 0x39; ++i) {1513 HEX_ALPHABET.chars.push(i);1514 }1515 for (let i = 0x61; i <= 0x66; ++i) {1516 HEX_ALPHABET.chars.push(i);1517 }15181519 /** @type {Alphabet} */1520 const SHORT_ALPHABET = Object.assign(1521 new Alphabet(),1522 {1523 chars: [],1524 len: 24,1525 bytes: 3,1526 flag: 0xfd,1527 },1528 );1529 for (let i = 0x61; i <= 0x7A; ++i) {1530 if (i === 0x76 || i === 0x71) {1531 // 24 entries, 26 letters, so we skip q and v1532 continue;1533 }1534 SHORT_ALPHABET.chars.push(i);1535 }15361537 /** @type {Alphabet} */1538 const LONG_ALPHABET = Object.assign(1539 new Alphabet(),1540 {1541 chars: [0x31, 0x32, 0x33, 0x34, 0x35, 0x36],1542 len: 32,1543 bytes: 4,1544 flag: 0xfe,1545 },1546 );1547 for (let i = 0x61; i <= 0x7A; ++i) {1548 LONG_ALPHABET.chars.push(i);1549 }15501551 /** @type {Alphabet} */1552 const ASCII_ALPHABET = Object.assign(1553 new Alphabet(),1554 {1555 chars: [],1556 len: 128,1557 bytes: 16,1558 flag: 0xf0,1559 /**1560 * @param {number} c1561 * @returns {boolean}1562 */1563 contains(c) {1564 return c <= 0x7f;1565 },1566 /**1567 * @param {number} c1568 * @returns {number}1569 */1570 index(c) {1571 return c;1572 },1573 },1574 );1575 for (let i = 0x00; i <= 0x7f; ++i) {1576 ASCII_ALPHABET.chars.push(i);1577 }15781579 /** @type {Alphabet} */1580 const RAWBYTE_ALPHABET = Object.assign(1581 new Alphabet(),1582 {1583 chars: [],1584 len: 256,1585 bytes: 32,1586 flag: 0xff,1587 /**1588 * @param {number} _c1589 * @returns {boolean}1590 */1591 contains(_c) {1592 return true;1593 },1594 /**1595 * @param {number} c1596 * @returns {number}1597 */1598 index(c) {1599 return c;1600 },1601 },1602 );1603 for (let i = 0x00; i <= 0xff; ++i) {1604 RAWBYTE_ALPHABET.chars.push(i);1605 }16061607 /**1608 * Parse an alphabet and buffer where the flag is right at the beginning.1609 * @param {number} start1610 * @param {Uint8Array} buf1611 * @returns {{"alphabet": Alphabet, "consumed_len_bytes": number, "len": number}?}1612 */1613 Alphabet.parse = function(start, buf) {1614 const flag = buf[start];1615 const parsed = Alphabet.parseFlag(flag, start + 1, buf);1616 if (!parsed) {1617 return null;1618 }1619 parsed.consumed_len_bytes += 1;1620 return parsed;1621 };16221623 /**1624 * Parse an alphabet and buffer where the flag is not at the beginning.1625 * @param {number} flag1626 * @param {number} i1627 * @param {Uint8Array} buf1628 * @returns {{"alphabet": Alphabet, "consumed_len_bytes": number, "len": number}?}1629 */1630 Alphabet.parseFlag = function(flag, i, buf) {1631 if (flag <= 0x80) {1632 return null;1633 }1634 const alphabet = flag === RAWBYTE_ALPHABET.flag ? RAWBYTE_ALPHABET : (1635 flag === ASCII_ALPHABET.flag ? ASCII_ALPHABET : (1636 flag === LONG_ALPHABET.flag ? LONG_ALPHABET : (1637 flag === SHORT_ALPHABET.flag ? SHORT_ALPHABET : (1638 flag === HEX_ALPHABET.flag ? HEX_ALPHABET : (1639 flag >= CONSONANTSONLY_ALPHABET.flag ? CONSONANTSONLY_ALPHABET : VOWELONLY_ALPHABITMAP1640 )))));1641 let len = alphabet === CONSONANTSONLY_ALPHABET || alphabet === VOWELONLY_ALPHABITMAP ?1642 bitCount(flag & 0x1f) : 0;1643 for (let ix = 0; ix < alphabet.bytes; ++ix) {1644 len += bitCount(buf[i]);1645 i += 1;1646 }1647 return {alphabet, consumed_len_bytes: alphabet.bytes, len};1648 };16491650 /**1651 * @template ST1652 * @extends SearchTreeBranches<ST>1653 */1654 class SearchTreeBranchesAlphaBitmap extends SearchTreeBranches {1655 /**1656 * @param {Alphabet} alphabet1657 * @param {Uint8Array} buffer1658 * @param {Uint8Array} nodeids1659 */1660 constructor(alphabet, buffer, nodeids) {1661 let bitmap;1662 if (alphabet === VOWELONLY_ALPHABITMAP) {1663 bitmap = new Uint8Array(1);1664 bitmap[0] = buffer[0] & 0x1f;1665 } else if (alphabet === CONSONANTSONLY_ALPHABET) {1666 bitmap = new Uint8Array(3);1667 bitmap[0] = buffer[1];1668 bitmap[1] = buffer[2];1669 bitmap[2] = buffer[0] & 0x1f;1670 } else {1671 bitmap = buffer.subarray(1);1672 }1673 let cardinality = 0;1674 for (let i = 0; i < bitmap.length; ++i) {1675 cardinality += bitCount(bitmap[i]);1676 }1677 super(cardinality, nodeids);1678 this.bitmap = bitmap;1679 this.alphabet = alphabet;1680 }1681 /**1682 * Yields [character, SearchTree] pairs.1683 * @returns {Generator<[number, Promise<ST>|null]>}1684 */1685 * entries() {1686 let i = 0;1687 let j = 0;1688 while (i < this.alphabet.len) {1689 if (this.bitmap[i >> 3] & (1 << (i & 0x07))) {1690 yield [this.alphabet.chars[i], this.subtrees[j]];1691 j += 1;1692 }1693 i += 1;1694 }1695 }1696 /**1697 * Given a character, returns the numbered index of the search1698 * tree, or -1 if there isn't one.1699 * @param {number} c1700 * @returns {number}1701 */1702 getIndex(c) {1703 //return this.getKeys().indexOf(c);1704 if (!this.alphabet.contains(c)) {1705 return -1;1706 }1707 const k = this.alphabet.index(c);1708 if (this.bitmap[k >> 3] & (1 << (k & 0x07))) {1709 let result = bitCount(~(0xff << (k & 0x07)) & this.bitmap[k >> 3]);1710 for (let ix = 0; ix < (k >> 3); ++ix) {1711 result += bitCount(this.bitmap[ix]);1712 }1713 return result;1714 } else {1715 return -1;1716 }1717 }1718 /**1719 * Given the numbered index of a search tree, returns the key.1720 * This is the exact opposite of getIndex().1721 * @param {number} branch_index1722 * @returns {number}1723 */1724 getKey(branch_index) {1725 return this.getKeys()[branch_index];1726 }1727 /**1728 * Returns a list of one-byte keys.1729 * @returns {Uint8Array}1730 */1731 getKeys() {1732 let length = 0;1733 for (let i = 0; i < this.bitmap.length; ++i) {1734 length += bitCount(this.bitmap[i]);1735 }1736 const result = new Uint8Array(length);1737 let result_index = 0;1738 for (let ix = 0; ix < this.alphabet.len; ++ix) {1739 if (this.bitmap[ix >> 3] & (1 << (ix & 0x07))) {1740 result[result_index] = this.alphabet.chars[ix];1741 result_index += 1;1742 }1743 }1744 return result;1745 }1746 }17471748 /**1749 * @typedef {PrefixSearchTree|SuffixSearchTree|InlineNeighborsTree} SearchTree1750 * @typedef {PrefixTrie|SuffixTrie} Trie1751 */17521753 /**1754 * An interleaved [prefix] and [suffix tree],1755 * used for name-based search.1756 *1757 * This data structure is used to drive prefix matches,1758 * such as matching the query "link" to `LinkedList`,1759 * and Lev-distance matches, such as matching the1760 * query "hahsmap" to `HashMap`.1761 *1762 * [prefix tree]: https://en.wikipedia.org/wiki/Prefix_tree1763 * [suffix tree]: https://en.wikipedia.org/wiki/Suffix_tree1764 *1765 * branches1766 * : A sorted-array map of subtrees.1767 *1768 * data1769 * : The substring represented by this node. The root node1770 * is always empty.1771 *1772 * leaves_suffix1773 * : The IDs of every entry that matches. Levenshtein matches1774 * won't include these.1775 *1776 * leaves_whole1777 * : The IDs of every entry that matches exactly. Levenshtein matches1778 * will include these.1779 *1780 * @type {{1781 * might_have_prefix_branches: SearchTreeBranches<SearchTree>,1782 * branches: SearchTreeBranches<SearchTree>,1783 * data: Uint8Array,1784 * leaves_suffix: RoaringBitmap,1785 * leaves_whole: RoaringBitmap,1786 * }}1787 */1788 class PrefixSearchTree {1789 /**1790 * @param {SearchTreeBranches<SearchTree>} branches1791 * @param {SearchTreeBranches<SearchTree>} might_have_prefix_branches1792 * @param {Uint8Array} data1793 * @param {RoaringBitmap} leaves_whole1794 * @param {RoaringBitmap} leaves_suffix1795 */1796 constructor(1797 branches,1798 might_have_prefix_branches,1799 data,1800 leaves_whole,1801 leaves_suffix,1802 ) {1803 this.might_have_prefix_branches = might_have_prefix_branches;1804 this.branches = branches;1805 this.data = data;1806 this.leaves_suffix = leaves_suffix;1807 this.leaves_whole = leaves_whole;1808 }1809 /**1810 * Returns the Trie for the root node.1811 *1812 * A Trie pointer refers to a single node in a logical decompressed search tree1813 * (the real search tree is compressed).1814 *1815 * @param {DataColumn} dataColumn1816 * @param {Uint8ArraySearchPattern} searchPattern1817 * @return {PrefixTrie}1818 */1819 trie(dataColumn, searchPattern) {1820 return new PrefixTrie(this, 0, dataColumn, searchPattern);1821 }18221823 /**1824 * Return the trie representing `name`1825 * @param {Uint8Array|string} name1826 * @param {DataColumn} dataColumn1827 * @returns {Promise<Trie?>}1828 */1829 async search(name, dataColumn) {1830 if (typeof name === "string") {1831 const utf8encoder = new TextEncoder();1832 name = utf8encoder.encode(name);1833 }1834 const searchPattern = new Uint8ArraySearchPattern(name);1835 /** @type {Trie} */1836 let trie = this.trie(dataColumn, searchPattern);1837 for (const datum of name) {1838 // code point definitely exists1839 /** @type {Promise<Trie>?} */1840 const newTrie = trie.child(datum);1841 if (newTrie) {1842 trie = await newTrie;1843 } else {1844 return null;1845 }1846 }1847 return trie;1848 }18491850 /**1851 * @param {Uint8Array|string} name1852 * @param {DataColumn} dataColumn1853 * @returns {AsyncGenerator<Trie>}1854 */1855 async* searchLev(name, dataColumn) {1856 if (typeof name === "string") {1857 const utf8encoder = new TextEncoder();1858 name = utf8encoder.encode(name);1859 }1860 const w = name.length;1861 if (w < 3) {1862 const trie = await this.search(name, dataColumn);1863 if (trie !== null) {1864 yield trie;1865 }1866 return;1867 }1868 const searchPattern = new Uint8ArraySearchPattern(name);1869 const levParams = w >= 6 ?1870 new Lev2TParametricDescription(w) :1871 new Lev1TParametricDescription(w);1872 /** @type {Array<[Promise<Trie>, number]>} */1873 const stack = [[Promise.resolve(this.trie(dataColumn, searchPattern)), 0]];1874 const n = levParams.n;1875 while (stack.length !== 0) {1876 // It's not empty1877 /** @type {[Promise<Trie>, number]} */1878 //@ts-expect-error1879 const [triePromise, levState] = stack.pop();1880 const trie = await triePromise;1881 for (const byte of trie.keysExcludeSuffixOnly()) {1882 const levPos = levParams.getPosition(levState);1883 const vector = levParams.getVector(1884 name,1885 byte,1886 levPos,1887 Math.min(w, levPos + (2 * n) + 1),1888 );1889 const newLevState = levParams.transition(1890 levState,1891 levPos,1892 vector,1893 );1894 if (newLevState >= 0) {1895 const child = trie.child(byte);1896 if (child) {1897 stack.push([child, newLevState]);1898 if (levParams.isAccept(newLevState)) {1899 yield child;1900 }1901 }1902 }1903 }1904 }1905 }19061907 /** @returns {RoaringBitmap} */1908 getCurrentLeaves() {1909 return this.leaves_whole.union(this.leaves_suffix);1910 }1911 }19121913 /**1914 * A representation of a set of strings in the search index,1915 * as a subset of the entire tree.1916 */1917 class PrefixTrie {1918 /**1919 * @param {PrefixSearchTree} tree1920 * @param {number} offset1921 * @param {DataColumn} dataColumn1922 * @param {Uint8ArraySearchPattern} searchPattern1923 */1924 constructor(tree, offset, dataColumn, searchPattern) {1925 this.tree = tree;1926 this.offset = offset;1927 this.dataColumn = dataColumn;1928 this.searchPattern = searchPattern;1929 }19301931 /**1932 * All exact matches for the string represented by this node.1933 * @returns {RoaringBitmap}1934 */1935 matches() {1936 if (this.offset === this.tree.data.length) {1937 return this.tree.leaves_whole;1938 } else {1939 return EMPTY_BITMAP;1940 }1941 }19421943 /**1944 * All matches for strings that contain the string represented by this node.1945 * @returns {AsyncGenerator<RoaringBitmap>}1946 */1947 async* substringMatches() {1948 /** @type {Promise<SearchTree>[]} */1949 let layer = [Promise.resolve(this.tree)];1950 while (layer.length) {1951 const current_layer = layer;1952 layer = [];1953 for await (const tree of current_layer) {1954 /** @type {number[]?} */1955 let rejected = null;1956 let leaves = tree.getCurrentLeaves();1957 for (const leaf of leaves.entries()) {1958 const haystack = await this.dataColumn.at(leaf);1959 if (haystack === undefined || !this.searchPattern.matches(haystack)) {1960 if (!rejected) {1961 rejected = [];1962 }1963 rejected.push(leaf);1964 }1965 }1966 if (rejected) {1967 if (leaves.cardinality() !== rejected.length) {1968 for (const rej of rejected) {1969 leaves = leaves.remove(rej);1970 }1971 yield leaves;1972 }1973 } else {1974 yield leaves;1975 }1976 }1977 /** @type {HashTable<[number, PrefixSearchTree|SuffixSearchTree][]>} */1978 const subnodes = new HashTable();1979 for await (const nodeEncoded of current_layer) {1980 const node = nodeEncoded instanceof InlineNeighborsTree ?1981 nodeEncoded.decode() :1982 nodeEncoded;1983 const branches = node.branches;1984 const l = branches.subtrees.length;1985 for (let i = 0; i < l; ++i) {1986 const subtree = branches.subtrees[i];1987 if (subtree) {1988 layer.push(subtree);1989 } else if (subtree === null) {1990 const byte = branches.getKey(i);1991 const newnode = branches.getNodeID(i);1992 if (!newnode) {1993 throw new Error(`malformed tree; no node for key ${byte}`);1994 } else {1995 let subnode_list = subnodes.get(newnode);1996 if (!subnode_list) {1997 subnode_list = [[byte, node]];1998 subnodes.set(newnode, subnode_list);1999 } else {2000 subnode_list.push([byte, node]);
Findings
✓ No findings reported for this file.