I have just recently learned about B+trees, after asking How to build a tree array into which / out of which items can be spliced, which only allows arrays of 1, 2, 4, 8, 16, or 32 items?, and seeing a wonderful answer implementing the B+tree according to the constraints. To summarize, basically that question was looking for a tree-like structure that we could treat as an array (find by index, remove at index, insert at index, etc.). In addition, the arrays that implement the tree nodes can only contain powers of 2 number of elements, up to 32 items (1, 2, 4, 8, 16, or 32). Additionally, they should be relatively compacted as the answer showed, essentially making sure that every node was filled to the brim with 32 items before creating new sub arrays (i.e. so you don't end up with a bunch of 16-item arrays).
The answer is kind of complex and I am still picking it apart. But as I master the inner details of it, I would like to see how a "real B+ search tree" would look in comparison. That is, a B+tree that actually has keys and that you treat sort of like a hash table (find by key, remove by key, insert with key, etc.). Whereas the other one was by index, this would be by key. Ideally arbitrary keys (perhaps with a getKey
function), or just string or integer keys.
So I'm wondering what needs to be modified in that original answer, copied here:
class Node {
constructor(capacity) {
// Mimic fixed-size array (avoid accidentally growing it)
this.children = Object.seal(Array(capacity).fill(null));
this.childCount = 0; // Number of used slots in children array
this.treeSize = 0; // Total number of values in this subtree
// Maintain back-link to parent.
this.parent = null;
// Per level in the tree, maintain a doubly linked list
this.prev = this.next = null;
}
setCapacity(capacity) {
if (capacity < 1) return;
// Here we make a new array, and copy the data into it
let children = Object.seal(Array(capacity).fill(null));
for (let i = 0; i < this.childCount; i++) children[i] = this.children[i];
this.children = children;
}
isLeaf() {
return !(this.children[0] instanceof Node);
}
index() {
return this.parent.children.indexOf(this);
}
updateTreeSize(start, end, sign=1) {
let sum = 0;
if (this.isLeaf()) {
sum = end - start;
} else {
for (let i = start; i < end; i++) sum += this.children[i].treeSize;
}
if (!sum) return;
sum *= sign;
// Apply the sum change to this node and all its ancestors
for (let node = this; node; node = node.parent) {
node.treeSize += sum;
}
}
wipe(start, end) {
this.updateTreeSize(start, end, -1);
this.children.copyWithin(start, end, this.childCount);
for (let i = this.childCount - end + start; i < this.childCount; i++) {
this.children[i] = null;
}
this.childCount -= end - start;
// Reduce allocated size if possible
if (this.childCount * 2 <= this.children.length) this.setCapacity(this.children.length / 2);
}
moveFrom(neighbor, target, start, count=1) {
// Note: `start` can have two meanings:
// if neighbor is null, it is the value/Node to move to the target
// if neighbor is a Node, it is the index from where value(s) have to be moved to the target
// Make room in target node
if (this.childCount + count > this.children.length) this.setCapacity(this.children.length * 2);
this.children.copyWithin(target + count, target, Math.max(target + count, this.childCount));
this.childCount += count;
if (neighbor !== null) {
// Copy the children
for (let i = 0; i < count; i++) {
this.children[target + i] = neighbor.children[start + i];
}
// Remove the original references
neighbor.wipe(start, start + count);
} else {
this.children[target] = start; // start is value to insert
}
this.updateTreeSize(target, target + count, 1);
// Set parent link(s)
if (!this.isLeaf()) {
for (let i = 0; i < count; i++) {
this.children[target + i].parent = this;
}
}
}
moveToNext(count) {
this.next.moveFrom(this, 0, this.childCount - count, count);
}
moveFromNext(count) {
this.moveFrom(this.next, this.childCount, 0, count);
}
basicRemove(index) {
if (!this.isLeaf()) {
// Take node out of the level's linked list
let prev = this.children[index].prev;
let next = this.children[index].next;
if (prev) prev.next = next;
if (next) next.prev = prev;
}
this.wipe(index, index + 1);
}
basicInsert(index, value) {
this.moveFrom(null, index, value);
if (value instanceof Node) {
// Insert node in the level's linked list
if (index > 0) {
value.prev = this.children[index-1];
value.next = value.prev.next;
} else if (this.childCount > 1) {
value.next = this.children[1];
value.prev = value.next.prev;
}
if (value.prev) value.prev.next = value;
if (value.next) value.next.prev = value;
}
}
pairWithSmallest() {
return this.prev && (!this.next || this.next.childCount > this.prev.childCount)
? [this.prev, this] : [this, this.next];
}
toString() {
return "[" + this.children.map(v => v??"-").join() + "]";
}
}
class Tree {
constructor(nodeCapacity=32) {
this.nodeCapacity = nodeCapacity;
this.root = new Node(1);
this.first = this.root; // Head of doubly linked list at bottom level
}
locate(offset) {
let node = this.root;
// Normalise argument
offset = offset < 0 ? Math.max(0, node.treeSize + offset) : Math.min(offset, node.treeSize);
while (!node.isLeaf()) {
let index = 0;
let child = node.children[index];
while (offset > child.treeSize || offset === child.treeSize && child.next) {
offset -= child.treeSize;
child = node.children[++index];
}
node = child;
}
return [node, offset];
}
getItemAt(offset) {
let [node, index] = this.locate(offset);
if (index < node.childCount) return node.children[index];
}
setItemAt(offset, value) {
let [node, index] = this.locate(offset);
if (index < node.childCount) node.children[index] = value;
}
removeItemAt(offset) {
let [node, index] = this.locate(offset);
if (index >= node.childCount) return;
while (true) {
console.assert(node.isLeaf() || node.children[index].treeSize === 0);
node.basicRemove(index);
// Exit when node's fill ratio is fine
if (!node.parent || node.childCount * 2 > this.nodeCapacity) return;
// Node has potentially too few children, we should either merge or redistribute
let [left, right] = node.pairWithSmallest();
if (!left || !right) { // A node with no siblings? Must become the root!
this.root = node;
node.parent = null;
return;
}
let sumCount = left.childCount + right.childCount;
let childCount = sumCount >> 1;
// Check whether to merge or to redistribute
if (sumCount > this.nodeCapacity) { // redistribute
// Move some data from the bigger to the smaller node
let shift = childCount - node.childCount;
if (!shift) { // Boundary case: when a redistribution would bring no improvement
console.assert(node.childCount * 2 === this.nodeCapacity && sumCount === this.nodeCapacity + 1);
return;
}
if (node === left) { // move some children from right to left
left.moveFromNext(shift);
} else { // move some children from left to right
left.moveToNext(shift);
}
return;
}
// Merge:
// Move all data from the right to the left
left.moveFromNext(right.childCount);
// Prepare to delete right node
node = right.parent;
index = right.index();
}
}
insertItemAt(offset, value) {
let [node, index] = this.locate(offset);
while (node.childCount === this.nodeCapacity) { // No room here
if (index === 0 && node.prev && node.prev.childCount < this.nodeCapacity) {
return node.prev.basicInsert(node.prev.childCount, value);
}
// Check whether we can redistribute (to avoid a split)
if (node !== this.root) {
let [left, right] = node.pairWithSmallest();
let joinedIndex = left === node ? index : left.childCount + index;
let sumCount = left.childCount + right.childCount + 1;
if (sumCount <= 2 * this.nodeCapacity) { // redistribute
let childCount = sumCount >> 1;
if (node === right) { // redistribute to the left
let insertInLeft = joinedIndex < childCount;
left.moveFromNext(childCount - left.childCount - +insertInLeft);
} else { // redistribute to the right
let insertInRight = index >= sumCount - childCount;
left.moveToNext(childCount - right.childCount - +insertInRight);
}
if (joinedIndex > left.childCount ||
joinedIndex === left.childCount && left.childCount > right.childCount) {
right.basicInsert(joinedIndex - left.childCount, value);
} else {
left.basicInsert(joinedIndex, value);
}
return;
}
}
// Cannot redistribute: split node
let childCount = node.childCount >> 1;
// Create a new node that will later become the right sibling of this node
let sibling = new Node(childCount);
// Move half of node node's data to it
sibling.moveFrom(node, 0, childCount, childCount);
// Insert the value in either the current node or the new one
if (index > node.childCount) {
sibling.basicInsert(index - node.childCount, value);
} else {
node.basicInsert(index, value);
}
// Is this the root?
if (!node.parent) {
// ...then first create a parent, which is the new root
this.root = new Node(2);
this.root.basicInsert(0, node);
}
// Prepare for inserting the sibling node into the tree
index = node.index() + 1;
node = node.parent;
value = sibling;
}
node.basicInsert(index, value);
}
/* Below this point: these methods are optional */
* [Symbol.iterator]() { // Make tree iterable
let i = 0;
for (let node = this.first; node; node = node.next) {
for (let i = 0; i < node.childCount; i++) yield node.children[i];
}
}
print() {
console.log(this.root && this.root.toString());
}
verify() {
// Raise an error when the tree violates one of the required properties
if (!this.root) return; // An empty tree is fine.
if (this.root.parent) throw "root should not have a parent";
// Perform a breadth first traversal
let q = [this.root];
while (q.length) {
if (q[0].isLeaf() && this.first !== q[0]) throw "this.first is not pointing to first leaf";
let level = [];
let last = null;
for (let parent of q) {
if (!(parent instanceof Node)) throw "parent is not instance of Node";
if (parent.children.length > this.nodeCapacity) throw "node's children array is too large";
if (parent.childCount > 0 && parent.childCount * 2 <= parent.children.length) throw "node's fill ratio is too low";
for (let i = parent.childCount; i < parent.children.length; i++) {
if (parent.children[i] !== null) throw "child beyond childCount should be null but is not";
}
let treeSize = parent.treeSize;
if (parent.isLeaf()) {
for (let value of parent.children.slice(0, parent.childCount)) {
if (value === null) throw "leaf has a null as value";
if (value instanceof Node) throw "leaf has a Node as value";
}
if (parent.treeSize !== parent.childCount) throw "leaf has mismatch in treeSize and childCount";
} else {
for (let node of parent.children.slice(0, parent.childCount)) {
if (node === null) throw "internal node has a null as value";
if (!(node instanceof Node)) throw "internal node has a non-Node as value";
if (node.parent !== parent) throw "wrong parent";
if (node.prev !== last) throw "prev link incorrect";
if (last && last.next !== node) throw "next link incorrect";
if (last && last.children.length + node.children.length <= this.nodeCapacity) {
throw "two consecutive siblings have a total number of children that is too small";
}
if (node.childCount * 2 < this.nodeCapacity) {
throw "internal node is too small: " + node;
}
level.push(node);
last = node;
treeSize -= node.treeSize;
}
if (treeSize) throw "internal node treeSize sum mismatches";
}
}
if (last && last.next) throw "last node in level has a next reference";
q = level;
}
}
test(count=100, option=3) {
// option:
// 0 = always insert & delete at left side (offset 0)
// 1 = always insert & delete at right side
// 2 = always insert & delete at middle
// 3 = insert & delete at random offsets
// Create array to perform the same operations on it as on the tree
let arr = [];
// Perform a series of insertions
for (let i = 0; i < count; i++) {
// Choose random insertion index
let index = Array.isArray(option) ? option[i] : [0, i, i >> 1, Math.floor(Math.random() * (i+1))][option];
// Perform same insertion in array and tree
arr.splice(index, 0, i);
this.insertItemAt(index, i);
// Verify tree consistency and properties
this.verify();
// Verify the order of values in the array is the same as in the tree
if (arr+"" !== [...this]+"") throw i + ": tree not same as array";
}
// Perform a series of updates
for (let i = 0; false && i < count; i++) {
// Choose random update index
let index = Math.floor(Math.random() * (i+1));
// Perform same insertion in array and tree
arr[index] += count;
this.setItemAt(index, this.getItemAt(index) + count);
// Verify tree consistency and properties
this.verify();
// Verify the order of values in the array is the same as in the tree
if (arr+"" !== [...this]+"") throw "tree not same as array";
}
// Perform a series of deletions
for (let i = arr.length - 1; i >= 0; i--) {
// Choose random deletion index
let index = [0, i, i >> 1, Math.floor(Math.random() * (i+1))][option];
// Perform same deletion in array and tree
arr.splice(index, 1);
this.removeItemAt(index);
// Verify tree consistency and properties
this.verify();
// Verify the order of values in the array is the same as in the tree
if (arr+"" !== [...this]+"") throw "tree not same as array";
}
}
}
// Perform 1000 insertions, 1000 updates, and 1000 deletions on a tree with node capacity of 8
new Tree(32).test(1000);
console.log("all tests completed");
I have started essentially trying to refactor this code into standalone functions rather than object-oriented methods, so I can get it closer to what assembly might require. But I ask in JavaScript because it is most familiar to me and easy to comprehend. In doing so, I have started to look around with where I need to change the support for indexes into support for keys, but am not entirely sure how much of a change this will be.
I would like to see what the implementation looks like so I can better learn what an actual realistic B+ search tree with these sorts of constraints looks like. I have seen every implementation of B+trees in JavaScript on GitHub, but they are all missing the complicated remove
method, vary wildly in how they implement things, and don't have the additional constraints needed for the project I'm working on, so it's hard to tell what exactly the keys should be doing in comparison to the integer indices. Here is one B+tree implementation I have been studying, the only one I found with the remove
method that is also in JavaScript.
Short of that, I am wondering what exactly needs to be changed in this implementation above to add support for keys. And how exactly the keys should work at the internal and leaf levels (i.e. do you store the start and end key for the leafs at each parent node? Or how do the keys work exactly so it's efficient?).
On one hand it seems that just a few places of code need to be changed (like the locate
method), but on the other hand it seems that just adding support for keys will require completely rewriting the whole thing. I am not sure, looking for clarity on that.
const locateNode = (tree, key) => {
let node = tree.root
while (!checkIfLeaf(node)) {
let index = 0
let child = node.children[index]
while (key != child.key) {
child = node.children[++index]
}
node = child
}
return node
}