0

A SkipList is a probabilistic data structure used, at least in part, for implementing an ordered key/value map. It is arranged in levels where higher levels skip nodes (the higher up, the more it skips), and the lowest level contains the nodes. As far as I have read, each level is implemented as a linked list, possibly a doubly linked list. And for some reason, skip lists are better for concurrency because in a multithreaded environment, they can be implemented without locks to enable fast/optimal performance as compared to say red/black trees or B+trees.

Here we have a "proper skip list" implemented in JavaScript, copied here for reference. The link has a nice suite of tests to show how it works.

const DEFAULT_STACK_UP_PROBABILITY = 0.25;

class ProperSkipList {
  constructor(options) {
    options = options || {};
    this.stackUpProbability = options.stackUpProbability || DEFAULT_STACK_UP_PROBABILITY;
    this.updateLength = options.updateLength !== false;
    this.typePriorityMap = {
      'undefined': 0,
      'object': 1,
      'number': 2,
      'bigint': 2,
      'string': 3
    };
    this.clear();
  }

  clear() {
    let headNode = {
      prev: null
    };
    let tailNode = {
      next: null
    };
    this.head = {
      isHead: true,
      key: undefined,
      value: undefined,
      nodes: [headNode]
    };
    this.tail = {
      isTail: true,
      key: undefined,
      value: undefined,
      nodes: [tailNode]
    };
    headNode.next = tailNode;
    tailNode.prev = headNode;
    headNode.group = this.head;
    tailNode.group = this.tail;
    this.length = this.updateLength ? 0 : undefined;
  }

  upsert(key, value) {
    let {matchingNode, prevNode, searchPath} = this._searchAndTrack(key);
    if (matchingNode) {
      let previousValue = matchingNode.group.value;
      matchingNode.group.value = value;
      return previousValue;
    }

    // Insert the entry.
    let newNode = {
      prev: prevNode,
      next: prevNode.next
    };
    let newGroup = {
      key,
      value,
      nodes: [newNode]
    };
    newNode.group = newGroup;
    prevNode.next = newNode;
    newNode.next.prev = newNode;

    // Stack up the entry for fast search.
    let layerIndex = 1;
    while (Math.random() < this.stackUpProbability) {
      let prevLayerNode = searchPath[layerIndex];
      if (!prevLayerNode) {
        let newHeadNode = {
          prev: null,
          group: this.head
        };
        let newTailNode = {
          next: null,
          group: this.tail
        };
        newHeadNode.next = newTailNode;
        this.head.nodes.push(newHeadNode);
        newTailNode.prev = newHeadNode;
        this.tail.nodes.push(newTailNode);
        prevLayerNode = newHeadNode;
      }
      let newNode = {
        prev: prevLayerNode,
        next: prevLayerNode.next,
        group: newGroup
      };
      prevLayerNode.next = newNode;
      newNode.next.prev = newNode;
      newGroup.nodes.push(newNode);
      layerIndex++;
    }
    if (this.updateLength) this.length++;

    return undefined;
  }

  find(key) {
    let {matchingNode} = this._search(key);
    return matchingNode ? matchingNode.group.value : undefined;
  }

  has(key) {
    return !!this.find(key);
  }

  _isAGreaterThanB(a, b) {
    let typeA = typeof a;
    let typeB = typeof b;
    if (typeA === typeB) {
      return a > b;
    }
    let typeAPriority = this.typePriorityMap[typeA];
    let typeBPriority = this.typePriorityMap[typeB];
    if (typeAPriority === typeBPriority) {
      return a > b;
    }
    return typeAPriority > typeBPriority;
  }

  // The two search methods are similar but were separated for performance reasons.
  _searchAndTrack(key) {
    let layerCount = this.head.nodes.length;
    let searchPath = new Array(layerCount);
    let layerIndex = layerCount - 1;
    let currentNode = this.head.nodes[layerIndex];
    let prevNode = currentNode;

    while (true) {
      let currentNodeGroup = currentNode.group;
      let currentKey = currentNodeGroup.key;
      if (!currentNodeGroup.isTail) {
        if (this._isAGreaterThanB(key, currentKey) || currentNodeGroup.isHead) {
          prevNode = currentNode;
          currentNode = currentNode.next;
          continue;
        }
        if (key === currentKey) {
          let matchingNode = currentNodeGroup.nodes[0];
          searchPath[layerIndex] = matchingNode;
          return {matchingNode, prevNode: matchingNode.prev, searchPath};
        }
      }
      searchPath[layerIndex] = prevNode;
      if (--layerIndex < 0) {
        return {matchingNode: undefined, prevNode, searchPath};
      }
      currentNode = prevNode.group.nodes[layerIndex];
    }
  }

  _search(key) {
    let layerIndex = this.head.nodes.length - 1;
    let currentNode = this.head.nodes[layerIndex];
    let prevNode = currentNode;
    while (true) {
      let currentNodeGroup = currentNode.group;
      let currentKey = currentNodeGroup.key;
      if (!currentNodeGroup.isTail) {
        if (this._isAGreaterThanB(key, currentKey) || currentNodeGroup.isHead) {
          prevNode = currentNode;
          currentNode = currentNode.next;
          continue;
        }
        if (key === currentKey) {
          let matchingNode = currentNodeGroup.nodes[0];
          return {matchingNode, prevNode: matchingNode.prev};
        }
      }
      if (--layerIndex < 0) {
        return {matchingNode: undefined, prevNode};
      }
      currentNode = prevNode.group.nodes[layerIndex];
    }
  }

  findEntriesFromMin() {
    return this._createEntriesIteratorAsc(this.head.nodes[0].next);
  }

  findEntriesFromMax() {
    return this._createEntriesIteratorDesc(this.tail.nodes[0].prev);
  }

  minEntry() {
    let [key, value] = this.findEntriesFromMin().next().value;
    return [key, value];
  }

  maxEntry() {
    let [key, value] = this.findEntriesFromMax().next().value;
    return [key, value];
  }

  minKey() {
    return this.minEntry()[0];
  }

  maxKey() {
    return this.maxEntry()[0];
  }

  minValue() {
    return this.minEntry()[1];
  }

  maxValue() {
    return this.maxEntry()[1];
  }

  _extractNode(matchingNode) {
    let nodes = matchingNode.group.nodes;
    for (let layerNode of nodes) {
      let prevNode = layerNode.prev;
      prevNode.next = layerNode.next;
      prevNode.next.prev = prevNode;
    }
    if (this.updateLength) this.length--;
    return matchingNode.group.value;
  }

  extract(key) {
    let {matchingNode} = this._search(key);
    if (matchingNode) {
      return this._extractNode(matchingNode);
    }
    return undefined;
  }

  delete(key) {
    return this.extract(key) !== undefined;
  }

  findEntries(fromKey) {
    let {matchingNode, prevNode} = this._search(fromKey);
    if (matchingNode) {
      return {
        matchingValue: matchingNode.group.value,
        asc: this._createEntriesIteratorAsc(matchingNode),
        desc: this._createEntriesIteratorDesc(matchingNode)
      };
    }
    return {
      matchingValue: undefined,
      asc: this._createEntriesIteratorAsc(prevNode.next),
      desc: this._createEntriesIteratorDesc(prevNode)
    };
  }

  deleteRange(fromKey, toKey, deleteLeft, deleteRight) {
    if (fromKey == null) {
      fromKey = this.minKey();
      deleteLeft = true;
    }
    if (toKey == null) {
      toKey = this.maxKey();
      deleteRight = true;
    }
    if (this._isAGreaterThanB(fromKey, toKey)) {
      return;
    }
    let {prevNode: fromNode, searchPath: leftSearchPath, matchingNode: matchingLeftNode} = this._searchAndTrack(fromKey);
    let {prevNode: toNode, searchPath: rightSearchPath, matchingNode: matchingRightNode} = this._searchAndTrack(toKey);
    let leftNode = matchingLeftNode ? matchingLeftNode : fromNode;
    let rightNode = matchingRightNode ? matchingRightNode : toNode.next;

    if (leftNode === rightNode) {
      if (deleteLeft) {
        this._extractNode(leftNode);
      }
      return;
    }

    if (this.updateLength) {
      let currentNode = leftNode;
      while (currentNode && currentNode.next !== rightNode) {
        this.length--;
        currentNode = currentNode.next;
      }
    }

    let leftGroupNodes = leftNode.group.nodes;
    let rightGroupNodes = rightNode.group.nodes;
    let layerCount = this.head.nodes.length;

    for (let layerIndex = 0; layerIndex < layerCount; layerIndex++) {
      let layerLeftNode = leftGroupNodes[layerIndex];
      let layerRightNode = rightGroupNodes[layerIndex];

      if (layerLeftNode && layerRightNode) {
        layerLeftNode.next = layerRightNode;
        layerRightNode.prev = layerLeftNode;
        continue;
      }
      if (layerLeftNode) {
        let layerRightmostNode = rightSearchPath[layerIndex];
        if (!layerRightmostNode.group.isTail) {
          layerRightmostNode = layerRightmostNode.next;
        }
        layerLeftNode.next = layerRightmostNode;
        layerRightmostNode.prev = layerLeftNode;
        continue;
      }
      if (layerRightNode) {
        let layerLeftmostNode = leftSearchPath[layerIndex];
        layerLeftmostNode.next = layerRightNode;
        layerRightNode.prev = layerLeftmostNode;
        continue;
      }
      // If neither left nor right nodes are present on the layer, connect based
      // on search path to remove in-between entries.
      let layerRightmostNode = rightSearchPath[layerIndex];
      if (!layerRightmostNode.group.isTail) {
        layerRightmostNode = layerRightmostNode.next;
      }
      let layerLeftmostNode = leftSearchPath[layerIndex];
      layerLeftmostNode.next = layerRightmostNode;
      layerRightmostNode.prev = layerLeftmostNode;
    }
    if (deleteLeft && matchingLeftNode) {
      this._extractNode(matchingLeftNode);
    }
    if (deleteRight && matchingRightNode) {
      this._extractNode(matchingRightNode);
    }
  }

  _createEntriesIteratorAsc(currentNode) {
    let i = 0;
    return {
      next: function () {
        let currentGroup = currentNode.group;
        if (currentGroup.isTail) {
          return {
            value: [currentNode.key, currentNode.value, i],
            done: true
          }
        }
        currentNode = currentNode.next;
        return {
          value: [currentGroup.key, currentGroup.value, i++],
          done: currentGroup.isTail
        };
      },
      [Symbol.iterator]: function () { return this; }
    };
  }

  _createEntriesIteratorDesc(currentNode) {
    let i = 0;
    return {
      next: function () {
        let currentGroup = currentNode.group;
        if (currentGroup.isHead) {
          return {
            value: [currentNode.key, currentNode.value, i],
            done: true
          }
        }
        currentNode = currentNode.prev;
        return {
          value: [currentGroup.key, currentGroup.value, i++],
          done: currentGroup.isHead
        };
      },
      [Symbol.iterator]: function () { return this; }
    };
  }
}

Where I am coming from is wondering how the theoretical concepts of a SkipList are applied in this implementation. But that is not my question, my question is very specific as described at the bottom. But take a look at a few things first. For example, the clear function is implemented like this, creating a fresh new SkipList:

clear() {
  let headNode = {
    prev: null
  };
  let tailNode = {
    next: null
  };
  this.head = {
    isHead: true,
    key: undefined,
    value: undefined,
    nodes: [headNode]
  };
  this.tail = {
    isTail: true,
    key: undefined,
    value: undefined,
    nodes: [tailNode]
  };
  headNode.next = tailNode;
  tailNode.prev = headNode;
  headNode.group = this.head;
  tailNode.group = this.tail;
  this.length = this.updateLength ? 0 : undefined;
}

Notice the "group" as in headNode.group = this.head has this structure:

  {
    isTail: boolean,
    isHead: boolean.
    key: undefined,
    value: undefined,
    nodes: [node]
  }

Or inserting a new node:

// Insert the entry.
let newNode = {
  prev: prevNode,
  next: prevNode.next
};
let newGroup = {
  key,
  value,
  nodes: [newNode]
};
newNode.group = newGroup;
prevNode.next = newNode;
newNode.next.prev = newNode;

So a node has a prev/next/group set of properties, while a group has a key/value/nodes. Finally, notice that this.head.nodes[layerIndex] and such is being called in several places, which to me seems like it simply bypasses the linked-list nature of the SkipList and has an internal array that stores all the nodes anyway. Basically I am confused, I have been looking at this code for a day and it looks to me like each level node is storing an array of all the children nodes for that level, or something like that. I am confused with what is happening here.

Can one explain what the general model is for this SkipList implementation? That is:

  • The meaning of group and node objects, and why they have their structure they do.
  • What the nodes array is doing on a group.

For context, I am trying to learn how to implement a SkipList, after reading several papers on it and understanding the general idea. I want to put some constraints on it, so don't want to use any existing implementation. I want it to work in JavaScript and C, so it needs to be single-threaded in one context and multithreaded in the other, so I have a bit to learn. But knowing how to implement it in JavaScript, like what the above code is doing, is a good first step.

Lance
  • 75,200
  • 93
  • 289
  • 503

1 Answers1

1

The summary of your question is:

  • The meaning of group and node objects, and why they have their structure they do.
  • What the nodes array is doing on a group.

I'll use an image taken from brilliant.org:

enter image description here

The linked lists appear as horizontal bands, while the arrays appear as vertical stacks.

A group corresponds to one distinct key in the set -- marked in yellow. The corresponding value is not depicted in the image, but is just payload and is not essential for understanding the data structure. A group has a nodes array -- displayed as a stack on top of the key. Each of these nodes belong to a different linked list in which they represent that same key. Those nodes have backreferences again to the group, so a node knows which key it represents, and which other linked lists have a node for this same key.

So the nodes array actually duplicates a key to different lists. One nodes array does not represent different keys in the collection -- just one. It is like an elevator to jump from one train (linked list) to another, with the same key.

The sizes of these arrays are randomly determined, but are intended to be small on average. This is driven by the stackUpProbability. It represents the probability that an array is extended with one more node (upwards in the image). The probability for having at least one node (for a key) is obviously 1, but the probability that the array will have at least one more node in it (and thus have a size of at least 2), is stackUpProbability. The probability that it will have size 3 is (stackUpProbability)²... etc.

This way, the bottom linked list (at layerIndex 1) will have all keys in the collection (one node per key, in sorted order), but any next layer will have fewer keys, and the top layer might only have a hand full. The lists with fewer nodes provide a way to travel fast along the collection in search of a value. This provides a proximity of where a value could be found. Via the "elevator" a search algorithm can step down to lower (slower) linked lists and continue the search there, until it reaches the lowest level where the key will be found (if present), or where it would have been (if not).

trincot
  • 317,000
  • 35
  • 244
  • 286