1

I have a B-tree and I'd like to, given an arbitrary parameter key, figure out what the greatest data key less then or equal to the parameter key. In other words, I want it to look to the left to figure out what key it should use in O(log n).

I've already modified the implementation of lower_bound in C code.

#define ORDER 3
static int compare(const int a, const int b) { return a > b; }
struct node { unsigned size; int key[ORDER - 1]; };
struct branch { struct node base, *child[ORDER]; };
struct ref { struct node *node; unsigned height, idx; };
struct tree { struct node *node; unsigned height; };

static struct ref lower(const struct tree tree, const int x) {
    struct ref lo, found;
    found.node = 0;
    if(!tree.node) return found;
    for(lo.node = tree.node, lo.height = tree.height; ;
        lo.node = ((const struct branch *)(const void *)lo.node)->child[lo.idx],
        lo.height--) {
        unsigned hi = lo.node->size; lo.idx = 0;
        if(!hi) continue;
        do {
            const unsigned mid = (lo.idx + hi) / 2; /* Will not overflow. */
            if(compare(x, lo.node->key[mid]) > 0) lo.idx = mid + 1;
            else hi = mid;
        } while(lo.idx < hi);
        if(lo.idx < lo.node->size) { /* Within bounds, record the current. */
            found = lo;
            if(compare(x, lo.node->key[lo.idx]) > 0) break;
        }
        if(!lo.height) break;
    }
    return found;
}

static int tree_lower_or(const struct tree tree,
    const int key, const int default_value) {
    struct ref ref;
    return (ref = lower(tree, key)).node
        && ref.idx < ref.node->size ? ref.node->key[ref.idx] : default_value;
}

#include <stdio.h>

int main(void) {
    struct node node[] = { { 2, {1,2} }, { 2, {5, 6} } };
    struct branch root = { { 1, {4} }, {node, node+1} };
    const struct tree tree = { &root.base, 1 };
    int i;
    for(i = 0; i < 8; i++)
        printf("%d->%d%s", i, tree_lower_or(tree, i, 0), i < 7 ? ", " : "\n");
    return 0;
}

This uses the example in std::lower_bound, data = {1, 2, 4, 5, 5, 6}. (Note that my B-tree's keys are strongly increasing, so I can't have two 5s.) It prints out 0->1, 1->1, 2->2, 3->4, 4->4, 5->5, 6->6, 7->0, which is x->next x in set or 0.

lower_bound

This is not quite what I want. The upper_bound is also not quite what I want, but close.

upper_bound

I want a lower bound from the right instead of the left, x->last x in set or 0.

What I want.

Is there a name for this and how to I modify the lower above to give this result?

Neil
  • 1,767
  • 2
  • 16
  • 22

2 Answers2

1

The way I would implement this is:

  • get the upper_bound
  • get the previous element (if any)
  • A) if there is a previous element and the element is == the key you are searching for, return it
  • B) otherwise, return the upper bound

In general you either care about the element directly before the upper_bound or about the upper_bound.

Ivaylo Strandjev
  • 69,226
  • 18
  • 123
  • 176
  • This is a solution, however, I'm hoping to lookup a maximum of 1 key. I don't store back-pointers or Morris, so the way I do iteration is to re-descend after it gets to a maximum/minimum index in a leaf node, thus this would be potentially an added `log n` on top of the `log n`. – Neil Jan 27 '23 at 10:43
  • Hm are you ok with changing the implementation of lower/compare? In theory it seems to me changing the `>` in compare to `>=` should do what you want as it will modify the predicate that you do the binary over – Ivaylo Strandjev Jan 27 '23 at 11:24
  • Actually you may have to modify also slightly what you do in the binary, i.e. return `hi`, not `lo` and this will also lead to need to modify the initial value of `hi` and `lo` – Ivaylo Strandjev Jan 27 '23 at 11:30
  • You mean like a normal comparator `return (a > b) - (b > a)`? Yes, I would be willing to do that, but I think it gives the same result, just symmetric. I think you are right about the `hi`; it seems like I am doing it from the other side. – Neil Jan 27 '23 at 17:52
0

Following the upper_bound advice, I was able to get the required behaviour without going down twice by keeping a return variable that I updated as appropriate. I found that I was being a little sloppy. The lower_bound just lines up correctly, but I found upper_bound not really obvious.

Updated example B-tree.

The first thing I did was work out a better example where it would be really obvious what was in the range and what was in the domain. In this case, I thought of the letter keys as the domain and the node-indices as the range, (as in my question.)

Here, key and x are arbitrary elements of the domain of letters. Applying the upper_bound process for each node gives us hi in the range. If hi.idx is non-zero, then found.idx = hi.idx - 1 is an element of the range and a valid data reference. We go down the tree and allow this to be overwritten if appropriate. Finally, in tree_left_or and tree_right_or, we transform the range element found, (it is just an unstable internal pointer-index), to a meaningful corresponding letter domain key in the set of keys.

/* https://github.com/neil-edelman/orcish needed for Graphviz names. */
/*#include "orcish.h"*/
#include <stdio.h>
#include <assert.h>

#define ORDER 3
static int compare(const char a, const char b) { return a > b; }
struct node { unsigned size; char key[ORDER - 1]; };
struct branch { struct node base, *child[ORDER]; };
struct ref { struct node *node; unsigned height, idx; };
struct tree { struct node *node; unsigned height; };

/** @return A reference the element at the greatest lower bound of `x` in
 `tree`, or if the element doesn't exist, `node` will be null. */
static struct ref right(const struct tree tree, const char x) {
    struct ref lo, found;
    found.node = 0;
    if(!tree.node) return found;
    for(lo.node = tree.node, lo.height = tree.height; ;
        lo.node = ((const struct branch *)(const void *)lo.node)->child[lo.idx],
        lo.height--) {
        unsigned hi = lo.node->size; lo.idx = 0;
        if(!hi) continue;
        do {
            const unsigned mid = (lo.idx + hi) / 2; /* Will not overflow. */
            if(compare(x, lo.node->key[mid]) > 0) lo.idx = mid + 1;
            else hi = mid;
        } while(lo.idx < hi);
        if(lo.idx < lo.node->size) { /* Within bounds, record the current. */
            found = lo;
            if(compare(x, lo.node->key[lo.idx]) > 0) break;
        }
        if(!lo.height) break;
    }
    return found;
}

/** @return Minimum element equal to or greater then `key` in `tree`, or, if
 the `key` is larger than any in the set, `default_value`. */
static char tree_right_or(const struct tree tree,
    const char key, const char default_value) {
    struct ref ref;
    return (ref = right(tree, key)).node
        && ref.idx < ref.node->size ? ref.node->key[ref.idx] : default_value;
}

/** @return A reference to the predecessor of the element at the least upper
 bound of `x` in `tree`, or `node` will be null if the predecessor doesn't
 exist. */
static struct ref left(const struct tree tree, const char x) {
    struct ref hi, found;
    found.node = 0;
    if(!tree.node) return found;
    for(hi.node = tree.node, hi.height = tree.height; ;
        hi.node = ((const struct branch *)(const void *)hi.node)->child[hi.idx],
        hi.height--) {
        unsigned lo = 0;
        if(!(hi.idx = hi.node->size)) continue;
        do { /* Upper-bound. */
            const unsigned mid = (lo + hi.idx) / 2; /* Will not overflow. */
            if(compare(hi.node->key[mid], x) <= 0) lo = mid + 1;
            else hi.idx = mid;
        } while(lo < hi.idx);
        if(hi.idx) {
            found = hi, found.idx--;
            /* Equal elements. */
            if(compare(x, found.node->key[found.idx]) <= 0) break;
        }
        if(!hi.height) break; /* Reached the bottom. */
    }
    return found;
}

/** @return Maximum element equal to or smaller then `key` in `tree`, or, if
 the `key` is smaller than any in the set, `default_value`. */
static char tree_left_or(const struct tree tree,
    const char key, const char default_value) {
    const struct ref ref = left(tree, key);
    return ref.node ? ref.node->key[ref.idx] : default_value;
}

#if 0
static void subgraph(const struct tree *const sub, FILE *fp) {
    const struct branch *branch;
    unsigned i;
    assert(sub->node && fp);
    fprintf(fp, "\ttrunk%p [label = <\n"
        "<table border=\"0\" cellspacing=\"0\">\n"
        "\t<tr><td border=\"0\" port=\"0\">"
        "<font color=\"Gray75\">%s</font></td></tr>\n",
        (const void *)sub->node, orcify(sub->node));
    if(sub->node->size) fprintf(fp, "\t<hr/>\n");
    for(i = 0; i < sub->node->size; i++) {
        const char *const bgc = i & 1 ? " bgcolor=\"Gray95\"" : "";
        fprintf(fp, "\t<tr><td border=\"0\" align=\"left\""
            " port=\"%u\"%s>%c</td></tr>\n", i + 1, bgc, sub->node->key[i]);
    }
    fprintf(fp, "\t<hr/>\n"
        "\t<tr><td></td></tr>\n"
        "</table>>];\n");
    if(!sub->height) return;
    /* Draw the lines between trees. */
    branch = (struct branch *)(void *)sub->node;
    for(i = 0; i <= branch->base.size; i++)
        fprintf(fp, "\ttrunk%p:%u:se -> trunk%p;\n",
        (const void *)sub->node, i, (const void *)branch->child[i]);
    /* Recurse. */
    for(i = 0; i <= branch->base.size; i++) {
        struct tree child;
        child.node = branch->child[i], child.height = sub->height - 1;
        subgraph(&child, fp);
    }
}
/** <https://graphviz.org/> */
static void graph(const struct tree *const tree,
    const char *const fn) {
    FILE *fp;
    assert(tree && fn);
    if(!(fp = fopen(fn, "w"))) { perror(fn); return; }
    fprintf(fp, "digraph {\n"
        "\tgraph [rankdir=LR, truecolor=true, bgcolor=transparent,"
        " fontname=modern, splines=false];\n"
        "\tnode [shape=none, fontname=modern];\n");
    if(!tree->node)
        fprintf(fp, "\tidle [shape=plaintext];\n");
    else subgraph(tree, fp);
    fprintf(fp, "\tnode [color=\"Red\"];\n"
        "}\n");
    fclose(fp);
}
#endif

int main(void) {
    struct node node[] = { { 2, {'b','d'} }, { 2, {'h','j'} } };
    struct branch root = { { 1, {'f'} }, {node, node+1} };
    const struct tree tree = { &root.base, 1 };
    const int expected[] = { 'z', 'b', 'b', 'd', 'd', 'f',
        'f', 'h', 'h', 'j', 'j', 'j' };
    char left[sizeof expected / sizeof *expected];
    char i;
    int passed;
    /*graph(&tree, "graph.gv");
    printf("nodes in B-tree:\n"
        "%s:(b,d), %s:(f), %s:(h,j)\n\n",
        orcify(&node[0]), orcify(&root), orcify(&node[1]));*/
    printf("right or z\n");
    for(i = 'a'; i < 'm'; i++)
        printf("%c\t%c\n", i, tree_right_or(tree, i, 'z'));
    printf("\n"
        "left or z\n");
    for(i = 'a'; i < 'm'; i++)
        printf("%c\t%c\n", i, left[i-'a'] = tree_left_or(tree, i, 'z'));
    printf("\n"
        "supposed to be...\n");
    for(passed = 1, i = 'a'; i < 'm'; i++) {
        printf("%c\t%c\n", i, expected[i-'a']);
        if(left[i-'a'] != expected[i-'a']) passed = 0;
    }
    printf("\n"
        "%s.\n", passed ? "PASSED" : "failed");
    return 0;
}
Neil
  • 1,767
  • 2
  • 16
  • 22