Say you have a bunch of actions for creating/inserting records into a bunch of different database tables. You have some records which can be inserted without any dependency on the output of any other insert. You have some which need to wait for one other thing to finish. And you have others that need to wait for many things to finish, which might finish at different times in the flow.
How can you write an algorithm which would sort and chunk the actions in the dependency tree so the inserts / database actions can be optimally batched? By optimally batched, I mean if you can insert 10 records into the same table at once, then do that. Any time you can batch insert, you should, to minimize the number of database calls/inserts.
Here is a snippet from the example code I whipped together with a fake sequence of actions using a simple data structure to capture all the required information.
...
{ action: 'create', table: 'tb1', set: 'key12', input: {
p: { type: 'binding', path: ['key10', 'z'] },
q: { type: 'binding', path: ['key11', 'a'] }
} },
{ action: 'create', table: 'tb4', set: 'key13' },
{ action: 'create', table: 'tb3', set: 'key14' },
{ action: 'create', table: 'tb4', set: 'key15', input: {
a: { type: 'binding', path: ['key8', 'z'] },
} },
...
Note that there are 4 possible properties on our "dependency node item":
action
: This is always "create" in our situation, but potetntially could be other things in the future.table
: The table name to insert into.set
: The name of the variable to add to the shared global "scope" in the action dependency tree, so other actions can read this as input.input
: Inputs to the action, which in our case are all "binding" inputs (but could just as well be literal values, but that would be too easy). For binding inputs, it reads some property/column value from a record stored in the shared scope of the dependency tree.
Given that, it should be possible somehow to construct a simple algorithm that chunks the actions into subsets which can be run in parallel AND batched. For example, our code below would end up something like this structure (I manually created this output, so there could be mistakes though I think I got it right. Oh and note, while the tables are numbered, it doesn't imply an order to them necessarily, just were simple names to pick):
// this is "close to" the desired result, because
// there might be a slightly different sort applied
// to this when implemented, but the chunking pattern
// and grouping of elements should be exactly like This
// (I am pretty sure, I manually did this
// so there could be small mistakes, but I doubt it)
const closeToDesiredResult = [
[
[
{ action: 'create', table: 'tb1', set: 'key1' },
{ action: 'create', table: 'tb1', set: 'key21' },
],
[
{ action: 'create', table: 'tb2', set: 'key2' },
{ action: 'create', table: 'tb2', set: 'key3' },
{ action: 'create', table: 'tb2', set: 'key23' },
],
[
{ action: 'create', table: 'tb4', set: 'key6' },
{ action: 'create', table: 'tb4', set: 'key8' },
{ action: 'create', table: 'tb4', set: 'key13' },
],
[
{ action: 'create', table: 'tb3', set: 'key5' },
{ action: 'create', table: 'tb3', set: 'key7' },
{ action: 'create', table: 'tb3', set: 'key9' },
{ action: 'create', table: 'tb3', set: 'key14' },
{ action: 'create', table: 'tb3', set: 'key24' },
],
[
{ action: 'create', table: 'tb6', set: 'key17' },
],
[
{ action: 'create', table: 'tb5', set: 'key16' },
]
],
[
[
{ action: 'create', table: 'tb1', set: 'key4', input: {
x: { type: 'binding', path: ['key2', 'baz'] }
} },
],
[
{ action: 'create', table: 'tb3', set: 'key10', input: {
y: { type: 'binding', path: ['key6', 'foo'] },
z: { type: 'binding', path: ['key1', 'bar'] }
} },
],
[
{ action: 'create', table: 'tb4', set: 'key15', input: {
a: { type: 'binding', path: ['key8', 'z'] },
} },
]
],
[
[
{ action: 'create', table: 'tb1', set: 'key12', input: {
p: { type: 'binding', path: ['key10', 'z'] },
q: { type: 'binding', path: ['key11', 'a'] }
} },
],
[
{ action: 'create', table: 'tb4', set: 'key11', input: {
a: { type: 'binding', path: ['key10', 'z'] },
b: { type: 'binding', path: ['key1', 'bar'] }
} },
],
[
{ action: 'create', table: 'tb6', set: 'key18', input: {
m: { type: 'binding', path: ['key4', 'x'] },
} },
{ action: 'create', table: 'tb6', set: 'key19', input: {
m: { type: 'binding', path: ['key4', 'x'] },
n: { type: 'binding', path: ['key13', 'a'] },
} },
]
],
[
[
{ action: 'create', table: 'tb2', set: 'key22', input: {
w: { type: 'binding', path: ['key18', 'm'] },
x: { type: 'binding', path: ['key17', 'm'] },
} },
],
[
{ action: 'create', table: 'tb6', set: 'key20', input: {
m: { type: 'binding', path: ['key18', 'm'] },
n: { type: 'binding', path: ['key17', 'm'] },
} },
]
]
]
Notice how there are 4 top-level chunks in the resulting array. These are the main steps. Then within each step, everything is grouped by table, so they can all be run in parallel, and within each table group, they can all be batch inserted. Boom.
How would you implement this, it seems quite tricky for my mind to grasp?
const actionTree = generateActionTree()
const chunkedActionTree = chunkDependencyTree(actionTree)
function chunkDependencyTree(list) {
const independentOnesMapByTableName = {}
list.forEach(node => {
// easy case
if (!node.input) {
const group = independentOnesMapByTableName[node.table]
= independentOnesMapByTableName[node.table] ?? []
group.push(node)
} else {
// I am at a loss for words...
}
})
}
function generateActionTree() {
// this would be constructed through a bunch of real-world
// functions, queuing up all the actions
// and pointing outputs to inputs.
return [
{ action: 'create', table: 'tb1', set: 'key1' },
{ action: 'create', table: 'tb2', set: 'key2' },
{ action: 'create', table: 'tb2', set: 'key3' },
{ action: 'create', table: 'tb3', set: 'key5' },
{ action: 'create', table: 'tb4', set: 'key6' },
{ action: 'create', table: 'tb3', set: 'key7' },
{ action: 'create', table: 'tb4', set: 'key8' },
{ action: 'create', table: 'tb3', set: 'key9' },
{ action: 'create', table: 'tb3', set: 'key10', input: {
y: { type: 'binding', path: ['key6', 'foo'] },
z: { type: 'binding', path: ['key1', 'bar'] }
} },
{ action: 'create', table: 'tb1', set: 'key4', input: {
x: { type: 'binding', path: ['key2', 'baz'] }
} },
{ action: 'create', table: 'tb4', set: 'key11', input: {
a: { type: 'binding', path: ['key10', 'z'] },
b: { type: 'binding', path: ['key1', 'bar'] }
} },
{ action: 'create', table: 'tb1', set: 'key12', input: {
p: { type: 'binding', path: ['key10', 'z'] },
q: { type: 'binding', path: ['key11', 'a'] }
} },
{ action: 'create', table: 'tb4', set: 'key13' },
{ action: 'create', table: 'tb3', set: 'key14' },
{ action: 'create', table: 'tb4', set: 'key15', input: {
a: { type: 'binding', path: ['key8', 'z'] },
} },
{ action: 'create', table: 'tb5', set: 'key16' },
{ action: 'create', table: 'tb6', set: 'key17' },
{ action: 'create', table: 'tb6', set: 'key18', input: {
m: { type: 'binding', path: ['key4', 'x'] },
} },
{ action: 'create', table: 'tb6', set: 'key19', input: {
m: { type: 'binding', path: ['key4', 'x'] },
n: { type: 'binding', path: ['key13', 'a'] },
} },
{ action: 'create', table: 'tb6', set: 'key20', input: {
m: { type: 'binding', path: ['key18', 'm'] },
n: { type: 'binding', path: ['key17', 'm'] },
} },
{ action: 'create', table: 'tb1', set: 'key21' },
{ action: 'create', table: 'tb2', set: 'key22', input: {
w: { type: 'binding', path: ['key18', 'm'] },
x: { type: 'binding', path: ['key17', 'm'] },
} },
{ action: 'create', table: 'tb2', set: 'key23' },
{ action: 'create', table: 'tb3', set: 'key24' },
]
}
I think this is roughly topological sorting, but not quite sure how to apply it to this specific situation.