I have 2 tables and I am trying to perform a left join using google query language,or any formula that could output the result set.
Table1
Table2
Result set
How can I accomplish that ?
Regards
I have 2 tables and I am trying to perform a left join using google query language,or any formula that could output the result set.
Table1
Table2
Result set
How can I accomplish that ?
Regards
OK well here is an inner join to start with:
=ArrayFormula(query(iferror(split(flatten(if(transpose(filter(Table2!B2:B,Table2!B2:B<>""))=filter(Table1!A2:A,Table1!A2:A<>""),filter(Table1!A2:A,Table1!A2:A<>"")&"|"&transpose(filter(Table2!A2:A,Table2!A2:A<>"")),)),"|")),"select Col1,Col2 where Col1 is not null label Col1 '',Col2 ''"))
which builds up a 2D array and fills in the positions where the two sets of data match, then flattens it back into a 1D array and splits it back into two columns.
I think you just have to add the non-matching rows to get a left outer join:
=ArrayFormula({query(iferror(split(flatten(if(transpose(filter(Table2!B2:B,Table2!B2:B<>""))=filter(Table1!A2:A,Table1!A2:A<>""),
filter(Table1!A2:A,Table1!A2:A<>"")&"|"&transpose(filter(Table2!A2:A,Table2!A2:A<>"")),)),"|")),"select Col1,Col2 where Col1 is not null label Col1 '',Col2 ''");
filter(Table1!A2:B,isna(vlookup(Table1!A2:A,Table2!B2:B,1,false)))})
Note
This is a special case where the first table just consists of keys (ID), and you want just the key plus the other column from the second table for rows where the IDs match. It would be straightforward to add more columns separated by a pipe symbol (or any other character of choice), but these would have to be hard-coded: I don't know of any way with this approach to automatically include all columns from both tables.
This is in contrast to the answers here which do automatically combine columns from both tables but don't allow for a one-to-many relationship.
I have also seen many solutions that have complicated formulas using VLOOKUP
, INDEX
, MATCH
, etc.
I decided to write a user function to combine tables, or as I refer to it, de-normalize the database. I wrote the function DENORMALIZE()
to support INNER
, LEFT
, RIGHT
and FULL
joins. By nesting function calls one can join unlimited tables in theory.
DENORMALIZE(range1, range2, primaryKey, foreignKey, [joinType])
Parameters:
range1
, the main table as a named range, a1Notation or an arrayrange2
, the related table as a named range, a1Notation or an arrayprimaryKey
, the unique identifier for the main table, columns start with "1"foreignKey
, the key in the related table to join to the main table, columns start with "1"joinType
, type of join, "Inner", "Left", "Right", "Full", optional and defaults to "Inner", case insensitiveReturns: results as a two dimensional array
Result Set Example:
=QUERY(denormalize("Employees","Orders",1,3), "SELECT * WHERE Col2 = 'Davolio' AND Col8=2", FALSE)
EmpID | LastName | FirstName | OrderID | CustomerID | EmpID | OrderDate | ShipperID |
---|---|---|---|---|---|---|---|
1 | Davolio | Nancy | 10285 | 63 | 1 | 8/20/1996 | 2 |
1 | Davolio | Nancy | 10292 | 81 | 1 | 8/28/1996 | 2 |
1 | Davolio | Nancy | 10304 | 80 | 1 | 9/12/1996 | 2 |
Other Examples:
=denormalize("Employees","Orders",1,3)
=denormalize("Employees","Orders",1,3,"full")
=QUERY(denormalize("Employees","Orders",1,3,"left"), "SELECT * ", FALSE)
=QUERY(denormalize("Employees","Orders",1,3), "SELECT * WHERE Col2 = 'Davolio'", FALSE)
=QUERY(denormalize("Employees","Orders",1,3), "SELECT * WHERE Col2 = 'Davolio' AND Col8=2", FALSE)
=denormalize("Orders","OrderDetails",1,2)
// multiple joins
=denormalize("Employees",denormalize("Orders","OrderDetails",1,2),1,3)
=QUERY(denormalize("Employees",denormalize("Orders","OrderDetails",1,2),1,3), "SELECT *", FALSE)
=denormalize(denormalize("Employees","Orders",1,3),"OrderDetails",1,2)
=QUERY(denormalize("Employees",denormalize("Orders","OrderDetails",1,2),1,3), "SELECT *", FALSE)
=QUERY(denormalize(denormalize("Employees","Orders",1,3),"OrderDetails",4,2), "SELECT *", FALSE)
function denormalize(range1, range2, primaryKey, foreignKey, joinType) {
var i = 0;
var j = 0;
var index = -1;
var lFound = false;
var aDenorm = [];
var hashtable = [];
var aRange1 = "";
var aRange2 = "";
joinType = DefaultTo(joinType, "INNER").toUpperCase();
// the 6 lines below are used for debugging
//range1 = "Employees";
//range1 = "Employees!A2:C12";
//range2 = "Orders";
//primaryKey = 1;
//foreignKey = 3;
//joinType = "LEFT";
// Sheets starts numbering columns starting with "1", arrays are zero-based
primaryKey -= 1;
foreignKey -= 1;
// check if range is not an array
if (typeof range1 !== 'object') {
// Determine if range is a1Notation and load data into an array
if (range1.indexOf(":") !== -1) {
aRange1 = ss.getRange(range1).getValues();
} else {
aRange1 = ss.getRangeByName(range1).getValues();
}
} else {
aRange1 = range1;
}
if (typeof range2 !== 'object') {
if (range2.indexOf(":") !== -1) {
aRange2 = ss.getRange(range2).getValues();
} else {
aRange2 = ss.getRangeByName(range2).getValues();
}
} else {
aRange2 = range2;
}
// make similar structured temp arrays with NULL elements
var tArray1 = MakeArray(aRange1[0].length);
var tArray2 = MakeArray(aRange2[0].length);
var lenRange1 = aRange1.length;
var lenRange2 = aRange2.length;
hashtable = getHT(aRange1, lenRange1, primaryKey);
for(i = 0; i < lenRange2; i++) {
index = hashtable.indexOf(aRange2[i][foreignKey]);
if (index !== -1) {
aDenorm.push(aRange1[index].concat(aRange2[i]));
}
}
// add left and full no matches
if (joinType == "LEFT" || joinType == "FULL") {
for(i = 0; i < lenRange1; i++) {
//index = aDenorm.indexOf(aRange1[i][primaryKey]);
index = aScan(aDenorm, aRange1[i][primaryKey], primaryKey)
if (index == -1) {
aDenorm.push(aRange1[i].concat(tArray2));
}
}
}
// add right and full no matches
if (joinType == "RIGHT" || joinType == "FULL") {
for(i = 0; i < lenRange2; i++) {
index = aScan(aDenorm, aRange2[i][foreignKey], primaryKey)
if (index == -1) {
aDenorm.push(tArray1.concat(aRange2[i]));
}
}
}
return aDenorm;
}
function getHT(aRange, lenRange, key){
var aHashtable = [];
var i = 0;
for (i=0; i < lenRange; i++ ) {
//aHashtable.push([aRange[i][key], i]);
aHashtable.push(aRange[i][key]);
}
return aHashtable;
}
function MakeArray(length) {
var i = 0;
var retArray = [];
for (i=0; i < length; i++) {
retArray.push("");
}
return retArray;
}
function DefaultTo(valueToCheck, valueToDefault) {
return typeof valueToCheck === "undefined" ? valueToDefault : valueToCheck;
}
// Search a multi-dimensional array for a value
function aScan(aValues, searchStr, searchCol) {
var retval = -1;
var i = 0;
var aLen = aValues.length;
for (i = 0; i < aLen; i++) {
if (aValues[i][searchCol] == searchStr) {
retval = i;
break;
}
}
return retval;
}
You can make a copy of the google sheet with data and examples here:
https://docs.google.com/spreadsheets/d/1vziuF8gQcsOxTLEtlcU2cgTAYL1eIaaMTAoIrAS7mnE/edit?usp=sharing
use in B2:
=ARRAYFORMULA(IFNA(VLOOKUP(Table1!A2:A, {Table2!B:B, Table2!A:A}, 2, 0)))