Sometimes, my lists of emails include duplicate addresses for the same person. For example, Jane's addresses are both "jane.doe@email.com" and "doe.jane@email". Her variants include replacing the "." with "-" or "_". At the moment, my duplicates script—upgraded ever so kindly by @Jordan Running and Ed Nelson—takes care of 'strict' duplicates, yet cannot detect that "doe.jane@email.com" is a 'complicated' duplicate of "jane.doe@email.com". Is there a way to delete even these duplicates such that I do not email more than one of Jane's addresses? All of them point to the same inbox, so I need only include one of her addresses.
Here is my current code:
function removeDuplicates() {
const startTime = new Date();
const newData = [];
const sheet = SpreadsheetApp.getActiveSheet();
const data = sheet.getDataRange().getValues();
const numRows = data.length;
const seen = {};
for (var i = 0, row, key; i < numRows && (row = data[i]); i++) {
key = JSON.stringify(row);
if (key in seen) {
continue;
}
seen[key] = true;
newData.push(row);
};
sheet.clearContents();
sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
// Show summary
const secs = (new Date() - startTime) / 1000;
SpreadsheetApp.getActiveSpreadsheet().toast(
Utilities.formatString('Processed %d rows in %.2f seconds (%.1f rows/sec); %d deleted',
numRows, secs, numRows / secs, numRows - newData.length),
'Remove duplicates', -1);
}