2

This question is exactly the same as Show all rows that have certain columns duplicated but for Sequelize.

Here's a minimal "fill in the missing query reproducer" with the same data as that question:

main.js

#!/usr/bin/env node
const assert = require('assert')
const path = require('path')
const { DataTypes, Sequelize, Op } = require('sequelize')
let sequelize
if (process.argv[2] === 'p') {
  sequelize = new Sequelize('tmp', undefined, undefined, {
    dialect: 'postgres',
    host: '/var/run/postgresql',
  })
} else {
  sequelize = new Sequelize({
    dialect: 'sqlite',
    storage: 'tmp.sqlite',
  })
}
;(async () => {
const User = sequelize.define('User', {
  firstName: {
    type: DataTypes.STRING,
  },
  lastName: {
    type: DataTypes.STRING,
  },
  active: {
    type: DataTypes.BOOLEAN,
  },
}, {})
await User.sync({force: true})
await User.bulkCreate([
  { id: 1, firstName: 'test0', lastName: 'test0', active: false, },
  { id: 2, firstName: 'test0', lastName: 'test0', active: true, },
  { id: 3, firstName: 'test1', lastName: 'test1', active: true, },
  { id: 4, firstName: 'test2', lastName: 'test2', active: false, },
  { id: 5, firstName: 'test2', lastName: 'test2', active: false, },
  { id: 6, firstName: 'test3', lastName: 'test3', active: true, },
])
const rows = await User.findAll({ TODO FILL YOUR QUERY HERE })
assert.strictEqual(rows[0].id, 1)
assert.strictEqual(rows[1].id, 2)
assert.strictEqual(rows[2].id, 4)
assert.strictEqual(rows[3].id, 5)
assert.strictEqual(rows.length, 4)
})().finally(() => { return sequelize.close() })

package.json

{
  "name": "tmp",
  "private": true,
  "version": "1.0.0",
  "dependencies": {
    "pg": "8.5.1",
    "pg-hstore": "2.3.3",
    "sequelize": "6.14.0",
    "sql-formatter": "4.0.2",
    "sqlite3": "5.0.2"
  }
}
Ciro Santilli OurBigBook.com
  • 347,512
  • 102
  • 1,199
  • 985

1 Answers1

1

Because Sequelize doesn't support subqueries without literal string queries, which defeats the point of the ORM, I tried to stay away from GROUP BY COUNT(*) >= 2 solutions, and went for the JOIN solution from: Show all rows that have certain columns duplicated instead:

const rows = await User.findAll({
  include: {
    model: User,
    as: 'duplicate',
    on: {
      '$User.firstName$': { [Op.col]: 'duplicate.firstName' },
      '$User.lastName$': { [Op.col]: 'duplicate.lastName' },
      '$User.id$': { [Op.ne]: { [Op.col]: 'duplicate.id' } },
    },
    required: true,
    orderBy: [['id', 'ASC']],
  }
})

Unfortunately that also requires setting up a dummy association, even though we are using a custom on:, otherwise sequelize complains:

User.hasMany(User, { as: 'duplicate', foreignKey: 'firstName', sourceKey: 'firstName', constraints: false });

Full working example:

main.js

#!/usr/bin/env node
const assert = require('assert')
const path = require('path')
const { DataTypes, Sequelize, Op } = require('sequelize')
let sequelize
if (process.argv[2] === 'p') {
  sequelize = new Sequelize('tmp', undefined, undefined, {
    dialect: 'postgres',
    host: '/var/run/postgresql',
  })
} else {
  sequelize = new Sequelize({
    dialect: 'sqlite',
    storage: 'tmp.sqlite',
  })
}
;(async () => {
const User = sequelize.define('User', {
  firstName: {
    type: DataTypes.STRING,
  },
  lastName: {
    type: DataTypes.STRING,
  },
  active: {
    type: DataTypes.BOOLEAN,
  },
}, {})
// We need this dummy association here.
User.hasMany(User, { as: 'duplicate', foreignKey: 'firstName', sourceKey: 'firstName', constraints: false });
await User.sync({force: true})
await User.bulkCreate([
  { id: 1, firstName: 'test0', lastName: 'test0', active: false, },
  { id: 2, firstName: 'test0', lastName: 'test0', active: true, },
  { id: 3, firstName: 'test1', lastName: 'test1', active: true, },
  { id: 4, firstName: 'test2', lastName: 'test2', active: false, },
  { id: 5, firstName: 'test2', lastName: 'test2', active: false, },
  { id: 6, firstName: 'test3', lastName: 'test3', active: true, },
])
// Since Sequelize can't handle subqueries, we can do the JOIN approach from:
// https://stackoverflow.com/questions/10324107/show-all-rows-that-have-certain-columns-duplicated/10324160#10324160
const rows = await User.findAll({
  include: {
    model: User,
    as: 'duplicate',
    on: {
      '$User.firstName$': { [Op.col]: 'duplicate.firstName' },
      '$User.lastName$': { [Op.col]: 'duplicate.lastName' },
      '$User.id$': { [Op.ne]: { [Op.col]: 'duplicate.id' } },
    },
    required: true,
    orderBy: [['id', 'ASC']],
  }
})
assert.strictEqual(rows[0].id, 1)
assert.strictEqual(rows[1].id, 2)
assert.strictEqual(rows[2].id, 4)
assert.strictEqual(rows[3].id, 5)
assert.strictEqual(rows.length, 4)
})().finally(() => { return sequelize.close() })

GitHub upstream.

Queries generated:

Executing (default): DROP TABLE IF EXISTS `Users`;
Executing (default): CREATE TABLE IF NOT EXISTS `Users` (`id` INTEGER PRIMARY KEY AUTOINCREMENT, `firstName` VARCHAR(255), `lastName` VARCHAR(255), `active` TINYINT(1), `createdAt` DATETIME NOT NULL, `updatedAt` DATETIME NOT NULL);
Executing (default): PRAGMA INDEX_LIST(`Users`)
Executing (default): INSERT INTO `Users` (`id`,`firstName`,`lastName`,`active`,`createdAt`,`updatedAt`) VALUES (1,'test0','test0',0,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(2,'test0','test0',1,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(3,'test1','test1',1,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(4,'test2','test2',0,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(5,'test2','test2',0,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(6,'test3','test3',1,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00');
Executing (default): SELECT `User`.`id`, `User`.`firstName`, `User`.`lastName`, `User`.`active`, `User`.`createdAt`, `User`.`updatedAt`, `duplicate`.`id` AS `duplicate.id`, `duplicate`.`firstName` AS `duplicate.firstName`, `duplicate`.`lastName` AS `duplicate.lastName`, `duplicate`.`active` AS `duplicate.active`, `duplicate`.`createdAt` AS `duplicate.createdAt`, `duplicate`.`updatedAt` AS `duplicate.updatedAt` FROM `Users` AS `User` INNER JOIN `Users` AS `duplicate` ON `User`.`firstName` = `duplicate`.`firstName` AND `User`.`lastName` = `duplicate`.`lastName` AND `User`.`id` != `duplicate`.`id`;

Tested on Ubuntu 21.10.

Ciro Santilli OurBigBook.com
  • 347,512
  • 102
  • 1,199
  • 985