So, I decided to do tad of experimentation. The result on a data set 1/10th the size of the OP's showed a sub-select performed better than a left join. Although, I would expect the left join to work the best in larger data sets. UPDATE: I set up a dataset like the OP and the LEFT JOIN still didn't out perform the sub-select, but it was very close.
Ensuring there are indexes on the two tables is critical for speed. So make sure your tables are indexed appropriately.
/*
DROP TABLE Cookie
CREATE TABLE Cookie ([CookieID] [bigint] NOT NULL IDENTITY(1,1) PRIMARY KEY,Dummy bit)
DROP TABLE CookiePurpose
CREATE TABLE CookiePurpose (CookiePurposeID [bigint] NOT NULL IDENTITY(1,1) PRIMARY KEY, [CookieID] [bigint] NOT NULL)
CREATE NONCLUSTERED INDEX IX_CookiePurpose_CookieID ON dbo.CookiePurpose (CookieID)
SET NOCOUNT ON
DECLARE @CookieCounter BIGINT=(SELECT COUNT(CookieID) FROM Cookie)
WHILE @CookieCounter<500000 BEGIN INSERT INTO Cookie(Dummy) SELECT 1; SET @CookieCounter=@CookieCounter+1; END
DECLARE @CookiePurposeCounter BIGINT=(SELECT COUNT(CookieID) FROM CookiePurpose)
WHILE @CookiePurposeCounter<1000000 BEGIN INSERT INTO CookiePurpose(CookieID) SELECT Round(( ( 1000000000 - 1 - 1 ) * Rand() + 1 ), 0); SET @CookiePurposeCounter=@CookiePurposeCounter+1; END
*/
--EXCEPT: 300ms @ 1/10 data; 2917ms for full data
DECLARE @start DATETIME2,@end DATETIME2; SET @start=GETDATE()
DECLARE @KnownCookieIDs TABLE ([CookieID] [bigint] NOT NULL)
INSERT INTO @KnownCookieIDs
SELECT CookieID FROM CookiePurpose
EXCEPT
SELECT CookieID from Cookie
SET @end=GETDATE(); SELECT DATEDIFF(ms,@start,@end) AS [ms elapsed];
GO
--LEFT JOIN: 123ms @ 1/10 data; 1093ms for full data
DECLARE @start DATETIME2,@end DATETIME2; SET @start=GETDATE()
DECLARE @KnownCookieIDs TABLE ([CookieID] [bigint] NOT NULL)
INSERT INTO @KnownCookieIDs
SELECT c.CookieID
FROM Cookie c
LEFT JOIN CookiePurpose cp ON cp.CookieID=c.CookieID
WHERE cp.CookieID IS NULL
SET @end=GETDATE(); SELECT DATEDIFF(ms,@start,@end) AS [ms elapsed];
GO
--sub-select: 113 ms @ 1/10 data; 1046ms for full data
DECLARE @start DATETIME2,@end DATETIME2; SET @start=GETDATE()
DECLARE @KnownCookieIDs TABLE ([CookieID] [bigint] NOT NULL)
INSERT INTO @KnownCookieIDs
SELECT c.CookieID
FROM Cookie c
WHERe c.CookieID NOT IN (
SELECT CookieID FROM CookiePurpose
)
SET @end=GETDATE(); SELECT DATEDIFF(ms,@start,@end) AS [ms elapsed];
GO
--Delete: 767ms @ 1/10 data; 8450ms for full data
DECLARE @start DATETIME2,@end DATETIME2; SET @start=GETDATE()
DECLARE @KnownCookieIDs TABLE ([CookieID] [bigint] NOT NULL)
INSERT INTO @KnownCookieIDs
SELECT c.CookieID
FROM Cookie c
DELETE c
FROM @KnownCookieIDs c
INNER JOIN CookiePurpose cp ON cp.CookieID=c.CookieID
SET @end=GETDATE(); SELECT DATEDIFF(ms,@start,@end) AS [ms elapsed];
GO