Is fairly difficult to do this, but you can get some mileage out of checksums. One approach is to split the key range into several subranges that can be verified a) in parallel and/or b) at different scheduled intervals. Eg:
use master;
go
set nocount on;
go
if db_id('test') is not null
begin
alter database test set single_user with rollback immediate;
drop database test;
end
go
create database test;
go
use test;
go
create table data (id int identity(1,1) not null primary key,
data1 varchar(38),
data2 bigint,
created_at datetime not null default getdate());
go
declare @i int = 0;
begin transaction
while @i < 1000000
begin
insert into data (data1, data2) values (newid(), @i);
set @i += 1;
if @i % 1000 = 0
begin
commit;
raiserror (N'Inserted %d', 0, 0, @i);
begin tran;
end
end
commit
raiserror (N'Inserted %d', 0, 0, @i);
go
backup database test to disk='c:\temp\test.bak' with init;
go
if db_id('copy') is not null
begin
alter database copy set single_user with rollback immediate;
drop database copy;
end
go
restore database copy from disk='c:\temp\test.bak'
with move 'test' to 'c:\temp\copy.mdf', move 'test_log' to 'c:\temp\copy_log.ldf';
go
-- create some differences
--
update test..data set data1 = newid() where id = cast(rand()*1000000 as int)
update copy..data set data1 = newid() where id = cast(rand()*1000000 as int)
delete from test..data where id = cast(rand()*1000000 as int);
insert into copy..data (data1, data2) values (newid(), -1);
-- do the check
--
declare @id int = 0;
while @id < 1010000
begin
declare @chk1 int, @chk2 int;
select @chk1 = checksum_agg(binary_checksum(*)) from test..data where id >= @id and id < @id + 10000
select @chk2 = checksum_agg(binary_checksum(*)) from copy..data where id >= @id and id < @id + 10000
if @chk1 != @chk2
begin
-- locate the different row(s)
--
select t.id, binary_checksum(*) as chk
from test..data t
where t.id >= @id and t.id < @id + 10000
except
select id, binary_checksum(*) as chk
from copy..data c
where c.id >= @id and c.id < @id + 10000;
select t.id, binary_checksum(*) as chk
from copy..data t
where id >= @id and id < @id + 10000
except
select id, binary_checksum(*) as chk
from test..data c
where c.id >= @id and c.id < @id + 10000;
end
else
begin
raiserror (N'Range %d is OK', 0,0, @id);
end
set @id += 10000;
end
The main issue is that identifying the differences can only be achieved by scanning all the rows, which is very expensive. Using ranges you can submit various ranges to be verified on a rotating schedule. The CHECKSUM_AGG
and BINARY_CHECKSUM(*)
restrictions apply, of course:
BINARY_CHECKSUM
ignores columns of noncomparable data types in its
computation. Noncomparable data types include text, ntext, image,
cursor, xml, and noncomparable common language runtime (CLR)
user-defined types.