I have a question about entity framework query building.
Schema
I have a table structure like this:
CREATE TABLE [dbo].[DataLogger](
[ID] [bigint] IDENTITY(1,1) NOT NULL,
[ProjectID] [bigint] NULL,
CONSTRAINT [PrimaryKey1] PRIMARY KEY CLUSTERED ( [ID] ASC )
)
CREATE TABLE [dbo].[DCDistributionBox](
[ID] [bigint] IDENTITY(1,1) NOT NULL,
[DataLoggerID] [bigint] NOT NULL,
CONSTRAINT [PrimaryKey2] PRIMARY KEY CLUSTERED ( [ID] ASC )
)
ALTER TABLE [dbo].[DCDistributionBox]
ADD CONSTRAINT [FK_DCDistributionBox_DataLogger] FOREIGN KEY([DataLoggerID])
REFERENCES [dbo].[DataLogger] ([ID])
CREATE TABLE [dbo].[DCString] (
[ID] [bigint] IDENTITY(1,1) NOT NULL,
[DCDistributionBoxID] [bigint] NOT NULL,
[CurrentMPP] [decimal](18, 2) NULL,
CONSTRAINT [PrimaryKey3] PRIMARY KEY CLUSTERED ( [ID] ASC )
)
ALTER TABLE [dbo].[DCString]
ADD CONSTRAINT [FK_DCString_DCDistributionBox] FOREIGN KEY([DCDistributionBoxID])
REFERENCES [dbo].[DCDistributionBox] ([ID])
CREATE TABLE [dbo].[StringData](
[DCStringID] [bigint] NOT NULL,
[TimeStamp] [datetime] NOT NULL,
[DCCurrent] [decimal](18, 2) NULL,
CONSTRAINT [PrimaryKey4] PRIMARY KEY CLUSTERED ( [TimeStamp] DESC, [DCStringID] ASC)
)
The [StringData]
table as has following storage stats:
- Data space: 26,901.86 MB
- Row count: 131,827,749
- Partitioned: true
- Partition count: 62
Usage
I now want to group the data in the [StringData]
table and do some aggregation.
In pure SQL it would look like this:
declare @projectID bigint = 20686;
declare @from datetime = '06.02.2016';
declare @till datetime = '07.02.2016';
declare @interval int = 15;
SELECT
DATEADD(MINUTE, DATEDIFF(MINUTE, 0, [StringData].[TimeStamp] ) / @interval * @interval, 0) AS [TimeStamp]
, AVG([StringData].[DCCurrent] / [DCString].[CurrentMPP]) AS [DCCurrentAvg]
, MIN([StringData].[DCCurrent] / [DCString].[CurrentMPP]) AS [DCCurrentMin]
, MAX([StringData].[DCCurrent] / [DCString].[CurrentMPP]) AS [DCCurrentMax]
, STDEV([StringData].[DCCurrent] / [DCString].[CurrentMPP]) AS [DCCurrentStDev]
, COUNT(*) AS [Count]
FROM [StringData]
JOIN [DCString] ON [DCString].[ID] = [StringData].[DCStringID]
JOIN [DCDistributionBox] ON [DCDistributionBox].[ID] = [DCString].[DCDistributionBoxID]
JOIN [DataLogger] ON [DataLogger].[ID] = [DCDistributionBox].[DataLoggerID]
WHERE [DataLogger].[ProjectID] = @projectID
AND [StringData].[TimeStamp] >= @from
AND [StringData].[TimeStamp] < @till
GROUP BY DATEADD(MINUTE, DATEDIFF(MINUTE, 0, [StringData].[TimeStamp] ) / @interval * @interval, 0)
Excecution timespan: 653ms
Now I created a Entity Framework equivalent (I thought):
var compareData = model.StringDatas
AsNoTracking()
.Where(p => p.DCString.DCDistributionBox.DataLogger.ProjectID == projectID && p.TimeStamp >= from && p.TimeStamp < till)
.Select(d => new
{
TimeStamp = d.Key,
DCCurrentMin = d.Min(v => v.DCCurrent / v.DCString.CurrentMPP),
DCCurrentMax = d.Max(v => v.DCCurrent / v.DCString.CurrentMPP),
DCCurrentAvg = d.Average(v => v.DCCurrent / v.DCString.CurrentMPP),
DCCurrentStDev = DbFunctions.StandardDeviation(d.Select(v => v.DCCurrent / v.DCString.CurrentMPP))
})
.ToList();
And the result of excecution was a timeout (longer the 30 seconds)!?
Attempts
I now took a look into the Entity Framework generated SQL query and looks like this:
SELECT
1 AS [C1],
[Project10].[C1] AS [C2],
[Project10].[C2] AS [C3],
[Project10].[C3] AS [C4],
[Project10].[C4] AS [C5],
[Project10].[C5] AS [C6]
FROM ( SELECT
[Project8].[C1] AS [C1],
[Project8].[C2] AS [C2],
[Project8].[C3] AS [C3],
[Project8].[C4] AS [C4],
(SELECT
STDEV([Project9].[A1]) AS [A1]
FROM ( SELECT
[Project9].[DCCurrent] / [Project9].[CurrentMPP] AS [A1]
FROM ( SELECT
[Extent17].[DCStringID] AS [DCStringID],
[Extent17].[DCCurrent] AS [DCCurrent],
[Extent18].[ID] AS [ID],
[Extent18].[CurrentMPP] AS [CurrentMPP]
FROM [dbo].[StringData] AS [Extent17]
INNER JOIN [dbo].[DCString] AS [Extent18] ON [Extent17].[DCStringID] = [Extent18].[ID]
INNER JOIN [dbo].[DCDistributionBox] AS [Extent19] ON [Extent18].[DCDistributionBoxID] = [Extent19].[ID]
INNER JOIN [dbo].[DataLogger] AS [Extent20] ON [Extent19].[DataLoggerID] = [Extent20].[ID]
WHERE (([Extent20].[ProjectID] = @p__linq__0) OR (([Extent20].[ProjectID] IS NULL) AND (@p__linq__0 IS NULL))) AND ([Extent17].[TimeStamp] >= @p__linq__1) AND ([Extent17].[TimeStamp] < @p__linq__2) AND (([Project8].[C1] = (DATEADD (minute, ((DATEDIFF (minute, @p__linq__4, [Extent17].[TimeStamp])) / @p__linq__5) * @p__linq__6, @p__linq__3))) OR (([Project8].[C1] IS NULL) AND (DATEADD (minute, ((DATEDIFF (minute, @p__linq__4, [Extent17].[TimeStamp])) / @p__linq__5) * @p__linq__6, @p__linq__3) IS NULL)))
) AS [Project9]
) AS [Project9]) AS [C5]
FROM ( SELECT
[Project6].[C1] AS [C1],
[Project6].[C2] AS [C2],
[Project6].[C3] AS [C3],
(SELECT
AVG([Project7].[A1]) AS [A1]
FROM ( SELECT
[Project7].[DCCurrent] / [Project7].[CurrentMPP] AS [A1]
FROM ( SELECT
[Extent13].[DCStringID] AS [DCStringID],
[Extent13].[DCCurrent] AS [DCCurrent],
[Extent14].[ID] AS [ID],
[Extent14].[CurrentMPP] AS [CurrentMPP]
FROM [dbo].[StringData] AS [Extent13]
INNER JOIN [dbo].[DCString] AS [Extent14] ON [Extent13].[DCStringID] = [Extent14].[ID]
INNER JOIN [dbo].[DCDistributionBox] AS [Extent15] ON [Extent14].[DCDistributionBoxID] = [Extent15].[ID]
INNER JOIN [dbo].[DataLogger] AS [Extent16] ON [Extent15].[DataLoggerID] = [Extent16].[ID]
WHERE (([Extent16].[ProjectID] = @p__linq__0) OR (([Extent16].[ProjectID] IS NULL) AND (@p__linq__0 IS NULL))) AND ([Extent13].[TimeStamp] >= @p__linq__1) AND ([Extent13].[TimeStamp] < @p__linq__2) AND (([Project6].[C1] = (DATEADD (minute, ((DATEDIFF (minute, @p__linq__4, [Extent13].[TimeStamp])) / @p__linq__5) * @p__linq__6, @p__linq__3))) OR (([Project6].[C1] IS NULL) AND (DATEADD (minute, ((DATEDIFF (minute, @p__linq__4, [Extent13].[TimeStamp])) / @p__linq__5) * @p__linq__6, @p__linq__3) IS NULL)))
) AS [Project7]
) AS [Project7]) AS [C4]
FROM ( SELECT
[Project4].[C1] AS [C1],
[Project4].[C2] AS [C2],
(SELECT
MAX([Project5].[A1]) AS [A1]
FROM ( SELECT
[Project5].[DCCurrent] / [Project5].[CurrentMPP] AS [A1]
FROM ( SELECT
[Extent9].[DCStringID] AS [DCStringID],
[Extent9].[DCCurrent] AS [DCCurrent],
[Extent10].[ID] AS [ID],
[Extent10].[CurrentMPP] AS [CurrentMPP]
FROM [dbo].[StringData] AS [Extent9]
INNER JOIN [dbo].[DCString] AS [Extent10] ON [Extent9].[DCStringID] = [Extent10].[ID]
INNER JOIN [dbo].[DCDistributionBox] AS [Extent11] ON [Extent10].[DCDistributionBoxID] = [Extent11].[ID]
INNER JOIN [dbo].[DataLogger] AS [Extent12] ON [Extent11].[DataLoggerID] = [Extent12].[ID]
WHERE (([Extent12].[ProjectID] = @p__linq__0) OR (([Extent12].[ProjectID] IS NULL) AND (@p__linq__0 IS NULL))) AND ([Extent9].[TimeStamp] >= @p__linq__1) AND ([Extent9].[TimeStamp] < @p__linq__2) AND (([Project4].[C1] = (DATEADD (minute, ((DATEDIFF (minute, @p__linq__4, [Extent9].[TimeStamp])) / @p__linq__5) * @p__linq__6, @p__linq__3))) OR (([Project4].[C1] IS NULL) AND (DATEADD (minute, ((DATEDIFF (minute, @p__linq__4, [Extent9].[TimeStamp])) / @p__linq__5) * @p__linq__6, @p__linq__3) IS NULL)))
) AS [Project5]
) AS [Project5]) AS [C3]
FROM ( SELECT
[Project2].[C1] AS [C1],
(SELECT
MIN([Project3].[A1]) AS [A1]
FROM ( SELECT
[Project3].[DCCurrent] / [Project3].[CurrentMPP] AS [A1]
FROM ( SELECT
[Extent5].[DCStringID] AS [DCStringID],
[Extent5].[DCCurrent] AS [DCCurrent],
[Extent6].[ID] AS [ID],
[Extent6].[CurrentMPP] AS [CurrentMPP]
FROM [dbo].[StringData] AS [Extent5]
INNER JOIN [dbo].[DCString] AS [Extent6] ON [Extent5].[DCStringID] = [Extent6].[ID]
INNER JOIN [dbo].[DCDistributionBox] AS [Extent7] ON [Extent6].[DCDistributionBoxID] = [Extent7].[ID]
INNER JOIN [dbo].[DataLogger] AS [Extent8] ON [Extent7].[DataLoggerID] = [Extent8].[ID]
WHERE (([Extent8].[ProjectID] = @p__linq__0) OR (([Extent8].[ProjectID] IS NULL) AND (@p__linq__0 IS NULL))) AND ([Extent5].[TimeStamp] >= @p__linq__1) AND ([Extent5].[TimeStamp] < @p__linq__2) AND (([Project2].[C1] = (DATEADD (minute, ((DATEDIFF (minute, @p__linq__4, [Extent5].[TimeStamp])) / @p__linq__5) * @p__linq__6, @p__linq__3))) OR (([Project2].[C1] IS NULL) AND (DATEADD (minute, ((DATEDIFF (minute, @p__linq__4, [Extent5].[TimeStamp])) / @p__linq__5) * @p__linq__6, @p__linq__3) IS NULL)))
) AS [Project3]
) AS [Project3]) AS [C2]
FROM ( SELECT
[Distinct1].[C1] AS [C1]
FROM ( SELECT DISTINCT
DATEADD (minute, ((DATEDIFF (minute, @p__linq__4, [Extent1].[TimeStamp])) / @p__linq__5) * @p__linq__6, @p__linq__3) AS [C1]
FROM [dbo].[StringData] AS [Extent1]
INNER JOIN [dbo].[DCString] AS [Extent2] ON [Extent1].[DCStringID] = [Extent2].[ID]
INNER JOIN [dbo].[DCDistributionBox] AS [Extent3] ON [Extent2].[DCDistributionBoxID] = [Extent3].[ID]
INNER JOIN [dbo].[DataLogger] AS [Extent4] ON [Extent3].[DataLoggerID] = [Extent4].[ID]
WHERE (([Extent4].[ProjectID] = @p__linq__0) OR (([Extent4].[ProjectID] IS NULL) AND (@p__linq__0 IS NULL))) AND ([Extent1].[TimeStamp] >= @p__linq__1) AND ([Extent1].[TimeStamp] < @p__linq__2)
) AS [Distinct1]
) AS [Project2]
) AS [Project4]
) AS [Project6]
) AS [Project8]
) AS [Project10]
Question
Why does Entity Framework seperates every aggregation into a sngle subselect and how can I avoid this to get a performance near to the raw SQL query?
Update 1
This has exact the same SQL query output and timeout result:
var query = from d in model.StringDatas
where d.DCString.DCDistributionBox.DataLogger.ProjectID == projectID
where d.TimeStamp >= fromDate
where d.TimeStamp < tillDate
group d by DbFunctions.AddMinutes(DateTime.MinValue, DbFunctions.DiffMinutes(DateTime.MinValue, d.TimeStamp) / minuteInterval * minuteInterval) into g
select new
{
TimeStamp = g.Key,
DCCurrentMin = g.Min(v => v.DCCurrent / v.DCString.CurrentMPP),
DCCurrentMax = g.Max(v => v.DCCurrent / v.DCString.CurrentMPP),
DCCurrentAvg = g.Average(v => v.DCCurrent / v.DCString.CurrentMPP),
DCCurrentStDev = DbFunctions.StandardDeviation(g.Select(v => v.DCCurrent / v.DCString.CurrentMPP))
};
var queryResult= query.ToList();