1

I am trying to find a way to cut out repeated code in an application that center around LINQ select statements. Lets say we have existing table rows that need to be aggregated and grouped for different reporting requirements and all original data is just grouped by day and needs to be grouped by week / month and another property.

DataRow is an example object that needs to be grouped and converted into an object ReportTableRow (please note this is a much reduced object but the actual objects have far more properties and therefore become much more drawn out).

    public class DataRow
    {
        public DateTime Date { get; set; }
        public string AccountNumber { get; set; }
        public string MachineNumber { get; set; }
        public int TEST { get; set; }
    }


    public class ReportTableRow
    {
        public int WeekNumber { get; set; }
        public int Month { get; set; }
       
        public string AccountNumber{ get; set; }
        public string MachineNumber { get; set; }
        
        public int TEST { get; set; }
        public string TEST_TRAFFICLIGHT { get; set; }       
    }

And we create a list of DataRows:

List<DataRow> reportTable = new List<DataRow>()
{
    new DataRow()
    {
        Date = new DateTime(2021, 06, 14),
        AccountNumber = "11111111",
        MachineNumber = "00AB2021",
        TEST = 2
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 15),
        AccountNumber = "11111111",
        MachineNumber = "00AB2021",
        TEST = 1
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 15),
        AccountNumber = "11111111",
        MachineNumber = "00AB2021",
        TEST = 6
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 16),
        AccountNumber = "11111111",
        MachineNumber = "00AB2021",
        TEST = 4
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 17),
        AccountNumber = "11111111",
        MachineNumber = "00AB2021",
        TEST = 2
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 18),
        AccountNumber = "11111111",
        MachineNumber = "00AB2021",
        TEST = 7
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 19),
        AccountNumber = "11111111",
        MachineNumber = "00AB2021",
        TEST = 2
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 20),
        AccountNumber = "11111111",
        MachineNumber = "00AB2021",
        TEST = 11
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 14),
        AccountNumber = "22222222",
        MachineNumber = "11BC2021",
        TEST = 2
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 15),
        AccountNumber = "22222222",
        MachineNumber = "11BC2021",
        TEST = 1
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 15),
        AccountNumber = "22222222",
        MachineNumber = "11BC2021",
        TEST = 6
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 16),
        AccountNumber = "22222222",
        MachineNumber = "11BC2021",
        TEST = 4
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 17),
        AccountNumber = "22222222",
        MachineNumber = "11BC2021",
        TEST = 2
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 18),
        AccountNumber = "22222222",
        MachineNumber = "11BC2021",
        TEST = 7
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 19),
        AccountNumber = "22222222",
        MachineNumber = "11BC2021",
        TEST = 2
    },
    new DataRow()
    {
        Date = new DateTime(2021, 06, 20),
        AccountNumber = "22222222",
        MachineNumber = "11BC2021",
        TEST = 11
    }
};

So if we either need the data grouped "BY WEEK" or "BY MONTH" then we need to actually return in the report the WeekNumber or Month number respectively and the grouping would look something like this where GetTrafficLight method returns a string value based on the value of the sum of TEST:

switch (aggregate.ToUpper())
{
    case "BY WEEK":
        reportTable = reportTable
                                .GroupBy(x => new { x.AccountNumber, WeekNumber = CultureInfo.InvariantCulture.Calendar.GetWeekOfYear(x.Date, CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Monday) })
                                .Select(x => new ReportTableRow
                                {
                                    WeekNumber = x.Key.WeekNumber,
                                    Month = x.Max(y => y.Date).Month,
                                    MachineNumber = x.FirstOrDefault().MachineNumber,
                                    TEST = x.Sum(y => y.TEST),
                                    TEST_TRAFFICLIGHT = GetTrafficLight(x.Sum(y => y.TEST)
                                })
                                .ToList();
        break;
    case "BY MONTH":
        reportTable = reportTable
                                .GroupBy(x => new { x.AccountNumber, x.Date.Month })
                                .Select(x => new ReportTableRow
                                {
                                    WeekNumber = CultureInfo.InvariantCulture.Calendar.GetWeekOfYear(x.Max(y => y.Date), CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Monday),
                                    Month = x.Key.Month,
                                    
                                    MachineNumber = x.FirstOrDefault().MachineNumber,
                                    TEST = x.Sum(y => y.TEST),
                                    TEST_TRAFFICLIGHT = GetTrafficLight(x.Sum(y => y.TEST)
                                })
                                .ToList();
        break;
}

The question is, is there anyway to remove the "Select" code and pass it into either a method or object that would accept an anonymous grouping so that it can be reused multiple times. Changing the anonymous grouping to a compile time object that contains two properties means duplicates are then returned in the dataset, could be that finding a way to remove the duplicates in compile time grouped members might help to resolve?

Please note, the code has been created to pose this question.

cullimorer
  • 755
  • 1
  • 5
  • 23
  • I've done similar to this before, and it was very similar to this SO post answer: https://stackoverflow.com/a/16517768/1188197 – EspressoBeans Jun 22 '21 at 15:16
  • I'll take a look, thanks! – cullimorer Jun 22 '21 at 15:18
  • Looks like it's along those lines but seems to be simple selecting of a string value into another string value, what about calculations like "Sum" and also passing in an outside method such as "GetTrafficLight"? – cullimorer Jun 22 '21 at 15:21

1 Answers1

2

This is optional, but the first thing I would do is put that "aggregate" into a boolean variable:

bool byWeek = aggregate.ToUpper() == "BY WEEK";

The next thing I would do is capture and tame that "GetWeekOfYear" logic:

int getFirstMonday (DateTime date) => 
    CultureInfo.InvariantCulture.Calendar.GetWeekOfYear(
        date, 
        CalendarWeekRule.FirstFourDayWeek, 
        DayOfWeek.Monday
    ); 

Doing these things makes it more feasible to put the conditional logic inside the GroupBy and Select methods, making it so that you don't have to do the whole thing twice:

List<ReportTableRow> results = 
    reportTable
    .GroupBy(x => new { 
        x.AccountNumber, 
        TimeBin = byWeek ? getFirstMonday(x.Date): x.Date.Month
    })
    .Select(x => new ReportTableRow {
        WeekNumber = byWeek ? x.Key.TimeBin : getFirstMonday(x.Max(y => y.Date)),
        Month = byWeek ? x.Max(y => y.Date).Month : x.Key.TimeBin,
        MachineNumber = x.FirstOrDefault().MachineNumber,
        TEST = x.Sum(y => y.TEST),
        TEST_TRAFFICLIGHT = GetTrafficLight(x.Sum(y => y.TEST))
    })
    .ToList();

I should note that your sample data makes it so that the results are the same regardless of 'aggregate' setting. But if you change it a bit, such as changing the days for some of the entries, you'll get different results in the aggregation. And at least for the changes I made my code repeats the behavior of your code when toggling 'aggregate'.

pwilcox
  • 5,542
  • 1
  • 19
  • 31