3

I need to run a LINQ query against a DataTable to get distinct values using multiple columns. If I know the columns I need to select beforehand, I can use the answer from David Hoarser at Select distinct rows from datatable in Linq:

var distinctValues = dsValues.AsEnumerable()
                    .Select(row => new {
                        attribute1_name = row.Field<string>("attribute1_name"),
                        attribute2_name = row.Field<string>("attribute2_name")
                     })
                    .Distinct();

However, in my case, I have the column names I need to select in a list of objects:

public class PC{
    public string Name{get;set;} 
    public string NumFormat{get;set}
}
List<PC>cols=new List<PC>();
cols.AddRange(new PC[]{new PC{Name="FullName",NumFormat=""},
                      {new PC{Name="Salary",NumFormat="$ #,##0"}});

I could use David's example from above to build the select statement as:

var distinctValues = dsValues.AsEnumerable()
                .Select(row => new {
                    attribute1_name = row.Field<string>(cols[0].Name),
                    attribute2_name = row.Field<string>(cols[1].Name)
                 })
                .Distinct();

But this won't work since I don't know how many columns I need to select Distinct over since the cols list can vary. How can I loop over the list of columns to build my Select statement to ultimately get my Distinct values?

Community
  • 1
  • 1
ccampj
  • 665
  • 1
  • 8
  • 18

2 Answers2

4

You can implement custom IEqualityComparer for that task

class DataRowComparer : IEqualityComparer<DataRow>
{
    private readonly List<string> _columns = new List<string>();
    public DataRowComparer(DataColumnCollection cols)
    {
        foreach (DataColumn col in cols)
        {
            _columns.Add(col.ColumnName);
        }
    }

    public bool Equals(DataRow x, DataRow y)
    {
        foreach (var column in _columns)
        {
            if (!IsEqual(x, y, column))
                return false;
        }

        return true;
    }


    public int GetHashCode(DataRow obj)
    {
        unchecked
        {
            var hash = 19;
            foreach (var column in _columns)
            {
                hash = hash*31 + (obj[column] == null ? 0 : obj[column].GetHashCode());
            }

            return hash;
        }
    }

    private static bool IsEqual(DataRow x, DataRow y, string column)
    {
        if (x[column] == null && y[column] == null)
            return true;

        if (x[column] == null || y[column] == null)
            return false;

        return x[column].Equals(y[column]);
    }
}

Usage

var distinctValues = dsValues
                 .AsEnumerable()
                 .Distinct(new DataRowComparer(dsValues.Columns))
                 .ToList();

You can also create extension method for DataTable

static class DataTableExtension
{
    public static IEnumerable<DataRow> DistinctRow(this DataTable table)
    {
        return table
            .AsEnumerable()
            .Distinct(new DataRowComparer(table.Columns));
    }
}

Usage

var distinctValues = dsValues
                    .DistinctRow()
                    .ToList();
Niyoko
  • 7,512
  • 4
  • 32
  • 59
  • I added one more constructor: public DataRowComparer(List cols) { _columns = cols; } I needed to run Distinct against a sub set of table columns. Your answer worked perfectly. – ccampj Oct 28 '16 at 15:26
  • @ccampj Of course you can modify the class if you need to. It's just simple example. – Niyoko Oct 28 '16 at 15:26
0
    public static DataTable GetDistinctRows(DataTable dataTable, string[][] fields, string newName)
    {
        DataTable result =
            new DataTable
            {
                TableName = newName,
                Locale = *
            };

        if (dataTable != null)
        {
            fields = fields
                .Where(a => dataTable.Columns.Contains(a[0]))
                .Select(a => a)
                .ToArray();

            foreach (string[] field in fields)
            {
                result.Columns.Add(dataTable.Columns[field[0]].ColumnName, dataTable.Columns[field[0]].DataType);
                result.Columns[field[0]].Caption = field[1];
            }

            List<int> hashCodes = new List<int>();

            DataRowComparer<DataRow> comparer = DataRowComparer.Default;

            for (int i = 0; i < dataTable.Rows.Count; i++)
            {
                DataRow row = result.NewRow();

                foreach (string[] field in fields)
                {
                    row[field[0]] = dataTable.Rows[i][field[0]];
                }

                int hashCode = comparer.GetHashCode(row);
                if (hashCodes.All(a => a != hashCode))
                {
                    hashCodes.Add(hashCode);
                    result.Rows.Add(row);
                }
            }
        }

        return result;
    }
koolsand
  • 11
  • 1