0

How do I delete the duplicate rows from the datatable where there are combinations of same name and dept combinations? I need to keep one entry.

DataTable dt = new DataTable();
dt.Columns.Add("id");
dt.Columns.Add("Name");
dt.Columns.Add("Dept");

dt.Rows.Add(1, "Test1", "Sample1");
dt.Rows.Add(2, "Test2", "Sample2");
dt.Rows.Add(3, "Test3", "Sample3");
dt.Rows.Add(4, "Test4", "Sample4");  // Duplicate 
dt.Rows.Add(5, "Test4", "Sample4");  // Duplicate 
dt.Rows.Add(6, "Test4", "Sample4");  // Duplicate 
dt.Rows.Add(7, "Test4", "Sample5");  

Result data table should be,

dt.Rows.Add(1, "Test1", "Sample1");
dt.Rows.Add(2, "Test2", "Sample2");
dt.Rows.Add(3, "Test3", "Sample3");
dt.Rows.Add(4, "Test4", "Sample4");  
dt.Rows.Add(6, "Test4", "Sample5");  

How can I do this in c#

blue
  • 833
  • 2
  • 12
  • 39
  • A "silly" algorithm would be to loop over each row, create a copy, store it in a list if not stored yet. Before storing you do your comparison. Your list will have no duplicates. The bigger the table, the slower this algorithm will be. – xmashallax Apr 07 '20 at 22:37
  • 1
    I think the answer to this question may be what you need: https://stackoverflow.com/questions/3242892/select-distinct-rows-from-datatable-in-linq – Mark Arend Apr 07 '20 at 23:12

2 Answers2

2

Simple

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            DataTable dt = new DataTable();
            dt.Columns.Add("id", typeof(int));
            dt.Columns.Add("Name", typeof(string));
            dt.Columns.Add("Dept", typeof(string));

            dt.Rows.Add(1, "Test1", "Sample1");
            dt.Rows.Add(2, "Test2", "Sample2");
            dt.Rows.Add(3, "Test3", "Sample3");
            dt.Rows.Add(4, "Test4", "Sample4");  // Duplicate 
            dt.Rows.Add(5, "Test4", "Sample4");  // Duplicate 
            dt.Rows.Add(6, "Test4", "Sample4");  // Duplicate 
            dt.Rows.Add(7, "Test4", "Sample5");

            DataTable dt2 = dt.AsEnumerable()
                .OrderBy(x => x.Field<int>("id"))
                .GroupBy(x => new { name = x.Field<string>("Name"), dept = x.Field<string>("Dept") })
                .Select(x => x.First())
                .CopyToDataTable();

        }
    }
}
jdweng
  • 33,250
  • 2
  • 15
  • 20
2

Here is a function that I got from someone, somewhere:

Usage:

List<string> columnName = new List<string> { "ID", "coulmn1", "coulmn_2", "Another", "however_many_columns_you_want_really" };
dataGrid = RemoveDuplicatesFromDataTable(dataGrid, columnName);

Function:

static DataTable RemoveDuplicatesFromDataTable(DataTable table, List<string> keyColumns)
{
    Dictionary<string, string> uniquenessDict = new Dictionary<string, string>(table.Rows.Count);
    StringBuilder stringBuilder = null;
    int rowIndex = 0;
    DataRow row;
    DataRowCollection rows = table.Rows;
    while (rowIndex < rows.Count - 1)
    {
        row = rows[rowIndex];
        stringBuilder = new StringBuilder();
        foreach (string colname in keyColumns)
        {
            //stringBuilder.Append(((double)row[colname]));
            stringBuilder.Append(row[colname]);
        }
        if (uniquenessDict.ContainsKey(stringBuilder.ToString()))
        {
            rows.Remove(row);
        }
        else
        {
            uniquenessDict.Add(stringBuilder.ToString(), string.Empty);
            rowIndex++;
        }
    }

    return table;
}
DarkPh03n1X
  • 600
  • 1
  • 7
  • 17
  • 1
    Very good solution, I just add my two cents. `while (rowIndex < rows.Count - 1)` will return 2 lines with same values in the DT , while if you want to have only one (unique) row, correct code is `while (rowIndex < rows.Count )` – Lorenzo Bassetti Oct 29 '21 at 20:38