0

I have a data structure in my app that looks a bit like this (this is just an small example).

public class Category
{
    public string Name { get; set; }
    public List<SubCategory> subCategories { get; set; }
}

public class SubCategory
{
    public int order { get; set; }
    public string Name { get; set; }
}

And with this structure I create a list like this:

class Program
{
    static void Main(string[] args)
    {
        var duplicateCategory = new Category
        {
            Name = "E-platform",
            subCategories = new List<SubCategory> {
                    new SubCategory
                    {
                        Name = "App",
                        order = 1
                    },
                    new SubCategory
                    {
                        Name = "Layout",
                        order = 2
                    },
                    new SubCategory
                    {
                        Name = "Sidepanel",
                        order = 3
                    }
                }
        };

        var duplicateCategory2 = new Category
        {
            Name = "E-platform",
            subCategories = new List<SubCategory> {
                    new SubCategory
                    {
                        Name = "Website",
                        order = 1
                    },
                    new SubCategory
                    {
                        Name = "Layout",
                        order = 2
                    }
                }
        };

        var categories = new List<Category>
        {
            //APP
            duplicateCategory,
            duplicateCategory,
            new Category
        {
            Name = "E-platform",
            subCategories = new List<SubCategory> {
                    new SubCategory
                    {
                        Name = "App",
                        order = 1
                    },
                    new SubCategory
                    {
                        Name = "Layout",
                        order = 2
                    }
                }
        },
            //WEBSITE
            duplicateCategory2,
            duplicateCategory2,
            new Category
        {
            Name = "E-platform",
            subCategories = new List<SubCategory> {
                    new SubCategory
                    {
                        Name = "Website",
                        order = 1
                    },
                    new SubCategory
                    {
                        Name = "Layout",
                        order = 2
                    },
                    new SubCategory
                    {
                        Name = "Sidepanel",
                        order = 3
                    }
                }
        }
    };
    }
}

Now from this list I need to remove the duplicates, this I have already achieved using "Distinct" of linq.

An extra requirement however is that I need to get the longest chain of the same subcategories out of this list. So given the list above, I would like to ONLY return these 2 objects:

new Category
        {
            Name = "E-platform",
            subCategories = new List<SubCategory> {
                    new SubCategory
                    {
                        Name = "App",
                        order = 1
                    },
                    new SubCategory
                    {
                        Name = "Layout",
                        order = 2
                    },
                    new SubCategory
                    {
                        Name = "Sidepanel",
                        order = 3
                    }
                }
        };

And

new Category
        {
            Name = "E-platform",
            subCategories = new List<SubCategory> {
                    new SubCategory
                    {
                        Name = "Website",
                        order = 1
                    },
                    new SubCategory
                    {
                        Name = "Layout",
                        order = 2
                    },
                    new SubCategory
                    {
                        Name = "Sidepanel",
                        order = 3
                    }
                }
        }

So as you can see it removed 1 of "E-platform -> 1. App -> 2. layout -> 3.Sidepanel" from the list to avoid duplicates (this I already figured out).

And it also removed "E-platform -> 1. App -> 2. layout" because "E-platform -> 1. App -> 2. layout -> 3.Sidepanel" has one extra level (this I can't figure out)

Do keep in mind that it is possible to be more than one level deeper, I just want the deepest category without throwing away other distinct categories.

Any help in getting this list would be very helpful! Let me know if you need more information.

DjKillerMemeStar
  • 425
  • 5
  • 18

2 Answers2

1

It's not completely clear which rules you are trying to implement, but hopefully the following will help:

var cleanCategoryList = categories
    .Distinct()
    // I'm guessing you only want to compare subcategories if the top-category
    // Names are the same. So... group them:
    .GroupBy(c => c.Name)
    .SelectMany(group =>
    {
        var candidates = group
            // Create tuple containing the category itself as well as a set of
            // the names of the sub categories
            .Select(c =>
            (
                c,
                c.SubCategories
                    .Select(sc => sc.Name)
                    .ToHashSet()
            ))
            .OrderByDescending(c => c.Item2.Count); // Important - see below

        var catsWithUniqSubCats = new List<(Category, HashSet<string>)>();
        foreach (var tuple in candidates)
        {
            // Since we sort by descending Count of the set of sub cat. names
            // we only need to compare each tuple with those already picked for
            // the resulting list
            if (!catsWithUniqSubCats.Any(c => tuple.Item2.IsSubsetOf(c.Item2)))
                // The tuple's category's sub categories are not a subset of
                // any other list of sub categories, so we add it to the result
                catsWithUniqSubCats.Add(tuple);
        }
        return catsWithUniqSubCats.Select(tup => tup.Item1);
    });

See .Net fiddle to test it out.

Xerillio
  • 4,855
  • 1
  • 17
  • 28
0

When you use Linq Distinct with custom objects you need to "teach" how to compare these objects.

The way you achieve that is overriding Equals and, if you are using structures that use Hashing, also override GetHashCode().

To calculate the hash you should use the Name and all subCategories objects. More info here: Good GetHashCode() override for List of Foo objects respecting the order

Example:

public class Category
{
    public string Name { get; set; }
    public List<SubCategory> subCategories { get; set; }


    public override bool Equals(object obj) { 
        Category other = obj as Category;
        if (other == null || other.Name != Name)
        {
            return false;
        }
        else
        {
            // Check if all SubCategory objects are also Equals
        }
    } 
    
    public override int GetHashCode() { 
        string nameHash = Name.GetHashCode();
        return nameHash + ... subcategories hashs ...
    } 
}
LucaSC
  • 722
  • 1
  • 10
  • 19