-1

I have several files that consist of folder path that looks like

//Fs1/userA/
//Fs1/userA/documents/
//Fs1/userB/
//Fs2/userC/documents

I want to do analysis of which folders have x number direct sub folders. E.g. userD have 120 sub folders directly under it, not inclusive of deeper folders.

And I have huge collection of such files, some of them containing few hundred k rows.

I've thought of using radix tree, but it doesn't perform if I have to traverse down all its children to find the number of direct sub folders.

Also considered putting it into a database server to query, which might be easier to implement, but I wanted to try using .Net for analysis without sql servers. Any help?

Bakudan
  • 19,134
  • 9
  • 53
  • 73
Lee Gary
  • 2,357
  • 2
  • 22
  • 38
  • You can use an xml file for a dattabase. Did a project last week that may help. The application reads a folder (and subfolders), creates an xml, and then put xml into a treeview for displaying. See : https://stackoverflow.com/questions/44133736/how-to-create-and-save-an-xml-file-containing-complete-hierarchy-of-files-and-fo/44136611#44136611 – jdweng Jun 06 '17 at 12:45
  • That will not work in my case as I'm exporting file servers backed by SAN with 50+TB of content – Lee Gary Jun 06 '17 at 23:15
  • For reading your posting it sounded like to didn't want a database and was looking for alternative. But now that you say you have 50+TB of data there isn't any real alternative.If you do store in database you should use stored procedure for the queries and just use Net and just use Net for final analysis. – jdweng Jun 07 '17 at 00:00
  • @jdweng the 50tb is actually file size in total, the csv are in mb. I've tried using Patricia tree, it loads really fast, like 2mil items in seconds. Using a dictionary takes couple of min for a fee hundred k rows and doing analysis is a pain – Lee Gary Jun 07 '17 at 00:15
  • I update my code to now include a treeview. The code checks folder and subfolder for all files and when size or number of files exceed a limit puts the results into the treeview. – jdweng Jun 07 '17 at 14:08

1 Answers1

0

I took my xml project and removed xml. Now I just get totals. Maybe this will work for you. The code checks folder and subfolder for all files and when size or number of files exceed a limit puts the results into the treeview.

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;

namespace CheckFileSizes
{
    public partial class Form1 : Form
    {
        const int FILE_SIZE_CHECK = 10000000;
        const int FILE_COUNT_CHECK = 1000;
        public Form1()
        {
            InitializeComponent();

            folderBrowserDialog1.SelectedPath = @"c:\temp";
            textBoxTotalFolderSizeMin.Text = FILE_SIZE_CHECK.ToString();
            textBoxNumberOfFilesMin.Text = FILE_COUNT_CHECK.ToString();

        }
        private void buttonBrowseForFolder_Click(object sender, EventArgs e)
        {
            folderBrowserDialog1.ShowDialog();
            textBoxFolderName.Text = folderBrowserDialog1.SelectedPath;
        }

        private void buttonMakeTree_Click(object sender, EventArgs e)
        {
            if (Directory.Exists(textBoxFolderName.Text))
            {
                MyDirectory root = MyDirectory.root;

                TreeNode rootNode = MakeTreeRecursive(textBoxFolderName.Text, root);
                textBoxTotalNumberOfFiles.Text = root.totalNumberOfFiles.ToString();
                textBoxTotalSize.Text = ((long)root.totalSize).ToString();

                if (rootNode == null)
                {
                    string rootNodeText = string.Format("Folder: '{0}', Number of Files: '{1}', File Size: '{2}'",
                        textBoxFolderName.Text,
                        textBoxTotalNumberOfFiles.Text,
                        textBoxTotalSize.Text
                    );
                    rootNode = new TreeNode(rootNodeText);
                }
                treeView1.Nodes.Add(rootNode);
                treeView1.ExpandAll();


            }
        }
        private TreeNode MakeTreeRecursive(string folder, MyDirectory myDirectory)
        {
            TreeNode node = null;

            myDirectory.name = folder.Substring(folder.LastIndexOf("\\") + 1);

            DirectoryInfo dInfo = new DirectoryInfo(folder);

            myDirectory.numberOfFiles = 0;
            myDirectory.size = 0.0f;

            foreach (FileInfo fInfo in dInfo.GetFiles())
            {
                try
                {
                    float fSize = fInfo.Length;
                    myDirectory.size += fSize;
                    myDirectory.numberOfFiles += 1;
                }
                catch (Exception e)
                {
                    Console.WriteLine("Error : CAnnot Access File '{0}'", fInfo.Name);
                }
            }
            myDirectory.totalSize = myDirectory.size;
            myDirectory.totalNumberOfFiles = myDirectory.numberOfFiles;
            foreach (string subFolder in Directory.GetDirectories(folder))
            {
                if (myDirectory.children == null) myDirectory.children = new List<MyDirectory>();
                MyDirectory childDirectory = new MyDirectory();
                myDirectory.children.Add(childDirectory);

                TreeNode childNode = MakeTreeRecursive(subFolder, childDirectory);
                if (childNode != null)
                {
                    if (node == null)
                    {
                        node = new TreeNode();
                    }
                    node.Nodes.Add(childNode);
                }

                myDirectory.totalSize += childDirectory.totalSize;
                myDirectory.totalNumberOfFiles += childDirectory.totalNumberOfFiles;
            }

            if ((myDirectory.totalNumberOfFiles >= long.Parse(textBoxNumberOfFilesMin.Text)) || myDirectory.totalSize >= float.Parse(textBoxTotalFolderSizeMin.Text))
            {
                if (node == null)
                {
                    node = new TreeNode();
                }
                string childNodeText = string.Format("Folder: '{0}', Number of Files: '{1}', File Size: '{2}'",
                    folder,
                    myDirectory.totalNumberOfFiles,
                    myDirectory.totalSize
                );
                node.Text =  childNodeText;
            }
            return node;
        }

    }
    public class MyDirectory
    {
        public static MyDirectory root = new MyDirectory();
        public List<MyDirectory> children { get; set; }

        public string name { get; set; }
        public long totalNumberOfFiles = 0;
        public int numberOfFiles = 0;
        public float size = 0.0f;
        public float totalSize = 0.0f;
    }
}
jdweng
  • 33,250
  • 2
  • 15
  • 20