1

I'm trying to get all the files located in a URL. When you access the URL in the browser, all files are being listed so I thought I could also print these files in my console program.

Apprently, my code below doesn't work and throws System.ArgumentException "URI formats are not supported." . Or is it really possible to achieve this in a Console Application using C#?

class Program
{
    public static void Main(string[] args)
    {
        foreach (string filename in Directory.GetFiles(@"http://mywebsite.files/", "*.*"))
        {
            Console.WriteLine(filename);
        }

        Console.Write("Press any key to continue . . . ");
        Console.ReadKey(true);
    }
}
yonan2236
  • 13,371
  • 33
  • 95
  • 141
  • You need to parse the HTML returned by your web server (you can use [HTML Agility Pack](http://htmlagilitypack.codeplex.com/) for that) – Nasreddine Jun 11 '13 at 10:13

4 Answers4

2

you cannot use the Directory class to list files of a Web directory and also the Server has to be configured to allow Directories/Files listing

What you should do is a web request the return the file list.

Have a look here for more info

Community
  • 1
  • 1
Massimiliano Peluso
  • 26,379
  • 6
  • 61
  • 70
0

Maybe you should read into the usage of the ftp-protocol. The way you are trying to solve your problem will most probably not work.

b_meyer
  • 594
  • 2
  • 7
0

If you have FTP access you can use the FtpWebRequest class. Here's an example from http://msdn.microsoft.com/en-us/library/ms229716.aspx

public class WebRequestGetExample
{
    public static void Main ()
    {
        // Get the object used to communicate with the server.
        FtpWebRequest request = (FtpWebRequest)WebRequest.Create("ftp://www.contoso.com/");
        request.Method = WebRequestMethods.Ftp.ListDirectoryDetails;

        // This example assumes the FTP site uses anonymous logon.
        request.Credentials = new NetworkCredential ("anonymous","janeDoe@contoso.com");

        FtpWebResponse response = (FtpWebResponse)request.GetResponse();

        Stream responseStream = response.GetResponseStream();
        StreamReader reader = new StreamReader(responseStream);
        Console.WriteLine(reader.ReadToEnd());

        Console.WriteLine("Directory List Complete, status {0}", response.StatusDescription);

        reader.Close();
        response.Close();
    }
}
nerdybeardo
  • 4,655
  • 23
  • 32
0

I have written some code that can get all Path Info under the IIS http site, if it allow directory listing, finally you can do it by:

List<PathInfo> pathInfos = new List<PathInfo>();
HttpHelper.GetAllFilePathAndSubDirectory("http://localhost:33333/", pathInfos);
HttpHelper.PrintAllPathInfo(pathInfos);

The helper code, the regex can be customized yourself, or change to use html parser):

public static class HttpHelper
{
    public static string ReadHtmlContentFromUrl(string url)
    {
        string html = string.Empty;
        HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);

        using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
        using (Stream stream = response.GetResponseStream())
        using (StreamReader reader = new StreamReader(stream))
        {
            html = reader.ReadToEnd();
        }
        //Console.WriteLine(html);
        return html;
    }

    public static void GetAllFilePathAndSubDirectory(string baseUrl, List<PathInfo> pathInfos)
    {
        Uri baseUri = new Uri( baseUrl.TrimEnd('/') );
        string rootUrl = baseUri.GetLeftPart(UriPartial.Authority);

        Regex regexFile = new Regex("[0-9] <a href=\"(http:)?(?<file>.*?)\"", RegexOptions.IgnoreCase);
        Regex regexDir = new Regex("dir.*?<a href=\"(http:)?(?<dir>.*?)\"", RegexOptions.IgnoreCase);

        string html = ReadHtmlContentFromUrl(baseUrl);
        //Files
        MatchCollection matchesFile = regexFile.Matches(html);
        if (matchesFile.Count != 0)
            foreach (Match match in matchesFile)
                if (match.Success)
                    pathInfos.Add(
                        new PathInfo( rootUrl + match.Groups["file"], false));
        //Dir
        MatchCollection matchesDir = regexDir.Matches(html);
        if (matchesDir.Count != 0)
            foreach (Match match in matchesDir)
                if (match.Success)
                {
                    var dirInfo = new PathInfo(rootUrl + match.Groups["dir"], true);
                    GetAllFilePathAndSubDirectory(dirInfo.AbsoluteUrlStr, dirInfo.Childs);
                    pathInfos.Add(dirInfo);
                }                        

    }


    public static void PrintAllPathInfo(List<PathInfo> pathInfos)
    {
        pathInfos.ForEach(f =>
        {
            Console.WriteLine(f.AbsoluteUrlStr);
            PrintAllPathInfo(f.Childs);
        });
    }

}



public class PathInfo
{
    public PathInfo(string absoluteUri, bool isDir)
    {
        AbsoluteUrl = new Uri(absoluteUri);
        IsDir = isDir;
        Childs = new List<PathInfo>();
    }

    public Uri AbsoluteUrl { get; set; }

    public string AbsoluteUrlStr
    {
        get { return AbsoluteUrl.ToString(); }
    }

    public string RootUrl
    {
        get { return AbsoluteUrl.GetLeftPart(UriPartial.Authority); }
    }

    public string RelativeUrl
    {
        get { return AbsoluteUrl.PathAndQuery; }
    }

    public string Query
    {
        get { return AbsoluteUrl.Query; }
    }

    public bool IsDir { get; set; }
    public List<PathInfo> Childs { get; set; }


    public override string ToString()
    {
        return String.Format("{0} IsDir {1} ChildCount {2} AbsUrl {3}", RelativeUrl, IsDir, Childs.Count, AbsoluteUrlStr);
    }
}
yu yang Jian
  • 6,680
  • 7
  • 55
  • 80