So, the html data I'm looking at is:
<A HREF="/data/client/Action.log">Action.log</A><br> 6/8/2015 3:45 PM
From this I need to extract either instances of Action.log,
My problem is I've been over a ton of regex tutorials and I still can't seem to brain up a pattern to extract it. I guess I'm lacking some fundamental understanding of regex, but any help would be appreciated.
Edit:
internal string[] ParseFolderIndex_Alpha(string url, WebDirectory directory)
{
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 3 * 60 * 1000;
request.KeepAlive = true;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK)
{
List<string> fileLocations = new List<string>(); string line;
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
{
while ((line = reader.ReadLine()) != null)
{
int index = line.IndexOf("<a href=");
if (index >= 0)
{
string[] segments = line.Substring(index).Split('\"');
///Can Parse File Size Here: Add todo
if (!segments[1].Contains("/"))
{
fileLocations.Add(segments[1]);
UI.UpdatePatchNotes("Web File Found: " + segments[1]);
UI.UpdateProgressBar();
}
else
{
if (segments[1] != @"../")
{
directory.SubDirectories.Add(new WebDirectory(url + segments[1], this));
UI.UpdatePatchNotes("Web Directory Found: " + segments[1].Replace("/", string.Empty));
}
}
}
else if (line.Contains("</pre")) break;
}
}
response.Dispose(); /// After ((line = reader.ReadLine()) != null)
return fileLocations.ToArray<string>();
}
else return new string[0]; /// !(HttpStatusCode.OK)
}
catch (Exception e)
{
LogHandler.LogErrors(e.ToString(), this);
LogHandler.LogErrors(url, this);
return null;
}
}
That's what I was doing, the problem is I changed servers and the html IIS is displaying is different so I have to make new logic.
Edit / Conclusion:
First of all, I'm sorry I even mentions regex :P Secondly each platform will have to be handled individually depending on environment.
This is how I'm currently gathering the file names.
internal string[] ParseFolderIndex(string url, WebDirectory directory)
{
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 3 * 60 * 1000;
request.KeepAlive = true;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
bool endMet = false;
if (response.StatusCode == HttpStatusCode.OK)
{
List<string> fileLocations = new List<string>(); string line;
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
{
while (!endMet)
{
line = reader.ReadLine();
if (line != null && line != "" && line.IndexOf("</A>") >= 0)
{
if (line.Contains("</html>")) endMet = true;
string[] segments = line.Replace("\\", "").Split('\"');
List<string> paths = new List<string>();
List<string> files = new List<string>();
for (int i = 0; i < segments.Length; i++)
{
if (!segments[i].Contains('<'))
paths.Add(segments[i]);
}
paths.RemoveAt(0);
foreach (String s in paths)
{
string[] secondarySegments = s.Split('/');
if (s.Contains(".") || s.Contains("Verinfo"))
files.Add(secondarySegments[secondarySegments.Length - 1]);
else
{
directory.SubDirectories.Add(new WebDirectory
(url + "/" + secondarySegments[secondarySegments.Length - 2], this));
UI.UpdatePatchNotes("Web Directory Found: " + secondarySegments[secondarySegments.Length - 2]);
}
}
foreach (String s in files)
{
if (!String.IsNullOrEmpty(s) && !s.Contains('%'))
{
fileLocations.Add(s);
UI.UpdatePatchNotes("Web File Found: " + s);
UI.UpdateProgressBar();
}
}
if (line.Contains("</pre")) break;
}
}
}
response.Dispose(); /// After ((line = reader.ReadLine()) != null)
return fileLocations.ToArray<string>();
}
else return new string[0]; /// !(HttpStatusCode.OK)
}
catch (Exception e)
{
LogHandler.LogErrors(e.ToString(), this);
LogHandler.LogErrors(url, this);
return null;
}
}