I'm currently learning C# and I've been working on a XML parser for the last two days. It's actually working fine my issue is the amount of time it take to parse more than 10k pages. this is my code.
public static void startParse(int id_min, int id_max, int numberofthreads)
{
int start;
int end;
int part;
int threadnbrs;
threadnbrs = numberofthreads;
List<Thread> workerThreads;
List<string> results;
part = (id_max - id_min) / threadnbrs;
start = id_min;
end = 0;
workerThreads = new List<Thread>();
results = new List<string>();
for (int i = 0; i < threadnbrs; i++)
{
if (i != 0)
start = end + 1;
end = start + (part);
if (i == (threadnbrs - 1))
end = id_max;
int _i = i;
int _start = start;
int _end = end;
Thread t = new Thread(() =>
{
Console.WriteLine("i = " + _i);
Console.WriteLine("start =" + _start);
Console.WriteLine("end =" + _end + "\r\n");
string parse = new ParseWH().parse(_start, _end);
lock (results)
{
results.Add(parse);
}
});
workerThreads.Add(t);
t.Start();
}
foreach (Thread thread in workerThreads)
thread.Join();
File.WriteAllText(".\\result.txt", String.Join("", results));
Console.Beep();
}
what i'm actually doing is splitting in different thread a range of element that need to be parsed so each thread handle X elements.
for each 100 elements it take approx 20 seconds. however it took me 17 minutes to parse 10 0000 Elements.
what i need is each thread working simultaneously on 100 of those 10 000 Elements so it can be done in 20 seconds. is there is a solution for that ?
Parse Code :
public string parse(int id_min, int id_max)
{
XmlDocument xml;
WebClient user;
XmlElement element;
XmlNodeList nodes;
string result;
string address;
int i;
//Console.WriteLine(id_min);
//Console.WriteLine(id_max);
i = id_min;
result = "";
xml = new XmlDocument();
while (i <= id_max)
{
user = new WebClient();
// user.Headers.Add("User-Agent", "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30");
user.Encoding = UTF8Encoding.UTF8;
address = "http://fr.wowhead.com/item=" + i + "?xml";
if (address != null)
xml.LoadXml(user.DownloadString(new Uri(address)));
element = xml.DocumentElement;
nodes = element.SelectNodes("/wowhead");
if (xml.SelectSingleNode("/wowhead/error") != null)
{
Console.WriteLine("error " + i);
i++;
continue;
}
result += "INSERT INTO item_wh (entry, class, subclass, displayId, ,quality, name, level) VALUES (";
foreach (XmlNode node in nodes)
{
// entry
result += node["item"].Attributes["id"].InnerText;
result += ", ";
// class
result += node["item"]["class"].Attributes["id"].InnerText;
result += ", ";
// subclass
result += node["item"]["subclass"].Attributes["id"].InnerText;
result += ", ";
// displayId
result += node["item"]["icon"].Attributes["displayId"].InnerText;
result += ", ";
// quality
result += node["item"]["quality"].Attributes["id"].InnerText;
result += ", \"";
// name
result += node["item"]["name"].InnerText;
result += "\", ";
// level
result += node["item"]["level"].InnerText;
result += ");";
// bakcline
result += "\r\n";
}
i++;
}
return (result);
}