Is there anyway I can optimize this function to take less times for strings with half a million characters?
for (int i = start; i <= end; i++) {
doc.body.push_back(html[i]);
if (html[i] == '<') {
if (html[i + 1] != '/') {
Tag newTag;
int start1 = html[i];
int end1;
for (int y = i; y <= end; y++) {
if (html[y] == '>') {
end1 = y;
break;
}
}
for (int y = start1 + 1; y <= end1; y++) {
if (html[y] == ' ' || html[y] == '>') {
break;
}
newTag.tagType.push_back(html[y]);
}
for (int y = start1; y <= end1; y++) {
newTag.openTag.push_back(html[y]);
}
if (newTag.tagType.find("area") != std::string::npos || newTag.tagType.find("base") != std::string::npos || newTag.tagType.find("br") != std::string::npos || newTag.tagType.find("col") != std::string::npos || newTag.tagType.find("command") != std::string::npos || newTag.tagType.find("embed") != std::string::npos || newTag.tagType.find("hr") != std::string::npos || newTag.tagType.find("img") != std::string::npos || newTag.tagType.find("input") != std::string::npos || newTag.tagType.find("keygen") != std::string::npos || newTag.tagType.find("link") != std::string::npos || newTag.tagType.find("meta") != std::string::npos || newTag.tagType.find("param") != std::string::npos || newTag.tagType.find("source") != std::string::npos || newTag.tagType.find("track") != std::string::npos || newTag.tagType.find("wbr") != std::string::npos) {
newTag.closeTag = "Null";
}
if (newTag.openTag.find("class=") != std::string::npos) {
int start1 = newTag.openTag.find("\"", newTag.openTag.find("class=")) + 1;
int end1 = newTag.openTag.find("\"", start1);
for (int y = start1; y < end1; y++) {
if (html[y] != ' ') {
newTag.tagClass.back().push_back(html[y]);
}
else {
newTag.tagClass.push_back("");
}
}
}
if (newTag.openTag.find("id=") != std::string::npos) {
int start1 = newTag.openTag.find("\"", newTag.openTag.find("id=")) + 1;
int end1 = newTag.openTag.find("\"", start1);
for (int y = start1; y < end1; y++) {
if (html[y] != ' ') {
newTag.tagID.back().push_back(html[y]);
}
else {
newTag.tagID.push_back("");
}
}
}
page.tags.push_back(newTag);
}
else {
int end1;
for (int y = i; y <= stringSize; y++) {
if (html[y] == '>') {
end1 = y;
break;
}
}
//gets everything within the closing tag
std::string storeClose;
for (int y = i; y <= end1; y++) {
storeClose.push_back(html[y]);
}
for (int y = page.tags.size() - 1; y >= 0; y--) {
if (storeClose.find(page.tags[y].tagType) != std::string::npos) {
page.tags[y].closeTag = storeClose;
}
}
}
}
}
I timed how long it took with the chrono library and it took 16 minutes for a string with the length of 300000 characters! This is supposed to be parsing a html document downloaded from the web and its mostly functional. For shorter pages its almost instant but as soon as I reach the higher number the time it takes its exponentially higher!