I have been working with some code I found here to help me with converting relative URLs to absolute URLs in HTML page source.
I want to work with RegEx, and not HTML Agility pack for this particular problem.
I've modified the code slightly, which is working well except that relative urls with a preceeding "/" are replaced, but it seems, so far as I can tell, relative URL's that don't include a preceeding slash are not.
I'm pretty sure the issue is in the intitial regEx string, as no replaces are attempted. This is beyond my regular expression knowledge.
Can anyone help me identify what is causing this not to match the types of URL I have described?
const string htmlPattern = "(?<attrib>\\shref|\\ssrc|\\sbackground)\\s*?=\\s*?"
+ "(?<delim1>[\"'\\\\]{0,2})(?!#|http|ftp|mailto|javascript)"
+ "/(?<url>[^\"'>\\\\]+)(?<delim2>[\"'\\\\]{0,2})";
// Wrapper Code
public static string GetRelativePathReplacedHtml(string source, Uri uri)
{
source = source.HtmlAppRelativeUrlsToAbsoluteUrls( uri );
return source;
}
// RegEx Match Code
public static string HtmlAppRelativeUrlsToAbsoluteUrls(this string html, Uri rootUrl)
{
if (string.IsNullOrEmpty(html))
return html;
const string htmlPattern = "(?<attrib>\\shref|\\ssrc|\\sbackground)\\s*?=\\s*?"
+ "(?<delim1>[\"'\\\\]{0,2})(?!#|http|ftp|mailto|javascript)"
+ "/(?<url>[^\"'>\\\\]+)(?<delim2>[\"'\\\\]{0,2})";
var htmlRegex = new Regex(htmlPattern, RegexOptions.IgnoreCase | RegexOptions.Multiline);
html = htmlRegex.Replace(html, m => htmlRegex.Replace(m.Value, "${attrib}=${delim1}" + ("~/" + m.Groups["url"].Value).ToAbsoluteUrl(rootUrl) + "${delim2}"));
const string cssPattern = "@import\\s+?(url)*['\"(]{1,2}"
+ "(?!http)\\s*/(?<url>[^\"')]+)['\")]{1,2}";
var cssRegex = new Regex(cssPattern, RegexOptions.IgnoreCase | RegexOptions.Multiline);
html = cssRegex.Replace(html, m => cssRegex.Replace(m.Value, "@import url(" + ("~/" + m.Groups["url"].Value).ToAbsoluteUrl(rootUrl) + ")"));
return html;
}
// Url Conversion
public static string ToAbsoluteUrl(this string relativeUrl, Uri rootUrl)
{
if (string.IsNullOrEmpty(relativeUrl))
return relativeUrl;
if (relativeUrl.StartsWith("/"))
relativeUrl = relativeUrl.Insert(0, "~");
if (!relativeUrl.StartsWith("~/"))
relativeUrl = relativeUrl.Insert(0, "~/");
var url = rootUrl;
var port = url.Port != 80 ? (":" + url.Port) : String.Empty;
// return string.Format("{0}://{1}{2}{3}", url.Scheme, url.Host, port, VirtualPathUtility.ToAbsolute(relativeUrl));
return string.Format("{0}://{1}{2}{3}", url.Scheme, url.Host, port, relativeUrl.Replace("~/", "/"));
}