Trying to extract the urls contained within all img, link, and script tags in a source code string. Using Sublime Text regex (img|link|script).+?(href|src)="(.+?)"
finds the correct results:
=>
link rel="stylesheet" type="text/css" href="assets/css/dynbm.css"
img src="assets/img/sponsor-image.png"
img class="logo" src="assets/img/sponsor-logo.png"
but using the following code via JS returns a random number of urls (even some but not all a tag urls):
var res,
lnks = [],
lnk_exp = new RegExp('(img|link|script).+?(href|src)="(.+?)"', 'gi');
while (res = lnk_exp.exec(src)) {
lnks.push(res[3]);
}
console.log(lnks);
=>
[
"assets/css/dynbm.css",
"/click?url=http://www.website.com/imglink",
"assets/img/sponsor-image.png",
"/click?url=http://www.website.com/link1",
"/click?url=http://www.website.com/link3",
"/click?url=http://www.website.com/cta",
"assets/img/sponsor-logo.png"
]
Full string for those interested:
<link rel="stylesheet" type="text/css" href="assets/css/dynbm.css"><div class="dynbm_wrap rrwidth" id="dbm-name"><div id="dynbm_screens"><div class="screen" id="slide1"><div class="dynbm_body"><div class="img_right"><a href="/click?url=http://www.website.com/imglink" onclick="return sl(this,'nw','dbm-name_i1-1');"><img src="assets/img/sponsor-image.png" alt="sponsor-image"></a></div><h3><a href="/click?url=http://www.website.com/link" onclick="return sl(this,'nw','dbm-name_h1-1');">Heading</a></h3><div class="body_content"><ul><li><a href="/click?url=http://www.website.com/link1" onclick="return sl(this,'nw','dbm-name_l1-1');">Bullet 1</a></li><li><a href="/click?url=http://www.website.com/link2" onclick="return sl(this,'nw','dbm-name_l1-2');">Bullet 2</a></li><li><a href="/click?url=http://www.website.com/link3" onclick="return sl(this,'nw','dbm-name_l1-3');">Bullet 3</a></li></ul></div><p class="action_link"><a target="_parent" href="/click?url=http://www.website.com/cta" onclick="return sl(this,'nw','dbm-name_a1-1');">Learn More</a></p></div></div></div><div class="dynbm_base"><div id="sponsored_footer"><p class="sponsored_text"><a href="/www/sponsored-by" id="sponsorlnk" target="_parent">From Our Sponsor</a></p><a target="_parent" href="/click?url=http://www.website.com" onclick="return sl(this,'nw','dbm-name_logo');"><img class="logo" src="assets/img/sponsor-logo.png" alt="Logo"></a></div><div class="disclosure"><a href="#" class="close" title="Close this message" target="_parent">close</a><h4>From Our Sponsor</h4><p>Content under this heading is from or created on behalf of the named sponsor. This content is not subject to the WebMD Editorial Policy and is not reviewed by the WebMD Editorial department for accuracy, objectivity or balance.</p></div></div></div><style type="text/css">#dbm-name { background: #fff; color: #000; }</style><script type="text/javascript">(function(){var e=$('dbm-name');e.find('p.sponsored_text a, .disclosure a.close, .disclosure').click(function(){e.find('.disclosure').toggleClass('visible').css('z-index',99);return false})})()</script>