1

I need a regex to get the src value of all following strings, considering single, double and no quotes.

With Double Quotes:

var a = '<script class="anyClass" src="anyFile.js" id="anyID">';
var b = '<script class="anyClass" src="anyFile.js">';
var c = '<script src="anyFile.js" id="anyID">';
var d = '<script src="anyFile.js">';

With Single Quotes:

var e = "<script class='anyClass' src='anyFile.js' id='anyID'>";
var f = "<script class='anyClass' src='anyFile.js'>";
var g = "<script src='anyFile.js' id='anyID'>";
var h = "<script src='anyFile.js'>";

Without Quotes:

var i= "<script class=anyClass src=anyFile.js id=anyID>";
var j= "<script class=anyClass src=anyFile.js>";
var k= "<script src=anyFile.js id=anyID>";
var l= "<script src=anyFile.js>";

Expected match/return:

anyFile.js

I have this poor looking solution with split:

var file = "";
var match = 'src="';
if(a.indexOf(match)>=0){
  file = a.split(match);
  file = file[1];
  file = file.split('.js"');
  file = file[0] + ".js";
}
else{
  match = "src='";
  if(a.indexOf(match)>=0){
    file = a.split(match);
    file = file[1];
    file = file.split(".js'");
    file = file[0] + ".js";
  }
  else{
    match = "src=";
    if(a.indexOf(match)>=0){
      file = a.split(match);
      file = file[1];
      file = file.split('.js');
      file = file[0] + ".js";
    }
    else {
    file = "no match";
    }
  }
}

But it can match wrong src attributes, so I need a regex.

Any help would be greatly appreciated.

Solution (special thanks to Rajesh)

var a = "<script class='anyClass' src='anyFile.js' id='anyID'>";
var b = '<script class="anyClass" src="anyFile.js">';
var c = '<script src="anyFile.js" id="anyID">';
var d = '<script src="anyFile.js">';

var e = "<script class='anyClass' src='anyFile.js' id='anyID'>";
var f = "<script class='anyClass' src='anyFile.js'>";
var g = "<script src='anyFile.js' id='anyID'>";
var h = "<script src='anyFile.js'>";

var i= "<script class=anyClass src=anyFile.js id=anyID>";
var j= "<script class=anyClass src=anyFile.js>";
var k= "<script src=anyFile.js id=anyID>";
var l= "<script src=anyFile.js>";
var l= "<script src=anyFile.js>";

function getSrc(str){
  var regex = /src=["']*[^"' >]+/;
  var match = str.match(regex);
  if(match!==null){
    match = match[0];
    var replaceRegex = /src=["' ]*/;
    match = match.replace(replaceRegex, "")
  }
  else{
    match = "no match";
  }
  console.log(match);
}

[a,b,c,d,e,f,g,h,i,j,k,l].forEach(getSrc)
Eduardo SR
  • 59
  • 5
  • 2
    I don't see any attempt made – Rajesh Apr 07 '17 at 13:43
  • Did you try it yourself ? if yes provide your code to fix/improve it. Also this question may help you http://stackoverflow.com/questions/317053/regular-expression-for-extracting-tag-attributes – Mohamed Abbas Apr 07 '17 at 13:44
  • I can link this solution. Maybe you are something with it. [It shows how to retrieve a substring that is between two specified characters](http://stackoverflow.com/questions/14867835/get-substring-between-two-characters-using-javascript) – user3004449 Apr 07 '17 at 13:46
  • @Rajesh I've updated the question – Eduardo SR Apr 07 '17 at 14:08

3 Answers3

1

Here's what I came up with to achieve this. Any improvements are welcome:

script src=["']?(\w+\.js)

https://regex101.com/r/6NCj94/2

or for cases where your script's name is something like jquery.min.js:

script src=["']?(.*\.js)

https://regex101.com/r/6NCj94/3

ThePerplexedOne
  • 2,920
  • 15
  • 30
  • Though for the mentioned cases, it works fine, it will fail for cases where you have multiple `.` like `fileName.min.js` or has hash in it like `fileName.askcf123.js` – Rajesh Apr 07 '17 at 14:01
  • Then you could simply use `.*` instead of `\w+`. – ThePerplexedOne Apr 07 '17 at 14:03
  • Hi ThePerplexedOne, thanks by the help. On regex101.com, when I choose Javascript as Flavor, it matches script src=anyFile.js, not only anyFile.js. But your solution is working with PHP as Flavor, what I need to modify? – Eduardo SR Apr 07 '17 at 14:42
  • I see what you mean, but rest assured nothing is different. It still matches, look to the right and you will see `Match Information` along with the `Groups`. The groups are what we want, it's what we're capturing. This regex will work fine. @EduardoSR – ThePerplexedOne Apr 07 '17 at 14:46
  • Thanks ThePerplexedOne, sorry my mistake. I found it is working too. – Eduardo SR Apr 07 '17 at 15:35
1

You can try something like this:

Logic:

  • Search for pattern src= followed by single/double quotes [optional] followed by anything thats not single/double quotes.
  • This will yield something like src="anyfile.js.
  • Now replace initials i.e. src= followed by optional quote.

Regex version

var a = '<script class="anyClass" src="anyFile.js" id="anyID">';
var b = '<script class="anyClass" src="anyFile.js">';
var c = '<script src="anyFile.js" id="anyID">';
var d = '<script src="anyFile.js">';
var i= "<script class=anyClass src=anyFile.js id=anyID>";
var j= "<script class=anyClass src=anyFile.js>";
var k= "<script src=anyFile.js id=anyID>";
var l= "<script src=anyFile.js>";

function getSrc(str){
  var regex = /src=["']*[^"' >]+/;
  var match = str.match(regex)[0];
  var replaceRegex = /src=["' ]*/;
  match = match.replace(replaceRegex, "")
  console.log(match)
}

[a,b,c,d,i,j,k,l].forEach(getSrc)
Community
  • 1
  • 1
Rajesh
  • 24,354
  • 5
  • 48
  • 79
1

I prefer going the right way, using DOM:

var scripts = document.getElementsByTagName('script');
Array.prototype.forEach.call(scripts, (script => console.log(script.src)));

Full javascript code:

var a = '<script src="anyFile1.js"><\/script>';
var b = '<script src=\'anyFile2.js\'><\/script>';
var c = '<script src=anyFile3.js><\/script>';

var html = document.createElement('div');
html.innerHTML = a + b + c;

var scripts = html.getElementsByTagName('script');
[].forEach.call(scripts, (script => console.log(script.src)));

The Regex way

<script[^>]+?src=(['"]?)([^\s>]+)\1[^>]*>(<\/script>)?

Live demo

str.replace(/<script[^>]+?src=(['"]?)([^\s>]+)\1[^>]*>(<\/script>)?/g, (
    (match, $0, $1) => $1 ? array.push($1) : ''
))

Full JS code:

const str = `<script class="anyClass" src="anyFile.js" id="anyID">
<script class="anyClass" src="anyFile.js">
<script src="anyFile.js" id="anyID">
<script src="anyFile.js">


<script class='anyClass' src='anyFile.js' id='anyID'>
<script class='anyClass' src='anyFile.js'
<script src='anyFile.js' id='anyID'>
<script src='anyFile.js'>

<script class=anyClass src=anyFile.js id=anyID>
<script class=anyClass src=anyFile.js>
<script src=anyFile.js id=anyID>
<script src=anyFile.js >`;

var array = [];

str.replace(/<script[^>]+?src=(['"]?)([^\s>]+)\1[^>]*>(<\/script>)?/g, (
    (match, $0, $1) => $1 ? array.push($1) : ''
))

console.log(array)
Community
  • 1
  • 1
revo
  • 47,783
  • 14
  • 74
  • 117