I recommend to use a RegExp to parse user-inputed HTML, instead of creating a DOM object, because it's not desired to load external content (iframe, script, link, style, object, ...
) when performing a "simple" task such as getting attribute values of a HTML string.
Using similar (although similarcontradiction?) methods as in my previous answer, I've created a function to match quoted attribute values. Both quoted, as non-quoted attributes are matched.
The code currently returns an object with attributes from the first tag, but it's easily extensible to retrieve all HTML elements (see bottom of answer).
Fiddle: http://jsfiddle.net/BP4nF/1/
// Example:
var htmlString = '<iframe src="http://www.stackoverflow.com/" width="123" height="123" frameborder="1" non-quoted=test></iframe>';
var arr = parseHTMLTag(htmlString);
//arr is the desired object. An easy method to verify:
alert(JSON.stringify(arr));
function parseHTMLTag(htmlString){
var tagPattern = /<[a-z]\S*(?:[^<>"']*(?:"[^"]*"|'[^']*'))*?[^<>]*(?:>|(?=<))/i;
var attPattern = /([-a-z0-9:._]+)\s*=(?:\s*(["'])((?:[^"']+|(?!\2).)*)\2|([^><\s]+))/ig;
// 1 = attribute, 2 = quote, 3 = value, 4=non-quoted value (either 3 or 4)
var tag = htmlString.match(tagPattern);
var attributes = {};
if(tag){ //If there's a tag match
tag = tag[0]; //Match the whole tag
var match;
while((match = attPattern.exec(tag)) !== null){
//match[1] = attribute, match[3] = value, match[4] = non-quoted value
attributes[match[1]] = match[3] || match[4];
}
}
return attributes;
}
The output of the example is equivalent to:
var arr = {
"src": "http://www.stackoverflow.com/",
"width": "123",
"height": "123",
"frameborder": "1",
"non-quoted": "test"
};
Extra: Modifying the function to get multiple matches (only showing code to update)
function parseHTMLTags(htmlString){
var tagPattern = /<([a-z]\S*)(?:[^<>"']*(?:"[^"]*"|'[^']*'))*?[^<>]*(?:>|(?=<))/ig;
// 1 = tag name
var attPattern = /([-a-z0-9:._]+)\s*=(?:\s*(["'])((?:[^"']+|(?!\2).)*)\2|([^><\s]+))/ig;
// 1 = attribute, 2 = quote, 3 = value, 4=non-quoted value (either 3 or 4)
var htmlObject = [];
var tag, match, attributes;
while(tag = tagPattern.exec(htmlString)){
attributes = {};
while(match = attPattern.exec(tag)){
attributes[match[1]] = match[3] || match[4];
}
htmlObject.push({
tagName: tag[1],
attributes: attributes
});
}
return htmlObject; //Array of all HTML elements
}