0

I want to obtain the text contained within a web page in an iframe.

As per an existing question on SO, the javascript code to obtain the content of a web page in an iframe is given below--

var myIFrame = document.getElementById(iFrameName); // Creating an object of the iframe
var content = myIFrame.contentWindow.document.body.innerHTML; // Getting it's content into a variable

The problem is that I want to obtain only the text content of the web page in iframe- I dont want to do this by obtaining the entire content and then parsing through it to remove images/links etc...The code above contains HTML Markup in body content of the web page--- Is there some way to obtain only the text content of web page in an iframe?

user893664
  • 319
  • 1
  • 9
  • 23
  • 1
    You could traverse the entire DOM and ignore everything that isn't text nodes – Martin Jespersen Sep 25 '11 at 10:46
  • before you go down this road, if you will be working with content *outside* your domain, see this: http://stackoverflow.com/questions/6170925/get-dom-content-of-cross-domain-iframe, and this: http://stackoverflow.com/questions/1036144/how-do-i-get-the-contents-of-an-iframe-containing-a-different-site. The suggestions below will work in local development, but not on webpages on a server. – ampersand Sep 25 '11 at 12:22

1 Answers1

0
var myIFrame = document.getElementById(iFrameName); // Creating an object of the iframe
var myIFrameBody = myIFrame.contentWindow.document.body; // Getting it's body content into a variable    

function getStrings(n, s) {
   var txt, childNodes, child, max, m, i;

   if (n.nodeType === 3) {
      txt = trim(n.data);

      if (txt.length > 0) {
         s.push(txt);
      }
   } 
   else if (n.nodeType === 1) {
      for (i = 0, max = n.childNodes.length; i < max; i++) {
         child = n.childNodes[i];
         getStrings(child, s);
      }
   }
}

/**
 Extract the html text starting from a node n.
 */
function getText(n) {
   var s = [],
       result;

   getStrings(n, s);
   result = s.join(" ");

   return result;       
}

var myIFrameText = getText(myIFrameBody);
user278064
  • 9,982
  • 1
  • 33
  • 46