0

I have read the posts here and here, but can't get the encoding right.

In javascript, I am writing a link to an <input> which is then read with document.getElementById('downloadlink').value and placed into an iframe src to be executed.

The code works fine on my localhost but not live server ,where the file is never loaded due to an invalid name (note: the http 302 Moved Temporarily is due to an .htaccess redirect due to an invalid file request). I am sure this is an issue with encoding ampersands. My understanding is that the input needs the ampersands (and other html entities) encoded. I am using php.js equivalents of htmlspecialchars() and htmlspecialchars_decode() for this (again this works on localhost).

For the img.src, my understanding is that I want ampersands encoded to &amp; However on my localhost, it works without them but not with them encoded. On my live site, it doesn't work either way.

To encode them, I have tried:

url = url.replace(/&/g, "&amp;"); 

It's time to stop tearing my hair out and ask for help. Anyone?

The iframe (with encoding from js htmlspecialchars) NOTE: I can't get the &amp; to stay encoded for the display - they get replaced on save.

<iframe src="www.waldorfteacherresources.com/getfile.php?file=g2-saints-martin-009.jpg&amp;mode=download&amp;hv=4443f86959bf104e1df0eac204b8aaf226ae533b&amp;wtrpath=docs " id="iframe" height="0" width="0" hidden=""></iframe>

The code

function downloadfile() {
    if (document.getElementById("downloadlink")) {
    var div = document.getElementById('datadiv');
    var url = hx(document.getElementById('downloadlink').value);
    var ifrm = document.createElement("iframe");

    ifrm.setAttribute("src", url);
    ifrm.setAttribute("id", "iframe");
    ifrm.height = 0;
    ifrm.width = 0;
    ifrm.hidden = true;
    div.appendChild(ifrm);
    }
}

// support functions to encode / decode 

// encodes output - equivalent of php hx function
// hx notation is a shortcut for htmlspecialchars() with all options set
function hx( string,  flags,  charsetEncoding,  double_encode) {
    if (typeof flags == "undefined"){
        flags = 0;
    }
    if (typeof charsetEncoding == "undefined" ){
    charsetEncoding = "UTF-8";
    }    
    if (typeof double_encode == "undefined"){
        double_encode = true;
    }

    // constants not valid until php v 5.4
    var ENT_HTML401 = 0;
    var ENT_HTML5 = (16 | 32);
    var ENT_COMPAT = 2;
    if ( flags == 0) {
     flags = ENT_COMPAT |  ENT_HTML401;
    }
     string = htmlspecialchars( string,  flags,  charsetEncoding,  double_encode);
    return  string;
}

// decodes output of hx() / htmlspecialchars() - shortcut notation for htmlspecialchars_decode()
function hdx(string) {
    return htmlspecialchars_decode(string);
}

function htmlspecialchars(string, quote_style, charset, double_encode) {
  //       discuss at: http://phpjs.org/functions/htmlspecialchars/
  var optTemp = 0,
    i = 0,
    noquotes = false;
  if (typeof quote_style === 'undefined' || quote_style === null) {
    quote_style = 2;
  }
  string = string.toString();
  if (double_encode !== false) { // Put this first to avoid double-encoding
    string = string.replace(/&/g, '&amp;');
  }
  string = string.replace(/</g, '&lt;')
    .replace(/>/g, '&gt;');

  var OPTS = {
    'ENT_NOQUOTES': 0,
    'ENT_HTML_QUOTE_SINGLE': 1,
    'ENT_HTML_QUOTE_DOUBLE': 2,
    'ENT_COMPAT': 2,
    'ENT_QUOTES': 3,
    'ENT_IGNORE': 4
  };
  if (quote_style === 0) {
    noquotes = true;
  }
  if (typeof quote_style !== 'number') { // Allow for a single string or an array of string flags
    quote_style = [].concat(quote_style);
    for (i = 0; i < quote_style.length; i++) {
      // Resolve string input to bitwise e.g. 'ENT_IGNORE' becomes 4
      if (OPTS[quote_style[i]] === 0) {
        noquotes = true;
      } else if (OPTS[quote_style[i]]) {
        optTemp = optTemp | OPTS[quote_style[i]];
      }
    }
    quote_style = optTemp;
  }
  if (quote_style & OPTS.ENT_HTML_QUOTE_SINGLE) {
    string = string.replace(/'/g, '&#039;');
  }
  if (!noquotes) {
    string = string.replace(/"/g, '&quot;');
  }

  return string;
}

function htmlspecialchars_decode(string, quote_style) {
    //       discuss at: http://phpjs.org/functions/htmlspecialchars_decode/
    var optTemp = 0,
        i = 0,
        noquotes = false;
    if (typeof quote_style === 'undefined') {
    quote_style = 2;
    }
    string = string.toString().replace(/&lt;/g, '<').replace(/&gt;/g, '>');
    var OPTS = {
    'ENT_NOQUOTES': 0,
    'ENT_HTML_QUOTE_SINGLE': 1,
    'ENT_HTML_QUOTE_DOUBLE': 2,
    'ENT_COMPAT': 2,
    'ENT_QUOTES': 3,
    'ENT_IGNORE': 4
    };
    if (quote_style === 0) {
    noquotes = true;
    }
    if (typeof quote_style !== 'number') { // Allow for a single string or an array of string flags
    quote_style = [].concat(quote_style);
    for (i = 0; i < quote_style.length; i++) {
        // Resolve string input to bitwise e.g. 'PATHINFO_EXTENSION' becomes 4
        if (OPTS[quote_style[i]] === 0) {
        noquotes = true;
        } else if (OPTS[quote_style[i]]) {
        optTemp = optTemp | OPTS[quote_style[i]];
        }
    }
    quote_style = optTemp;
    }
    if (quote_style & OPTS.ENT_HTML_QUOTE_SINGLE) {
    string = string.replace(/&#039;/g, "'");
    }
    if (!noquotes) {
    string = string.replace(/&quot;/g, '"');
    }
    // Put this in last place to avoid escape being double-decoded
    string = string.replace(/&amp;/g, '&');

    return string;
}


The request headers

    GET /www.example.com/getfile.php?file=myfile.jpg&mode=download&hv=939afca0cdaafd55a1e1471da7463be9acbf5478&wtrpath=docs HTTP/1.1
    Host: www.example.com
    Connection: keep-alive
    Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
    Upgrade-Insecure-Requests: 1
    User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36
    Referer: http://www.waldorfteacherresources.com/index.php?grade=2&page=Saints
    Accept-Encoding: gzip, deflate, sdch
    Accept-Language: en-US,en;q=0.8
    Cookie: id=XXX;
    PHPSESSID=.... session info here

The response

    HTTP/1.1 302 Moved Temporarily
    Date: Sun, 21 Feb 2016 21:16:05 GMT
    Server: Apache
    X-Powered-By: PHP/5.5.32
    **** this is an .htaccess redirect due to an invalid file request
    Location: /index.php
    Cache-Control: max-age=86400
    Expires: Thu, 01 Jan 1970 00:00:00 GMT
    Vary: Accept-Encoding
    Content-Encoding: gzip
    Content-Length: 767
    Keep-Alive: timeout=3, max=100
    Connection: Keep-Alive
    Content-Type: text/html; charset=UTF-8
Community
  • 1
  • 1
mseifert
  • 5,390
  • 9
  • 38
  • 100
  • 1
    There's no need to HTML-encode the URL. You *do* need to worry about URI encoding for any parameter names and values in the URL, and that's a completely different sort of encoding. – Pointy Feb 21 '16 at 23:20
  • Also, you can set the `.src` and `.id` attributes of your iframe element as properties, just like you're setting `.height` and `.width` - no need to call `.setAttribute()`. – Pointy Feb 21 '16 at 23:23
  • @Pointy I tried encoding the url with encodeURIComponent for the src but this failed for both localhost and web. Why would localhost be working and web not? – mseifert Feb 21 '16 at 23:24
  • 1
    The `encodeURIComponent()` is for **components** of a URL. You don't pass the whole URL to it, you pass parameter names and parameter values while you're building the URL. – Pointy Feb 21 '16 at 23:25
  • You also don't need to hand-encode `&` characters inside `href` or `src` attribute values in your HTML markup. Just use plain `&`. – Pointy Feb 21 '16 at 23:29
  • @Pointy - When I go into the page source and copy the link directly from `src` and paste it into the browser, it works fine. Any ideas how this can be? AND it works on the localhost as is. – mseifert Feb 21 '16 at 23:31
  • 1
    I don't have enough information about what you're doing to know. All I know is that (1) there is no need to HTML-encode anything that's never going to be processed by an HTML parser, and (2) there's no need to use `&` in attribute value strings. – Pointy Feb 21 '16 at 23:34
  • @Pointy I found the answer. The src apparently needs `http://` to prefix the link where it isn't require elsewhere. Never heard of this. Thanks for helping me look elsewhere. – mseifert Feb 21 '16 at 23:43
  • OK, glad you're making progress!! – Pointy Feb 21 '16 at 23:45

1 Answers1

0

It turns out that it was not ampersands at all but the url format. www.example.com did not work from src - whereas http://www. did. www. only is valid for my site - don't know why or how the communication to the server is different from src than from the browser command bar. But it was.

mseifert
  • 5,390
  • 9
  • 38
  • 100