0

Consider the two test files below, test_load_utf8.php which is a webpage to inspect -- and test_load_utf8.js, which is a script doing the inspection of this page using casperjs with engine slimerjs (version 0.10.1-pre).

You can use php -S localhost:8080 in the same directory as these file, and the webpage will be available at http://127.0.0.1:8080/test_load_utf8.php. When the button is clicked, JSON object is retrieved via AJAX call, and shown in the div:

ffoxutf8

As visible from the image, there is no problem with UTF-8 characters.

With the test_load_utf8.js, I would like to capture the AJAX response which carries the UTF-8 text in it. However, when I run the script from the command prompt/terminal (bash on Ubuntu 14.04), I get this:

$ /home/USERNAME/.nvm/versions/node/v4.0.0/lib/node_modules/casperjs/bin/casperjs --engine=slimerjs test_load_utf8.js
website opened 
 resp.body: {"one":"Hello \u2013 \ufeff\u0915\u093e\u091a\u0902 \u03bc\u03b5","two":"\u00e7a \u00f8y je\u015b\u0107 \u044f\u0441\u0442\u044c"}

Note that the printout does not have the actual UTF-8 characters, but they are encoded. In fact, in my actual problem (beyond this test), I don't even get UTF-8 encodings, but I get, say â for , which I guess is some ASCII encoding getting mixed up somewhere.

So, my question is - how can I get proper UTF-8 characters in the console.log output in terminal? That is, the output from the script should be:

 resp.body: {"one":"Hello – काचं με","two":"ça øy jeść ясть"}

EDIT: forgot to paste in the files :)

test_load_utf8.php

<?php
if (array_key_exists("QUERY_STRING", $_SERVER)) {
  if ($_SERVER["QUERY_STRING"] == "getjson") {
    $message = new stdClass;
    $message->one = "Hello – काचं με";
    $message->two = "ça øy jeść ясть";
    header('Content-Type: application/json; charset=utf-8', true,200);
    echo json_encode($message);
    exit;
  }
}
?>
<html>
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
  <style type="text/css">
#dataholder {
  border: 2px solid gray;
  width: 70%; height: 150px;
}
.my_btn { background-color:yellow; }
  </style>
  <script src="http://code.jquery.com/jquery-1.12.4.min.js"></script>
  <script type="text/javascript">
var thishref = window.location.href.slice(0, window.location.href.indexOf('?')+1);
var qstr = window.location.href.slice(window.location.href.indexOf('?')+1);

function OnGetdata(inbtn) {
  console.log("OnGetdata; loading ?getjson via AJAX call");

  $.ajax({url: thishref + "?getjson",
    async: true,
    success: function(data) {
      console.log("got getjson data " , data); // object here
      $("#dataholder").html(JSON.stringify(data));
    },
    error: function(xhr, ajaxOptions, thrownError) {
      console.log("getjson error " + thishref + " : " + xhr.status + " / " + thrownError);
    }
  });

}

ondocready = function() {
  $("#getdatabtn").click(function(){
    OnGetdata(this);
  });
}
$(document).ready(ondocready);
  </script>
</head>


<body>
  <h1>Hello World!</h1>

  <button type="button" id="getdatabtn" class="my_btn">Get Data!</button>
  <div id="dataholder"></div>

</body>
</html>

test_load_utf8.js

// run with:
// /home/USERNAME/.nvm/versions/node/v4.0.0/lib/node_modules/casperjs/bin/casperjs --engine=slimerjs test_load_utf8.js

var utf8 = require('utf8');

var casper = require('casper').create({
  //~ verbose: true,
  //~ logLevel: 'debug',
  userAgent: 'Mozilla/5.0 (X11; Linux i686; rv:43.0) Gecko/20100101 Firefox/43.0',
  viewportSize: {width: 1024, height: 768},
  pageSettings: {
    loadImages: false,//The script is much faster when this field is set to false
    loadPlugins: false,
  }
});

casper.on("page.initialized", function(page) {
  page.captureContent = [ /json/ ]; // slimerJS only (0.10.1-pre)
  page.onResourceReceived = function(response) {
    var turl = response.url;
    if (turl.indexOf("getjson") > -1) {
      if (response.stage == "end") {
        if (response.body.length > 0) {
          console.log(" resp.body: " + response.body);
        }
      }
    }
  };
});

//First step is to open page
casper.start().thenOpen("http://127.0.0.1:8080/test_load_utf8.php", function() {
  console.log("website opened");
});

//Second step is to click to the button
casper.then(function(){
  this.evaluate(function(){
    document.getElementById("getdatabtn").click();
  });
});

casper.run();
sdbbs
  • 4,270
  • 5
  • 32
  • 87
  • 1
    Does this work? [How do I decode a string with escaped unicode?](http://stackoverflow.com/questions/7885096/how-do-i-decode-a-string-with-escaped-unicode) – Artjom B. Jun 09 '16 at 15:26
  • Thanks @ArtjomB. - indeed, this: "`.replace(/\\u([\d\w]{4})/gi, function (match, grp) { return String.fromCharCode(parseInt(grp, 16)); } );`" did work; although I'm still puzzled so as to why this happens at all (I had forgotten to paste in the source files when I wrote this post first; did that now) – sdbbs Jun 10 '16 at 10:38

0 Answers0