0

I have an HTML file, in which the parameters of interest occur between square brackets [ ], but then this range keeps repeating with identical names inside subsequent square brackets like this :

var jArray= {"2":["<span style='color:#c7b699'><b>May 1<\/b><\/span>","Percentage: <span>57%\n<\/span>","Interest Rate: <span>0.53","Amount Exchange: <span>150,<\/span>777,<\/span>695.16","Monthly Exchange: <span>370,<\/span>352.08","Interest Differentiation: <span>8.07","Interest RatePer: <span>0.54","Second Quarter","<b>Friday, May 1, 2020<\/b>","May","Phase: <span>Second Quarter<\/span>","May 1"],"0":["<b>April 29<\/b>","36%\n","Interest Rate: 0.52922522949925","Amount Exchange: 150701713.86852","Monthly Exchange: 378921.00811486","Interest Differentiation: 6.0176806272646","Interest RatePer: 0.52559281178633","Quarter Pre","April 29"],"1":["<b>April 30<\/b>","46%\n","Interest Rate: 0.52909124921306","Amount Exchange: 150739875.63889","Monthly Exchange: 374391.59013105","Interest Differentiation: 7.028645917145","Interest RatePer: 0.53195147367036","Quarter Pre","April 30"],"3":["<b>May 2<\/b>","68%\n","Interest Rate: 0.52882712965299","Amount Exchange: 150815161.77193","Monthly Exchange: 367050.8265867","Interest Differentiation: 9.1346986834112","Interest RatePer: 0.54259013649969","Second Quarter","May 2"],"4":["<b>May 3<\/b>","78%\n","Interest Rate: 0.52869706105934","Amount Exchange: 150852264.90232","Monthly Exchange: 364701.85696453","Interest Differentiation: 10.23252765695","Interest RatePer: 0.54608484792928","Second Quarter","May 3"]};

The parameters of interest are the first occurences of 'Percentage', 'Interest Rate', ... 'Date'. inside the first square bracket [ ]. How to export only the contents of the first square bracket out into a separate text file while ignoring all the remaining entries inside subsequent square brackets ? Also, the 'Amount Exchange' is actually 150777695.16 and the 'Monthly Exchange' is 370352.08. The entry inside the curly brackets { } does not have a new-line character. I am using Matlab.

  • In what kind of format do you want it in that .txt file? Besides should that file be a one time download or do you want the file to be stored on the server? – Chiel May 20 '20 at 11:34
  • Yes please. Text file output. Actually, I have a bunch of these html files in one directory. The files are named bankData1.html, bankData2.html,...,bankData10000.html. The objective is to get all these parameters of all the html files in one text file like this: Percentage, Interest Rate, Amount Exchange, ... so that row #1 has parameters from bankData1, row#2 has parameters from bankData2, ..., row #10000 has parameters from bankData10000. Nothing to be stored on a server. Thanks. – user5461722 May 20 '20 at 14:03

1 Answers1

0

I have managed to get the given bankdata into the right format and then, in columns in a .txt file. However it is not possible to loop through files in a directory with javascript. For that you would have to use a server-side language like php or python. But I'm not sure if you have access to that, which is why I did it in plain javascript. The comments in the code should suffice in describing the code. I used the download function from this stackoverflow post

var jArray = {
  "2": ["<span style='color:#c7b699'><b>May 1<\/b><\/span>", "Percentage: <span>57%\n<\/span>", "Interest Rate: <span>0.53", "Amount Exchange: <span>150,<\/span>777,<\/span>695.16", "Monthly Exchange: <span>370,<\/span>352.08",
    "Interest Differentiation: <span>8.07", "Interest RatePer: <span>0.54", "Second Quarter", "<b>Friday, May 1, 2020<\/b>", "May", "Phase: <span>Second Quarter<\/span>", "May 1"
  ],
  "0": ["<b>April 29<\/b>", "36%\n", "Interest Rate: 0.52922522949925", "Amount Exchange: 150701713.86852", "Monthly Exchange: 378921.00811486", "Interest Differentiation: 6.0176806272646", "Interest RatePer: 0.52559281178633", "Quarter Pre",
    "April 29"
  ],
  "1": ["<b>April 30<\/b>", "46%\n", "Interest Rate: 0.52909124921306", "Amount Exchange: 150739875.63889", "Monthly Exchange: 374391.59013105", "Interest Differentiation: 7.028645917145", "Interest RatePer: 0.53195147367036", "Quarter Pre",
    "April 30"
  ],
  "3": ["<b>May 2<\/b>", "68%\n", "Interest Rate: 0.52882712965299", "Amount Exchange: 150815161.77193", "Monthly Exchange: 367050.8265867", "Interest Differentiation: 9.1346986834112", "Interest RatePer: 0.54259013649969", "Second Quarter",
    "May 2"
  ],
  "4": ["<b>May 3<\/b>", "78%\n", "Interest Rate: 0.52869706105934", "Amount Exchange: 150852264.90232", "Monthly Exchange: 364701.85696453", "Interest Differentiation: 10.23252765695", "Interest RatePer: 0.54608484792928", "Second Quarter",
    "May 3"
  ]
};

//the data that you want from the array
let toLoad = ["Percentage", "Interest Rate", "Amount Exchange", "Monthly Exchange", "Interest Differentiation", "Interest RatePer", "Phase"];



function loadData(array) {
  if (array.length == 0) {
    return false;
  }

  var data = jArray["2"];
  var dataToFile = "";

  //remove the unnececary items from the array
  data.splice(0, 1);
  data.splice(6, 3);
  for (i = 0; i < data.length; i++) {
    //remove the "Interest Rate: " etc from every item in the array
    var toRemove = array[i] + ": ";
    //remove the html, remove the ",", remove the toRemove variable, remove the linebreaks "\n".
    data[i] = ((data[i]).replace(/<\/?[^>]+(>|$)/g, "").replace(/,/gi, "")).replace(toRemove, "").replace("\n", "");
  }

  dataToFile += toLoad.join(",") + ",Date\n";
  dataToFile += data.join(",") + "\n";



  download("bankData.txt", dataToFile);
}


function download(filename, text) {
  var element = document.createElement('a');
  element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
  element.setAttribute('download', filename);

  element.style.display = 'none';
  document.body.appendChild(element);

  element.click();

  document.body.removeChild(element);
}
<!DOCTYPE html>
<html lang="en" dir="ltr">

<head>
  <meta charset="utf-8">
</head>

<body>
  <button onclick="loadData(toLoad);">Download bankdata</button>


</body>

</html>

Hope this helps!

Edit Oke, I've managed to convert the forloop to Matlab code. I've tested it and it should work. However you will have to do the rest yourself because I do not yet possess the knowledge of Matlab.


toLoad = ["Percentage", "Interest Rate", "Amount Exchange", "Monthly Exchange", "Interest Differentiation", "Interest RatePer", "Phase";];

data = ["<span style='color:#c7b699'><b>May 1<\/b><\/span>", "Percentage: <span>57%\n<\/span>", "Interest Rate: <span>0.53</span>", "Amount Exchange: <span>150,<\/span>777,<\/span>695.16", "Monthly Exchange: <span>370,<\/span>352.08", "Interest Differentiation: <span>8.07", "Interest RatePer: <span>0.54", "Second Quarter", "<b>Friday, May 1, 2020<\/b>", "May", "Phase: <span>Second Quarter<\/span>", "May 1";]

data = data([2:7]);

for a = 1:length(data) 
str = regexprep(data(1,a), '<.*?>','');
str = regexprep(str, '\\n', '');
str = regexprep(str, ',','');
data(1,a) = regexprep(str, toLoad(1,a) + ": ", '');
end


Chiel
  • 1,324
  • 1
  • 11
  • 30
  • Thank you very much for taking time out to do this. But the requirement is specific to Matlab. For some reason, Javascript will not be acceptable. – user5461722 May 20 '20 at 14:59
  • Oke, sorry, that requirement wasn't clear to me. Unfortunately MatLab is out of my scope, so I won't be able to help you with that. – Chiel May 20 '20 at 15:03
  • No worries. Atleast thank you for taking this effort. – user5461722 May 21 '20 at 01:04
  • This code works very very nicely and gives the desired output. Will it be possible for you to please give a regex conversion for the inside 'for loop' ? – user5461722 May 22 '20 at 02:53
  • @user5461722 Not sure what you mean with “regex conversion”. Could you clarify? – Chiel May 22 '20 at 13:44
  • Matlab has a command called regexp which is exactly the same as regexp in other languages: https://in.mathworks.com/help/matlab/ref/regexp.html In the 'for loop' portion, they seem to be the same. Just trying to equate it to Matlab regexp so that I can convert this code to matlab. – user5461722 May 22 '20 at 14:13
  • You have gone out of the way to help me. I don't know how I can thank you. The code is good enough for my purposes. And the previous javascript code also works completely if anybody else faces the same issue. – user5461722 May 23 '20 at 13:51