0

I'm developing a JavaScript uploader, because I need to upload files bigger than max_upload_size and I can't modify that global PHP parameter. So, I thought I could get a file, divide it and upload the parts via HTTP into my web server.

This is the uploader code:

subir.php

<html>
<head></head>
<script type="text/javascript">
var procesadas = 0;

function readSingleFile(e) {
   var reintentos = 1;
   var file = e.target.files[0];
   var name = e.target.value;
  if (!file) {
    return;
  }
  var reader = new FileReader();
  reader.onload = function(e) {
    var contents = e.target.result;
    sendHTTPRequest(contents, name);
  };
  //reader.readAsBinaryString(file);
  reader.readAsText(file);
}

function displayContents(contents) {
  var element = document.getElementById('file-content');
  element.innerHTML = contents;
}
/*
 * Recibe el contenido del archivo. Lo sube en porciones de N bytes
 * i es el contador de porciones
 * i + 1 / 1024 es el numero de la porcion
 * temp.length / 1024 es la cantidad de porciones
 */
function sendHTTPRequest(contents, nombre) {
    var n = 1024;
    var temp = contents.split("");
    var porciones = parseInt(temp.length / n);
    var envio = "";


    for (i=0;i<temp.length;i=i+n) {
        envio = "";
        for (j=0;j<n;j++) {
            if (temp[i+j]!=null) {
                envio = envio+temp[i+j];
            }
        }
        sendPortion(envio, (i+1)/n, porciones, nombre);
    }
}

function sendPortion(envio, orden, porciones, nombre) {
    var xmlhttp = new XMLHttpRequest();
    var formData = new FormData();
    var element = document.getElementById('file-content');
    var sent = document.getElementById('registro_eventos');
    var reintentos = 1;


    formData.append('archivo', envio);
    formData.append('orden', parseInt(orden));
    formData.append('porciones', porciones);
    formData.append('nombre', nombre);


    xmlhttp.open("POST","procesar.php",true);
    xmlhttp.send(formData);
    xmlhttp.onreadystatechange = function() {
        if (xmlhttp.readyState==4) {
            if (xmlhttp.responseText.trim()=="OK") {
                procesadas = procesadas + 1;
                element.innerHTML = "<hr>Estado " + procesadas + " de " +         (porciones + 1);

                if (procesadas>porciones) {
                    archivoCompleto(nombre);
                }
            }
        }
    }
}

function archivoCompleto(nombre) {
    procesadas = 0;
    var xmlhttp = new XMLHttpRequest();
    var formData = new FormData();
    formData.append('nombre', nombre);

    xmlhttp.open("POST","generar.php",true);
    xmlhttp.send(formData);
    xmlhttp.onreadystatechange = function() {
        if (xmlhttp.readyState==4) {
                alert(xmlhttp.responseText);
        }
    }
}


function registrareventos() {
    document.getElementById('file-input')
.addEventListener('change', readSingleFile, false);
    }
</script>
<body onload="registrareventos();">
<input type="file" id="file-input" />
<h3>Contents of the file:</h3>
<pre id="file-content"></pre>
<pre id="registro_eventos"></pre>
</body>
</html>

This is the code that processes every individual upload and stores it in a database table:

procesar.php

<?php
include("include.php");

$db = new MysqliDb(BD_HOST, BD_USER, BD_PASSWORD, BD_NAME);

$insertDatos = array(
    'nombrefisico' => str_replace('C:\fakepath\\', "", $_POST['nombre']),
    'porcion' => $_POST['orden'],
    'contenido' => $_POST['archivo'],
);



//$file = fopen("arch\archivo".$_POST['orden'].".txt", 'wb');
$log = fopen("arch\log.txt", "ab");
$log2 = fopen("arch\log2.txt", "ab");
fwrite($log2, $db->getLastError()."\n");
fwrite($log2, "Para escribir;".$_POST['orden']."\n");
//$res = fwrite($file, $_POST['archivo']);
$res = $db->insert("buffer_upload", $insertDatos);
if ($res!=false) {
    fwrite($log, $_POST['orden'].";".$res."\n");
    echo("OK");
} else {
    fwrite($log, $_POST['orden'].";error\n");
    echo("NOK");
}
?>

This is the definition of the database table where I store the parts:

    CREATE TABLE IF NOT EXISTS `buffer_upload` (
    `id` int(11) NOT NULL,
    `nombrefisico` varchar(255) COLLATE latin1_general_ci NOT NULL,
    `porcion` int(11) NOT NULL,
    `contenido` blob NOT NULL
    ) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci;

And when all the parts have been uploaded, I invoke generar.php. This code reads all the parts stored in the database and generates the file in the web server with the original physical name:

generar.php

<?php
include("include.php");

$db = new MysqliDb(BD_HOST, BD_USER, BD_PASSWORD, BD_NAME);
$db->where('nombrefisico', str_replace('C:\fakepath\\', "", $_POST['nombre']));
$db->orderBy('porcion', 'asc');
$result = $db->get("buffer_upload");
$archivo = fopen(str_replace('C:\fakepath\\', "", $_POST['nombre']), "ab");

foreach ($result as $r) {
    fwrite($archivo, $r['contenido']);
}
?>

This works fine with plain text or CSV files, but when I attempt to upload a PDF, this is what I get:

Original PDF

%PDF-1.3
3 0 obj
<</Type /Page
/Parent 1 0 R
/Resources 2 0 R
/Contents 4 0 R>>
endobj
4 0 obj
<</Filter /FlateDecode /Length 383>>
stream
xœ}TAnA¼ï+ú‡-ìv·íæ Žh>0"›(ˆ$«ß§'°bÃbŸFêšrWÕxª–Ï;B·òs÷n)o>rq•å¶|X¶“QÑZ1p/    ËMyõþt¸9<~½_¿—ãzZË—õx\ïž^—åÛ“Cµ˜u?“>=ŸN?o‹‹v?¿ì4§w…÷Â^öÕ·ÇéPnwn =£µ—è 3ZAõºéÖpòhh#F
žpg#TÅÄ’™:jÆ6‡¦˜    Ä1›ç}ú—môÜ`-a+,¶Íì°8o®,®‚ž«ŠÊ±ò:Ð=–&M2JÛE¨ÿǶš±Æ0‰µ¹i’°ç/â  ìh‰ï΋zÍî
=Y‡n‰´>P=6¦sÏå*‰ïVÝŽb¶_„zíÛÖciÖÀ{ž'¾ÍÁ‰ogô³]ÒM…§‰o÷‹P¯Ù³Ô$ñ=f›&Ò†   ¢Q8ü¹·×"môÖÛ2i/TIÄÆ^εý<I
endstream
endobj
1 0 obj
<</Type /Pages
/Kids [3 0 R ]
/Count 1
/MediaBox [0 0 595.28 841.89]
>>
endobj
5 0 obj
<</Filter /FlateDecode /Length 364>>
stream
xœ]RËnƒ0¼ó>¦‡L‚%„DI8ô¡Ò~ %E*ràï»»vÒªHXã±gvVk?/¥îá¿™±©`]¯[óx5
ˆ3\zíÉP´}³¸¯ÍPOžâjJÝ^’øïx6/f›¬Ïðàù¯¦Óë‹Ø|æî«ë4}à z—¦¢…}žëé¥@ø,Û–- ž÷˺EÍïuò^Ú,ÍØÂ<Õ
˜Z_ÀK‚ IQ¤èößYdçîïÕCK€_ê%q„8>à!J"V!2&bGÄ£%r"H¢¬DîÈ\}2EL1n¥ºh¾jƒå–  eä»"a*H¬‹ØÕ:ÞÛdá˜î„9cÅüŽ[ÈX1~²¼"œ3¿gÏãÑò;Oâ•õ<r_ì)-ï<%açI9•󤶕󤜪  °8vÝs·4z0÷97WcpÄüªx¶4Õ^ÃýáMãD*ú 
¶©
endstream
endobj
6 0 obj
<</Type /Font
/BaseFont /Helvetica
/Subtype /Type1
/Encoding /WinAnsiEncoding
/ToUnicode 5 0 R
>>
endobj
2 0 obj
<<
/ProcSet [/PDF /Text /ImageB /ImageC /ImageI]
/Font <<
/F1 6 0 R
>>
/XObject <<
>>
>>
endobj
7 0 obj
<<
/Producer (FPDF 1.81)
/CreationDate (D:20160115163041)
>>
endobj
8 0 obj
<<
/Type /Catalog
/Pages 1 0 R
>>
endobj
xref
0 9
0000000000 65535 f 
0000000540 00000 n 
0000001174 00000 n 
0000000009 00000 n 
0000000087 00000 n 
0000000627 00000 n 
0000001061 00000 n 
0000001278 00000 n 
0000001354 00000 n 
trailer
<<
/Size 9
/Root 8 0 R
/Info 7 0 R
>>
startxref
1403
%%EOF

This is the uploaded file:

%PDF-1.3
3 0 obj
<</Type /Page
/Parent 1 0 R
/Resources 2 0 R
/Contents 4 0 R>>
endobj
4 0 obj
<</Filter /FlateDecode /Length 383>>
stream
x�}TAnA��+��-�v��� �h>0"�(�$�ߧ'�b�b�F��rW�x���;B��s�n)o>rq���|X��Q�Z1p/�My��t�9<~�_���zZ˗�x\��^����C��u?�>=�N?o��v?��4�w��^�շ��Pnwn =���� �3ZA����p�hh#F
�pg#T����:j�6���    �1��}��m��`-a+,����8o�,�����ʱ�:�=�&M2�J�E���Ƕš��0����i���/� �h����z��
=Y�n��>P=6�s��*��Vݎb�_�z���ci��{�'����og��]�M����o��P�ٳ�$�=f�&҆�Q8����"�m���2i/TI��^ε�<I
endstream
endobj
1 0 obj
<</Type /Pages
/Kids [3 0 R ]
/Count 1
/MediaBox [0 0 595.28 841.89]
>>
endobj
5 0 obj
<</Filter /FlateDecode /Length 364>>
stream
x�]R�n�0��>��L�%�DI�8���~ �%E*r�ﻻvҪHX��gvVk?/���Ῑ��`]�[�x5
�3\z��P�}����PO���j�Jݍ^���x6/f������������|����4}� z�����}����@�,ۖ-��˺E�u�^�,���<�
�Z_�K� IQ����Yd����C�K�_�%q�8>�!J"V!2&bGģ%r"H��D���\}2EL1n��h�j���e��"a*H����:��d����9c���[�X1~��"�3�g��Ñ�;O���<r_�)-�<%a�I9��8v�s�4z0�97Wcp���x�4�^���M�D*� 
��
endstream
endobj
6 0 obj
<</Type /Font
/BaseFont /Helvetica
/Subtype /Type1
/Encoding /WinAnsiEncoding
/ToUnicode 5 0 R
>>
endobj
2 0 obj
<<
/ProcSet [/PDF /Text /ImageB /ImageC /ImageI]
/Font <<
/F1 6 0 R
>>
/XObject <<
>>
>>
endobj
7 0 obj
<<
/Producer (FPDF 1.81)
/CreationDate (D:20160115163041)
>>
endobj
8 0 obj
<<
/Type /Catalog
/Pages 1 0 R
>>
endobj
xref
0 9
0000000000 65535 f 
0000000540 00000 n 
0000001174 00000 n 
0000000009 00000 n 
0000000087 00000 n 
0000000627 00000 n 
0000001061 00000 n 
0000001278 00000 n 
0000001354 00000 n 
trailer
<<
/Size 9
/Root 8 0 R
/Info 7 0 R
>>
startxref
1403
%%EOF

As you can see, it seems to be a trouble with binary strings, but I really can't figure out where I'm messing the charsets.

Daniel Coturel
  • 106
  • 1
  • 1
  • Are you sure you're not merely *interpreting* the file in the wrong encoding when looking at it?! I.e., there's absolutely nothing wrong here, you're merely trying to view a binary file as UTF-8, which doesn't make sense and produces garbage (different garbage than when you try to view it as Latin-1, but that's pretty irrelevant). – deceze Jan 26 '16 at 12:11
  • I think that's the key of the problem. What I don't know is wich is the correct encoding to transfer binary data through the database and how to ensure that the POST is sent using that encoding. – Daniel Coturel Jan 26 '16 at 15:06
  • It's binary. It has no encoding. As long as you don't try to convert it using some encoding and always only treat it as a binary blob, there should be no issue. Your basic issue is that you try to look at the PDF contents as text, which it's not. Save it to disk and use a file checksum or hex editor to confirm whether it's identical to the original file. If that's so, then it doesn't matter what it looks like when viewed as text. – deceze Jan 26 '16 at 15:09
  • Thanks for both of your answers. It got me thinking and researching in documentation and this site. I tried to send the contents of each part as a Blob but it wasn't working fine. Finally I tried to encode it to base64 using the readAsDataURL method of the FileReader and it worked. This post was helpful: http://stackoverflow.com/questions/7431365/filereader-readasbinarystring-to-upload-files – Daniel Coturel Jan 26 '16 at 16:43

0 Answers0