11

I'm trying to extract IPTC photo caption information from a JPEG file using Javascript. (I know I can do this server-side, but I'm looking specifically for a Javascript solution.)

I found this script, which extracts EXIF information ... but I'm not sure how to adapt it to grab IPTC data.

Are there any existing scripts that offer such functionality? If not, how would you modify the EXIF script to also parse IPTC data?

UPDATE

I've modified the EXIF script I linked above. It sorta does what I want, but it's not grabbing the right data 100 percent of the time.

After line 401, I added:

else if (iMarker == 237) {
        // 0xED = Application-specific 13 (Photoshop IPTC)                
        if (bDebug) log("Found 0xFFED marker");   
        return readIPTCData(oFile, iOffset + 4, getShortAt(oFile, iOffset+2, true)-2);                       
}

And then elsewhere in the script, I added this function:

function readIPTCData(oFile, iStart, iLength) {
    exif = new Array();

if (getStringAt(oFile, iStart, 9) != "Photoshop") {
    if (bDebug) log("Not valid Photoshop data! " + getStringAt(oFile, iStart, 9));
    return false;
}

var output = '';
var count = 0;
two = new Array();
for (i=0; i<iLength; i++) {
   if (getByteAt(oFile, iStart + i) == 2 && getByteAt(oFile, iStart + i + 1) == 120) {
      var caption = getString2At(oFile, iStart + i + 2, 800);
   }
   if (getByteAt(oFile, iStart + i) == 2 && getByteAt(oFile, iStart + i + 1) == 80) {
      var credit = getString2At(oFile, iStart + i + 2, 300);
   }       
}

exif['ImageDescription'] = caption;
exif['Artist'] = credit;

return exif;

}

So let me now modify my question slightly. How can the function above be improved?

jawns317
  • 1,726
  • 2
  • 17
  • 26
  • This will only be possible in browsers that support the new-ish HTML5 file APIs. That script you linked *appears* to get image data by fetching it from the server, which is probably not what you're wanting to do. – Pointy Apr 29 '11 at 14:05
  • Yes, that's perfectly fine -- I'm developing this for internal use only, and we've all got the latest browsers, so it's not a problem. – jawns317 Apr 29 '11 at 14:08
  • This isn't working for me. Do you have a jsfiddle example? – Mark Robson Jun 16 '14 at 16:53
  • For anyone else who DOES need cross-browser support, [jDataView](https://github.com/jDataView/jDataView) is a nice way to work with binary data in JavaScript. – abettermap Dec 02 '14 at 01:09

3 Answers3

6

For what it's worth, I extrapolated on this a bit... I haven't done a whole lot of testing, but the few test images I have seem to work.

    var bDebug = false;

    var fieldMap = {
        120 : 'caption',
        110 : 'credit',
        25 : 'keywords',
        85 : 'byline',
        122 : 'captionWriter',
        105 : 'headline',
        116 : 'copyright',
        15 : 'category'
    };

    function readIPTCData(oFile, iStart, iLength) {
        var data = {};

        if (oFile.getStringAt(iStart, 9) != "Photoshop") {
            if (bDebug) log("Not valid Photoshop data! " + oFile.getStringAt(iStart, 9));
            return false;
        }

        var fileLength = oFile.getLength();

        var length, offset, fieldStart, title, value;
        var FILE_SEPARATOR_CHAR = 28,
            START_OF_TEXT_CHAR = 2;

        for (var i = 0; i < iLength; i++) {

            fieldStart = iStart + i;
            if(oFile.getByteAt(fieldStart) == START_OF_TEXT_CHAR && oFile.getByteAt(fieldStart + 1) in fieldMap) {
                length = 0;
                offset = 2;

                while(
                    fieldStart + offset < fileLength &&
                    oFile.getByteAt(fieldStart + offset) != FILE_SEPARATOR_CHAR &&
                    oFile.getByteAt(fieldStart + offset + 1) != START_OF_TEXT_CHAR) { offset++; length++; }

                if(!length) { continue; }

                title = fieldMap[oFile.getByteAt(fieldStart + 1)];
                value = oFile.getStringAt(iStart + i + 2, length) || '';
                value = value.replace('\000','').trim();

                data[title] = value;
                i+=length-1;
            }
        }

        return data;

    }

    function findIPTCinJPEG(oFile) {
        var aMarkers = [];

        if (oFile.getByteAt(0) != 0xFF || oFile.getByteAt(1) != 0xD8) {
            return false; // not a valid jpeg
        }

        var iOffset = 2;
        var iLength = oFile.getLength();
        while (iOffset < iLength) {
            if (oFile.getByteAt(iOffset) != 0xFF) {
                if (bDebug) console.log("Not a valid marker at offset " + iOffset + ", found: " + oFile.getByteAt(iOffset));
                return false; // not a valid marker, something is wrong
            }

            var iMarker = oFile.getByteAt(iOffset+1);

            if (iMarker == 237) {
                if (bDebug) console.log("Found 0xFFED marker");
                return readIPTCData(oFile, iOffset + 4, oFile.getShortAt(iOffset+2, true)-2);

            } else {
                iOffset += 2 + oFile.getShortAt(iOffset+2, true);
            }

        }

    }

    IPTC.readFromBinaryFile = function(oFile) {
        return findIPTCinJPEG(oFile);
    }
user67821
  • 76
  • 1
  • 3
  • what does 'oFile' expected to get? Where is the definition for IPTC? Will it work for PNG as well? – ItayB Aug 19 '14 at 08:20
1

I'd like to suggest library exifr that works in both Node.js and browser. And it also supports the new HEIC image format.

exifr.parse(input, {iptc: true}).then(output => {
  console.log('IPTC', output)
})

It parses multiple data formats (TIFF/EXIF, ICC, IPTC, XMP, JFIF) but IPTC isn't enabled by default so you need to enabled it in options as seen in the example.

Mike Kovařík
  • 244
  • 2
  • 8
0

Well, this should get you going on creating your own javascript parser if you can't find a library that already does this.

http://www.iptc.org/std/photometadata/specification/IPTC-PhotoMetadata%28200907%29_1.pdf

netbrain
  • 9,194
  • 6
  • 42
  • 68