1

How do I extract metadata from ONLY JPEG images using Java? I tried the codes on Read Image Metadata from single file with Java

import org.w3c.dom.*;

import java.io.*;
import java.util.*;
import javax.imageio.*;
import javax.imageio.stream.*;
import javax.imageio.metadata.*;

public class Metadata {

public static void main(String[] args) {
    Metadata meta = new Metadata();
    String filename = "C:\\Users\\Public\\Pictures\\download.jpg";
    if (new File(filename).exists()) {
        meta.readAndDisplayMetadata(filename);
    } else {
        System.out.println("cannot find file: " + filename);
    }

}

void readAndDisplayMetadata( String fileName ) {
    try {

        File file = new File( fileName );
        ImageInputStream iis = ImageIO.createImageInputStream(file);
        Iterator<ImageReader> readers = ImageIO.getImageReaders(iis);

        if (readers.hasNext()) {

            // pick the first available ImageReader
            ImageReader reader = readers.next();

            // attach source to the reader
            reader.setInput(iis, true);

            // read metadata of first image
            IIOMetadata metadata = reader.getImageMetadata(0);

            String[] names = metadata.getMetadataFormatNames();
            int length = names.length;
            for (int i = 0; i < length; i++) {
                System.out.println( "Format name: " + names[ i ] );
                displayMetadata(metadata.getAsTree(names[i]));
            }
        }
    }
    catch (Exception e) {
        e.printStackTrace();
    }
}

void displayMetadata(Node root) {
displayMetadata(root, 0);
}

void indent(int level) {
for (int i = 0; i < level; i++)
    System.out.print("    ");
}

void displayMetadata(Node node, int level) {
// print open tag of element
indent(level);
System.out.print("<" + node.getNodeName());
NamedNodeMap map = node.getAttributes();
if (map != null) {

    // print attribute values
    int length = map.getLength();
    for (int i = 0; i < length; i++) {
        Node attr = map.item(i);
        System.out.print(" " + attr.getNodeName() +
                         "=\"" + attr.getNodeValue() + "\"");
    }
}

Node child = node.getFirstChild();
if (child == null) {
    // no children, so close element and return
    System.out.println("/>");
    return;
}

// children, so close current tag
System.out.println(">");
while (child != null) {
    // print children recursively
    displayMetadata(child, level + 1);
    child = child.getNextSibling();
}

// print close tag of element
indent(level);
System.out.println("</" + node.getNodeName() + ">");
}
}

this is the code that i used. the output that i got is a little weird. the metadata that i want to extract is the filename, the file type, MIME type, dimensions, encoding process, bits per sample, color channels, file size, JFIF version, resolution unit, x and y resolution as well as MD5 and SHA1 hash values.

OUTPUT:

Format name: javax_imageio_jpeg_image_1.0
<javax_imageio_jpeg_image_1.0>
<JPEGvariety>
    <app0JFIF majorVersion="1" minorVersion="1" resUnits="1" Xdensity="96"    Ydensity="96" thumbWidth="0" thumbHeight="0"/>
</JPEGvariety>
<markerSequence>
    <unknown MarkerTag="225"/>
    <unknown MarkerTag="225"/>
    <dqt>
        <dqtable elementPrecision="0" qtableId="0"/>
    </dqt>
    <dqt>
        <dqtable elementPrecision="0" qtableId="1"/>
    </dqt>
    <sof process="0" samplePrecision="8" numLines="239" samplesPerLine="211" numFrameComponents="3">
        <componentSpec componentId="1" HsamplingFactor="2" VsamplingFactor="2" QtableSelector="0"/>
        <componentSpec componentId="2" HsamplingFactor="1" VsamplingFactor="1" QtableSelector="1"/>
        <componentSpec componentId="3" HsamplingFactor="1" VsamplingFactor="1" QtableSelector="1"/>
    </sof>
    <dht>
        <dhtable class="0" htableId="0"/>
    </dht>
    <dht>
        <dhtable class="1" htableId="0"/>
    </dht>
    <dht>
        <dhtable class="0" htableId="1"/>
    </dht>
    <dht>
        <dhtable class="1" htableId="1"/>
    </dht>
    <sos numScanComponents="3" startSpectralSelection="0" endSpectralSelection="63" approxHigh="0" approxLow="0">
        <scanComponentSpec componentSelector="1" dcHuffTable="0" acHuffTable="0"/>
        <scanComponentSpec componentSelector="2" dcHuffTable="1" acHuffTable="1"/>
        <scanComponentSpec componentSelector="3" dcHuffTable="1" acHuffTable="1"/>
    </sos>
   </markerSequence>
</javax_imageio_jpeg_image_1.0>
Format name: javax_imageio_1.0
<javax_imageio_1.0>
<Chroma>
    <ColorSpaceType name="YCbCr"/>
    <NumChannels value="3"/>
</Chroma>
<Compression>
    <CompressionTypeName value="JPEG"/>
    <Lossless value="FALSE"/>
    <NumProgressiveScans value="1"/>
</Compression>
<Dimension>
    <PixelAspectRatio value="1.0"/>
    <ImageOrientation value="normal"/>
    <HorizontalPixelSize value="0.26458332"/>
    <VerticalPixelSize value="0.26458332"/>
</Dimension>
</javax_imageio_1.0>
Community
  • 1
  • 1
user3847620
  • 59
  • 4
  • 9
  • 3
    What metadata do you intend to extract? Please provide some code and more information to your question! – Daniel B Jul 17 '14 at 07:41
  • Some code that shows what you tried will be useful. Is this metadata the JPEG headers? If the headers is what you mean you can always extract them at the bit level. Just google how JPEG format is structured and read the bytes. – Razvan Jul 17 '14 at 07:43
  • i edited and added the codes i have :) – user3847620 Jul 17 '14 at 08:52
  • _"the output that i got is a little weird"_, could you show it please ? http://docs.oracle.com/javase/7/docs/api/javax/imageio/metadata/doc-files/jpeg_metadata.html gives information on the metadata handled by Java for the JPEG format – superbob Jul 17 '14 at 09:57
  • i added the output. pls help! – user3847620 Jul 17 '14 at 12:48
  • The output seems to be conform to what is described in the last link I posted. There are 2 SGML structures that defines the metadata. You have to parse them or use Apache Tika as suggested in @YMomb answer – superbob Jul 17 '14 at 13:05

2 Answers2

2

You should probably have a look at Apache Tika.

YMomb
  • 2,366
  • 1
  • 27
  • 36
0

Check out metadata-extractor. It let's you read out many kinds of image metadata from JPEG and other kinds of image files.

Drew Noakes
  • 300,895
  • 165
  • 679
  • 742