1

When grabbing an email I run the gmail.users.messages.get() and then run the following two functions to process the payload.

function getBody(message) {
  var encodedBody = '';  
  try{    
    if(typeof message.parts === 'undefined'){
      encodedBody = message.body.data;
    }    
    else{
      encodedBody = getHTMLPart(message.parts);
    }
    encodedBody = encodedBody.replace(/-/g, '+').replace(/_/g, '/').replace(/\s/g, '');
  }
  catch(e) {} // there was a failure

  return decodeURIComponent(escape(window.atob(encodedBody)));
}
function getHTMLPart(arr) {

  for(var x = 0; x <= arr.length; x++){    
    if(typeof arr[x].parts === 'undefined'){
      if(arr[x].mimeType === 'text/html'){
        return arr[x].body.data;
      }
    }
    else{      
      return getHTMLPart(arr[x].parts);
    }
  }
  return '';
}

I then save that data into an .html file for later use. The problem is that inline images aren't embedded with base64 or any other way to retrieve that data, but are instead embedded using a unique CID.

So what I need to do is, when retrieving the payload from the above function, I need to also retrieve the embedded image and save it locally as <\CID.png> (or jpg or whatever). I can then to a replace on the message to replace the CID embed in the html with the local path of the image.

So does anyone know how or have any advice on how to get those embedded images? Thanks in advance!

Harmonic
  • 367
  • 1
  • 3
  • 18

1 Answers1

2

The images will be extracted into attachments. Look for the part in the response that includes the cid in the Content-ID or X-Attachment-Id headers, get the attachment, and insert the base64 data as the image source instead of the cid.

Example

var response = {
 "id": "15ade50437b9aa01",
 "threadId": "15ade50437b9aa01",
 "labelIds": [
  "UNREAD",
  "IMPORTANT",
  "SENT",
  "INBOX"
 ],
 "snippet": "",
 "historyId": "1171380",
 "internalDate": "1489788486000",
 "payload": {
  "mimeType": "multipart/related",
  "filename": "",
  "headers": [
   {
    "name": "Content-Type",
    "value": "multipart/related; boundary=94eb2c034184892a95054af46913"
   }
  ],
  "body": {
   "size": 0
  },
  "parts": [
   {
    "mimeType": "multipart/alternative",
    "filename": "",
    "headers": [
     {
      "name": "Content-Type",
      "value": "multipart/alternative; boundary=94eb2c034184892a93054af46912"
     }
    ],
    "body": {
     "size": 0
    },
    "parts": [
     {
      "partId": "0.0",
      "mimeType": "text/plain",
      "filename": "",
      "headers": [
       {
        "name": "Content-Type",
        "value": "text/plain; charset=UTF-8"
       }
      ],
      "body": {
       "size": 25,
       "data": "W2ltYWdlOiBJbmZvZ2FkIGJpbGQgMV0NCg=="
      }
     },
     {
      "partId": "0.1",
      "mimeType": "text/html",
      "filename": "",
      "headers": [
       {
        "name": "Content-Type",
        "value": "text/html; charset=UTF-8"
       }
      ],
      "body": {
       "size": 106,
       "data": "PGRpdiBkaXI9Imx0ciI-PGltZyBzcmM9ImNpZDppaV8xNWFkZTUwMmVlYTg0MGNlIiBhbHQ9IkluZm9nYWQgYmlsZCAxIiB3aWR0aD0iNTgiIGhlaWdodD0iNTQiPjxicj48L2Rpdj4NCg=="
      }
     }
    ]
   },
   {
    "partId": "1",
    "mimeType": "image/png",
    "filename": "smile.png",
    "headers": [
     {
      "name": "Content-Type",
      "value": "image/png; name=\"smile.png\""
     },
     {
      "name": "Content-Disposition",
      "value": "inline; filename=\"smile.png\""
     },
     {
      "name": "Content-Transfer-Encoding",
      "value": "base64"
     },
     {
      "name": "Content-ID",
      "value": "\u003cii_15ade502eea840ce\u003e"
     },
     {
      "name": "X-Attachment-Id",
      "value": "ii_15ade502eea840ce"
     }
    ],
    "body": {
     "attachmentId": "ANGjdJ8Xh1_0DBjFbc2qKRHD8uTw-9nkPP30v-vohJforDg54EHPHf3Obd2P9W6Wfss0cwfmblQWi5F3958vcEi0HyiMNgpKJbsQAVP9viUOY4LzyxwAvR7-dis4PNGflBpkZFMHv62LGKkQ1-ZPG3Go_Xh_sXJUveHl4JjmwLpNp6LjlHzuA_3XOkY2LLQLFmXNTo_dJbqDQWvMb8UTGnATMOoTNKvNQ4Ndr9pgQYI1SBvtdThgUDmlOGKYLHM6qR4AlrNNFnPUCZZU-BB7o7Dt2dhj-kexiIdvaB2LEnoeCBth_oK9HELt2tw4rlY",
     "size": 8539
    }
   }
  ]
 },
 "sizeEstimate": 12800
};

function getHtml(res) {
  var parts = [res.payload];
  while (parts.length) {
    var part = parts.shift();
    if (part.parts) {
      parts = parts.concat(part.parts);
    }

    if(part.mimeType === 'text/html') {
      return decodeURIComponent(escape(atob(part.body.data.replace(/\-/g, '+').replace(/\_/g, '/'))));
    }
  }
  return '';
}

function getAttachmentId(res, cid) {
  var parts = [res.payload];
  while (parts.length) {
    var part = parts.shift();
    if (part.parts) {
      parts = parts.concat(part.parts);
    }
    var headers = part.headers;
    var indexedHeaders = headers.reduce(function(acc, header) {
      acc[header.name.toLowerCase()] = header.value;
      return acc;
    }, {});
    var contentId = indexedHeaders['content-id'] || '';
    var xAttachmentId = indexedHeaders['x-attachment-id'] || '';
    if (contentId.includes(cid) || xAttachmentId.includes(cid)) {
      return part.body.attachmentId;
    }
  }
  return '';
}

var html = getHtml(response);
console.log(html);
// Extract the cids and find the matching attachments in the response
var attachmentId = getAttachmentId(response, 'ii_15ade502eea840ce');
console.log(attachmentId);
// Get the attachment from the Gmail API and replace the cid 
// with the base64-data
Tholle
  • 108,070
  • 19
  • 198
  • 189
  • 1
    Thanks! I've already successfully extracted message attachments but didn't know inline were stored as attachments as well, and had no idea how to match the `cid` with the file name. This answer is super helpful, thanks! – Harmonic Mar 17 '17 at 23:04
  • @Harmonic Great! Happy to help. – Tholle Mar 17 '17 at 23:05