I currently have a really basic MD5 hash algorithm working. (All I can call atm is MD5(const char*)) However, it's going to be limited to small files. i.e 32bit system will only give me up to 4GB or less of a file. Plus why in the world, would I want to load anything that's even close to 1GB into memory. ;) So that brings me to this question...
How can I hash large files? I noticed OpenSSL (Which I will use sometime in the future) use MD5 hash update function, when loading parts of a file into memory. So what exactly happens when 'updating' a MD5 hash? Is there an pseudo code on the internet to do this or examples anywhere?
P.S I'm sorta new to the crypto world. So forgive me if I ask any follow up questions to anyone's answer. Like to trying things out the hard way before I take the easy way. Best way to learn! ;)
My MD5 Header
#ifndef MD5_H
#define MD5_H
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#define MAX_MD5_HASH_LENGTH 32
typedef union uwb {
unsigned w;
unsigned char b[4];
} WBunion;
typedef unsigned Digest[4];
unsigned f0( unsigned abcd[] );
unsigned f1( unsigned abcd[] );
unsigned f2( unsigned abcd[] );
unsigned f3( unsigned abcd[] );
typedef unsigned (*DgstFctn)(unsigned a[]);
unsigned *calcKs( unsigned *k);
unsigned rol( unsigned v, short amt );
unsigned *md5( const char *msg, int mlen);
char* convertRawMd5HashToString(unsigned* rawMd5);
int isValidMd5(const char* md5String);
#endif
Source File
#include "md5.h"
unsigned *calcKs( unsigned *k)
{
double s, pwr;
int i;
pwr = pow( 2, MAX_MD5_HASH_LENGTH);
for (i=0; i<64; i++) {
s = fabs(sin(1+i));
k[i] = (unsigned)( s * pwr );
}
return k;
}
// ROtate v Left by amt bits
unsigned rol( unsigned v, short amt )
{
unsigned msk1 = (1<<amt) -1;
return ((v>>(MAX_MD5_HASH_LENGTH-amt)) & msk1) | ((v<<amt) & ~msk1);
}
unsigned *md5( const char *message, int messageLength)
{
static const Digest h0 = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476 };
static const DgstFctn ff[] = { &f0, &f1, &f2, &f3 };
static const short M[] = { 1, 5, 3, 7 };
static const short O[] = { 0, 1, 5, 0 };
static const short rot0[] = { 7,12,17,22};
static const short rot1[] = { 5, 9,14,20};
static const short rot2[] = { 4,11,16,23};
static const short rot3[] = { 6,10,15,21};
static const short *rots[] = {rot0, rot1, rot2, rot3 };
static unsigned kspace[64];
static unsigned *k;
static Digest h;
Digest abcd;
DgstFctn fctn;
short m, o, g;
unsigned f;
short *rotn;
union {
unsigned w[16];
char b[64];
}mm;
int os = 0;
int grp, grps, q, p;
unsigned char *msg2;
if (k==NULL) k= calcKs(kspace);
for (q=0; q<4; q++) h[q] = h0[q]; // initialize
{
grps = 1 + (messageLength+8)/64;
msg2 = malloc( 64*grps);
memcpy( msg2, message, messageLength);
msg2[messageLength] = (unsigned char)0x80;
q = messageLength + 1;
while (q < 64*grps){ msg2[q] = 0; q++ ; }
{
WBunion u;
u.w = 8*messageLength;
q -= 8;
memcpy(msg2+q, &u.w, 4 );
}
}
for (grp=0; grp<grps; grp++)
{
memcpy( mm.b, msg2+os, 64);
for(q=0;q<4;q++) abcd[q] = h[q];
for (p = 0; p<4; p++) {
fctn = ff[p];
rotn = rots[p];
m = M[p]; o= O[p];
for (q=0; q<16; q++) {
g = (m*q + o) % 16;
f = abcd[1] + rol( abcd[0]+ fctn(abcd) + k[q+16*p] + mm.w[g], rotn[q%4]);
abcd[0] = abcd[3];
abcd[3] = abcd[2];
abcd[2] = abcd[1];
abcd[1] = f;
}
}
for (p=0; p<4; p++)
h[p] += abcd[p];
os += 64;
}
if( msg2 )
free( msg2 );
return h;
}
char* convertRawMd5HashToString(unsigned* rawMd5)
{
static char* outputBuffer[MAX_MD5_HASH_LENGTH];
memset(outputBuffer, 0, MAX_MD5_HASH_LENGTH);
int j, k;
WBunion u;
for (j=0;j<4; j++){
u.w = rawMd5[j];
for (k=0;k<4;k++) sprintf(outputBuffer, "%s%02x", outputBuffer, u.b[k]);
}
return outputBuffer;
}
unsigned f0( unsigned abcd[] ){
return ( abcd[1] & abcd[2]) | (~abcd[1] & abcd[3]); }
unsigned f1( unsigned abcd[] ){
return ( abcd[3] & abcd[1]) | (~abcd[3] & abcd[2]);}
unsigned f2( unsigned abcd[] ){
return abcd[1] ^ abcd[2] ^ abcd[3];}
unsigned f3( unsigned abcd[] ){
return abcd[2] ^ (abcd[1] |~ abcd[3]);}
int isValidMd5(const char* md5String)
{
if(strlen(md5String) != MAX_MD5_HASH_LENGTH)
return 0;
for (int i = 0; i < MAX_MD5_HASH_LENGTH; ++i) {
char c = tolower(md5String[i]);
if((c >= 'a' && c <= 'f') || isdigit(c)) {
continue;
} else {
return 0;
}
}
return 1;
}
I couldn't find the original author, but if anyone knows who written most of this snippet, let me know. Thanks. :)