22

I was hanging out in my profiler for a while trying to figure out how to speed up a common log parser which was bottlenecked around the date parsing, and I tried various algorithms to speed things up.

The thing I tried that was fastest for me was also by far the most readable, but potentially non-standard C.

This worked quite well in GCC, icc, and my really old and picky SGI compiler. As it's a quite readable optimization, where doesn't it do what I want?

static int parseMonth(const char *input) {
    int rv=-1;
    int inputInt=0;
    int i=0;

    for(i=0; i<4 && input[i]; i++) {
        inputInt = (inputInt << 8) | input[i];
    }

    switch(inputInt) {
        case 'Jan/': rv=0; break;
        case 'Feb/': rv=1; break;
        case 'Mar/': rv=2; break;
        case 'Apr/': rv=3; break;
        case 'May/': rv=4; break;
        case 'Jun/': rv=5; break;
        case 'Jul/': rv=6; break;
        case 'Aug/': rv=7; break;
        case 'Sep/': rv=8; break;
        case 'Oct/': rv=9; break;
        case 'Nov/': rv=10; break;
        case 'Dec/': rv=11; break;
    }
    return rv;
}
Peter Mortensen
  • 30,738
  • 21
  • 105
  • 131
Dustin
  • 89,080
  • 21
  • 111
  • 133

13 Answers13

23

Solaris 10 - SPARC - SUN Compiler.

Test code:

#include <stdio.h>

static int parseMonth(const char *input) {
    int rv=-1;
    int inputInt=0;
    int i=0;

    for(i=0; i<4 && input[i]; i++) {
        inputInt = (inputInt << 8) | input[i];
    }

    switch(inputInt) {
        case 'Jan/': rv=0; break;
        case 'Feb/': rv=1; break;
        case 'Mar/': rv=2; break;
        case 'Apr/': rv=3; break;
        case 'May/': rv=4; break;
        case 'Jun/': rv=5; break;
        case 'Jul/': rv=6; break;
        case 'Aug/': rv=7; break;
        case 'Sep/': rv=8; break;
        case 'Oct/': rv=9; break;
        case 'Nov/': rv=10; break;
        case 'Dec/': rv=11; break;
    }

    return rv;
}

static const struct
{
    char *data;
    int   result;
} test_case[] =
{
    { "Jan/", 0 },
    { "Feb/", 1 },
    { "Mar/", 2 },
    { "Apr/", 3 },
    { "May/", 4 },
    { "Jun/", 5 },
    { "Jul/", 6 },
    { "Aug/", 7 },
    { "Sep/", 8 },
    { "Oct/", 9 },
    { "Nov/", 10 },
    { "Dec/", 11 },
    { "aJ/n", -1 },
};

#define DIM(x) (sizeof(x)/sizeof(*(x)))

int main(void)
{
    size_t i;
    int    result;

    for (i = 0; i < DIM(test_case); i++)
    {
        result = parseMonth(test_case[i].data);
        if (result != test_case[i].result)
            printf("!! FAIL !! %s (got %d, wanted %d)\n",
                   test_case[i].data, result, test_case[i].result);
    }
    return(0);
}

Results (GCC 3.4.2 and Sun):

$ gcc -O xx.c -o xx
xx.c:14:14: warning: multi-character character constant
xx.c:15:14: warning: multi-character character constant
xx.c:16:14: warning: multi-character character constant
xx.c:17:14: warning: multi-character character constant
xx.c:18:14: warning: multi-character character constant
xx.c:19:14: warning: multi-character character constant
xx.c:20:14: warning: multi-character character constant
xx.c:21:14: warning: multi-character character constant
xx.c:22:14: warning: multi-character character constant
xx.c:23:14: warning: multi-character character constant
xx.c:24:14: warning: multi-character character constant
xx.c:25:14: warning: multi-character character constant
$ ./xx
$ cc -o xx xx.c
$ ./xx
!! FAIL !! Jan/ (got -1, wanted 0)
!! FAIL !! Feb/ (got -1, wanted 1)
!! FAIL !! Mar/ (got -1, wanted 2)
!! FAIL !! Apr/ (got -1, wanted 3)
!! FAIL !! May/ (got -1, wanted 4)
!! FAIL !! Jun/ (got -1, wanted 5)
!! FAIL !! Jul/ (got -1, wanted 6)
!! FAIL !! Aug/ (got -1, wanted 7)
!! FAIL !! Sep/ (got -1, wanted 8)
!! FAIL !! Oct/ (got -1, wanted 9)
!! FAIL !! Nov/ (got -1, wanted 10)
!! FAIL !! Dec/ (got -1, wanted 11)
$

Note that the last test case still passed - that is, it generated a -1.

Here's a revised - more verbose - version of parseMonth() which does work the same under both GCC and Sun C compiler:

#include <stdio.h>

/* MONTH_CODE("Jan/") does not reduce to an integer constant */
#define MONTH_CODE(x)   ((((((x[0]<<8)|x[1])<<8)|x[2])<<8)|x[3])

#define MONTH_JAN       (((((('J'<<8)|'a')<<8)|'n')<<8)|'/')
#define MONTH_FEB       (((((('F'<<8)|'e')<<8)|'b')<<8)|'/')
#define MONTH_MAR       (((((('M'<<8)|'a')<<8)|'r')<<8)|'/')
#define MONTH_APR       (((((('A'<<8)|'p')<<8)|'r')<<8)|'/')
#define MONTH_MAY       (((((('M'<<8)|'a')<<8)|'y')<<8)|'/')
#define MONTH_JUN       (((((('J'<<8)|'u')<<8)|'n')<<8)|'/')
#define MONTH_JUL       (((((('J'<<8)|'u')<<8)|'l')<<8)|'/')
#define MONTH_AUG       (((((('A'<<8)|'u')<<8)|'g')<<8)|'/')
#define MONTH_SEP       (((((('S'<<8)|'e')<<8)|'p')<<8)|'/')
#define MONTH_OCT       (((((('O'<<8)|'c')<<8)|'t')<<8)|'/')
#define MONTH_NOV       (((((('N'<<8)|'o')<<8)|'v')<<8)|'/')
#define MONTH_DEC       (((((('D'<<8)|'e')<<8)|'c')<<8)|'/')

static int parseMonth(const char *input) {
    int rv=-1;
    int inputInt=0;
    int i=0;

    for(i=0; i<4 && input[i]; i++) {
        inputInt = (inputInt << 8) | input[i];
    }

    switch(inputInt) {
        case MONTH_JAN: rv=0; break;
        case MONTH_FEB: rv=1; break;
        case MONTH_MAR: rv=2; break;
        case MONTH_APR: rv=3; break;
        case MONTH_MAY: rv=4; break;
        case MONTH_JUN: rv=5; break;
        case MONTH_JUL: rv=6; break;
        case MONTH_AUG: rv=7; break;
        case MONTH_SEP: rv=8; break;
        case MONTH_OCT: rv=9; break;
        case MONTH_NOV: rv=10; break;
        case MONTH_DEC: rv=11; break;
    }

    return rv;
}

static const struct
{
    char *data;
    int   result;
} test_case[] =
{
    { "Jan/", 0 },
    { "Feb/", 1 },
    { "Mar/", 2 },
    { "Apr/", 3 },
    { "May/", 4 },
    { "Jun/", 5 },
    { "Jul/", 6 },
    { "Aug/", 7 },
    { "Sep/", 8 },
    { "Oct/", 9 },
    { "Nov/", 10 },
    { "Dec/", 11 },
    { "aJ/n", -1 },
    { "/naJ", -1 },
};

#define DIM(x) (sizeof(x)/sizeof(*(x)))

int main(void)
{
    size_t i;
    int    result;

    for (i = 0; i < DIM(test_case); i++)
    {
        result = parseMonth(test_case[i].data);
        if (result != test_case[i].result)
            printf("!! FAIL !! %s (got %d, wanted %d)\n",
                   test_case[i].data, result, test_case[i].result);
    }
    return(0);
}

I wanted to use MONTH_CODE() but the compilers did not cooperate.

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
  • 3
    Is SPARC is big-endian? If so, reversing the characters (or the loop ordering) ought to fix it. – Mr Fooz Nov 30 '08 at 01:34
  • I'm very intrigued at the difference between GCC and Sun C. Granted this is at best implementation defined behaviour, but normally, GCC emulates the native compilers pretty well. I don't regard it as a bug, though - not in either compiler. It is simply 'implementation specific' behaviour and AOK. – Jonathan Leffler Nov 30 '08 at 01:42
  • 1
    Also, I tried: "/naJ" wanting -1 and got back a 'test failure' with the function returning 0. So, 4-byte word reverse ordering lets the test pass. But it hardly resolves the problem - that the parseMonth() code is not reliable across platforms. – Jonathan Leffler Nov 30 '08 at 01:45
  • @Jonathan - agreed. People have already posted a couple faster solutions, or solutions just as pretty to my problem. I have no excuse to leave this code in, pretty as I think it is, when there are real compilers I might encounter that will fail on it. – Dustin Nov 30 '08 at 01:52
  • @Dustin: see my revision which provides a portable solution. – Jonathan Leffler Nov 30 '08 at 02:03
  • @Jonathan Great. Should MONTH_CODE be used on the input instead of the loop? – Dustin Nov 30 '08 at 02:08
  • @Dustin - yes, you could do that; it would probably be marginally faster. – Jonathan Leffler Nov 30 '08 at 02:26
  • A MONTH_CODE variant which probably would work: #define MONTH_CODE(a,b,c,d) (((uint32_t)(a) << 24) | ((uint32_t)(b) << 16) | ((uint32_t)(c) << 8) | (uint32_t)(d)) – CesarB Nov 30 '08 at 15:35
  • @CesarB; yes, that should work - invoked MONTH_CODE('J','a','n','/'); it would be tempting to hard-code the '/' and reduce the parameters to three. – Jonathan Leffler Nov 30 '08 at 16:55
  • Technically, of course, the Sun C compiler does compile the code fine; it just doesn't work the same as it does under GCC or on Intel chips. So, this isn't quite an accurate answer for the question. :D – Jonathan Leffler Dec 01 '08 at 01:32
13
if ( !input[0] || !input[1] || !input[2] || input[3] != '/' )
    return -1;

switch ( input[0] )
{
    case 'F': return 1; // Feb
    case 'S': return 8; // Sep
    case 'O': return 9; // Oct
    case 'N': return 10; // Nov
    case 'D': return 11; // Dec;
    case 'A': return input[1] == 'p' ? 3 : 7; // Apr, Aug
    case 'M': return input[2] == 'r' ? 2 : 4; // Mar, May
    default: return input[1] == 'a' ? 0 : (input[2] == 'n' ? 5 : 6); // Jan, Jun, Jul
}

Slightly less readable and not so much validating, but perhaps even faster, no?

paxdiablo
  • 854,327
  • 234
  • 1,573
  • 1,953
Vilx-
  • 104,512
  • 87
  • 279
  • 422
  • Typo on the last line ('a' ? 0 -- not 1), but yes, this is slightly over twice as fast on my machine. Where performance matters, you win. I'm wondering where this isn't readable enough, though. – Dustin Nov 30 '08 at 00:22
  • 2
    @Dustin, it's readable if you add comments explaining it - won't affect run speed, will make it as readable as yours for the next bod looking at it. I would make sure the 31-day months are listed first since that would give a tiny bit of extra speed (statistically). – paxdiablo Nov 30 '08 at 00:27
  • Assuming the case statements are compiled in the given order, that is. – paxdiablo Nov 30 '08 at 00:28
  • @Pax Agreed. I didn't find it particularly confusing and my test pointed out a typo that led to an incorrect return. This may just be a case where the right answer to my question is that I asked the wrong question. – Dustin Nov 30 '08 at 00:35
  • 8
    I'm not convinced that 'Zax' should be accepted as January; nor that Xxx should be accepted as July. – Jonathan Leffler Nov 30 '08 at 21:02
  • 1
    As I said - it's not validating. If you want validation, then these are note the lines are looking for. – Vilx- Dec 06 '08 at 16:47
  • OK - the original code was validating (rv is initialized to -1, though it would be clearer if that was in a default case in the switch), so you're addressing a different problem. – Jonathan Leffler Dec 13 '08 at 04:14
11

You're just computing a hash of those four characters. Why not predefine some integer constants that compute the hash in the same way and use those? Same readability and you're not depending on any implementation specific idiosyncrasies of the compiler.

uint32_t MONTH_JAN = 'J' << 24 + 'a' << 16 + 'n' << 8 + '/';
uint32_t MONTH_FEB = 'F' << 24 + 'e' << 16 + 'b' << 8 + '/';

...

static uint32_t parseMonth(const char *input) {
    uint32_t rv=-1;
    uint32_t inputInt=0;
    int i=0;

    for(i=0; i<4 && input[i]; i++) {
        inputInt = (inputInt << 8) | (input[i] & 0x7f); // clear top bit
    }

    switch(inputInt) {
        case MONTH_JAN: rv=0; break;
        case MONTH_FEB: rv=1; break;

        ...
    }

    return rv;
}
tvanfosson
  • 524,688
  • 99
  • 697
  • 795
  • I do like this one aesthetically. I think it looks just as nice as what I made, but can be made to work more consistently and without the occasional compiler warning. Thanks. – Dustin Nov 30 '08 at 01:54
  • Use macros; `#define MON_HASH(s) ((((s)[0]) & 0xff << 24) | (((s)[1]) & 0xff << 16) | (((s)[2]) & 0xff << 8) | (((s)[3]) & 0xff))` ... `switch (MON_HASH(input)) {` ... `case MON_HASH("Jan/")`. *NOTE* that using something like `gperf` to generate an 8-bit (or shorter) hash (`Vilx-` comes closest to what `gperf` does) will be MUCH more efficient than the `int` "hashes" above (a `switch` on an `int` will NOT be implemented with a jump table, whereas a `switch` on a `char` or `__int8` probably will.) – vladr Feb 27 '12 at 16:27
10

I only know what the C Standard says about this (C99):

The value of an integer character constant containing more than one character (e.g., 'ab'), or containing a character or escape sequence that does not map to a single-byte execution character, is implementation-defined. If an integer character constant contains a single character or escape sequence, its value is the one that results when an object with type char whose value is that of the single character or escape sequence is converted to type int.

(6.4.4.4/10 taken from a draft)

So it's implementation defined. Meaning it is not guaranteed it works the same everywhere, but the behavior must be documented by the implementation. For example if int is only 16 bits wide in a particular implementation, then 'Jan/' can't be represented anymore like you intend it (char must be at least 8 bits, while a character literal is always of type int).

Johannes Schaub - litb
  • 496,577
  • 130
  • 894
  • 1,212
  • It's theoretically wrong, I understand. I'm wondering if there are any C compilers where it doesn't work in practice. It's completely out of curiosity as it worked for me even on an ancient compiler that fails on over half the open source stuff I find. – Dustin Nov 30 '08 at 00:10
  • 1
    @Dustin, try compiling for PPC (non-Intel Mac OS X). I think that's big-endian. – strager Nov 30 '08 at 00:11
  • @strager Did... I ran this on my G3 as well as my SGI (which is sort of bidirectional, but runs IRIX in big-endian). – Dustin Nov 30 '08 at 00:14
  • @Dustin, I have an old 8051 compiler that fails to compile this - it only had 16-bit ints and it gags on the 'xxxx' constants (it would also fail at runtime with the shift-or code). In fact it also gags on 'xx' constants. – paxdiablo Nov 30 '08 at 00:21
  • Dustin. you will probably not figure out by compiler, but by their backends. i can change by GCC backend to make int 16bits wide, and then gcc-eco32 will miscompile it. – Johannes Schaub - litb Nov 30 '08 at 00:24
  • @Pax neat, thanks. I suppose I should've qualified it with a 32-bit compiler, but the second failure is half of the type of error I was looking for. – Dustin Nov 30 '08 at 00:26
  • In practice? For example the world's first C compiler? (16-bit ints.) – Windows programmer Dec 02 '08 at 04:11
6
char *months = "Jan/Feb/Mar/Apr/May/Jun/Jul/Aug/Sep/Oct/Nov/Dec/";
char *p = strnstr(months, input, 4);
return p ? (p - months) / 4 : -1;
Scott Evernden
  • 39,136
  • 15
  • 78
  • 84
  • I did that one as well. It's certainly smaller, but wasn't faster as it took considerably longer to parse December dates than January dates. I had one that would adapt the haystack to take advantage of the input being chronological as well. More complicated, but not really faster. – Dustin Nov 30 '08 at 01:57
  • that surprising as the strstr() should compile into nearly a single machine instruction on most PCs – Scott Evernden Nov 30 '08 at 04:17
  • @Dustin, I ran some benchmarks without optimization using your original method and Scott's method on my 5 year old computer. Scott found Jan ten million times in 1.6s and Dec in 8.3s, your program found both Jan and Dec in 2.6s. So while significantly clearer, Scott's method is a little slower... – Robert Gamble Nov 30 '08 at 06:27
  • in some cases. My question is this: what exactly are you doing that only being able to call this function a million times a second causes a bottleneck? – Robert Gamble Nov 30 '08 at 06:29
  • This is a neat solution; the only downside I see is that strnstr() is not a standard C function. – Jonathan Leffler Nov 30 '08 at 21:17
  • @Jonathan's right about strnstr, I tested with strstr instead which I forgot to mention. – Robert Gamble Dec 01 '08 at 02:12
  • 1
    @Robert, "ran some benchmarks without optimization" <-- the results are meaningless. Turn optimization on when you're benchmarking! – vladr Feb 27 '12 at 16:24
4

There are at least 3 things that keep this program from being portable:

  1. Multi-character constants are implementation-defined so different compilers may handle them differently.
  2. A byte can be more than 8 bits, there is plenty of hardware where the smallest addressable unit of memory is 16 or even 32 bits, you often find this in DSPs for example. If a byte is more than 8 bits then so will char since char is by definition one byte long; your program will not function properly on such systems.
  3. Lastly, there are many machines where int is only 16-bits (which is the smallest size allowed for int) including embedded devices and legacy machines, your program will fail on these machines as well.
Robert Gamble
  • 106,424
  • 25
  • 145
  • 137
  • My 64-bit processor has 32-bit int's, and it works fine. Maybe you meant less than? – strager Nov 30 '08 at 00:02
  • Really or the BYTE thing? AFAIK byte is by definition 8 bits – JaredPar Nov 30 '08 at 00:03
  • @JaredPar: A byte in C has at least 8 bits but may have more. There are plenty of systems around today (new ones too!) with bytes larger than 8 bits. – Robert Gamble Nov 30 '08 at 00:06
  • Okay, you're using the C definition (notoriously vague). I've gotten to the point now that I use explicit sized types in my code to stop confusing other people. Ex: __int16, __int32, etc ... – JaredPar Nov 30 '08 at 00:18
  • @JaredPar: There is plenty of hardware where the smallest addressable unit of memory is 16 or 32 bits, this is not just a C definition thing. On such systems a char will be 16 or 32 bits and an int may be only 1 byte. The world isn't a Windows PC ;) – Robert Gamble Nov 30 '08 at 00:32
  • @Robert Gamble, Doesn't the C spec say the size order must be: char <= short <= int <= long ? – strager Nov 30 '08 at 00:40
  • @strager, yes and on a system where a byte is 32 bits: char, short, int and long could all be 1 byte. – Robert Gamble Nov 30 '08 at 00:51
  • @strager: specifically, there were Cray machines where sizeof(char) == sizeof(short) == sizeof(int); I believe they were all 32-bit quantities. – Jonathan Leffler Nov 30 '08 at 21:15
4

National Instrument's CVI 8.5 for Windows compiler fails on your original code with multiple warnings:

  Warning: Excess characters in multibyte character literal ignored.

and errors of the form:

  Duplicate case label '77'.

It succeeds on Jonathan's code.

AShelly
  • 34,686
  • 15
  • 91
  • 152
2

I get warnings, but no errors (gcc). Seems to compile and operate fine. May not work for big-endian systems, though!

I wouldn't suggest this method, though. Perhaps you can xor instead of or-shift, to create a single byte. Then use the case statement on a byte (or, faster, use a LUT of the first N bits).

strager
  • 88,763
  • 26
  • 134
  • 176
  • That's effectively a perfect hash. It doesn't matter too much for this application, but the downside of a perfect hash is that it can return wrong answers for bad input. – Dustin Nov 30 '08 at 00:06
  • @Dustin, then you can do a strcmp once you have your possible answer to check it. Use a LUT for this. Easy. – strager Nov 30 '08 at 00:09
  • One of my earlier attempts at this was to write a program that took a set of words as input and generated a hierarchical case statement switching on letters before doing a strcmp. For example: http://pastebin.com/f30e09462 (optionally breaking the Js down further). – Dustin Nov 30 '08 at 00:13
  • @Dustin, you can perhaps break your switch up into further switches. This should be faster than calling memcmp. If you don't want to do that, you can take advantage of the fact that you already know the first character, and don't need to compare it. – strager Nov 30 '08 at 00:41
  • @strager I wrote this thing to build that for me: http://github.com/dustin/snippets/tree/master/python/misc/gensearch.py – Dustin Nov 30 '08 at 00:41
1

The fact that a four character constant is equivalent to an particular 32-bit integer is a non-standard feature often seen on compilers for MS Windows and Mac computers (and PalmOS, AFAICR).

On theses systems a four character string is commonly used as a tag for identifying chunks of data files, or as an application / data-type identifier (e.g. "APPL").

It's a convenience then for the developer that they can store such a string into various data-structures without worrying about zero-byte termination, pointers, etc.

Alnitak
  • 334,560
  • 70
  • 407
  • 495
1

Comeau compiler

Comeau C/C++ 4.3.10.1 (Oct  6 2008 11:28:09) for ONLINE_EVALUATION_BETA2
Copyright 1988-2008 Comeau Computing.  All rights reserved.
MODE:strict errors C99 

"ComeauTest.c", line 11: warning: multicharacter character literal (potential
          portability problem)
          case 'Jan/': rv=0; break;
               ^

"ComeauTest.c", line 12: warning: multicharacter character literal (potential
          portability problem)
          case 'Feb/': rv=1; break;
               ^

"ComeauTest.c", line 13: warning: multicharacter character literal (potential
          portability problem)
          case 'Mar/': rv=2; break;
               ^

"ComeauTest.c", line 14: warning: multicharacter character literal (potential
          portability problem)
          case 'Apr/': rv=3; break;
               ^

"ComeauTest.c", line 15: warning: multicharacter character literal (potential
          portability problem)
          case 'May/': rv=4; break;
               ^

"ComeauTest.c", line 16: warning: multicharacter character literal (potential
          portability problem)
          case 'Jun/': rv=5; break;
               ^

"ComeauTest.c", line 17: warning: multicharacter character literal (potential
          portability problem)
          case 'Jul/': rv=6; break;
               ^

"ComeauTest.c", line 18: warning: multicharacter character literal (potential
          portability problem)
          case 'Aug/': rv=7; break;
               ^

"ComeauTest.c", line 19: warning: multicharacter character literal (potential
          portability problem)
          case 'Sep/': rv=8; break;
               ^

"ComeauTest.c", line 20: warning: multicharacter character literal (potential
          portability problem)
          case 'Oct/': rv=9; break;
               ^

"ComeauTest.c", line 21: warning: multicharacter character literal (potential
          portability problem)
          case 'Nov/': rv=10; break;
               ^

"ComeauTest.c", line 22: warning: multicharacter character literal (potential
          portability problem)
          case 'Dec/': rv=11; break;
               ^

"ComeauTest.c", line 1: warning: function "parseMonth" was declared but never
          referenced
  static int parseMonth(const char *input) {
             ^
Alessandro Jacopson
  • 18,047
  • 15
  • 98
  • 153
  • Thanks for the compiler pointer. I'm not worried about compilers complaining about potential portability problems, just compilers producing code that behaves differently. – Dustin Nov 30 '08 at 20:14
0

Machine word size issues aside, your compiler may promote input[i] to a negative integer which will just set the upper bits of inputInt with or operation, so I suggest you to be explicit about signedness of char variables.

But since in US, no one cares about the 8th bit, it is probably a non-issue for you.

artificialidiot
  • 5,309
  • 29
  • 27
0

I'd sure love to see the profiling that shows this is your most significant bottleneck, but in any case if you're going to pull something like this, use a union instead of 50 instructions looping and shifting. Here's a little example program, I'll leave it to you to fit it into your program.

/* union -- demonstrate union for characters */

#include <stdio.h>

union c4_i {
    char c4[5];
    int  i ;
} ;

union c4_i ex;

int main (){
    ex.c4[0] = 'a';
    ex.c4[1] = 'b';
    ex.c4[2] = 'c';
    ex.c4[3] = 'd';
    ex.c4[4] = '\0';
    printf("%s 0x%08x\n", ex.c4, ex.i );
    return 0;
}

Here's example output:

bash $ ./union
abcd 0x64636261
bash $ 
Charlie Martin
  • 110,348
  • 25
  • 193
  • 263
  • I did a similar thing by just casting the char* to an int* and then reading from within it. That's where I got into endian problems. – Dustin Nov 30 '08 at 01:36
  • None of these things is going to be immune to problems with endian-ness, etc, plus this is vulnerable to someone mixing case. – Charlie Martin Nov 30 '08 at 20:56
0

As mentioned by others, that code throws a bunch of warnings and is probably not endian-safe.

Was your original date parser hand-written as well? Have you tried strptime(3)?

HUAGHAGUAH
  • 1,063
  • 6
  • 3