1

I have searched for how to parse XML file using Expat library, but found nothing useful for a beginner.

All what I want is to read the three numbers 6, 7 and 8 from a configuration file.

<?xml version="1.0" encoding="UTF-8"?>
<Configuration>
    <board_height> 6 </board_height>
    <board_width> 7 </board_width>
    <maximum_highscores> 8 </maximum_highscores>    
</Configuration>
Mayar
  • 13
  • 1
  • 5

1 Answers1

5

Expat is a streaming XML parser. Ergo, it parses the document in chunks, and it's up to you (the programmer) to maintain state, monitor progress, and capture the data you need as it's parsed, not afterwards.

There are three primary handlers you need to build:

  1. XML_StartElementHandler
  2. XML_EndElementHandler
  3. XML_CharacterDataHandler

You'll find function signatures for these in expat.h. The first function is invoked when a starting tag is found (<tag>), the second function is when a ending tag is found (</tag>), and the third function captures the text between the tags. This is a rudimentary but fast parsing technique.

Here's a proof of concept using Expat 2.1:

#include <expat.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#ifdef XML_LARGE_SIZE
#if defined(XML_USE_MSC_EXTENSIONS) && _MSC_VER < 1400
#define XML_FMT_INT_MOD "I64"
#else
#define XML_FMT_INT_MOD "ll"
#endif
#else
#define XML_FMT_INT_MOD "l"
#endif

struct setting {
    const char *key;
    char *value;
} config[] = {
    {"board_height", NULL}, {"board_width", NULL}, {"maximum_highscores", NULL}
};

struct setting *current_setting;

int
key_cmp(void const *ld, void const *rd)
{
    struct setting const *const l = ld;
    struct setting const *const r = rd;
    return strcmp(l->key, r->key);
}

void XMLCALL
handler(void *userData, const XML_Char *s, int len)
{
    if(len == 0){
        return;
    }

    if(!current_setting){
        return;
    }

    char *value = malloc((len+1) * sizeof(XML_Char));
    strncpy(value, s, len);
    current_setting->value = value;
}

static void XMLCALL
startElement(void *userData, const char *name, const char **atts)
{
    struct setting key = { .key = name };
    current_setting = bsearch(&key, config, sizeof(config)/sizeof(config[0]), sizeof(config[0]), key_cmp);
}

static void XMLCALL
endElement(void *userData, const char *name)
{
    current_setting = NULL;
}

int
main(int argc, char *argv[])
{
    char buf[BUFSIZ];

    XML_Parser parser = XML_ParserCreate(NULL);

    int done;
    int depth = 0;

    XML_SetUserData(parser, &depth);
    XML_SetElementHandler(parser, startElement, endElement);
    XML_SetCharacterDataHandler(parser, handler);

    FILE *fp = fopen("config.xml", "r");

    do {
        int len = (int)fread(buf, 1, sizeof(buf), fp);
        done = len < sizeof(buf);
        if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
            fprintf(stderr,
                    "%s at line %" XML_FMT_INT_MOD "u\n",
                    XML_ErrorString(XML_GetErrorCode(parser)),
                    XML_GetCurrentLineNumber(parser));
            return 1;
        }
    } while (!done);

    XML_ParserFree(parser);

    int i;
    for (i = 0; i < (sizeof(config)/sizeof(config[0])); i++) {
        struct setting current = config[i];
        printf("%s: %s\n", current.key, current.value);
        free(current.value);
    }

    return 0;
}

This program opens a file config.xml for reading, sets the appropriate Expat handlers, and duplicates the "value" strings it finds (assuming the key is the XML tag).

The code provided is merely an example. I'm not a professional C programmer.

Jacob Budin
  • 9,753
  • 4
  • 32
  • 35
  • thanks so much for your detailed answer , but while running the program it gives me that XML_Parser parser is undefined refrence although i have included expat.h >>> do you know where the problem is ?! – Mayar Dec 29 '14 at 17:46
  • @Mayar You've run `./configure && make installlib` on the Expat library to install it, right? If you're new to C, I'd recommend this resource: ["The Definitive C Book Guide and List"](http://stackoverflow.com/questions/562303/the-definitive-c-book-guide-and-list). – Jacob Budin Dec 30 '14 at 00:47
  • the program gives null as output, if more than three key are mentioned. – Ashwant Manikoth Aug 10 '19 at 04:48