6

I want to read an XML file into a char *buffer using C.

What is the best way to do this?

How should I get started?

Dinah
  • 52,922
  • 30
  • 133
  • 149
UcanDoIt
  • 1,775
  • 7
  • 20
  • 27
  • I think you have your levels of abstraction confused here. You're asking specifically about XML file, but XML file is no different from any other random access file or a byte stream. Please be sure that you're approaching your problem at the right level of abstraction. –  Dec 19 '08 at 19:40
  • Reading a file in C is fairly straightforward. Processing the XML tags is another problem entirely. – Anthony Giorgio Dec 19 '08 at 19:44
  • 2
    I'm not sure why this has been down-voted. It's a reasonable question. He's not asking to be handed the code on a platter -- he's (or she's) asking how to get started. Surely there are libraries out there and people with experience of importing XML files? – Ben Dec 19 '08 at 22:01
  • I strongly believed that people who downvote should add a comment but, in this case, it may be because reading a file into a char *buffer is not XML-specific at all. The OP probably wanted to *parse* the file but got the vocabulary wrong. – bortzmeyer Dec 20 '08 at 11:33

8 Answers8

12

And if you want to parse XML, not just reading it into a buffer (something which would not be XML-specific, see Christoph's and Baget's answers), you can use for instance libxml2:

#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>

int main(int argc, char **argv) {
   xmlDoc *document;
   xmlNode *root, *first_child, *node;
   char *filename;

   if (argc < 2) {
     fprintf(stderr, "Usage: %s filename.xml\n", argv[0]);
     return 1;
   }
   filename = argv[1];

  document = xmlReadFile(filename, NULL, 0);
  root = xmlDocGetRootElement(document);
  fprintf(stdout, "Root is <%s> (%i)\n", root->name, root->type);
  first_child = root->children;
  for (node = first_child; node; node = node->next) {
     fprintf(stdout, "\t Child is <%s> (%i)\n", node->name, node->type);
  }
  fprintf(stdout, "...\n");
  return 0;
}

On an Unix machine, you typically compile the above with:

% gcc -o read-xml $(xml2-config --cflags) -Wall $(xml2-config --libs) read-xml.c
bortzmeyer
  • 34,164
  • 12
  • 67
  • 91
  • 1.0E99 thanks ! The code *and the compilation flags* are a great example. Can't believe no one flagged this as useful in 8 years? – phs Feb 24 '17 at 15:16
7

Is reading the contents of the file into a single, simple buffer really what you want to do? XML files are generally there to be parsed, and you can do this with a library like libxml2, just to give one example (but notably, is implemented in C).

Nietzche-jou
  • 14,415
  • 4
  • 34
  • 45
4

Hopefully bug-free ISO-C code to read the contents of a file and add a '\0' char:

#include <stdlib.h>
#include <stdio.h>

long fsize(FILE * file)
{
    if(fseek(file, 0, SEEK_END))
        return -1;

    long size = ftell(file);
    if(size < 0)
        return -1;

    if(fseek(file, 0, SEEK_SET))
        return -1;

    return size;
}

size_t fget_contents(char ** str, const char * name, _Bool * error)
{
    FILE * file = NULL;
    size_t read = 0;
    *str = NULL;
    if(error) *error = 1;

    do
    {
        file = fopen(name, "rb");
        if(!file) break;

        long size = fsize(file);
        if(size < 0) break;

        if(error) *error = 0;

        *str = malloc((size_t)size + 1);
        if(!*str) break;

        read = fread(*str, 1, (size_t)size, file);
        (*str)[read] = 0;
        *str = realloc(*str, read + 1);

        if(error) *error = (size != (long)read);
    }
    while(0);

    if(file) fclose(file);
    return read;
}
Christoph
  • 164,997
  • 36
  • 182
  • 240
3
  1. Install libxml2 as a NuGet package in Visual studio(I am using Vs 2015 to test this)
  2. Copy and paste the contents under example XML file in a notepad and save the file as example.xml
  3. Copy and past the code under //xml parsing in to Vs
  4. Call the function from main with xml file name as an argument
  5. You will be getting the xml data in configReceive

That's all...

example XML file:

<?xml version="1.0" encoding="utf-8"?>
    <config>
        <xmlConfig value1="This is a simple XML parsing program in C"/>
        <xmlConfig value2="Thank you : Banamali Mishra"/>
        <xmlConfig value3="2000000"/>
        <xmlConfig value4="80"/>
        <xmlConfig value5="10"/>
        <xmlConfig value6="1"/>
    </config>

Here is the source code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libxml/xmlreader.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>

char configReceive[6][80] = { " " };

//xml parsing
void ParsingXMLFile(char *filename) {
    char         *docname;
    xmlDocPtr    doc;
    xmlNodePtr   cur;
    xmlChar      *uri;
    char config[6][80] = { "value1", "value2", "value3", "value4", "value5", "value6" };
    int count = 0;
    int count1 = 0;

    docname = filename;
    doc = xmlParseFile(docname);
    cur = xmlDocGetRootElement(doc);
    cur = cur->xmlChildrenNode;
    while (cur != NULL) {
        if ((!xmlStrcmp(cur->name, (const xmlChar *)"xmlConfig"))) {
            uri = xmlGetProp(cur, (xmlChar *)config[count++]);
            strcpy(configReceive[count1++], (char *)uri);
            xmlFree(uri);
        }
        cur = cur->next;
    }

    count = 0;
    count1 = 0;
    xmlFreeDoc(doc);
}
chqrlie
  • 131,814
  • 10
  • 121
  • 189
2

You can use the stat() function to get the file size. then allocate a buffer using malloc after it reading the file using fread.

the code will be something like that:

struct stat file_status;
char *buf = NULL;
FILE * pFile;

stat("tmp.xml", &file_status);
buf = (char*)malloc(file_status.st_size);
pFile = fopen ("tmp.xml","r");
fread (buf,1,file_status.st_size,pFile);

fclose(pFile);
Baget
  • 3,318
  • 1
  • 24
  • 44
  • You shouldn't cast malloc(), and should be checking return values. Additionally, stat() is non-standard (I mean the ISO C standard). – Nietzche-jou Dec 19 '08 at 17:01
  • @sgm: Revise the code and post it, along with your changes and why you made them. I would be happy to vote it up. – GEOCHET Dec 19 '08 at 17:02
  • The answer is correct but probably not what the OP wanted :-) He probably wanted to *parse* the XML file. – bortzmeyer Dec 20 '08 at 11:31
  • @sgm: casting malloc is only incorrect in C90. You should cast it to be compatible with C++, and there's no danger in C99 since implicit function declarations are no longer legal. – Adam Rosenfield Dec 21 '08 at 07:08
1

Here is a full program that reads in a whole XML file (really, any file), into a buffer. It includes about as much error-checking as would be useful.

N.B. everything is done in main(). Turning it into a callable function is left as an exercise for the reader.

(Tested, compiled with GCC 4.3.3. Switches were -Wall -W --pedantic --ansi.)

Comments on this will be addressed in approximately eight hours.

#include <stdio.h>
#include <stdlib.h>


int main (int argc, char *argv[]) {
 char   *buffer;        /* holds the file contents. */
 size_t  i;             /* indexing into buffer. */
 size_t  buffer_size;   /* size of the buffer. */
 char   *temp;          /* for realloc(). */
 char    c;             /* for reading from the input. */
 FILE   *input;         /* our input stream. */


 if (argc == 1) {
      fprintf(stderr, "Needs a filename argument.\n");
      exit(EXIT_FAILURE);
 }
 else if (argc > 2) {
      fprintf(stderr, "Well, you passed in a few filenames, but I'm only using %s\n", argv[1]);
 }

 if ((input = fopen(argv[1], "r")) == NULL) {
      fprintf(stderr, "Error opening input file %s\n", argv[1]);
      exit(EXIT_FAILURE);
 }

 /* Initial allocation of buffer */
 i = 0;
 buffer_size = BUFSIZ;
 if ((buffer = malloc(buffer_size)) == NULL) {
      fprintf(stderr, "Error allocating memory (before reading file).\n");
      fclose(input);
 }

 while ((c = fgetc(input)) != EOF) {
      /* Enlarge buffer if necessary. */
      if (i == buffer_size) {
       buffer_size += BUFSIZ;
       if ((temp = realloc(buffer, buffer_size)) == NULL) {
        fprintf(stderr, "Ran out of core while reading file.\n");
        fclose(input);
        free(buffer);
        exit(EXIT_FAILURE);
       }
       buffer = temp;
      }

      /* Add input char to the buffer. */
      buffer[i++] = c;
 }

 /* Test if loop terminated from error. */
 if (ferror(input)) {
      fprintf(stderr, "There was a file input error.\n");
      free(buffer);
      fclose(input);
      exit(EXIT_FAILURE);
 }

 /* Make the buffer a bona-fide string. */
 if (i == buffer_size) {
      buffer_size += 1;
      if ((temp = realloc(buffer, buffer_size)) == NULL) {
       fprintf(stderr, "Ran out of core (and only needed one more byte too ;_;).\n");
       fclose(input);
       free(buffer);
       exit(EXIT_FAILURE);
      }
      buffer = temp;
 }
 buffer[i] = '\0';

 puts(buffer);

 /* Clean up. */
 free(buffer);
 fclose(input);

 return 0;
}
Nietzche-jou
  • 14,415
  • 4
  • 34
  • 45
0

I believe that question was about XML parsing and not about file reading, however OP should really clarify this.
Any way you got plenty example how to read file.
Another option to xml parsing in additional to sgm suggestion will be Expat library

Community
  • 1
  • 1
Ilya
  • 3,104
  • 3
  • 23
  • 30
0

Suggestion: Use memory mapping

This has the potential to cut down on useless copying of the data. The trick is to ask the OS for what you want, instead of doing it. Here's an implementation I made earlier:

mmap.h

#ifndef MMAP_H
#define MMAP_H

#include <sys/types.h>

struct region_t {
  void *head;
  off_t size;
};

#define OUT_OF_BOUNDS(reg, p) \
  (((void *)(p) < (reg)->head) || ((void *)(p) >= ((reg)->head)+(reg)->size))

#define REG_SHOW(reg) \
  printf("h: %p, s: %ld (e: %p)\n", reg->head, reg->size, reg->head+reg->size);

struct region_t *do_mmap(const char *fn);
#endif

mmap.c

#include <stdlib.h>

#include <sys/types.h>  /* open lseek             */
#include <sys/stat.h>   /* open                   */
#include <fcntl.h>      /* open                   */
#include <unistd.h>     /*      lseek             */
#include <sys/mman.h>   /*            mmap        */

#include "mmap.h"

struct region_t *do_mmap(const char *fn)
{
  struct region_t *R = calloc(1, sizeof(struct region_t));

  if(R != NULL) {
    int fd;

    fd = open(fn, O_RDONLY);
    if(fd != -1) {
      R->size = lseek(fd, 0, SEEK_END);
      if(R->size != -1) {
        R->head = mmap(NULL, R->size, PROT_READ, MAP_PRIVATE, fd, 0);
        if(R->head) {
          close(fd); /* don't need file-destructor anymore. */
          return R;
        }
        /*                no clean up of borked (mmap,) */
      }
      close(fd);   /* clean up of borked (lseek, mmap,) */
    }
    free(R); /* clean up of borked (open, lseek, mmap,) */
  }
  return NULL;
}
Anders Eurenius
  • 4,170
  • 2
  • 24
  • 20