1

I'm writing a program that use OpenXML to read an excel file. The file contains also dates, so i need a function that recognize them. Here on stackoverflow i've found this solution: How to distinguish inline numbers from OLE Automation date numbers in OpenXML SpreadSheet? .

I've tryed to apply it to my code, but every time I get the following error:

"An unhandled exception of type 'System.NullReferenceException' occurred in OpenXML_Prova.exe

Additional information: Object reference not set to an instance of an object."

on the first line of the GetDateTimeCellFormats, in which it seems that NumberingFormats is always null.

Here is the complete code:

using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;



namespace OpenXML_Prova
{
    class Program
    {

        DocumentFormat.OpenXml.Spreadsheet.NumberingFormats numb = new NumberingFormats();

        static uint[] builtInDateTimeNumberFormatIDs = new uint[] { 14, 15, 16, 17, 18, 19, 20, 21, 22, /*27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58 */};
        static Dictionary<uint, NumberingFormat> builtInDateTimeNumberFormats = builtInDateTimeNumberFormatIDs.ToDictionary(id => id, id => new NumberingFormat { NumberFormatId = id });
        static Regex dateTimeFormatRegex = new Regex(@"((?=([^[]*\[[^[\]]*\])*([^[]*[ymdhs]+[^\]]*))|.*\[(h|mm|ss)\].*)", RegexOptions.Compiled);

        static void Main(string[] args) {
            Program prova = new Program();
            prova.ReadFile();
        }

        void ReadFile()
        {

            var filePath = @"C:\\Users\\m.p\\Desktop\\report_fatturato_brevissimo.xlsx";
            using (FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
            {
                using (SpreadsheetDocument doc = SpreadsheetDocument.Open(fs, false))
                {
                    WorkbookPart workbookPart = doc.WorkbookPart;
                    SharedStringTablePart sstpart = workbookPart.GetPartsOfType<SharedStringTablePart>().First();
                    SharedStringTable sst = sstpart.SharedStringTable;

                    WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
                    Worksheet sheet = worksheetPart.Worksheet;

                    var cells = sheet.Descendants<Cell>();
                    var rows = sheet.Descendants<Row>();

                    Console.WriteLine("Row count = {0}", rows.LongCount());
                    Console.WriteLine("Cell count = {0}", cells.LongCount());

                    // Or... via each row
                    foreach (Row row in rows)
                    {
                        foreach (Cell c in row.Elements<Cell>())
                        {
                            if ((c.DataType != null) && (c.DataType == CellValues.SharedString))
                            {
                                int ssid = int.Parse(c.CellValue.Text);
                                string str = sst.ChildElements[ssid].InnerText;
                                Console.Write/*Line*/(/*"Shared string {0}: {1}", ssid, */str + "\t");
                            }
                            else
                            {
                                if (IsDateTimeCell(workbookPart, c) == true)
                                {
                                    int ssid = int.Parse(c.CellValue.Text);
                                    DateTime date = DateTime.FromOADate(Double.Parse(c.CellValue.Text));
                                    Console.Write/*Line*/(/*"Shared string {0}: {1}", ssid, */date + "\t");
                                }
                                else if (c.CellValue != null)
                                {
                                    Console.Write/*Line*/(/*"Cell contents: {0}", */c.CellValue.Text + "\t");
                                }
                            }
                        }
                        Console.WriteLine("");
                    }
                }
            }
            Console.Read();
        }


        public static Dictionary<uint, NumberingFormat> GetDateTimeCellFormats(WorkbookPart wbPart)
        {

//Error on the following instruction: NumberingFormats seems to be always null 
            var dateNumberFormats = wbPart.WorkbookStylesPart.Stylesheet.NumberingFormats
                .Descendants<NumberingFormat>()
                .Where(nf => dateTimeFormatRegex.Match(nf.FormatCode.Value).Success)
                .ToDictionary(nf => nf.NumberFormatId.Value);

            var cellFormats = wbPart.WorkbookStylesPart.Stylesheet.CellFormats
                .Descendants<CellFormat>();

            var dateCellFormats = new Dictionary<uint, NumberingFormat>();
            uint styleIndex = 0;
            foreach (var cellFormat in cellFormats)
            {
                if (cellFormat.ApplyNumberFormat != null && cellFormat.ApplyNumberFormat.Value)
                {
                    if (dateNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                    {
                        dateCellFormats.Add(styleIndex, dateNumberFormats[cellFormat.NumberFormatId.Value]);
                    }
                    else if (builtInDateTimeNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                    {
                        dateCellFormats.Add(styleIndex, builtInDateTimeNumberFormats[cellFormat.NumberFormatId.Value]);
                    }
                }

                styleIndex++;
            }

            return dateCellFormats;
        }

        //Usage Example
        public static bool IsDateTimeCell(WorkbookPart wbPart, Cell cell)
        {
            if (cell.StyleIndex == null)
                return false;

            var dateTimeCellFormats = GetDateTimeCellFormats(wbPart);

            return dateTimeCellFormats.ContainsKey(cell.StyleIndex);
        }


    }
}
Massimo P.
  • 55
  • 1
  • 10

2 Answers2

1

OpenXML has 164 built-in formats that Excel uses for basic formats in spreadsheet cells. The default DateTime formats are part of these built-in types.

Only if you create new format with Excel (custom format), will these be included in the NumberingFormats section of the Stylesheet.

I suggest you wrap your code like below to check for NumberingFormats, to avoid the NullReferenceException

if (wbPart.WorkbookStylesPart.Stylesheet.NumberingFormats != null) {

Then you will need to add some code to detect for a datetime format. This SO article tries to determine the possible datetime styles. See some of the comments and the first link above for references to the specification which provides more detail.

Taterhead
  • 5,763
  • 4
  • 31
  • 40
0

OpenXML has 164 built-in formats that Excel uses for basic formats in spreadsheet cells. The default DateTime formats are part of these built-in types.

Only if you create new format with Excel (custom format), these will be included in the NumberingFormats section of the Stylesheet.

I suggest you wrap your code like below to check for NumberingFormats, to avoid the NullReferenceException

if (wbPart.WorkbookStylesPart.Stylesheet.NumberingFormats != null)
        {

Then you will need to add some code to detect for a datetime format.

Sample code:

        if (cellFormat?.NumberFormatId == 14)
        {
            double d = double.Parse(cell.CellValue.InnerText);

            string val = DateTime.FromOADate(d).ToString();
            return val;
        }

As per this link - NumberFormatId 14 is a datetime number format with

MM-DD-YY format. We can use the above sample to convert it to datetime value.

This SO article tries to determine the possible datetime styles. See some of the comments and the first link above for references to the specification which provides more detail.

Referred and added to the answer given by @Taterhead

Sanjeevi Subramani
  • 501
  • 1
  • 5
  • 16