2

Edit:

Due to memory issues, I am attempting to now use: http://blogs.msdn.com/b/brian_jones/archive/2010/06/22/writing-large-excel-files-with-the-open-xml-sdk.aspx

But still running into some problems.

String strWham = strExtract + strExtract2012;

System.Data.SqlClient.SqlCommand objCmd = new System.Data.SqlClient.SqlCommand(strWham, objConn);
objCmd.CommandTimeout = 3000;

System.Data.SqlClient.SqlDataReader objReader;
objReader = objCmd.ExecuteReader();

string path = @"\\wsi\userdata\pterrazas\AccountingReports\ExpThrough201212.xlsx";

while (objReader.Read())
{         
    using (var myDoc = SpreadsheetDocument.Create(path, DocumentFormat.OpenXml.SpreadsheetDocumentType.Workbook))
    {               
        WorkbookPart workbookPart = myDoc.WorkbookPart;
        /* The next line causes the error:
           ** Error: Object Reference not set to an instance of an object**
        */
        WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();

        string origninalSheetId = workbookPart.GetIdOfPart(worksheetPart);

        WorksheetPart replacementPart = workbookPart.AddNewPart<WorksheetPart>();
        string replacementPartId = workbookPart.GetIdOfPart(replacementPart);

        OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
        OpenXmlWriter writer = OpenXmlWriter.Create(replacementPart);

        Row r = new Row();
        Cell c = new Cell();
        CellFormula f = new CellFormula();
        f.CalculateCell = true;
        f.Text = "RAND()";
        c.Append(f);
        CellValue v = new CellValue();
        c.Append(v);
        int numRows = 1;
        int numCols = 1;
        while (reader.Read())
        {
            if (reader.ElementType == typeof(SheetData))
            {
                if (reader.IsEndElement)
                    continue;
                writer.WriteStartElement(new SheetData());

                for (int row = 0; row < numRows; row++)
                {
                    writer.WriteStartElement(r);
                    for (int col = 0; col < numCols; col++)
                    {
                         writer.WriteElement(c);
                         numCols++;
                    }
                    writer.WriteEndElement();
                    numRows++;
                }

                writer.WriteEndElement();
            }
            else
            {
                if (reader.IsStartElement)
                {
                    writer.WriteStartElement(reader);
                }
                else if (reader.IsEndElement)
                {
                    writer.WriteEndElement();
                }
            }
        }

        reader.Close();
        writer.Close();

        Sheet sheet = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Id.Value.Equals(origninalSheetId)).First();
        sheet.Id.Value = replacementPartId;
        workbookPart.DeletePart(worksheetPart);

     }
}    

objConn.Close();

}
catch (Exception ex)

Can't get it configured properly to run without exception!

Thanks for any help!

petelids
  • 12,305
  • 3
  • 47
  • 57
user3930607
  • 41
  • 1
  • 5
  • Have you tried eliminating the code line by line to see which line is causing the issue? – RQDQ Aug 11 '14 at 17:48
  • Based on the performance report, OpenXML is storing all this data in memory as it goes through (massive) and I presume it is maxing out the memory given the error. – user3930607 Aug 11 '14 at 17:57
  • You'll need to use the SAX-Like Approach mentioned in the link you provide. This writes the XML out piece by piece rather than having to have large parts of the document in memory in one go. What have you tried from that link and what errors did you get? – petelids Aug 11 '14 at 22:13
  • Updated with problems when using SAX-Like Approach. – user3930607 Aug 13 '14 at 16:53

2 Answers2

2

I figured how to use xmlreader/xmlwriter with sqldatareader from a sql db string and it uses very little memory. This produced a 300,000 row xlsx file in ten minutes using hardly any memory!

//Exp Through 2012

            String strExtract2012 = "WHERE client.typecode = 'I' AND Policy.UniqAgency IN(65536,65537,65538,65539,65540) AND Line.ExpirationDate < '1/1/2013' " +
                "ORDER BY polagencycode, polbranch, clientlookup, policynumber, lineeff, linetypecode";


            String strWham = strExtract + strExtract2012;

            System.Data.SqlClient.SqlCommand objCmd = new System.Data.SqlClient.SqlCommand(strWham, objConn);
            objCmd.CommandTimeout = 3000;

            System.Data.SqlClient.SqlDataReader objReader;
            objReader = objCmd.ExecuteReader();


            string path = @"\\filepath\ExpThrough201212.xlsx";
            string blankpath = @"\\filepath\blank.xlsx"; - put this blank xlsx file in the *filepath*

            File.Copy(blankpath, path, true);





            if (objReader.Read())
            {
                using (SpreadsheetDocument myDoc = SpreadsheetDocument.Open(path, true))
                {                      
                    WorkbookPart workbookPart = myDoc.WorkbookPart;

                    WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
                    string origninalSheetId = workbookPart.GetIdOfPart(worksheetPart);

                    WorksheetPart replacementPart = workbookPart.AddNewPart<WorksheetPart>();
                    string replacementPartId = workbookPart.GetIdOfPart(replacementPart);


                    OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
                    OpenXmlWriter writer = OpenXmlWriter.Create(replacementPart);

                    while (reader.Read())
                    {
                        if (reader.ElementType == typeof(SheetData))
                        {
                            if (reader.IsEndElement)
                                continue;
                            writer.WriteStartElement(new SheetData());

                            Row rr = new Row();
                            writer.WriteStartElement(rr);

                            //Add Header          
                            for (int count = 0; count < objReader.FieldCount; count++)
                            {
                                String FieldName = objReader.GetName(count);



                                DocumentFormat.OpenXml.Spreadsheet.Cell cell = new DocumentFormat.OpenXml.Spreadsheet.Cell();
                                cell.DataType = DocumentFormat.OpenXml.Spreadsheet.CellValues.String;
                                cell.CellValue = new DocumentFormat.OpenXml.Spreadsheet.CellValue(objReader.GetName(count));
                                //headerRow.AppendChild(cell);

                                writer.WriteElement(cell);
                            }

                            writer.WriteEndElement();


                            //writer.WriteEndElement();
                            //sheetData.AppendChild(headerRow);



                            while (objReader.Read())
                            {
                                //DocumentFormat.OpenXml.Spreadsheet.Row newRow = new DocumentFormat.OpenXml.Spreadsheet.Row();
                                Row r = new Row();
                                writer.WriteStartElement(r);

                                for (int col = 0; col < objReader.FieldCount; col++)
                                {
                                    String FieldValue = objReader.GetValue(col).ToString();

                                    //columns.Add(FieldValue);

                                    DocumentFormat.OpenXml.Spreadsheet.Cell cell = new DocumentFormat.OpenXml.Spreadsheet.Cell();
                                    cell.DataType = DocumentFormat.OpenXml.Spreadsheet.CellValues.String;
                                    cell.CellValue = new DocumentFormat.OpenXml.Spreadsheet.CellValue(FieldValue);
                                    //newRow.AppendChild(cell);
                                    writer.WriteElement(cell);

                                }
                                //.AppendChild(newRow);
                                writer.WriteEndElement();                                    
                            }

                            writer.WriteEndElement();
                        }
                        else
                        {
                            if (reader.IsStartElement)
                            {
                                writer.WriteStartElement(reader);
                            }
                            else if (reader.IsEndElement)
                            {
                                writer.WriteEndElement();
                            }
                        }
                    }

                    reader.Close();
                    writer.Close();

                    Sheet sheet = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Id.Value.Equals(origninalSheetId)).First();
                    sheet.Id.Value = replacementPartId;
                    workbookPart.DeletePart(worksheetPart);



                }
            }
            objReader.Close();
user3930607
  • 41
  • 1
  • 5
1

You create a new document for each row in objReader; the while (objReader.Read()) line should be moved inside the file creation code but this isn't causing the issue you are currently seeing.

The issue you are seeing is because the sample you have used as a basis reads an existing file (using SpreadsheetDocument.Open) and copies data to it but you are creating a brand new one from scratch (using SpreadsheetDocument.Create). That is why his document already contains a workbookpart but yours doesn't. As yours doesn't, you are getting an object reference error because your workbookPart variable is null.

Vincent Tan has a great post about how to write a large Excel file using a pure SAX approach here. He mentions some issues you might come across using the code in the link you provide so I highly recommend you have a read.

Using that as a basis you could achieve more or less what you are after with something like this:

String strWham = strExtract + strExtract2012;

System.Data.SqlClient.SqlCommand objCmd = new System.Data.SqlClient.SqlCommand(strWham, objConn);
objCmd.CommandTimeout = 3000;

System.Data.SqlClient.SqlDataReader objReader;
objReader = objCmd.ExecuteReader();

string path = @"\\wsi\userdata\pterrazas\AccountingReports\ExpThrough201212.xlsx";

using (SpreadsheetDocument myDoc = SpreadsheetDocument.Create(path, SpreadsheetDocumentType.Workbook))
{
    List<OpenXmlAttribute> xmlAttributes;
    OpenXmlWriter writer;

    //add a workbookpart manually
    myDoc.AddWorkbookPart();
    WorksheetPart worksheetpart = myDoc.WorkbookPart.AddNewPart<WorksheetPart>();

    //create an XML writer for the worksheetpart
    writer = OpenXmlWriter.Create(worksheetpart);
    writer.WriteStartElement(new Worksheet());
    writer.WriteStartElement(new SheetData());

    int rowNumber = 1;

    while (objReader.Read())
    {
        xmlAttributes = new List<OpenXmlAttribute>();
        // this is the row index
        xmlAttributes.Add(new OpenXmlAttribute("r", null, rowNumber.ToString()));

        //write the row start element with the attributes added above
        writer.WriteStartElement(new Row(), xmlAttributes);

        for (int columnNumber = 1; columnNumber < objReader.FieldCount; columnNumber++)
        {
            //reset the attributes
            xmlAttributes = new List<OpenXmlAttribute>();
            // this is the data type ("t"), with CellValues.String ("str")
            // you might need to change this depending on your source data
            // you might also consider using the Shared Strings table instead
            xmlAttributes.Add(new OpenXmlAttribute("t", null, "str"));

            //add the cell reference (A1, B1, A2... etc)
            xmlAttributes.Add(new OpenXmlAttribute("r", null, GetExcelColumnName(rowNumber, columnNumber)));

            //write the start of the cell element with the type and cell reference attributes
            writer.WriteStartElement(new Cell(), xmlAttributes);

            //write the cell value
            writer.WriteElement(new CellValue(objReader[columnNumber].ToString()));

            //write the cell end element
            writer.WriteEndElement();
        }

        //write the row end element
        writer.WriteEndElement();

        rowNumber++;
    }

    //write the sheetdata end element
    writer.WriteEndElement();
    //write the worksheet end element
    writer.WriteEndElement();
    writer.Close();

    //create a writer for the workbookpart
    writer = OpenXmlWriter.Create(myDoc.WorkbookPart);
    //write the start element of a workbook to the workbook part
    writer.WriteStartElement(new Workbook());
    //write the start element of a sheets item to the workbook part
    writer.WriteStartElement(new Sheets());
    //write the whole element of a sheet to the workbook part
    //note we link it to the id of the worksheetpart populated above
    writer.WriteElement(new Sheet()
    {
        Name = "Sheet1",
        SheetId = 1,
        Id = myDoc.WorkbookPart.GetIdOfPart(worksheetpart)
    });

    //write the sheets end element
    writer.WriteEndElement();
    //write the workbook end element
    writer.WriteEndElement();
    writer.Close();

    myDoc.Close();
}

objConn.Close();

The method GetExcelColumnName I have used is adapted from this StackOverflow question.

private static string GetExcelColumnName(int rowNumber, int columnNumber)
{
    int dividend = columnNumber;
    string columnName = String.Empty;
    int modulo;

    while (dividend > 0)
    {
        modulo = (dividend - 1) % 26;
        columnName = Convert.ToChar(65 + modulo).ToString() + columnName;
        dividend = (int)((dividend - modulo) / 26);
    }

    return columnName + rowNumber.ToString();
}

Note that you might want to change the type that you output as per the comment in the code depending on the type of the source data. You might also want to look at the Shared Strings concept whereby strings are written to a separate file and then the index of that string is written as the cell value rather than the string itself.

In terms of raw performance for writing an Excel file I think the above is probably the fastest method using the OpenXML SDK. This does of course come at the cost of complexity however.

I hope that helps.

Community
  • 1
  • 1
petelids
  • 12,305
  • 3
  • 47
  • 57
  • 1
    This solution doesn't work for .net Core 3.1 and OpenXML 2.11.0 becuase to generate ~70 MB file, this code required 3GB of memory (at peaks) – KondzioSSJ4 Jun 12 '20 at 10:36