0

I am receiving an uploaded Excel file on my server and I want to parse the file. The data with in the table is processed in the correct order, but for some strange reason my header row gets alphabetized. If I have the first row's columns as such First Col, A Second Col, Third Col, and A Fourth Col, the code returns an array of [A Fourth Col, A Second Col, First Col, Third Col], i.e. alphabetized with strings starting with 'A's first, then 'F', then 'S', etc.

My C# code goes like this for reading the header row:

private JsonResult ProcessExcelFile(String filename, String connectionString)
{
    DataTable ExcelFile = null;
    List<Dictionary<string,dynamic>> dataCells = new List<Dictionary<string,dynamic>>();
    var dataCount = 0;
    dataCells.Add(new Dictionary<string, dynamic>());
    dataCells.Add("fileName", "Excel," + fileName);

    using (var conn = new OleDbConnection(connectionString))
    {
        if (conn.State == System.Data.ConnectionState.Closed)
        {
            conn.Open();

            DataTable tableSheets = conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
            List<string> sheetList = new List<string>();
            foreach (DataRow tableSheet in tableSheets.Rows)
            {
                string currentSheetName = tableSheet[2].ToString().Replace("'", "");
                sheetList.Add(currentSheetName);
            }

            DataSet data = new DataSet();
            foreach(var sheetName in sheetList)
            {
                var dataTable = new DataTable();
                string query = string.Format("SELECT * FROM [{0}]", sheetName);
                OleDbDataAdapter adapter = new OleDbDataAdapter(query, conn);
                adapter.Fill(dataTable);
                data.Tables.Add(dataTable);
            }

            ExcelFile = new DataTable("ExcelFile");
            List<string> columnNames = new List<string>();
            DataTable dataColumns = conn.GetSchema("Columns");
            foreach (DataRow dataColumn in dataColumns.Rows)
            {
                string currentSheet = dataColumn.ItemArray[2].ToString();
                if(currentSheet == "Sheet1!")
                {
                    columnsNames.Add(dataColumn["COLUMN_NAME"].ToString());
                    DataColumn tableHeader = new DataColumn(dataColumn["COLUMN_NAME"].ToString(), Type.GetType("System.String"));
                    ExcelFile.Columns.Add(tableHeader);
                }
            }

            var cmd = conn.CreateCommand();

            cmd.CommandText = String.Format(
                @"SELECT * FROM [{0}$]",
                "Sheet1"
            );

            using (var rdr = cmd.ExecuteReader())
            {
                DataRow NewRow;
                if (rdr.HasRows)
                {
                    while (rdr.Read())
                    {
                        dataCount++;
                        NewRow = ExcelFile.NewRow();
                        dataCells.Add(new Dictionary<string, dynamic>());

                        for (int index = 0; index < rdr.FieldCount; index++)
                        {
                            string fieldName = columnsList[index];
                            var fieldType = rdr.GetFieldType(index).FullName;

                            switch(fieldType)
                            {
                                case ("System.Byte"):
                                    byte byteFieldValue = rdr.GetByte(index);
                                    dataCells[dataCount].Add(fieldname, byteFieldValue);
                                    NewRow[fieldname] = byteFieldValue;
                                    break;

                                case ("System.Int16"):
                                    Int16 int16FieldValue = rdr.GetInt16(index);
                                    dataCells[dataCount].Add(fieldname, int16FieldValue);
                                    NewRow[fieldname] = int16FieldValue;
                                    break;

                                case ("System.Int32"):
                                    Int32 int32FieldValue = rdr.GetInt32(index);
                                    dataCells[dataCount].Add(fieldname, int32FieldValue);
                                    NewRow[fieldname] = int32FieldValue;
                                    break;

                                case ("System.Int64"):
                                    Int64 int64FieldValue = rdr.GetInt64(index);
                                    dataCells[dataCount].Add(fieldname, int64FieldValue);
                                    NewRow[fieldname] = int64FieldValue;
                                    break;

                                case ("System.Double"):
                                    double doubleFieldValue = rdr.GetDouble(index);
                                    dataCells[dataCount].Add(fieldname, doubleFieldValue);
                                    NewRow[fieldname] = doubleFieldValue;
                                    break;

                                case ("System.Boolean"):
                                    bool boolFieldValue = rdr.GetBoolean(index);
                                    dataCells[dataCount].Add(fieldname, boolFieldValue);
                                    NewRow[fieldname] = boolFieldValue;
                                    break;

                                case ("System.Char"):
                                    char charFieldValue = rdr.GetChar(index);
                                    dataCells[dataCount].Add(fieldname, charFieldValue);
                                    NewRow[fieldname] = charFieldValue;
                                    break;

                                case ("System.Decimal"):
                                    decimal decimalFieldValue = rdr.GetDecimal(index);
                                    dataCells[dataCount].Add(fieldname, decimalFieldValue);
                                    NewRow[fieldname] = decimalFieldValue;
                                    break;

                                case ("System.DateTime"):
                                    DateTime datetimeFieldValue = rdr.GetDateTime(index);
                                    dataCells[dataCount].Add(fieldname, datetimeFieldValue);
                                    NewRow[fieldname] = datetimeFieldValue;
                                    break;

                                case ("System.String"):
                                    string stringFieldValue = "";
                                    stringFieldValue = rdr.GetDouble(index);
                                    dataCells[dataCount].Add(fieldname, stringFieldValue);
                                    NewRow[fieldname] = stringFieldValue;
                                    break;

                                default:
                                    string defaultFieldValue = "type not found";
                                    dataCells[dataCount].Add(fieldname, defaultFieldValue);
                                    NewRow[fieldname] = defaultFieldValue;
                                    break;
                            }
                        }
                    }
                }
            }
            rdr.Close();
        }
        conn.Close();
    }

    JsonResult jsonObject = Json(dataCells);
    return jsonObject

}

Anyone know why this is doing this?

Matthew Dewell
  • 563
  • 7
  • 30
  • I take it the columnNames list doesn't behave the same way? i.e. It isn't alphabetized. – Jacob Barnes Nov 01 '17 at 14:36
  • This is the code where you output the column names; where you look at what was parsed. Show the code where you read and parse the column names from the file. We're also missing important information about some of the variables used in this code snippet. Where are `columnNames` and `ExcelFile` declared and instantiated? We can't help if you don't give us even basic information from your code. – Joel Coehoorn Nov 01 '17 at 14:39
  • Sorry about that, not providing other variables. `List columnNames = new List();` and `ExcelFile = new DataTable("ExcelFile");` Sorry about not giving all the code. I don't think it would fit within the post, and as I said, JSFiddle would not help. – Matthew Dewell Nov 01 '17 at 14:53
  • You can't *parse* an Excel file. `xlsx` files are zipped XML files. Where is the code that loads the Excel file? Is it a real Excel file or a CSV with a fake extension? What you posted has nothing to do with Excel, it shows a DataColumnCollection that probably came from a DataTable. Post your code – Panagiotis Kanavos Nov 01 '17 at 15:42
  • If you want to process real xlsx files use a library like EPPlus. It looks like you are using the Jet driver to read from the Excel file as if it were a table and possibly reading the schema with `GetSchemaTable()`? Why not just load the data into a DataTable? Don't force people to guess what you did and what went wrong – Panagiotis Kanavos Nov 01 '17 at 15:46
  • @PanagiotisKanavos, I am only loading a xls file, not a xlsx or csv. Please do not get angry at me because I can't post all my code, unless you know where I can post it all. Do you? – Matthew Dewell Nov 01 '17 at 16:53
  • @MatthewDewell `xls` is deprecated for over 10 years. Online services like Google Sheets and Office Online only support it on *paid* versions. It's *trivial* to generate an `xlsx` file with libraries like EPPlus without requiring either Excel or the Jet driver on the server. In fact, that's why it was created. As for your code, post the part that "parses" the file in the question itself. – Panagiotis Kanavos Nov 01 '17 at 16:57
  • @MatthewDewell check this [possibly duplicate question](https://stackoverflow.com/questions/2507502/asp-net-c-read-excel-sheet). It shows how the file is read using the Jet driver and the schema loaded in a DataTable. Are you using similar code? It's not so much that you can't post it – Panagiotis Kanavos Nov 01 '17 at 17:00
  • @PanagiotisKanavos there is a bit more code, but still not all of it for the .cs, but enough for the class. – Matthew Dewell Nov 01 '17 at 18:27
  • @MatthewDewell so this *is* a duplicate and the code redundant. You've already loaded the sheet's data in the `data` datatable. All the rest of the code tries to copy that DataTable into another, `ExcelTable` in a very convoluted way. And all that, just to return Json? – Panagiotis Kanavos Nov 02 '17 at 08:52
  • @MatthewDewell you could delete everything from `ExcelFile = new DataTable("ExcelFile");` down to `JsonResult jsonObject = Json(dataCells);` and replace it with `return Json(data);`. If you don't like the column names of the tables, you can change them – Panagiotis Kanavos Nov 02 '17 at 08:55
  • @PanagiotisKanavos I did as you said, removing the code from `ExcelFile= new DataTable("ExcelFile");` down to `JsonResult = jsonObject(dataCells);` and replaced it with `return Json(data);` and got a response back in my JavaScript of "ERROR:A Circular reference was detected while serializing an object of type 'System.Globalization.CultureInfo'." – Matthew Dewell Nov 02 '17 at 15:45
  • @MatthewDewell are you using ASP.NET MVC? Web API uses Json.NET which [can serialize Datasets](https://www.newtonsoft.com/json/help/html/SerializeDataSet.htm). ASP.NET MVC (before Core) uses the outdated JavascriptSerializer. You [can replace it with Json.NET](https://stackoverflow.com/questions/14591750/setting-the-default-json-serializer-in-asp-net-mvc) or [call JsonConvert.SerializeObject to get the string and return it as a ContentResult](https://stackoverflow.com/questions/34091056/asp-mvc-5-and-json-net-action-return-type) with the proper content type – Panagiotis Kanavos Nov 02 '17 at 15:55
  • @PanagiotisKanavos I am using ASP.NET MVC, and cannot use Newtonsoft code. Any other ideas? – Matthew Dewell Nov 02 '17 at 18:20
  • @PanagiotisKanavos I am stuck with MVC 4.0.0.1 so the only option, I believe, I might have, is the JavaScriptSerializer().Serialize(), but that gives me the same error of: "ERROR:A Circular reference was detected while serializing an object of type 'System.Globalization.CultureInfo'." – Matthew Dewell Nov 02 '17 at 18:59
  • @PanagiotisKanavos I have found this: https://forums.asp.net/t/1291572.aspx?getSchema+does+not+preserve+order may explain what's going on. I guess I need to pull the A1 to the last column entries in the `columnNames` array, as that is where the data is loaded. – Matthew Dewell Nov 02 '17 at 21:41

1 Answers1

1

You need to use the ORDINAL_POSITION column to order the results:

var dataColumns = conn.GetSchema("Columns").AsEnumerable().OrderBy(c => c["TABLE_NAME"]).ThenBy(c => c["ORDINAL_POSITION"]);
NetMage
  • 26,163
  • 3
  • 34
  • 55
  • Sorry I didn't see your entry, before I entered my last comment above. Yes, I need to get it in `ORDINAL_POSITION`. It looks like this is going to work, but I am getting an error about **"Error:'System.Data.TypedTableBaseExtensions.AsEnumerable(System.Data.TypedTableBase)' is a 'method', which is not valid in this given context** – Matthew Dewell Nov 02 '17 at 21:53
  • Sorry, I forgot the method call parens: `()`. – NetMage Nov 02 '17 at 22:11