I am not experienced with C#. I need to read a parquet file and then use LINQ to query the data read from the file. I don't know if I need to deserialise.
The following is the data in the parquet file
The data is being read into the 'records' variable. But when I use LINQ on it, I get the error, "Unable to cast object of type 'Parquet.Data.DataColumn' to type 'LinqAndParquet.DataFrame'." at the LINQ query.
public class Program
{
public static DataColumn[] allData;
public static DataColumn[] ReadParquetFile()
{
using (Stream fileStream = File.OpenRead(@"F:\AutomationRunStation\11_12.parquet"))
{
// open parquet file reader
using (var parquetReader = new Parquet.ParquetReader(fileStream))
{
// get file schema (available straight after opening parquet reader)
// however, get only data fields as only they contain data values
DataField[] dataFields = parquetReader.Schema.GetDataFields();
// enumerate through row groups in this file
for (int i = 0; i < parquetReader.RowGroupCount; i++)
{
// create row group reader
using (ParquetRowGroupReader groupReader = parquetReader.OpenRowGroupReader(i))
{
// read all columns inside each row group (you have an option to read only
// required columns if you need to.
allData = dataFields.Select(groupReader.ReadColumn).ToArray();
}
}
return allData;
}
}
}
static void Main(string[] args)
{
var records = ReadParquetFile();
var queryResult = from DataFrame data in records
where data.EventId == 280000001
select data.Loss;
Console.WriteLine(queryResult);
Console.ReadKey();
}
}