I am unable to extract data from java inputstream containing Parquet file data without creating a file. Imagine I have to write a method which reads the parquet data from a java input stream without creating any interim file
This is working but I am having to create a file. How to read the data without having to create any file?
public static void main(String[] args) throws FileNotFoundException {
SpringApplication.run(ParquetstreamApplication.class, args);
Path path=new Path(Paths.get("").toAbsolutePath().toString()+"\\src\\main\\resources\\test-parquet.parquet");
InputStream fileInputStream = new FileInputStream(path.toString());
List<GenericRecord> recordList = new ArrayList<>();
try (ParquetFileReader parquetFileReader = new ParquetFileReader(HadoopInputFile.fromPath(path, new Configuration()), ParquetReadOptions.builder().build())) {
final ParquetMetadata footer = parquetFileReader.getFooter();
final MessageType schema = createdParquetSchema(footer);
PageReadStore pages;
while ((pages = parquetFileReader.readNextRowGroup()) != null) {
final long rows = pages.getRowCount();
final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
final RecordReader<Group> recordReader = columnIO.getRecordReader(pages, new GroupRecordConverter(schema));
for (int row = 0; row < rows; row++) {
final Map<String, Object> eventData = new HashMap<>();
int fieldIndex = 0;
final SimpleGroup simpleGroup = (SimpleGroup) recordReader.read();
GenericRecord record = new GenericData.Record(parseSchema());
for (Type field : schema.getFields()) {
try {
//System.out.print(field.getName()+": "+simpleGroup.getValueToString(fieldIndex, 0)+" ");
record.put(field.getName(),simpleGroup.getValueToString(fieldIndex, 0));
}
catch (Exception parquetException){
System.out.println("error");
}
fieldIndex++;
}
//System.out.println();
recordList.add(record);
}
}