0

I am unable to extract data from java inputstream containing Parquet file data without creating a file. Imagine I have to write a method which reads the parquet data from a java input stream without creating any interim file

This is working but I am having to create a file. How to read the data without having to create any file?

public static void main(String[] args) throws FileNotFoundException {
    SpringApplication.run(ParquetstreamApplication.class, args);
    Path path=new Path(Paths.get("").toAbsolutePath().toString()+"\\src\\main\\resources\\test-parquet.parquet");
    InputStream fileInputStream = new FileInputStream(path.toString());
    List<GenericRecord> recordList = new ArrayList<>();
     try (ParquetFileReader parquetFileReader = new ParquetFileReader(HadoopInputFile.fromPath(path, new Configuration()), ParquetReadOptions.builder().build())) {
            final ParquetMetadata footer = parquetFileReader.getFooter();
            final MessageType schema = createdParquetSchema(footer);
            PageReadStore pages;
            while ((pages = parquetFileReader.readNextRowGroup()) != null) {
                final long rows = pages.getRowCount();
                final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
                final RecordReader<Group> recordReader = columnIO.getRecordReader(pages, new GroupRecordConverter(schema));

                for (int row = 0; row < rows; row++) {

                        final Map<String, Object> eventData = new HashMap<>();

                        int fieldIndex = 0;
                        final SimpleGroup simpleGroup = (SimpleGroup) recordReader.read();
                        GenericRecord record = new GenericData.Record(parseSchema());
                        for (Type field : schema.getFields()) {

                            try {
                               //System.out.print(field.getName()+": "+simpleGroup.getValueToString(fieldIndex, 0)+" ");
                                record.put(field.getName(),simpleGroup.getValueToString(fieldIndex, 0));
                            }
                            catch (Exception parquetException){
                               System.out.println("error");
                            }

                            fieldIndex++;

                        }
                        //System.out.println();
                    recordList.add(record);
                }
            }
  • afaics that's not possible – g00se May 12 '23 at 16:20
  • https://stackoverflow.com/questions/58141248/read-parquet-data-from-bytearrayoutputstream-instead-of-file What is this answer trying to say? – Umair Husain May 12 '23 at 17:54
  • Please trim your code to make it easier to find your problem. Follow these guidelines to create a [minimal reproducible example](https://stackoverflow.com/help/minimal-reproducible-example). – Community May 13 '23 at 10:15

0 Answers0