I have a 10GB PDF file that I would like to break up into 10 files each 1GB in size. I need to do this operation in parallel, which means spinning 10 threads which each starts from a different position and read up to 1GB of data and write to a file. Basically the final result should be 10 files that each contain a portion of the original 10GB file.
I looked at FileChannel, but the position is shared, so once I modify the position in one thread, it impacts the other thread. I also looked at AsynchronousFileChannel in Java 7 but I'm not sure if that's the way to go. I appreciate any suggestion on this issue.
I wrote this simple program that reads a small text file to test the FileChannel idea, doesn't seem to work for what I'm trying to achieve.
package org.cas.filesplit;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
import java.nio.file.Paths;
public class ConcurrentRead implements Runnable {
private int myPosition = 0;
public int getPosition() {
return myPosition;
}
public void setPosition(int position) {
this.myPosition = position;
}
static final String filePath = "C:\\Users\\temp.txt";
@Override
public void run() {
try {
readFile();
} catch (IOException e) {
e.printStackTrace();
}
}
private void readFile() throws IOException {
Path path = Paths.get(filePath);
FileChannel fileChannel = FileChannel.open(path);
fileChannel.position(myPosition);
ByteBuffer buffer = ByteBuffer.allocate(8);
int noOfBytesRead = fileChannel.read(buffer);
while (noOfBytesRead != -1) {
buffer.flip();
System.out.println("Thread - " + Thread.currentThread().getId());
while (buffer.hasRemaining()) {
System.out.print((char) buffer.get());
}
System.out.println(" ");
buffer.clear();
noOfBytesRead = fileChannel.read(buffer);
}
fileChannel.close();
}
}