I am trying to split a text file with multiple threads. The file is of 1 GB. I am reading the file by char. The Execution time is 24 min 54 seconds. Instead of reading a file by char is their any better way where I can reduce the execution time. I'm having a hard time figuring out an approach that will reduce the execution time. Please do suggest me also, if there is any other better way to split file with multiple threads. I am very new to java.
Any help will be appreciated. :)
public static void main(String[] args) throws Exception {
RandomAccessFile raf = new RandomAccessFile("D:\\sample\\file.txt", "r");
long numSplits = 10;
long sourceSize = raf.length();
System.out.println("file length:" + sourceSize);
long bytesPerSplit = sourceSize / numSplits;
long remainingBytes = sourceSize % numSplits;
int maxReadBufferSize = 9 * 1024;
List<String> filePositionList = new ArrayList<String>();
long startPosition = 0;
long endPosition = bytesPerSplit;
for (int i = 0; i < numSplits; i++) {
raf.seek(endPosition);
String strData = raf.readLine();
if (strData != null) {
endPosition = endPosition + strData.length();
}
String str = startPosition + "|" + endPosition;
if (sourceSize > endPosition) {
startPosition = endPosition;
endPosition = startPosition + bytesPerSplit;
} else {
break;
}
filePositionList.add(str);
}
for (int i = 0; i < filePositionList.size(); i++) {
String str = filePositionList.get(i);
String[] strArr = str.split("\\|");
String strStartPosition = strArr[0];
String strEndPosition = strArr[1];
long startPositionFile = Long.parseLong(strStartPosition);
long endPositionFile = Long.parseLong(strEndPosition);
MultithreadedSplit objMultithreadedSplit = new MultithreadedSplit(startPositionFile, endPositionFile);
objMultithreadedSplit.start();
}
long endTime = System.currentTimeMillis();
System.out.println("It took " + (endTime - startTime) + " milliseconds");
}
}
public class MultithreadedSplit extends Thread {
public static String filePath = "D:\\tenlakh\\file.txt";
private int localCounter = 0;
private long start;
private long end;
public static String outPath;
List<String> result = new ArrayList<String>();
public MultithreadedSplit(long startPos, long endPos) {
start = startPos;
end = endPos;
}
@Override
public void run() {
try {
String threadName = Thread.currentThread().getName();
long currentTime = System.currentTimeMillis();
RandomAccessFile file = new RandomAccessFile("D:\\sample\\file.txt", "r");
String outFile = "out_" + threadName + ".txt";
System.out.println("Thread Reading started for start:" + start + ";End:" + end+";threadname:"+threadName);
FileOutputStream out2 = new FileOutputStream("D:\\sample\\" + outFile);
file.seek(start);
int nRecordCount = 0;
char c = (char) file.read();
StringBuilder objBuilder = new StringBuilder();
int nCounter = 1;
while (c != -1) {
objBuilder.append(c);
// System.out.println("char-->" + c);
if (c == '\n') {
nRecordCount++;
out2.write(objBuilder.toString().getBytes());
objBuilder.delete(0, objBuilder.length());
//System.out.println("--->" + nRecordCount);
// break;
}
c = (char) file.read();
nCounter++;
if (nCounter > end) {
break;
}
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
}