how to write a Java class to sort 10 billion integers, assuming we can only fit a subset of them in memory at once.
I have done sorting but questions is how i would get the 1 billion values ?
How I am gonna sort them if i am going to load a portion of them in memory ?
If you can help me with source code it would be much appreciated.
Thanks in advance.
here is my last code, you can run it and guide me now.
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.Scanner;
/**
* @project jqcontacts
* @date Mar 26, 2012
* @time 11:16:35 AM
*/
public class SortIntegers {
private static String BIG_FILE="g:/bigFile.txt";
private static String SORT_FILE_PREFIX = "g:/sortFile";
/*private static final int SORT_FILE_MAX_SIZE = 1000000;*/
private static final int SORT_FILE_MAX_SIZE = 10;
private static final String MAIN_FILE = "g:/rawfile1.txt";
private static int RAWA_FILE_MAX_SIZE = 100;
// if i keep the size of MERGE_BUFFER_INITIAL_SIZE = SORT_FILE_MAX_SIZE, the end big file is sorted.
private static int MERGE_BUFFER_INITIAL_SIZE=5;
private static int MERGE_BUFFER_SIZE_NEXT = MERGE_BUFFER_INITIAL_SIZE;
private static int MERGE_BUFFER_SIZE_PREVIOUS = 0;
private static int countFile = 0;
public static void readFile(String name) throws FileNotFoundException{
Scanner scanner = new Scanner(new File(name));
List<Integer> intList = new ArrayList<Integer>();
int fileSize = 0 ;
while(scanner.hasNextInt()){
intList.add(scanner.nextInt());
++fileSize;
if(fileSize>=SORT_FILE_MAX_SIZE){
Collections.sort(intList);
/*System.out.println("list size: " + intList.size());*/
String fileName = SORT_FILE_PREFIX + countFile +".txt";
++fileSize;
PrintWriter out = openWriter(fileName);
for(int i:intList){
writeFile(i, out);
}
out.close();
intList.clear();
++countFile;
fileSize = 0;
}
}
System.out.println("done!");
}
public static List<Integer> readSortFile(String name, List<Integer> list) throws FileNotFoundException{
Scanner scanner = new Scanner(new File(name));
int bufferSize = 0;
while(scanner.hasNextInt()){
++bufferSize;
if(bufferSize>=MERGE_BUFFER_SIZE_PREVIOUS && bufferSize<=MERGE_BUFFER_SIZE_NEXT){
list.add(scanner.nextInt());
}
if(bufferSize>=MERGE_BUFFER_SIZE_NEXT){
break;
}
}
Collections.sort(list);
return list;
}
private static PrintWriter openWriter(String name) {
try {
File file = new File(name);
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(file)), true);
return out;
} catch (IOException e) {
//System.out.println("I/O Error");
e.printStackTrace();
System.exit(0);
}
return null;
}
private static void writeFile(int i, PrintWriter out) {
/* String line = "0" + "\t" + Integer.toString(i);*/
String line = Integer.toString(i) + "\t";
out.println(line);
}
/**
* @param args
*/
public static void main(String[] args) {
generateRawIntFile();
try {
readFile(MAIN_FILE);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
System.out.println("countFile: " + countFile);
// merge sort here, merge the sorted files into one
List<Integer> comboList = new ArrayList<Integer>();
boolean isDone = true;
PrintWriter outP = openWriter(BIG_FILE);
while(isDone){
for(int i=0;i<countFile;i++){
try {
//TODO: do we need the return type for readSortFile ????
comboList = readSortFile(SORT_FILE_PREFIX+i+".txt", comboList);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// System.out.println("hun writing on big file " + comboList.size());
// add the list into bigfile and clear it for further processing
try{
for(int value:comboList){
writeFile(value, outP);
}
comboList.clear();
MERGE_BUFFER_SIZE_PREVIOUS = MERGE_BUFFER_SIZE_NEXT;
MERGE_BUFFER_SIZE_NEXT += MERGE_BUFFER_INITIAL_SIZE;
System.out.println("MERGE_BUFFER_SIZE_PREVIOUS: " + MERGE_BUFFER_SIZE_PREVIOUS + " MERGE_BUFFER_SIZE_NEXT:" + MERGE_BUFFER_SIZE_NEXT);
if(MERGE_BUFFER_SIZE_PREVIOUS >= RAWA_FILE_MAX_SIZE){
System.out.println("sorting is finished");
isDone = false;
break;
}
}catch (Exception e) {
e.printStackTrace();
}
}
}
/**
*
*/
public static void generateRawIntFile() {
Random randomGenerator = new Random();
PrintWriter out = openWriter(MAIN_FILE);
for (Integer i = 0; i < RAWA_FILE_MAX_SIZE;i++){
Integer value = randomGenerator.nextInt(RAWA_FILE_MAX_SIZE);
writeFile(value, out);
}
out.close();
}
}