I rewrote the test to test different sizes of buffer.
Here is the new code:
public class ReadFileInChunks {
public static void main(String[] args) throws IOException {
String path = "C:\\\\tmp\\1GB.zip";
readFileInChuncks(path, new byte[1024 * 128], false);
for (int i = 1; i <= 1024; i+=10) {
readFileInChuncks(path, new byte[1024 * i], true);
}
}
public static void readFileInChuncks(String path, byte[] buffer, boolean report) throws IOException {
long t = System.currentTimeMillis();
InputStream is = new FileInputStream(path);
while ((readToArray(is, buffer)) != 0) {
}
if (report) {
System.out.println("buffer size = " + buffer.length/1024 + "kB , duration = " + (System.currentTimeMillis() - t) + " ms");
}
}
public static int readToArray(InputStream is, byte[] buffer) throws IOException {
int index = 0;
while (index != buffer.length) {
int read = is.read(buffer, index, buffer.length - index);
if (read == -1) {
break;
}
index += read;
}
return index;
}
}
And here are the results...
buffer size = 121kB , duration = 320 ms
buffer size = 131kB , duration = 330 ms
buffer size = 141kB , duration = 330 ms
buffer size = 151kB , duration = 323 ms
buffer size = 161kB , duration = 320 ms
buffer size = 171kB , duration = 320 ms
buffer size = 181kB , duration = 320 ms
buffer size = 191kB , duration = 310 ms
buffer size = 201kB , duration = 320 ms
buffer size = 211kB , duration = 310 ms
buffer size = 221kB , duration = 310 ms
buffer size = 231kB , duration = 310 ms
buffer size = 241kB , duration = 310 ms
buffer size = 251kB , duration = 310 ms
buffer size = 261kB , duration = 320 ms
buffer size = 271kB , duration = 310 ms
buffer size = 281kB , duration = 320 ms
buffer size = 291kB , duration = 310 ms
buffer size = 301kB , duration = 319 ms
buffer size = 311kB , duration = 320 ms
buffer size = 321kB , duration = 310 ms
buffer size = 331kB , duration = 320 ms
buffer size = 341kB , duration = 310 ms
buffer size = 351kB , duration = 320 ms
buffer size = 361kB , duration = 310 ms
buffer size = 371kB , duration = 320 ms
buffer size = 381kB , duration = 311 ms
buffer size = 391kB , duration = 310 ms
buffer size = 401kB , duration = 310 ms
buffer size = 411kB , duration = 320 ms
buffer size = 421kB , duration = 310 ms
buffer size = 431kB , duration = 310 ms
buffer size = 441kB , duration = 310 ms
buffer size = 451kB , duration = 320 ms
buffer size = 461kB , duration = 310 ms
buffer size = 471kB , duration = 310 ms
buffer size = 481kB , duration = 310 ms
buffer size = 491kB , duration = 310 ms
buffer size = 501kB , duration = 310 ms
buffer size = 511kB , duration = 320 ms
buffer size = 521kB , duration = 300 ms
buffer size = 531kB , duration = 310 ms
buffer size = 541kB , duration = 312 ms
buffer size = 551kB , duration = 311 ms
buffer size = 561kB , duration = 320 ms
buffer size = 571kB , duration = 310 ms
buffer size = 581kB , duration = 314 ms
buffer size = 591kB , duration = 320 ms
buffer size = 601kB , duration = 310 ms
buffer size = 611kB , duration = 310 ms
buffer size = 621kB , duration = 310 ms
buffer size = 631kB , duration = 310 ms
buffer size = 641kB , duration = 310 ms
buffer size = 651kB , duration = 310 ms
buffer size = 661kB , duration = 301 ms
buffer size = 671kB , duration = 310 ms
buffer size = 681kB , duration = 310 ms
buffer size = 691kB , duration = 310 ms
buffer size = 701kB , duration = 310 ms
buffer size = 711kB , duration = 300 ms
buffer size = 721kB , duration = 310 ms
buffer size = 731kB , duration = 310 ms
buffer size = 741kB , duration = 310 ms
buffer size = 751kB , duration = 310 ms
buffer size = 761kB , duration = 311 ms
buffer size = 771kB , duration = 310 ms
buffer size = 781kB , duration = 300 ms
buffer size = 791kB , duration = 300 ms
buffer size = 801kB , duration = 310 ms
buffer size = 811kB , duration = 310 ms
buffer size = 821kB , duration = 300 ms
buffer size = 831kB , duration = 310 ms
buffer size = 841kB , duration = 310 ms
buffer size = 851kB , duration = 300 ms
buffer size = 861kB , duration = 310 ms
buffer size = 871kB , duration = 310 ms
buffer size = 881kB , duration = 310 ms
buffer size = 891kB , duration = 304 ms
buffer size = 901kB , duration = 310 ms
buffer size = 911kB , duration = 310 ms
buffer size = 921kB , duration = 310 ms
buffer size = 931kB , duration = 299 ms
buffer size = 941kB , duration = 321 ms
buffer size = 951kB , duration = 310 ms
buffer size = 961kB , duration = 310 ms
buffer size = 971kB , duration = 310 ms
buffer size = 981kB , duration = 310 ms
buffer size = 991kB , duration = 295 ms
buffer size = 1001kB , duration = 339 ms
buffer size = 1011kB , duration = 302 ms
buffer size = 1021kB , duration = 610 ms
It looks like some sort of threshold is hit at around 1021kB size buffer. Looking deeper into this I see...
buffer size = 1017kB , duration = 310 ms
buffer size = 1018kB , duration = 310 ms
buffer size = 1019kB , duration = 602 ms
buffer size = 1020kB , duration = 600 ms
So it looks like there is some sort of doubling effect is going on when this threshold is hit. My initial thoughts are that the readToArray while loop was looping double the amount of times when the threshold was hit, but that's not the case, the while loop only goes through one iteration whether 300ms run or 600ms run. So lets look at the actual io_utils.c
that implements actually reads the data from the disk for some clues.
jint
readBytes(JNIEnv *env, jobject this, jbyteArray bytes,
jint off, jint len, jfieldID fid)
{
jint nread;
char stackBuf[BUF_SIZE];
char *buf = NULL;
FD fd;
if (IS_NULL(bytes)) {
JNU_ThrowNullPointerException(env, NULL);
return -1;
}
if (outOfBounds(env, off, len, bytes)) {
JNU_ThrowByName(env, "java/lang/IndexOutOfBoundsException", NULL);
return -1;
}
if (len == 0) {
return 0;
} else if (len > BUF_SIZE) {
buf = malloc(len);
if (buf == NULL) {
JNU_ThrowOutOfMemoryError(env, NULL);
return 0;
}
} else {
buf = stackBuf;
}
fd = GET_FD(this, fid);
if (fd == -1) {
JNU_ThrowIOException(env, "Stream Closed");
nread = -1;
} else {
nread = (jint)IO_Read(fd, buf, len);
if (nread > 0) {
(*env)->SetByteArrayRegion(env, bytes, off, nread, (jbyte *)buf);
} else if (nread == JVM_IO_ERR) {
JNU_ThrowIOExceptionWithLastError(env, "Read error");
} else if (nread == JVM_IO_INTR) {
JNU_ThrowByName(env, "java/io/InterruptedIOException", NULL);
} else { /* EOF */
nread = -1;
}
}
if (buf != stackBuf) {
free(buf);
}
return nread;
}
One thing to note is that BUF_SIZE is set to 8192. The doubling effect happens way above that. So the next culprit would be the IO_Read
method.
windows/native/java/io/io_util_md.h:#define IO_Read handleRead
So we go to handleRead method.
windows/native/java/io/io_util_md.c:handleRead(jlong fd, void *buf, jint len)
This method hands of the request to a method called ReadFile.
JNIEXPORT
size_t
handleRead(jlong fd, void *buf, jint len)
{
DWORD read = 0;
BOOL result = 0;
HANDLE h = (HANDLE)fd;
if (h == INVALID_HANDLE_VALUE) {
return -1;
}
result = ReadFile(h, /* File handle to read */
buf, /* address to put data */
len, /* number of bytes to read */
&read, /* number of bytes read */
NULL); /* no overlapped struct */
if (result == 0) {
int error = GetLastError();
if (error == ERROR_BROKEN_PIPE) {
return 0; /* EOF */
}
return -1;
}
return read;
}
And this is where the trail runs cold.... for now. If I find the code for ReadFile I will take a look and post back.