Well, adding the question again since it got deleted the last time I posted.
So I've been trying to run some tests on buffered vs unbuffered reads in windows and linux. What I tried to do was downloaded a txt and read n number of blocks in a loop for different sizes and compared time taken for buffered and unbuffered reads. I noticed that in windows and linux for buffered read the first block each time took a little longer but the rest were faster(Due to read-ahead?). In windows, the unbuffered read times for each block was similar to the time taken for the 1st block read in buffered mode. But in linux the read times for unbuffered was all over the place, most of them being atleast 20 times longer than buffered read times. I'm trying to figure out why, but I'm stuck.
Here's the code I used to check this, the commented part is to read all blocks and compare time, then I tried block by block, I tried it with different block sizes as well such as 64k, 1mb, 2mb etc. The results were similar i.e., the gap between unbuffered and buffered reads in linux was significantly larger than in windows. buffered:
/*#ifdef _WIN32
#include <Windows.h>
#include <iostream>
#include <fstream>
#include <conio.h>
#include <chrono>
const wchar_t* FILE_PATH = L".\\10mb.txt";
const int BLOCK_SIZE = 64 * 1024; // 64 KB
const int NUM_BLOCKS = 100;
void performBufferedRead() {
HANDLE hFile = CreateFileW(FILE_PATH, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hFile == INVALID_HANDLE_VALUE) {
std::cerr << "Error opening the file. Error code: " << GetLastError() << std::endl;
return;
}
char buffer[BLOCK_SIZE];
DWORD bytesRead;
LARGE_INTEGER offset;
offset.QuadPart = 0;
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
auto start = std::chrono::steady_clock::now();
for (int i = 0; i < NUM_BLOCKS; ++i) {
ReadFile(hFile, buffer, BLOCK_SIZE, &bytesRead, NULL);
}
auto end = std::chrono::steady_clock::now();
double totalTime = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / static_cast<double>(frequency.QuadPart) * 1'000'000.0;
CloseHandle(hFile);
std::cout << "buffered: " << totalTime << " microseconds" << std::endl;
std::ofstream outfile("buffered_read_times_win.txt", std::ios_base::app);
if (outfile.is_open()) {
outfile << "Buffered Read Time: " << totalTime << " microseconds" << std::endl;
outfile.close();
}
}
int main() {
performBufferedRead();
return 0;
}
#elif defined __linux__
#include <iostream>
#include <fstream>
#include <chrono>
#include <fcntl.h>
#include <unistd.h>
#include <sys/statvfs.h>
const char* FILE_PATH = "/home/ubuntu/10mb.txt";
const int BLOCK_SIZE = 64 * 1024; // 64 KB
const int NUM_BLOCKS = 100;
void performBufferedRead() {
int fd = open(FILE_PATH, O_RDONLY);
if (fd == -1) {
std::cerr << "Error opening the file." << errno << std::endl;
return;
}
char* buffer = new char[BLOCK_SIZE];
auto start = std::chrono::steady_clock::now();
for (int i = 0; i < NUM_BLOCKS; ++i) {
ssize_t bytesRead = read(fd, buffer, BLOCK_SIZE);
if (bytesRead == -1) {
std::cerr << "Error reading the file." << errno << std::endl;
close(fd);
delete[] buffer;
return;
}
}
auto end = std::chrono::steady_clock::now();
auto totalTime = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
std::cout << "buffered: " << totalTime << " microseconds" << std::endl;
std::ofstream outfile("buffered_read_times_lin.txt", std::ios_base::app);
if (outfile.is_open()) {
outfile << "Buffered Read Time: " << totalTime << " microseconds" << std::endl;
outfile.close();
}
close(fd);
delete[] buffer;
}
int main() {
performBufferedRead();
return 0;
}
#endif
*/
#ifdef _WIN32
#include <Windows.h>
#include <iostream>
#include <fstream>
#include <conio.h>
#include <chrono>
const wchar_t* FILE_PATH = L".\\10mb.txt";
const int BLOCK_SIZE = 64 * 1024; // 64 KB
const int NUM_BLOCKS = 100;
void performBufferedRead() {
HANDLE hFile = CreateFileW(FILE_PATH, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hFile == INVALID_HANDLE_VALUE) {
std::cerr << "Error opening the file. Error code: " << GetLastError() << std::endl;
return;
}
char buffer[BLOCK_SIZE];
DWORD bytesRead;
LARGE_INTEGER offset;
offset.QuadPart = 0;
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
auto start = std::chrono::steady_clock::now();
double totalBlockTime = 0.0;
for (int i = 0; i < NUM_BLOCKS; ++i) {
auto blockStart = std::chrono::steady_clock::now();
ReadFile(hFile, buffer, BLOCK_SIZE, &bytesRead, NULL);
auto blockEnd = std::chrono::steady_clock::now();
double blockTime = std::chrono::duration_cast<std::chrono::microseconds>(blockEnd - blockStart).count() / static_cast<double>(frequency.QuadPart) * 1'000'000.0;
std::cout << "Block " << (i + 1) << " read time: " << blockTime << " microseconds" << std::endl;
totalBlockTime += blockTime;
}
auto end = std::chrono::steady_clock::now();
double totalTime = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / static_cast<double>(frequency.QuadPart) * 1'000'000.0;
CloseHandle(hFile);
std::cout << "Total time: " << totalTime << " microseconds" << std::endl;
std::cout << "Total block time: " << totalBlockTime << " microseconds" << std::endl;
std::ofstream outfile("buffered_read_block_times_win.txt", std::ios_base::app);
if (outfile.is_open()) {
outfile << "Buffered Read Time: " << totalTime << " microseconds" << std::endl;
outfile.close();
}
}
int main() {
performBufferedRead();
return 0;
}
#elif defined __linux__
#include <iostream>
#include <fstream>
#include <chrono>
#include <fcntl.h>
#include <unistd.h>
#include <sys/statvfs.h>
const char* FILE_PATH = "/home/ubuntu/10mb.txt";
const int BLOCK_SIZE = 64 * 1024; // 64 KB
const int NUM_BLOCKS = 100;
void performBufferedRead() {
int fd = open(FILE_PATH, O_RDONLY);
if (fd == -1) {
std::cerr << "Error opening the file." << errno << std::endl;
return;
}
char* buffer = new char[BLOCK_SIZE];
auto start = std::chrono::steady_clock::now();
double totalBlockTime = 0.0;
for (int i = 0; i < NUM_BLOCKS; ++i) {
auto blockStart = std::chrono::steady_clock::now();
ssize_t bytesRead = read(fd, buffer, BLOCK_SIZE);
if (bytesRead == -1) {
std::cerr << "Error reading the file." << errno << std::endl;
close(fd);
delete[] buffer;
return;
}
auto blockEnd = std::chrono::steady_clock::now();
auto blockTime = std::chrono::duration_cast<std::chrono::microseconds>(blockEnd - blockStart).count();
std::cout << "Block " << (i + 1) << " read time: " << blockTime << " microseconds" << std::endl;
totalBlockTime += blockTime;
}
auto end = std::chrono::steady_clock::now();
auto totalTime = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
std::cout << "Total time: " << totalTime << " microseconds" << std::endl;
std::cout << "Total block time: " << totalBlockTime << " microseconds" << std::endl;
std::ofstream outfile("buffered_read_block_times_lin.txt", std::ios_base::app);
if (outfile.is_open()) {
outfile << "Buffered Read Time: " << totalTime << " microseconds" << std::endl;
outfile.close();
}
close(fd);
delete[] buffer;
}
int main() {
performBufferedRead();
return 0;
}
#endif
unbuffered:
/*#ifdef _WIN32
#include <Windows.h>
#include <iostream>
#include <fstream>
#include <conio.h>
#include <chrono>
const wchar_t* FILE_PATH = L".\\10mb.txt";
const int BLOCK_SIZE = 64 * 1024; // 64 KB
const int NUM_BLOCKS = 100;
void performUnbufferedRead() {
HANDLE hFile = CreateFileW(FILE_PATH, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_NO_BUFFERING, NULL);
if (hFile == INVALID_HANDLE_VALUE) {
std::cerr << "Error opening the file. Error code: " << GetLastError() << std::endl;
return;
}
char buffer[BLOCK_SIZE];
DWORD bytesRead;
LARGE_INTEGER offset;
offset.QuadPart = 0;
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
auto start = std::chrono::steady_clock::now();
for (int i = 0; i < NUM_BLOCKS; ++i) {
ReadFile(hFile, buffer, BLOCK_SIZE, &bytesRead, NULL);
}
auto end = std::chrono::steady_clock::now();
double totalTime = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / static_cast<double>(frequency.QuadPart) * 1'000'000.0;
CloseHandle(hFile);
std::cout << "unbuffered: " << totalTime << " microseconds\n";
std::ofstream outfile("unbuffered_read_times_win.txt", std::ios_base::app);
if (outfile.is_open()) {
outfile << "Unbuffered Read Time: " << totalTime << " microseconds" << std::endl;
outfile.close();
}
}
int main() {
performUnbufferedRead();
return 0;
}
#elif defined __linux__
#include <iostream>
#include <fstream>
#include <chrono>
#include <fcntl.h>
#include <unistd.h>
#include <sys/statvfs.h>
const char* FILE_PATH = "/home/ubuntu/10mb.txt";
const int BLOCK_SIZE = 64 * 1024; // 64 KB
const int NUM_BLOCKS = 100;
void performUnbufferedRead() {
int fd = open(FILE_PATH, O_RDONLY | O_DIRECT);
if (fd == -1) {
std::cerr << "Error opening the file." << errno << std::endl;
return;
}
// Allocate memory for the aligned buffer
void* alignedBuffer;
if (posix_memalign(&alignedBuffer, BLOCK_SIZE, BLOCK_SIZE) != 0) {
std::cerr << "Error allocating aligned buffer." << std::endl;
close(fd);
return;
}
auto start = std::chrono::steady_clock::now();
for (int i = 0; i < NUM_BLOCKS; ++i) {
ssize_t bytesRead = read(fd, alignedBuffer, BLOCK_SIZE);
if (bytesRead == -1) {
std::cerr << "Error reading the file." << errno << std::endl;
close(fd);
free(alignedBuffer);
return;
}
}
auto end = std::chrono::steady_clock::now();
auto totalTime = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
std::cout << "unbuffered: " << totalTime << " microseconds" << std::endl;
std::ofstream outfile("unbuffered_read_times_lin.txt", std::ios_base::app);
if (outfile.is_open()) {
outfile << "Unbuffered Read Time: " << totalTime << " microseconds" << std::endl;
outfile.close();
}
close(fd);
free(alignedBuffer);
}
int main() {
performUnbufferedRead();
return 0;
}
#endif
*/
#ifdef _WIN32
#include <Windows.h>
#include <iostream>
#include <fstream>
#include <conio.h>
#include <chrono>
const wchar_t* FILE_PATH = L".\\10mb.txt";
const int BLOCK_SIZE = 64 * 1024; // 64 KB
const int NUM_BLOCKS = 100;
void performUnbufferedRead() {
HANDLE hFile = CreateFileW(FILE_PATH, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_NO_BUFFERING, NULL);
if (hFile == INVALID_HANDLE_VALUE) {
std::cerr << "Error opening the file. Error code: " << GetLastError() << std::endl;
return;
}
char buffer[BLOCK_SIZE];
DWORD bytesRead;
LARGE_INTEGER offset;
offset.QuadPart = 0;
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
auto start = std::chrono::steady_clock::now();
double totalBlockTime = 0.0;
for (int i = 0; i < NUM_BLOCKS; ++i) {
auto blockStart = std::chrono::steady_clock::now();
ReadFile(hFile, buffer, BLOCK_SIZE, &bytesRead, NULL);
auto blockEnd = std::chrono::steady_clock::now();
double blockTime = std::chrono::duration_cast<std::chrono::microseconds>(blockEnd - blockStart).count() / static_cast<double>(frequency.QuadPart) * 1'000'000.0;
std::cout << "Block " << (i + 1) << " read time: " << blockTime << " microseconds" << std::endl;
totalBlockTime += blockTime;
}
auto end = std::chrono::steady_clock::now();
double totalTime = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / static_cast<double>(frequency.QuadPart) * 1'000'000.0;
CloseHandle(hFile);
std::cout << "Total time: " << totalTime << " microseconds" << std::endl;
std::cout << "Total block time: " << totalBlockTime << " microseconds" << std::endl;
std::ofstream outfile("unbuffered_read_block_times_win.txt", std::ios_base::app);
if (outfile.is_open()) {
outfile << "Unbuffered Read Time: " << totalTime << " microseconds" << std::endl;
outfile.close();
}
}
int main() {
performUnbufferedRead();
return 0;
}
#elif defined __linux__
#include <iostream>
#include <fstream>
#include <chrono>
#include <fcntl.h>
#include <unistd.h>
#include <sys/statvfs.h>
const char* FILE_PATH = "/home/ubuntu/10mb.txt";
const int BLOCK_SIZE = 64 * 1024; // 64 KB
const int NUM_BLOCKS = 100;
void performUnbufferedRead() {
int fd = open(FILE_PATH, O_RDONLY | O_DIRECT);
if (fd == -1) {
std::cerr << "Error opening the file." << errno << std::endl;
return;
}
// Allocate memory for the aligned buffer
void* alignedBuffer;
if (posix_memalign(&alignedBuffer, BLOCK_SIZE, BLOCK_SIZE) != 0) {
std::cerr << "Error allocating aligned buffer." << std::endl;
close(fd);
return;
}
auto start = std::chrono::steady_clock::now();
double totalBlockTime = 0.0;
for (int i = 0; i < NUM_BLOCKS; ++i) {
auto blockStart = std::chrono::steady_clock::now();
ssize_t bytesRead = read(fd, alignedBuffer, BLOCK_SIZE);
if (bytesRead == -1) {
std::cerr << "Error reading the file." << errno << std::endl;
close(fd);
free(alignedBuffer);
return;
}
auto blockEnd = std::chrono::steady_clock::now();
auto blockTime = std::chrono::duration_cast<std::chrono::microseconds>(blockEnd - blockStart).count();
std::cout << "Block " << (i + 1) << " read time: " << blockTime << " microseconds" << std::endl;
totalBlockTime += blockTime;
}
auto end = std::chrono::steady_clock::now();
auto totalTime = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
std::cout << "Total time: " << totalTime << " microseconds" << std::endl;
std::cout << "Total block time: " << totalBlockTime << " microseconds" << std::endl;
std::ofstream outfile("unbuffered_read_block_times_lin.txt", std::ios_base::app);
if (outfile.is_open()) {
outfile << "Unbuffered Read Time: " << totalTime << " microseconds" << std::endl;
outfile.close();
}
close(fd);
free(alignedBuffer);
}
int main() {
performUnbufferedRead();
return 0;
}
#endif
And the hardware configurations of the systems used were similar, i7 intel processor, 16gb ddr4 ram, SSD harddisk etc. Are minor differences such as generation of processor(a few gens at most), SSD capacity etc, but would minor differences such as these result in over 20x difference between buffered and unbuffered read times? And this is considering that the buffered read times in both windows and linux were almost similar, only in linux there was the significant change.
Here's the details of the OS used: Linux 5.15.0-73-generic x86_64 and Windows Enterprise 11 version: 22H2 os build: 22621.1848
I expected the buffered reads to be faster than unbuffered due to read ahead and other optimizations, but the gap between buffered and unbuffered reads in linux I could not figure out.