0

I made two experiments to check my understanding(theory) on IO performance, both on a single mechanical CMR hard disk, both C/C++. That is, for mechanical hard disks, data is wrote block by block into the disk, as 'block device' it is classified. Suppose that the disk's best sequtial wirte performance is 200MB/s.

Experiment 1: 10 threads write files in sequential mode without flush call, each thread write its own 100 files(not wrote by other threads, 1000 files totally). I found the total thoughput is nearly 200MB/s.

Experiment 2: T(64 or higher) threads write files in random mode without flush call, each thread write its own file(not wrote by other threads, T files totally). Each thread write N(large) batches. For each batch, seek to a random position in this file and write M(1~4096) bytes. I found the total thoughput is very slow(<3MB/s), and smaller M leads to higher thoughput.

I do not understand why. They(random and sequentail) both write data in blocks into disk, and both use variations of elevator algorithm, for multiple file write. So, the random write with large M(3072~4096) should not be that much slower than sequential write. But the test result really shocked me. Any master know about that?

Another question about that already exists: Difference between sequential write and random write. But I could not get an solution from that.

Here is my random test code in the main process file. Some explainations: This program can both generate test file and test random write on them. Total FILES_GEN files, each file one thread to write. Each thread write WRITE_TURNS turns, for each turn print log and write WRITES_EACH_TURN times. For each write time, seek to a random position and write WRITE_BYTES_ONCE bytes.


#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <algorithm>
#include <thread>
#include <mutex>
#include <atomic>

#include <stdio.h>

#include <boost/filesystem.hpp>

#include "../lib/time.h"
#include "_random.h"

using namespace std;
using namespace boost::filesystem;

/******* DECL *******/

//#define MODE_GEN
#define FILES_GEN 512
#define FILE_GEN_LINE_SIZE 111
size_t FILE_GEN_SIZE = 1L * 1024 * 1024 * 4;

size_t WRITE_TURNS = 1L * 1000 * 100;
size_t WRITES_EACH_TURN = 1L * 1000 * 1;
#define WRITE_BYTES_ONCE 128
#define WRITE_INTERVAL_BEG 0.1
#define WRITE_INTERVAL_END 0.1
#define WRITE_DIR "/home/bosser/mnt/sdc1/multiple_random_write"
#define INIT_CHAR '.'
//#define FLUSH_EVERY_TURN

atomic_size_t write_cnt;
simple_random simp_rand;

void sample_gen();
void thread_main(size_t f_i);
void write_once_old(FILE *f, size_t f_size, size_t f_line_size, size_t w_pos, char write_buff[]);
void write_once_new(FILE *f, size_t f_size, size_t f_line_size, size_t w_pos, char write_buff[]);
void thread_monitor();

/******* IMPL *******/

int main(int argc, char *argv[]) {
    #ifdef MODE_GEN
    sample_gen();
    return 0;
    #else
    if (argc > 1 && strcmp(argv[1], "gen") == 0) {
        sample_gen();
        return 0;
    }
    #endif
    vector<size_t> f_i_vect;
    path write_dir_path(WRITE_DIR);
    for (directory_iterator iter(write_dir_path), end; iter != end; ++iter) {
        if (!is_regular_file(iter->path())) continue;
        try { f_i_vect.push_back(stol(iter->path().filename().string())); } catch (...) {}
    }
    sort(f_i_vect.begin(), f_i_vect.end());
    vector<thread*> threads;
    for (auto f_i : f_i_vect) {
        thread *td = new thread(thread_main, f_i);
        threads.push_back(td);
    }
    threads.push_back(new thread(thread_monitor));
    for (thread *td : threads) td->join();
    return 0;
}

void sample_gen() {
    system((string("rm -f ")+WRITE_DIR+"/*").c_str());
    for (size_t f_i = 0; f_i < FILES_GEN; ++f_i) {
        printf("gen file %d\n", f_i);
        string file_path = string(WRITE_DIR) + "/" + to_string(f_i);
        FILE *f = fopen(file_path.c_str(), "w");
        size_t wrote_size = 0;
        for (size_t wrote_size = 0; wrote_size < FILE_GEN_SIZE; wrote_size += FILE_GEN_LINE_SIZE) {
            for (size_t i = 0; i < FILE_GEN_LINE_SIZE - 1; ++i) fputc(INIT_CHAR, f);
            fputc('\n', f);
        }
        fclose(f);
    }
}

void thread_main(size_t f_i) {
    static mutex stdio_lock;
    #define within_stdio_lock(blk) { stdio_lock.lock(); blk; stdio_lock.unlock(); }
    size_t td_id = f_i;
    #define print_td_info() printf("TD[%3d]", td_id)
    within_stdio_lock(print_td_info(); printf(", begin\n"););
    string file_path = string(WRITE_DIR) + "/" + to_string(f_i);
    FILE *f = fopen(file_path.c_str(), "rb+");
    while (fgetc(f) != '\n') {}
    size_t f_line_size = ftell(f);
    fseek(f, 0, SEEK_END);
    size_t f_size = ftell(f);
    within_stdio_lock(print_td_info(); printf(", f_line_size: %ld, f_size: %ld\n", f_line_size, f_size););
    char write_buff[WRITE_BYTES_ONCE];
    //for (size_t i = 0; i < WRITE_BYTES_ONCE; ++i) { write_buff[i] = 'x'; }
    for (size_t turn = 0; turn < WRITE_TURNS; ++turn) {
        double write_interval = WRITE_INTERVAL_BEG == WRITE_INTERVAL_END ?
            0 : simp_rand.rand_double(WRITE_INTERVAL_BEG, WRITE_INTERVAL_END);
        within_stdio_lock(print_td_info(); printf(", write_interval: %.6f, turn: %ld\n", write_interval, turn););
        for (size_t w_i = 0; w_i < WRITES_EACH_TURN; ++w_i) {
            size_t w_pos = simp_rand.rand_access_pos(f_line_size, f_size);
            write_once_new(f, f_size, f_line_size, w_pos, write_buff);
        }
        write_cnt += WRITES_EACH_TURN;
        #ifdef FLUSH_EVERY_TURN
        fflush(f);
        #endif
        if (write_interval != 0) sleep_seconds(write_interval);
    }
    fclose(f);
    within_stdio_lock(print_td_info(); printf(", over\n"););
}

// bad impl for random access simulation
void write_once_old(FILE *f, size_t f_size, size_t f_line_size, size_t w_pos, char write_buff[]) {
    #define next_w_pos(_pos) ((_pos + 1) % f_size)
    #define pos_col_i(_pos) ((_pos + 1) % f_line_size) // not a good idea
    for (size_t w_cnt = 0; w_cnt < WRITE_BYTES_ONCE; w_pos = next_w_pos(w_pos)) {
        if (pos_col_i(w_pos) == 0) continue;
        fseek(f, w_pos, SEEK_SET);
        char rand_char = simp_rand.rand_char_vsb();
        fputc(rand_char, f);
        ++w_cnt;
    }
    #undef next_w_pos
    #undef pos_col_i
}

// bytes write once includes '\n'
void write_once_new(FILE *f, size_t f_size, size_t f_line_size, size_t w_pos, char write_buff[]) {
    #define next_w_pos(_pos) ((_pos + 1) % f_size)
    #define pos_col_i(_pos) (_pos % f_line_size)
    for (int w_cnt = 0, w_pos_simul = w_pos; w_cnt < WRITE_BYTES_ONCE; ++w_cnt, w_pos_simul = next_w_pos(w_pos_simul)) {
        char write_char;
        if (pos_col_i(w_pos_simul) == f_line_size-1) write_char = '\n';
        //else write_char = simp_rand.rand_char_vsb();
        else write_char = '+';
        write_buff[w_cnt] = write_char;
    }
    fseek(f, w_pos, SEEK_SET);
    size_t w_pos_to_end = f_size - w_pos;
    if (w_pos_to_end < WRITE_BYTES_ONCE) {
        fwrite(write_buff, 1, w_pos_to_end, f);
        fseek(f, 0, SEEK_SET);
        fwrite(write_buff+w_pos_to_end, 1, WRITE_BYTES_ONCE-w_pos_to_end, f);
    } else {
        fwrite(write_buff, 1, WRITE_BYTES_ONCE, f);
    }
    #undef next_w_pos
    #undef pos_col_i
}

void thread_monitor() {
    FILE *f = fopen("../log/multiple_random_write.log", "w");
    double time_beg = 0.001 * get_time_millisec();
    while (true) {
        sleep_seconds(1);
        double time_now = 0.001 * get_time_millisec();
        double time_psd = time_now - time_beg;
        double write_cnt_mb = 1.0 * write_cnt / 1024 / 1024;
        double write_rate_mbps = write_cnt_mb / time_psd;
        fprintf(f, "TD[monitor], time_psd, %.3fs, write_cnt: %.3fMB, write_rate: %.3fMB/s\n", time_psd, write_cnt_mb, write_rate_mbps);
        fflush(f);
    }
    fclose(f);
}


  • Since you're writing files to a filesystem (rather than disk blocks to a raw disk device), performance is likely to be dominated by filesystem and file cache effects -- does the filesystem to write journalling? How well does the file cache handle your files? By having more threads you're particularly putting more stress on the file cache. – Chris Dodd Apr 15 '22 at 08:30
  • Maybe you are right. I tested random write on ext2(non-journaling) again, and got a bit higher performance(<4MB/s), but still low. Now I give up in this problem, waiting for further study in the future. – 万里独行侠 Apr 16 '22 at 09:03

0 Answers0