Ubuntu - Nginx - extremely high IO writes

Question

I have a pretty high traffic Nginx server dishing out static content for a family of websites. I can't figure out why the disk writes are so high.

VMWare ESXi 6.0 host (datastore is sitting on 4x Enterprise SSDs in a RAID10) running an Ubuntu 14.04.3 LTS VM with 4 cores and 16GB RAM. NGINX v 1.4.6

The instance is currently handling roughly 75,000 connections (I know this lists all connections, not just connected, but TIME_WAIT connections still eat a port)

$ netstat -tn | wc -l
75237

Is pushing roughly 50 MiB

$ sudo bmon
Interfaces                     x RX bps       pps     %x TX bps       pps     %
->lo                           x      4B        0      x      4B        0
  eth0                         x   1.60MiB  17.57K     x  52.93MiB  13.57K
    qdisc none (mq)            x      0         0      x  54.64MiB  41.08K
      class :1 (mq)            x      0         0      x  21.49MiB  15.46K
      class :2 (mq)            x      0         0      x  11.65MiB   9.57K
      class :3 (mq)            x      0         0      x  11.62MiB   8.65K
      class :4 (mq)            x      0         0      x   9.88MiB   7.40K

And the write rate is through the roof!

$ sudo iostat
Linux 3.13.0-52-generic (hostname)   11/30/2015      _x86_64_        (4 CPU)

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           2.13    0.00    4.80   62.99    0.00   30.08

Device:            tps    kB_read/s    kB_wrtn/s    kB_read    kB_wrtn
sda              93.21       696.88     34015.01    6759664  329942882
dm-0            105.09       696.56     34022.58    6756593  330016284
dm-1              0.02         0.09         0.00        896          0



$ sudo iotop -k -o
Total DISK READ :      71.03 K/s | Total DISK WRITE :   45959.36 K/s
Actual DISK READ:      71.03 K/s | Actual DISK WRITE:   33324.19 K/s
  TID  PRIO  USER     DISK READ DISK WRITE>  SWAPIN      IO    COMMAND
 1128 be/4 www-data    0.00 K/s 12134.03 K/s  0.00 % 78.36 % nginx: worker process
 1119 be/4 www-data    0.00 K/s 8073.57 K/s  0.00 % 89.34 % nginx: worker process
 1109 be/4 www-data   71.03 K/s 6065.04 K/s  0.00 % 26.60 % nginx: worker process
 1110 be/4 www-data    0.00 K/s 4032.84 K/s  0.00 % 89.23 % nginx: worker process
 1105 be/4 www-data    0.00 K/s 2024.31 K/s  0.00 %  0.00 % nginx: worker process
 1113 be/4 www-data    0.00 K/s 2024.31 K/s  0.00 % 20.72 % nginx: worker process
 1115 be/4 www-data    0.00 K/s 2024.31 K/s  0.00 %  0.00 % nginx: worker process
 1120 be/4 www-data    0.00 K/s 2024.31 K/s  0.00 %  0.00 % nginx: worker process
 1121 be/4 www-data    0.00 K/s 2024.31 K/s  0.00 % 61.78 % nginx: worker process
 1114 be/4 www-data    0.00 K/s 2020.37 K/s  0.00 %  0.00 % nginx: worker process
 1106 be/4 www-data    0.00 K/s 2016.42 K/s  0.00 % 48.97 % nginx: worker process
 1122 be/4 www-data    0.00 K/s 1365.32 K/s  0.00 %  0.00 % nginx: worker process
  184 be/3 root        0.00 K/s  126.27 K/s  0.00 % 90.53 % [jbd2/dm-0-8]
 1127 be/4 www-data    0.00 K/s    3.95 K/s  0.00 %  0.00 % nginx: worker process

This is my current Nginx config, commented lines are the variants that I have tried.

user www-data;
worker_processes 32; # I know the recommended is 1 per core, but with this set to auto, images started breaking instead of just lagging (which my boss thought was preferable short-term)
#worker_processes auto;
worker_rlimit_nofile 100000;
pid /run/nginx.pid;

events {
        worker_connections 4000;
        multi_accept on;
        use epoll;
        # accept_mutex off;
}

http {
        sendfile on;

        tcp_nopush on;
        tcp_nodelay on;

        keepalive_timeout 15;
        keepalive_requests 200;

        reset_timedout_connection on;

        types_hash_max_size 2048;
        server_tokens off;

        open_file_cache max=200000 inactive=20s;
        open_file_cache_valid 30s;
        open_file_cache_min_uses 2;
        open_file_cache_errors on;

        include /etc/nginx/mime.types;
        default_type application/octet-stream;

        index index.php index.htm index.html;

#        client_body_buffer_size 10k;
#        client_body_buffer_size 16K;
#        client_body_buffer_size 128K;
        client_body_buffer_size 1m;
        client_header_buffer_size 1k;
#        client_header_buffer_size 2k;
        client_max_body_size 25m;
#        large_client_header_buffers 2 1k;
        large_client_header_buffers 4 8k;

        client_body_timeout 15;
        client_header_timeout 15;

        send_timeout 2;

        access_log off;
        error_log /var/log/nginx/error.log crit;

        gzip on;
        gzip_disable "msie6";

        gzip_proxied expired no-cache no-store private auth;
        gzip_comp_level 2;
        gzip_min_length 10240;
        gzip_types text/plain text/css text/xml text/javascript application/json application/x-javascript application/xml application/xml+rss;

        include /etc/nginx/conf.d/*.conf;
        include /etc/nginx/sites-enabled/*;
}

These are modifications I've made to /etc/sysctl.conf

fs.file-max = 2097152

vm.swappiness = 10
vm.dirty_ratio = 60
vm.dirty_background_ratio = 2

net.ipv4.tcp_synack_retries = 2

net.ipv4.ip_local_port_range = 1024 65535

net.ipv4.tcp_rfc1337 = 1

net.ipv4.tcp_syncookies = 1

net.ipv4.tcp_fin_timeout = 15

net.ipv4.tcp_keepalive_time = 300
net.ipv4.tcp_keepalive_probes = 5
net.ipv4.tcp_keepalive_intvl = 15

net.core.rmem_default = 31457280
net.core.rmem_max = 33554432

net.core.wmem_default = 31457280
net.core.wmem_max = 33554432

#net.core.somaxconn = 4096
net.core.somaxconn = 65535

net.ipv4.tcp_max_syn_backlog = 65535

net.core.netdev_max_backlog = 65536

net.core.optmem_max = 25165824

net.ipv4.tcp_mem = 65536 131072 262144
net.ipv4.udp_mem = 65536 131072 262144

net.ipv4.tcp_rmem = 8192 87380 16777216
net.ipv4.udp_rmem_min = 16384

net.ipv4.tcp_max_tw_buckets = 1440000
net.ipv4.tcp_tw_recycle = 0
net.ipv4.tcp_tw_reuse = 1

net.ipv4.tcp_congestion_control = cubic

I'm at a loss for what's causing the high write rate. I thought it was because of the client buffers, but none of the changes made any difference. I made sure all updates were installed and did a reboot on the server, but NOTHING brought the write rate down. Any help would be appreciated!

score 1 · Answer 1 · answered Dec 03 '15 at 22:46

One of the server blocks was sending hashed requests to a php script, which in turn was dishing out files based off the hash. This sounds fine and dandy, until you get into the documentation a little deeper and realize that nginx CANNOT use the sendfile performance boosts when using unix sockets. Switched the php script from sending the file to putting an X-Accel-Redirect header for the file and my writes dropped to virtually 0.

https://www.reddit.com/r/nginx/comments/3v17hq/cant_get_rid_of_high_io_writes_on_nginx_content/

Ubuntu - Nginx - extremely high IO writes

1 Answers1