I have a pretty high traffic Nginx server dishing out static content for a family of websites. I can't figure out why the disk writes are so high.
VMWare ESXi 6.0 host (datastore is sitting on 4x Enterprise SSDs in a RAID10) running an Ubuntu 14.04.3 LTS VM with 4 cores and 16GB RAM. NGINX v 1.4.6
The instance is currently handling roughly 75,000 connections (I know this lists all connections, not just connected, but TIME_WAIT connections still eat a port)
$ netstat -tn | wc -l
75237
Is pushing roughly 50 MiB
$ sudo bmon
Interfaces x RX bps pps %x TX bps pps %
->lo x 4B 0 x 4B 0
eth0 x 1.60MiB 17.57K x 52.93MiB 13.57K
qdisc none (mq) x 0 0 x 54.64MiB 41.08K
class :1 (mq) x 0 0 x 21.49MiB 15.46K
class :2 (mq) x 0 0 x 11.65MiB 9.57K
class :3 (mq) x 0 0 x 11.62MiB 8.65K
class :4 (mq) x 0 0 x 9.88MiB 7.40K
And the write rate is through the roof!
$ sudo iostat
Linux 3.13.0-52-generic (hostname) 11/30/2015 _x86_64_ (4 CPU)
avg-cpu: %user %nice %system %iowait %steal %idle
2.13 0.00 4.80 62.99 0.00 30.08
Device: tps kB_read/s kB_wrtn/s kB_read kB_wrtn
sda 93.21 696.88 34015.01 6759664 329942882
dm-0 105.09 696.56 34022.58 6756593 330016284
dm-1 0.02 0.09 0.00 896 0
$ sudo iotop -k -o
Total DISK READ : 71.03 K/s | Total DISK WRITE : 45959.36 K/s
Actual DISK READ: 71.03 K/s | Actual DISK WRITE: 33324.19 K/s
TID PRIO USER DISK READ DISK WRITE> SWAPIN IO COMMAND
1128 be/4 www-data 0.00 K/s 12134.03 K/s 0.00 % 78.36 % nginx: worker process
1119 be/4 www-data 0.00 K/s 8073.57 K/s 0.00 % 89.34 % nginx: worker process
1109 be/4 www-data 71.03 K/s 6065.04 K/s 0.00 % 26.60 % nginx: worker process
1110 be/4 www-data 0.00 K/s 4032.84 K/s 0.00 % 89.23 % nginx: worker process
1105 be/4 www-data 0.00 K/s 2024.31 K/s 0.00 % 0.00 % nginx: worker process
1113 be/4 www-data 0.00 K/s 2024.31 K/s 0.00 % 20.72 % nginx: worker process
1115 be/4 www-data 0.00 K/s 2024.31 K/s 0.00 % 0.00 % nginx: worker process
1120 be/4 www-data 0.00 K/s 2024.31 K/s 0.00 % 0.00 % nginx: worker process
1121 be/4 www-data 0.00 K/s 2024.31 K/s 0.00 % 61.78 % nginx: worker process
1114 be/4 www-data 0.00 K/s 2020.37 K/s 0.00 % 0.00 % nginx: worker process
1106 be/4 www-data 0.00 K/s 2016.42 K/s 0.00 % 48.97 % nginx: worker process
1122 be/4 www-data 0.00 K/s 1365.32 K/s 0.00 % 0.00 % nginx: worker process
184 be/3 root 0.00 K/s 126.27 K/s 0.00 % 90.53 % [jbd2/dm-0-8]
1127 be/4 www-data 0.00 K/s 3.95 K/s 0.00 % 0.00 % nginx: worker process
This is my current Nginx config, commented lines are the variants that I have tried.
user www-data;
worker_processes 32; # I know the recommended is 1 per core, but with this set to auto, images started breaking instead of just lagging (which my boss thought was preferable short-term)
#worker_processes auto;
worker_rlimit_nofile 100000;
pid /run/nginx.pid;
events {
worker_connections 4000;
multi_accept on;
use epoll;
# accept_mutex off;
}
http {
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 15;
keepalive_requests 200;
reset_timedout_connection on;
types_hash_max_size 2048;
server_tokens off;
open_file_cache max=200000 inactive=20s;
open_file_cache_valid 30s;
open_file_cache_min_uses 2;
open_file_cache_errors on;
include /etc/nginx/mime.types;
default_type application/octet-stream;
index index.php index.htm index.html;
# client_body_buffer_size 10k;
# client_body_buffer_size 16K;
# client_body_buffer_size 128K;
client_body_buffer_size 1m;
client_header_buffer_size 1k;
# client_header_buffer_size 2k;
client_max_body_size 25m;
# large_client_header_buffers 2 1k;
large_client_header_buffers 4 8k;
client_body_timeout 15;
client_header_timeout 15;
send_timeout 2;
access_log off;
error_log /var/log/nginx/error.log crit;
gzip on;
gzip_disable "msie6";
gzip_proxied expired no-cache no-store private auth;
gzip_comp_level 2;
gzip_min_length 10240;
gzip_types text/plain text/css text/xml text/javascript application/json application/x-javascript application/xml application/xml+rss;
include /etc/nginx/conf.d/*.conf;
include /etc/nginx/sites-enabled/*;
}
These are modifications I've made to /etc/sysctl.conf
fs.file-max = 2097152
vm.swappiness = 10
vm.dirty_ratio = 60
vm.dirty_background_ratio = 2
net.ipv4.tcp_synack_retries = 2
net.ipv4.ip_local_port_range = 1024 65535
net.ipv4.tcp_rfc1337 = 1
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_fin_timeout = 15
net.ipv4.tcp_keepalive_time = 300
net.ipv4.tcp_keepalive_probes = 5
net.ipv4.tcp_keepalive_intvl = 15
net.core.rmem_default = 31457280
net.core.rmem_max = 33554432
net.core.wmem_default = 31457280
net.core.wmem_max = 33554432
#net.core.somaxconn = 4096
net.core.somaxconn = 65535
net.ipv4.tcp_max_syn_backlog = 65535
net.core.netdev_max_backlog = 65536
net.core.optmem_max = 25165824
net.ipv4.tcp_mem = 65536 131072 262144
net.ipv4.udp_mem = 65536 131072 262144
net.ipv4.tcp_rmem = 8192 87380 16777216
net.ipv4.udp_rmem_min = 16384
net.ipv4.tcp_max_tw_buckets = 1440000
net.ipv4.tcp_tw_recycle = 0
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_congestion_control = cubic
I'm at a loss for what's causing the high write rate. I thought it was because of the client buffers, but none of the changes made any difference. I made sure all updates were installed and did a reboot on the server, but NOTHING brought the write rate down. Any help would be appreciated!