21

I have a problem with setting up mailcow with traefik, I encounter gateway timeouts. I also have this problem with nextcloud, so I would be really interested, what causes these issues with gateway timeout.

I guess it has to do with port 9000 and php-fpm upstream or sth.

But I want to know for sure, and how to deal with it.

My traefik.toml:

debug = true
checkNewVersion = true
defaultEntryPoints = ["http", "https"]

[web]
address = ":8080"
[web.auth.basic]
users = ["admin:undecipherablestring"]

[entryPoints]
  [entryPoints.http]
  address = ":80"
    [entryPoints.http.redirect]
      entryPoint = "https"
  [entryPoints.https]
  address = ":443"
    [entryPoints.https.tls]

[acme]
email = "email@address.com"
storage = "acme.json"
entryPoint = "https"
onHostRule = true

[[acme.domains]]
  main = "main.com"
  sans = ["monitor.main.com", "ports.main.com", "git.main.com", "cloud.main.com", "mail.main.com"]

My traefik docker-compose.yml:

version: '2'
services:
  proxy:
    image: traefik
    container_name: traefik
    restart: always
    command: |- 
      --docker 
      --docker.domain=docker.localhost 
      --logLevel=DEBUG
    networks:
      - webgateway
    labels:
      - "traefik.frontend.rule=Host: monitor.main.com"
      - "traefik.port=8080"
    ports:
      - "80:80"
      - "443:443"
      - "8080:8080"
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - ./traefik.toml:/traefik.toml
      - ./acme.json:/acme.json
      - acme:/opt/traefik/acme

networks:
  webgateway:
    driver: bridge

volumes:
  acme:
    driver: local

My mailcow docker-compose.yml:

version: '2.1'
services:

    unbound-mailcow:
      image: mailcow/unbound:1.0
      build: ./data/Dockerfiles/unbound
      command: /usr/sbin/unbound
      depends_on:
        mysql-mailcow:
          condition: service_healthy
      healthcheck:
        test: ["CMD", "nslookup", "google.com", "127.0.0.1"]
        interval: 30s
        timeout: 3s
        retries: 10
      volumes:
        - ./data/conf/unbound/unbound.conf:/etc/unbound/unbound.conf:ro
      restart: always
      networks:
        mailcow-network:
          ipv4_address: 172.22.1.254
          aliases:
            - unbound

    mysql-mailcow:
      image: mariadb:10.1
      command: mysqld --max_allowed_packet=128M
      healthcheck:
        test: ["CMD", "mysqladmin", "ping", "--host", "localhost", "--silent"]
        interval: 5s
        timeout: 5s
        retries: 10
      volumes:
        - mysql-vol-1:/var/lib/mysql/
        - ./data/conf/mysql/:/etc/mysql/conf.d/:ro
      environment:
        - MYSQL_ROOT_PASSWORD=${DBROOT}
        - MYSQL_DATABASE=${DBNAME}
        - MYSQL_USER=${DBUSER}
        - MYSQL_PASSWORD=${DBPASS}
      restart: always
      dns:
        - 172.22.1.254
      dns_search: mailcow-network
      networks:
        mailcow-network:
          ipv4_address: 172.22.1.250
          aliases:
            - mysql

    redis-mailcow:
      image: redis:alpine
      depends_on:
        unbound-mailcow:
          condition: service_healthy
      volumes:
        - redis-vol-1:/data/
      restart: always
      dns:
        - 172.22.1.254
      dns_search: mailcow-network
      networks:
        mailcow-network:
          ipv4_address: 172.22.1.249
          aliases:
            - redis

    clamd-mailcow:
      image: mailcow/clamd:1.1
      build: ./data/Dockerfiles/clamd
      restart: always
      environment:
        - SKIP_CLAMD=${SKIP_CLAMD:-n}
      dns:
        - 172.22.1.254
      dns_search: mailcow-network
      networks:
        mailcow-network:
          aliases:
            - clamd

    rspamd-mailcow:
      image: mailcow/rspamd:1.3
      build: ./data/Dockerfiles/rspamd
      command: > 
        /bin/bash -c "
        sleep 5;
        /usr/bin/rspamd -f -u _rspamd -g _rspamd
        "
      depends_on:
        - nginx-mailcow
      volumes:
        - ./data/conf/rspamd/override.d/:/etc/rspamd/override.d:ro
        - ./data/conf/rspamd/local.d/:/etc/rspamd/local.d:ro
        - ./data/conf/rspamd/lua/:/etc/rspamd/lua/:ro
        - dkim-vol-1:/data/dkim
        - rspamd-vol-1:/var/lib/rspamd
      restart: always
      dns:
        - 172.22.1.254
      dns_search: mailcow-network
      hostname: rspamd
      networks:
        mailcow-network:
          ipv4_address: 172.22.1.253
          aliases:
            - rspamd

    php-fpm-mailcow:
      image: mailcow/phpfpm:1.0
      build: ./data/Dockerfiles/phpfpm
      command: "php-fpm -d date.timezone=${TZ}"
      depends_on:
        - redis-mailcow
      volumes:
        - ./data/web:/web:ro
        - ./data/conf/rspamd/dynmaps:/dynmaps:ro
        - dkim-vol-1:/data/dkim
      environment:
        - DBNAME=${DBNAME}
        - DBUSER=${DBUSER}
        - DBPASS=${DBPASS}
        - MAILCOW_HOSTNAME=${MAILCOW_HOSTNAME}
        - IMAP_PORT=${IMAP_PORT:-143}
        - IMAPS_PORT=${IMAPS_PORT:-993}
        - POP_PORT=${POP_PORT:-110}
        - POPS_PORT=${POPS_PORT:-995}
        - SIEVE_PORT=${SIEVE_PORT:-4190}
        - SUBMISSION_PORT=${SUBMISSION_PORT:-587}
        - SMTPS_PORT=${SMTPS_PORT:-465}
        - SMTP_PORT=${SMTP_PORT:-25}
      restart: always
      dns:
        - 172.22.1.254
      dns_search: mailcow-network
      networks:
        mailcow-network:
          aliases:
            - phpfpm

    sogo-mailcow:
      image: mailcow/sogo:1.3
      build: ./data/Dockerfiles/sogo
      depends_on:
        unbound-mailcow:
          condition: service_healthy
      environment:
        - DBNAME=${DBNAME}
        - DBUSER=${DBUSER}
        - DBPASS=${DBPASS}
        - TZ=${TZ}
        - MAILCOW_HOSTNAME=${MAILCOW_HOSTNAME}
      volumes:
        - ./data/conf/sogo/:/etc/sogo/
      restart: always
      dns:
        - 172.22.1.254
      dns_search: mailcow-network
      networks:
        mailcow-network:
          ipv4_address: 172.22.1.252
          aliases:
            - sogo

    dovecot-mailcow:
      image: mailcow/dovecot:1.4
      build: ./data/Dockerfiles/dovecot
      depends_on:
        unbound-mailcow:
          condition: service_healthy
      volumes:
        - ./data/conf/dovecot:/usr/local/etc/dovecot
        - ./data/assets/ssl:/etc/ssl/mail/:ro
        - ./data/conf/sogo/:/etc/sogo/
        - vmail-vol-1:/var/vmail
        - crypt-vol-1:/mail_crypt/
      environment:
        - DBNAME=${DBNAME}
        - DBUSER=${DBUSER}
        - DBPASS=${DBPASS}
      ports:
        - "${DOVEADM_PORT:-127.0.0.1:19991}:12345"
        - "${IMAP_PORT:-143}:143"
        - "${IMAPS_PORT:-993}:993"
        - "${POP_PORT:-110}:110"
        - "${POPS_PORT:-995}:995"
        - "${SIEVE_PORT:-4190}:4190"
      restart: always
      dns:
        - 172.22.1.254
      dns_search: mailcow-network
      hostname: ${MAILCOW_HOSTNAME}
      networks:
        mailcow-network:
          aliases:
            - dovecot

    postfix-mailcow:
      image: mailcow/postfix:1.2
      build: ./data/Dockerfiles/postfix
      depends_on:
        unbound-mailcow:
          condition: service_healthy
      volumes:
        - ./data/conf/postfix:/opt/postfix/conf
        - ./data/assets/ssl:/etc/ssl/mail/:ro
        - postfix-vol-1:/var/spool/postfix
        - crypt-vol-1:/var/lib/zeyple
      environment:
        - DBNAME=${DBNAME}
        - DBUSER=${DBUSER}
        - DBPASS=${DBPASS}
      ports:
        - "${SMTP_PORT:-25}:25"
        - "${SMTPS_PORT:-465}:465"
        - "${SUBMISSION_PORT:-587}:587"
      restart: always
      dns:
        - 172.22.1.254
      dns_search: mailcow-network
      hostname: ${MAILCOW_HOSTNAME}
      networks:
        mailcow-network:
          aliases:
            - postfix

    memcached-mailcow:
      image: memcached:alpine
      depends_on:
        unbound-mailcow:
          condition: service_healthy
      restart: always
      dns:
        - 172.22.1.254
      dns_search: mailcow-network
      networks:
        mailcow-network:
          aliases:
            - memcached

    nginx-mailcow:
      depends_on:
        - sogo-mailcow
        - php-fpm-mailcow
      image: nginx:mainline-alpine
      healthcheck:
        test: ["CMD", "ping", "php-fpm-mailcow", "-c", "5"]
        interval: 5s
        timeout: 5s
        retries: 10
      command: /bin/sh -c "envsubst < /etc/nginx/conf.d/templates/listen_plain.template > /etc/nginx/conf.d/listen_plain.active &&
        envsubst < /etc/nginx/conf.d/templates/listen_ssl.template > /etc/nginx/conf.d/listen_ssl.active &&
        envsubst < /etc/nginx/conf.d/templates/server_name.template > /etc/nginx/conf.d/server_name.active &&
        nginx -g 'daemon off;'"
      environment:
        - HTTPS_PORT=${HTTPS_PORT:-443}
        - HTTP_PORT=${HTTP_PORT:-80}
        - MAILCOW_HOSTNAME=${MAILCOW_HOSTNAME}
      volumes:
        - ./data/web:/web:ro
        - ./data/conf/rspamd/dynmaps:/dynmaps:ro
        - ./data/assets/ssl/:/etc/ssl/mail/:ro
        - ./data/conf/nginx/:/etc/nginx/conf.d/:rw
      expose:
        - "${HTTP_PORT:-80}"      
      #ports:
        #- "${HTTPS_BIND:-0.0.0.0}:${HTTPS_PORT:-443}:${HTTPS_PORT:-443}"
        #- "${HTTP_BIND:-0.0.0.0}:${HTTP_PORT:-80}:${HTTP_PORT:-80}"
      restart: always
      dns:
        - 172.622.1.254
      dns_search: mailcow-network
      labels:
      - "traefik.frontend.rule=Host: ${MAILCOW_HOSTNAME}"
      - "traefik.backend=mailcow"
      - "traefik.port=80"
      - "traefik.frontend.entryPoints=http,https"
      - "traefik.docker.network=traefik"
      networks:
        mailcow-network:
          ipv4_address: 172.22.1.251
          aliases:
            - nginx
        traefik:

    acme-mailcow:
      depends_on:
        - nginx-mailcow
      image: mailcow/acme:1.12
      build: ./data/Dockerfiles/acme
      dns:
        - 172.22.1.254
      dns_search: mailcow-network
      environment:
        - ADDITIONAL_SAN=${ADDITIONAL_SAN}
        - MAILCOW_HOSTNAME=${MAILCOW_HOSTNAME}
        - DBNAME=${DBNAME}
        - DBUSER=${DBUSER}
        - DBPASS=${DBPASS}
        - SKIP_LETS_ENCRYPT=${SKIP_LETS_ENCRYPT:-n}
        - SKIP_IP_CHECK=${SKIP_IP_CHECK:-n}
      volumes:
        - ./data/web/.well-known/acme-challenge:/var/www/acme:rw
        - ./data/assets/ssl:/var/lib/acme/:rw
        - ./data/assets/ssl-example:/var/lib/ssl-example/:ro
        - /var/run/docker.sock:/var/run/docker.sock:ro
      # do not restart the container too often. Things get worse when we hit let's encrypt's ratelimit.
      restart: on-failure:1
      networks:
        mailcow-network:
          aliases:
            - acme

    fail2ban-mailcow:
      image: mailcow/fail2ban:1.5
      build: ./data/Dockerfiles/fail2ban
      depends_on:
        - dovecot-mailcow
        - postfix-mailcow
        - sogo-mailcow
        - php-fpm-mailcow
        - redis-mailcow
      restart: always
      privileged: true
      environment:
        - TZ=${TZ}
        - SKIP_FAIL2BAN=${SKIP_FAIL2BAN:-no}
      network_mode: "host"
      dns:
        - 172.22.1.254
      dns_search: mailcow-network
      volumes:
        - /lib/modules:/lib/modules:ro

    ipv6nat:
      image: robbertkl/ipv6nat
      restart: always
      privileged: true
      network_mode: "host"
      volumes:
        - /var/run/docker.sock:/var/run/docker.sock:ro
        - /lib/modules:/lib/modules:ro

networks:
  mailcow-network:
    driver: bridge
    enable_ipv6: true
    ipam:
      driver: default
      config:
        - subnet: 172.22.1.0/24
        - subnet: fd4d:6169:6c63:6f77::/64
  traefik:
    external:
      name: traefik_webgateway

volumes:
  vmail-vol-1:
  mysql-vol-1:
  dkim-vol-1:
  redis-vol-1:
  rspamd-vol-1:
  postfix-vol-1:
  crypt-vol-1:
Nicola Ben
  • 10,615
  • 8
  • 41
  • 65
metanerd
  • 713
  • 1
  • 6
  • 21
  • Heyo, serdmanczyk from reddit. I took a look but not sure, sorry :/. – Serdmanczyk Sep 15 '17 at 16:08
  • Just in case anyone has the same issue as me. I had a docker-compose file for my traefik setup in one folder and I was trying to run apache containers from another directory with a separate docker-compose file. This caused 504 gateway timeout errors for those apache containers. I (temporarily) moved everything in the same docker-compose file and it worked instantly. Adding the `traefik.docker.network` label didn't help. – milosa Nov 24 '19 at 12:15

4 Answers4

27

I think I may have had a similar issue to what you are/were experiencing. Take a look at this GitHub issue: https://github.com/containous/traefik/issues/979

If your problem is the same as mine, here is the issue:

Traefik is on a "front facing" network, so is one of your services, but that service is also part of a "back facing" network. Traefik, by default, doesn't know what network to send requests to... so it sends them to a randomly chosen one of the two IP address options (picked at the creation of that container). If Traefik isn't part of that network, it wont be able to reach that container, and will give you a Gateway Timeout.

Solution: add a label to your container to directly specify to Traefik what network it should be communicating on:

 labels:
      - "traefik.enable=true"
      - "traefik.docker.network=<folder prefix>webgateway"
      - "traefik.backend=<backend service"
      - "traefik.frontend.rule=Host:<host setting>"

Pro tip: use docker network ls to figure out what the actual name of your network is, because it is not what docker-compose says in the file. The actual network name is prefixed based on the name of the folder that it is run in. (I don't know why, and I don't like it, but that is the world we live in)

Hence the <folder prefix> in my above example.

CenterOrbit
  • 6,446
  • 1
  • 28
  • 34
  • `- "traefik.docker.network=traefik"` I already specified the network, because I already had a look at this issue. It still does not work, the question also is for me, which containers all need to be in this network. – metanerd Sep 27 '17 at 07:25
  • All containers that need to talk to traefik need to be on this network. Also, make sure you run a `docker network ls`, because your actual network name will not be as simple as "traefik", the docker-compose command alters the name, prefixing the folder name that you're running the command. Use that real network name in your label. – CenterOrbit Sep 29 '17 at 18:23
  • 2
    It's a silly nuance with Traefik config and Docker networking. The name specifying is more of a "work around" until a more "it just works" solution is available. It took me (sadly) about a week to track down why this was happening. And, as soon as I figured it out, I circled back to this StackOverflow to report my findings to help others! Glad it helped you! – CenterOrbit Oct 17 '17 at 15:46
  • 1
    Wow, this is the correct answer... I had to add the _full network name_, which is contrary to the GitHub issues, example configurations, and so many other places. – Cobertos May 21 '20 at 08:19
  • @Coburn I feel your pain. If you had any idea how long it took me to track down the root cause... – CenterOrbit May 21 '20 at 19:28
3

What I do is define in a file called .env in the same dir as docker-compose.yml a variable for the project name (that ends as the prefix for all containers/networks/etc). It should only contain characters, no spaces, dashes, etc.

COMPOSE_PROJECT_NAME=myproject

Then, in your application label, you use the variable name appended with the created network separated by "_":

 - "traefik.docker.network=${COMPOSE_PROJECT_NAME}_mynetwork"
Carlosedp
  • 1,951
  • 1
  • 17
  • 15
2

FWIW, if you define the network in the provider section in your traefik service definition, you do not need to add it to each service you add going forward.

Example:

version: "3.3"

services:
  traefik:
    image: "traefik:v2.2"
    container_name: "traefik"
    command:
      - "--api.insecure=true"
      - "--providers.docker=true"
      - "--providers.docker.network=traefik"     <=========
      - "--providers.docker.exposedbydefault=false"
      - "--entrypoints.web.address=:80"
    ports:
      - "80:80"
      - "8090:8080"
    networks:
      - traefik
    volumes:
      - "/var/run/docker.sock:/var/run/docker.sock:ro"

    networks:
      traefik:
        external: true

You should create the traefik network outside your compose file.

tokynet
  • 21
  • 1
  • Why should we create the `Traefik` network outside the Compose file ? Can't we create it in the Compose file when running a Swarm ? – Stephane Jun 25 '21 at 19:33
  • 1
    Mainly because you will reference that network on other compose files. I believe if you try to "docker-compose down" on the traefik directory, it will fail because the network is in-use on other compose files. By decoupling the network from the compose file, you can restart the traefik compose as needed without having to go stop the other "stacks". For example if you needed to update traefik. Yes, when you stop the traefik compose you will have an "outage" but the apps behind traefik could/would stay up and working as expected while you bring traefik back "up". – tokynet Jun 28 '21 at 20:34
1

I think this issue has been resolved by the traefik community. You can check this: https://github.com/containous/traefik/pull/1873

Mohamed Chaawa
  • 918
  • 1
  • 9
  • 23