0

I have a mongoDB 4.0.1 replica set with 4 nodes setup with the following configuration

  var cfg = {
    "_id": "rs0",
    "version": 1,
    "members": [
        {
            "_id": 0,
            "host": "mongo-1:27017",
            "priority": 3
        },
        {
            "_id": 1,
            "host": "mongo-2:27017",
            "priority": 2
        },
        {
            "_id": 2,
            "host": "mongo-3:27017",
            "priority": 1
        },
        {
            "_id": 3,
            "host": "mongo-4:27017",
            "arbiterOnly": true
        }
    ]
};
rs.initiate(cfg, { force: true });
rs.reconfig(cfg, { force: true });
rs.status();

It's all on local host and connecting is fine and that's ok. I'm basically testing that bringing down the primary mongo-1 (after 10-12 seconds) the remaining nodes elect a new primary ideally mongo-2 since its priority is higher.

But if I take down the container mongo-1, the other 3 nodes just keep trying to connect to mongo-1 indefinitely. I've even waited over 10 minutes but all the logs show are attempted connections host-unreachable blah blah.

If anyone has any ideas on what I might be missing or a config that I'm not setting please feel free to comment or suggest a solution for this problem. Thanks!

EDIT Here is my output from rs.status():

rs0:PRIMARY> rs.status()
{
    "set" : "rs0",
    "date" : ISODate("2018-10-27T00:47:23.582Z"),
    "myState" : 1,
    "term" : NumberLong(4),
    "syncingTo" : "",
    "syncSourceHost" : "",
    "syncSourceId" : -1,
    "heartbeatIntervalMillis" : NumberLong(2000),
    "optimes" : {
        "lastCommittedOpTime" : {
            "ts" : Timestamp(1540601235, 1),
            "t" : NumberLong(4)
        },
        "readConcernMajorityOpTime" : {
            "ts" : Timestamp(1540601235, 1),
            "t" : NumberLong(4)
        },
        "appliedOpTime" : {
            "ts" : Timestamp(1540601235, 1),
            "t" : NumberLong(4)
        },
        "durableOpTime" : {
            "ts" : Timestamp(1540601235, 1),
            "t" : NumberLong(4)
        }
    },
    "lastStableCheckpointTimestamp" : Timestamp(1540601162, 1),
    "members" : [
        {
            "_id" : 0,
            "name" : "mongo-1:27017",
            "health" : 1,
            "state" : 1,
            "stateStr" : "PRIMARY",
            "uptime" : 227,
            "optime" : {
                "ts" : Timestamp(1540601235, 1),
                "t" : NumberLong(4)
            },
            "optimeDate" : ISODate("2018-10-27T00:47:15Z"),
            "syncingTo" : "",
            "syncSourceHost" : "",
            "syncSourceId" : -1,
            "infoMessage" : "could not find member to sync from",
            "electionTime" : Timestamp(1540601173, 1),
            "electionDate" : ISODate("2018-10-27T00:46:13Z"),
            "configVersion" : 1,
            "self" : true,
            "lastHeartbeatMessage" : ""
        },
        {
            "_id" : 1,
            "name" : "mongo-2:27017",
            "health" : 1,
            "state" : 2,
            "stateStr" : "SECONDARY",
            "uptime" : 31,
            "optime" : {
                "ts" : Timestamp(1540601235, 1),
                "t" : NumberLong(4)
            },
            "optimeDurable" : {
                "ts" : Timestamp(1540601235, 1),
                "t" : NumberLong(4)
            },
            "optimeDate" : ISODate("2018-10-27T00:47:15Z"),
            "optimeDurableDate" : ISODate("2018-10-27T00:47:15Z"),
            "lastHeartbeat" : ISODate("2018-10-27T00:47:22.085Z"),
            "lastHeartbeatRecv" : ISODate("2018-10-27T00:47:22.295Z"),
            "pingMs" : NumberLong(0),
            "lastHeartbeatMessage" : "",
            "syncingTo" : "mongo-1:27017",
            "syncSourceHost" : "mongo-1:27017",
            "syncSourceId" : 0,
            "infoMessage" : "",
            "configVersion" : 1
        },
        {
            "_id" : 2,
            "name" : "mongo-3:27017",
            "health" : 1,
            "state" : 2,
            "stateStr" : "SECONDARY",
            "uptime" : 225,
            "optime" : {
                "ts" : Timestamp(1540601235, 1),
                "t" : NumberLong(4)
            },
            "optimeDurable" : {
                "ts" : Timestamp(1540601235, 1),
                "t" : NumberLong(4)
            },
            "optimeDate" : ISODate("2018-10-27T00:47:15Z"),
            "optimeDurableDate" : ISODate("2018-10-27T00:47:15Z"),
            "lastHeartbeat" : ISODate("2018-10-27T00:47:21.677Z"),
            "lastHeartbeatRecv" : ISODate("2018-10-27T00:47:22.491Z"),
            "pingMs" : NumberLong(0),
            "lastHeartbeatMessage" : "",
            "syncingTo" : "mongo-1:27017",
            "syncSourceHost" : "mongo-1:27017",
            "syncSourceId" : 0,
            "infoMessage" : "",
            "configVersion" : 1
        },
        {
            "_id" : 3,
            "name" : "mongo-4:27017",
            "health" : 1,
            "state" : 7,
            "stateStr" : "ARBITER",
            "uptime" : 225,
            "lastHeartbeat" : ISODate("2018-10-27T00:47:21.657Z"),
            "lastHeartbeatRecv" : ISODate("2018-10-27T00:47:21.804Z"),
            "pingMs" : NumberLong(0),
            "lastHeartbeatMessage" : "",
            "syncingTo" : "",
            "syncSourceHost" : "",
            "syncSourceId" : -1,
            "infoMessage" : "",
            "configVersion" : 1
        }
    ],
    "ok" : 1,
    "operationTime" : Timestamp(1540601235, 1),
    "$clusterTime" : {
        "clusterTime" : Timestamp(1540601235, 1),
        "signature" : {
            "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
            "keyId" : NumberLong(0)
        }
    }
}
Qtax
  • 33,241
  • 9
  • 83
  • 121
Andrew Edwards
  • 1,005
  • 1
  • 9
  • 24
  • Not sure if this is your problem, but MongoDB requires having an odd number of voting members in a replica set. For explanation see this (https://stackoverflow.com/questions/16150409/why-does-a-mongodb-replica-set-require-an-odd-number-of-voting-nodes). So the recommendation is 3,5,7 and so on. What is the purpose of Arbiter here Primary - Secondary - Secondary is a valid configuration. – gamepop Oct 26 '18 at 22:09
  • I was having the same issue with 1 primary and 2 secondary – Andrew Edwards Oct 26 '18 at 22:14
  • Can you edit your question to include the output of `rs.status()`? With 4 configured voting members you need 3/4 available in order to elect a primary. As noted in an earlier comment, you also don't need an arbiter in this scenario. Lastly, you should remove `force:true` from any normal reconfiguration. The `force` option should only be used in catastrophic failure scenarios where you need to [reconfigure without a majority of replica set members available](https://docs.mongodb.com/manual/tutorial/reconfigure-replica-set-with-unavailable-members/). – Stennie Oct 27 '18 at 00:17
  • @Stennie I've updated with the output and this output is with the `force: true` removed. And it's still having the same problem. – Andrew Edwards Oct 27 '18 at 00:52
  • The example `rs.status()` output suggests all of the members are currently healthy. Are you certain that `mongo-1` was shut down at the time this output was captured? Of the 4 members, only `mongo-2` has noticably less uptime, so perhaps this member was inadvertently shut down (if so, it would be expected that the current primary would remain unchanged). Can you replace this with `rs.status()` output when `mongo-1` is definitely shut down and also note which member the `rs.status()` is from? Some of the log lines of concern would be helpful. – Stennie Oct 30 '18 at 09:28

0 Answers0