PageRenderTime 49ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/Rachis/Rachis.Tests/TopologyChangesTests.cs

http://github.com/ayende/ravendb
C# | 427 lines | 328 code | 95 blank | 4 comment | 23 complexity | bfa52a329918cb75b6e5f9f8749a8052 MD5 | raw file
Possible License(s): GPL-3.0, MPL-2.0-no-copyleft-exception, LGPL-2.1, Apache-2.0, BSD-3-Clause, CC-BY-SA-3.0
  1. using System;
  2. using System.Diagnostics;
  3. using System.Linq;
  4. using System.Threading;
  5. using FizzWare.NBuilder;
  6. using FluentAssertions;
  7. using Rachis.Transport;
  8. using Xunit;
  9. using Xunit.Extensions;
  10. namespace Rachis.Tests
  11. {
  12. public class TopologyChangesTests : RaftTestsBase
  13. {
  14. private static int testCount = 0;
  15. public TopologyChangesTests ()
  16. {
  17. WriteLine ("------------------------------------------------");
  18. WriteLine ("--== Starting test " + (testCount++) + " ==--");
  19. WriteLine ("------------------------------------------------");
  20. }
  21. [Fact]
  22. public void CanRevertTopologyChange()
  23. {
  24. var leader = CreateNetworkAndGetLeader(3);
  25. var nonLeaders = Nodes.Where(x => x != leader).ToList();
  26. var inMemoryTransport = ((InMemoryTransportHub.InMemoryTransport) leader.Transport);
  27. DisconnectNodeSending(leader.Name);
  28. WriteLine("Initial leader is " + leader.Name);
  29. leader.AddToClusterAsync(new NodeConnectionInfo { Name = "node3" });
  30. var topologyChanged = WaitForToplogyChange(leader);
  31. Assert.True(leader.CurrentTopology.Contains("node2"));
  32. WriteLine("<-- should switch leaders now");
  33. nonLeaders.ForEach(engine => inMemoryTransport.ForceTimeout());
  34. inMemoryTransport.ForceTimeout();// force it to win
  35. Assert.True(topologyChanged.Wait(3000));
  36. Assert.True(nonLeaders.Any(x=>x.State==RaftEngineState.Leader));
  37. foreach (var raftEngine in nonLeaders)
  38. {
  39. Assert.False(raftEngine.CurrentTopology.IsVoter("node3"));
  40. }
  41. Assert.False(leader.CurrentTopology.IsVoter("node3"));
  42. }
  43. //fire up console to listen for log : nc -l -u -p 7071
  44. [Fact]
  45. public void New_node_can_be_added_even_if_it_is_down()
  46. {
  47. const int nodeCount = 3;
  48. var topologyChangeFinishedOnAllNodes = new CountdownEvent(nodeCount);
  49. var leader = CreateNetworkAndGetLeader(nodeCount);
  50. Nodes.ToList().ForEach(n => n.TopologyChanged += cmd => {
  51. if(cmd.Requested.AllNodeNames.Contains("non-existing-node"))
  52. topologyChangeFinishedOnAllNodes.Signal();
  53. });
  54. // ReSharper disable once PossibleNullReferenceException
  55. leader.AddToClusterAsync(new NodeConnectionInfo { Name = "non-existing-node" }).Wait();
  56. Assert.True(topologyChangeFinishedOnAllNodes.Wait(5000),"Topology changes should happen in less than 5 sec for 3 node network");
  57. Nodes.ToList().ForEach(n =>
  58. {
  59. var allNodeNames = n.CurrentTopology.AllNodeNames.ToList();
  60. allNodeNames.Should().Contain("non-existing-node");
  61. });
  62. }
  63. [Fact]
  64. public void Adding_additional_node_that_goes_offline_and_then_online_should_still_work()
  65. {
  66. var leaderNode = CreateNetworkAndGetLeader(3);
  67. using (var additionalNode = NewNodeFor(leaderNode))
  68. {
  69. additionalNode.TopologyChanging += () => DisconnectNode("node3");
  70. var waitForTopologyChangeInLeader = WaitForToplogyChange(leaderNode);
  71. leaderNode.AddToClusterAsync(new NodeConnectionInfo { Name = additionalNode.Name }).Wait();
  72. Thread.Sleep(additionalNode.Options.ElectionTimeout * 2);
  73. ReconnectNode(additionalNode.Name);
  74. Assert.True(waitForTopologyChangeInLeader.Wait(3000));
  75. }
  76. }
  77. [Fact]
  78. public void Adding_already_existing_node_should_throw()
  79. {
  80. var leader = CreateNetworkAndGetLeader(2);
  81. leader.Invoking(x => x.AddToClusterAsync(new NodeConnectionInfo
  82. {
  83. Name = Nodes.First(a => a.Name != leader.Name).Name
  84. }))
  85. .ShouldThrow<InvalidOperationException>();
  86. }
  87. [Fact]
  88. public void Removal_of_non_existing_node_should_throw()
  89. {
  90. var leader = CreateNetworkAndGetLeader(2);
  91. leader.Invoking(x => x.RemoveFromClusterAsync(new NodeConnectionInfo { Name = "santa" }))
  92. .ShouldThrow<InvalidOperationException>();
  93. }
  94. [Fact]
  95. public void Cluster_cannot_have_two_concurrent_node_removals()
  96. {
  97. var leader = CreateNetworkAndGetLeader(4, messageTimeout: 1500);
  98. var nonLeader = Nodes.FirstOrDefault(x => x.State != RaftEngineState.Leader);
  99. Assert.NotNull(nonLeader);
  100. leader.RemoveFromClusterAsync(new NodeConnectionInfo { Name = nonLeader.Name });
  101. //if another removal from cluster is in progress,
  102. Assert.Throws<InvalidOperationException>(() => leader.RemoveFromClusterAsync(new NodeConnectionInfo { Name = leader.Name }).Wait());
  103. }
  104. [Theory]
  105. [InlineData(2)]
  106. [InlineData(3)]
  107. [InlineData(5)]
  108. [InlineData(7)]
  109. public void Non_leader_Node_removed_from_cluster_should_update_peers_list(int nodeCount)
  110. {
  111. var leader = CreateNetworkAndGetLeader(nodeCount);
  112. var cmd = new DictionaryCommand.Set
  113. {
  114. Key = "a",
  115. Value = 1
  116. };
  117. var waitForCommitsOnCluster = WaitForCommitsOnCluster(machine => machine.LastAppliedIndex == cmd.AssignedIndex);
  118. leader.AppendCommand(cmd);
  119. Assert.True(waitForCommitsOnCluster.Wait(3000));
  120. var nodeToRemove = Nodes.First(x => x.State != RaftEngineState.Leader);
  121. var nodesThatShouldRemain = Nodes.Where(n => ReferenceEquals(n, nodeToRemove) == false)
  122. .ToList();
  123. var waitForToplogyChangeOnCluster = WaitForToplogyChangeOnCluster(nodesThatShouldRemain);
  124. leader.RemoveFromClusterAsync(new NodeConnectionInfo { Name = nodeToRemove.Name }).Wait();
  125. Assert.True(waitForToplogyChangeOnCluster.Wait(5000));
  126. var nodePeerLists = Nodes.Where(n => ReferenceEquals(n, nodeToRemove) == false)
  127. .Select(n => n)
  128. .ToList();
  129. foreach (var node in nodePeerLists)
  130. {
  131. Assert.Equal(nodesThatShouldRemain.Count(), node.CurrentTopology.AllNodeNames.Count());
  132. foreach (var n in nodesThatShouldRemain)
  133. {
  134. Assert.True(node.CurrentTopology.Contains(n.Name));
  135. }
  136. }
  137. }
  138. [Fact]
  139. public void Cluster_nodes_are_able_to_recover_after_shutdown_in_the_middle_of_topology_change()
  140. {
  141. var leader = CreateNetworkAndGetLeader(2);
  142. var nonLeader = Nodes.First(x => x != leader);
  143. var topologyChangeStarted = new ManualResetEventSlim();
  144. nonLeader.TopologyChanging += () =>
  145. {
  146. Console.WriteLine("<---disconnected from sending : " + nonLeader.Name);
  147. DisconnectNodeSending(nonLeader.Name);
  148. topologyChangeStarted.Set();
  149. };
  150. leader.AddToClusterAsync(new NodeConnectionInfo { Name = "nodeC" });
  151. Assert.True(topologyChangeStarted.Wait(2000));
  152. RestartAllNodes();
  153. WriteLine("<---nodeA, nodeB are down");
  154. ReconnectNodeSending(nonLeader.Name);
  155. var topologyChangesFinished = WaitForToplogyChangeOnCluster();
  156. Assert.True(topologyChangesFinished.Wait(3000));
  157. foreach (var raftEngine in Nodes)
  158. {
  159. raftEngine.CurrentTopology.AllNodeNames.Should().Contain("nodeC");
  160. }
  161. }
  162. [Fact]
  163. public void Cluster_cannot_have_two_concurrent_node_additions()
  164. {
  165. var leader = CreateNetworkAndGetLeader(4, messageTimeout: 1500);
  166. leader.AddToClusterAsync(new NodeConnectionInfo { Name = "extra1" });
  167. //if another removal from cluster is in progress,
  168. Assert.Throws<InvalidOperationException>(() => leader.AddToClusterAsync(new NodeConnectionInfo { Name = "extra2" }).Wait());
  169. }
  170. [Theory]
  171. [InlineData(2)]
  172. [InlineData(3)]
  173. [InlineData(4)]
  174. [InlineData(7)]
  175. public void Node_added_to_cluster_should_update_peers_list(int nodeCount)
  176. {
  177. WriteLine("--> Started test");
  178. var leader = CreateNetworkAndGetLeader(nodeCount);
  179. WriteLine("--> Selected leader, creating additional node");
  180. using (var additionalNode = NewNodeFor(leader))
  181. {
  182. var clusterChanged = WaitForToplogyChangeOnCluster();
  183. var newNodeAdded = WaitForNodeToBecomeVoter(additionalNode);
  184. WriteLine("Adding the additional node (name = {0}) to cluster", additionalNode.Name);
  185. leader.AddToClusterAsync(new NodeConnectionInfo { Name = additionalNode.Name }).Wait();
  186. clusterChanged.Wait();
  187. var becameVoter = newNodeAdded.Wait(TimeSpan.FromSeconds((nodeCount + 1)*10));
  188. if (!becameVoter)
  189. {
  190. Assert.True(false,"New node never became a voter");
  191. }
  192. WriteLine("--> Cluster finished changing, new node added.");
  193. var raftNodes = Nodes.ToList();
  194. foreach (var node in raftNodes)
  195. {
  196. var containedInAllVotingNodes = node.CurrentTopology.Contains(additionalNode.Name);
  197. if(containedInAllVotingNodes)
  198. continue;
  199. Assert.True(containedInAllVotingNodes,
  200. node.CurrentTopology + " on " + node.Name);
  201. }
  202. additionalNode.CurrentTopology.AllNodeNames.Should().Contain(raftNodes.Select(node => node.Name));
  203. }
  204. }
  205. [Theory]
  206. [InlineData(2)]
  207. [InlineData(3)]
  208. [InlineData(4)]
  209. public void Can_step_down(int nodeCount)
  210. {
  211. var leader = CreateNetworkAndGetLeader(nodeCount);
  212. var firstCommits = WaitForCommitsOnCluster(x => x.Data.ContainsKey("4"));
  213. for (int i = 0; i < 5; i++)
  214. {
  215. leader.AppendCommand(new DictionaryCommand.Set
  216. {
  217. Key = i.ToString(),
  218. Value = i
  219. });
  220. }
  221. firstCommits.Wait();
  222. var nextCommit = WaitForCommitsOnCluster(x => x.Data.ContainsKey("c"));
  223. leader.AppendCommand(new DictionaryCommand.Set
  224. {
  225. Key = "c",
  226. Value = 3
  227. });
  228. var newLeader = WaitForNewLeaderAsync();
  229. leader.StepDownAsync().Wait();
  230. nextCommit.Wait();
  231. var dictionaryStateMachine = ((DictionaryStateMachine)newLeader.Result.StateMachine);
  232. WriteLine("<-- have new leader state machine");
  233. Assert.Equal(3, dictionaryStateMachine.Data["c"]);
  234. }
  235. [Theory]
  236. [InlineData(2)]
  237. [InlineData(3)]
  238. [InlineData(4)]
  239. public void Leader_removed_from_cluster_modifies_member_lists_on_remaining_nodes(int nodeCount)
  240. {
  241. try
  242. {
  243. var leader = CreateNetworkAndGetLeader(nodeCount);
  244. var raftNodes = Nodes.ToList();
  245. var nonLeaderNode = raftNodes.FirstOrDefault(n => n.State != RaftEngineState.Leader);
  246. Assert.NotNull(leader);
  247. Assert.NotNull(nonLeaderNode);
  248. raftNodes.Remove(leader);
  249. var waitForNewLeaderAsync = WaitForNewLeaderAsync();
  250. leader.StepDownAsync().Wait();
  251. var waitForToplogyChangeOnCluster = WaitForToplogyChangeOnCluster(raftNodes);
  252. waitForNewLeaderAsync.Result.RemoveFromClusterAsync(new NodeConnectionInfo {Name = leader.Name}).Wait();
  253. Assert.True(waitForToplogyChangeOnCluster.Wait(300));
  254. var expectedNodeNameList = raftNodes.Select(x => x.Name).ToList();
  255. raftNodes.ForEach(node => node.CurrentTopology.AllNodeNames.Should()
  256. .BeEquivalentTo(expectedNodeNameList, "node " + node.Name + " should have expected AllVotingNodes list"));
  257. }
  258. finally
  259. {
  260. ReleaseAllNodes();
  261. }
  262. }
  263. [Fact]
  264. public void Follower_removed_from_cluster_does_not_affect_leader_and_commits()
  265. {
  266. var commands = Builder<DictionaryCommand.Set>.CreateListOfSize(5)
  267. .All()
  268. .With(x => x.Completion = null)
  269. .Build()
  270. .ToList();
  271. var leader = CreateNetworkAndGetLeader(4, messageTimeout: 1500);
  272. var nonLeaderNode = Nodes.First(x => x.State != RaftEngineState.Leader);
  273. var someCommitsAppliedEvent = new CountdownEvent(2);
  274. nonLeaderNode.CommitApplied += (cmd) =>
  275. {
  276. if (cmd is DictionaryCommand.Set && someCommitsAppliedEvent.CurrentCount > 0)
  277. someCommitsAppliedEvent.Signal();
  278. };
  279. leader.AppendCommand(commands[0]);
  280. leader.AppendCommand(commands[1]);
  281. Assert.True(someCommitsAppliedEvent.Wait(2000));
  282. Assert.Equal(3, leader.CurrentTopology.QuorumSize);
  283. WriteLine(string.Format("<--- Removing from cluster {0} --->", nonLeaderNode.Name));
  284. leader.RemoveFromClusterAsync(new NodeConnectionInfo { Name = nonLeaderNode.Name }).Wait();
  285. var otherNonLeaderNode = Nodes.First(x => x.State != RaftEngineState.Leader && !ReferenceEquals(x, nonLeaderNode));
  286. var allCommitsAppliedEvent = new ManualResetEventSlim();
  287. otherNonLeaderNode.CommitApplied += (cmd) =>
  288. {
  289. if (commands[4].AssignedIndex== cmd.AssignedIndex)
  290. allCommitsAppliedEvent.Set();
  291. };
  292. WriteLine(string.Format("<--- Appending remaining commands ---> (leader name = {0}, reading from {1})", leader.Name, nonLeaderNode.Name));
  293. leader.AppendCommand(commands[2]);
  294. leader.AppendCommand(commands[3]);
  295. leader.AppendCommand(commands[4]);
  296. var condition = allCommitsAppliedEvent.Wait(3000);
  297. Assert.True(condition);
  298. var committedCommands = otherNonLeaderNode.PersistentState.LogEntriesAfter(0).Select(x => nonLeaderNode.PersistentState.CommandSerializer.Deserialize(x.Data))
  299. .OfType<DictionaryCommand.Set>().ToList();
  300. committedCommands.Should().HaveCount(5);
  301. for (int i = 0; i < 5; i++)
  302. {
  303. Assert.Equal(commands[i].Value, committedCommands[i].Value);
  304. Assert.Equal(commands[i].AssignedIndex, committedCommands[i].AssignedIndex);
  305. }
  306. otherNonLeaderNode.CommitIndex.Should().Be(leader.CommitIndex, "after all commands have been committed, on non-leader nodes should be the same commit index as on index node");
  307. }
  308. [Theory]
  309. [InlineData(2)]
  310. [InlineData(3)]
  311. public void Follower_removed_from_cluster_modifies_member_lists_on_remaining_nodes(int nodeCount)
  312. {
  313. var leader = CreateNetworkAndGetLeader(nodeCount);
  314. var raftNodes = Nodes.ToList();
  315. var removedNode = raftNodes.FirstOrDefault(n => n.State != RaftEngineState.Leader);
  316. var nonLeaderNode = raftNodes.FirstOrDefault(n => n.State != RaftEngineState.Leader && !ReferenceEquals(n, removedNode));
  317. Assert.NotNull(leader);
  318. Assert.NotNull(removedNode);
  319. Trace.WriteLine(string.Format("<-- Leader chosen: {0} -->", leader.Name));
  320. Trace.WriteLine(string.Format("<-- Node to be removed: {0} -->", removedNode.Name));
  321. raftNodes.Remove(removedNode);
  322. var topologyChangeComittedEvent = new CountdownEvent(nodeCount - 1);
  323. raftNodes.ForEach(node => node.TopologyChanged += cmd => topologyChangeComittedEvent.Signal());
  324. Trace.WriteLine(string.Format("<-- Removing {0} from the cluster -->", removedNode.Name));
  325. leader.RemoveFromClusterAsync(new NodeConnectionInfo { Name = removedNode.Name }).Wait();
  326. Assert.True(topologyChangeComittedEvent.Wait(nodeCount * 2500));
  327. var expectedNodeNameList = raftNodes.Select(x => x.Name).ToList();
  328. Trace.WriteLine("<-- expectedNodeNameList:" + expectedNodeNameList.Aggregate(String.Empty, (all, curr) => all + ", " + curr));
  329. raftNodes.ForEach(node => node.CurrentTopology.AllNodeNames.Should().BeEquivalentTo(expectedNodeNameList, "node " + node.Name + " should have expected AllVotingNodes list"));
  330. }
  331. }
  332. }