PageRenderTime 51ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/Rachis/Rachis.Tests/ElectionRelatedTests.cs

http://github.com/ayende/ravendb
C# | 308 lines | 245 code | 54 blank | 9 comment | 15 complexity | dbedfd6d2c3b19a230e5e74748cc9020 MD5 | raw file
Possible License(s): GPL-3.0, MPL-2.0-no-copyleft-exception, LGPL-2.1, Apache-2.0, BSD-3-Clause, CC-BY-SA-3.0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Linq;
  5. using System.Threading;
  6. using System.Threading.Tasks;
  7. using FizzWare.NBuilder;
  8. using FluentAssertions;
  9. using Rachis.Storage;
  10. using Rachis.Transport;
  11. using Voron;
  12. using Xunit;
  13. using Xunit.Extensions;
  14. namespace Rachis.Tests
  15. {
  16. public class ElectionRelatedTests : RaftTestsBase
  17. {
  18. [Fact]
  19. public void Follower_as_a_single_node_becomes_leader_automatically()
  20. {
  21. var hub = new InMemoryTransportHub();
  22. var storageEnvironmentOptions = StorageEnvironmentOptions.CreateMemoryOnly();
  23. storageEnvironmentOptions.OwnsPagers = false;
  24. var raftEngineOptions = new RaftEngineOptions(
  25. new NodeConnectionInfo { Name = "node1" },
  26. storageEnvironmentOptions,
  27. hub.CreateTransportFor("node1"),
  28. new DictionaryStateMachine()
  29. )
  30. {
  31. ElectionTimeout = 1000,
  32. HeartbeatTimeout = 1000/6
  33. };
  34. PersistentState.ClusterBootstrap(raftEngineOptions);
  35. storageEnvironmentOptions.OwnsPagers = true;
  36. using (var raftNode = new RaftEngine(raftEngineOptions))
  37. {
  38. Assert.Equal(RaftEngineState.Leader, raftNode.State);
  39. }
  40. }
  41. [Fact]
  42. public void Network_partition_should_cause_message_resend()
  43. {
  44. var leader = CreateNetworkAndGetLeader(3, messageTimeout: 300);
  45. var countdown = new CountdownEvent(2);
  46. leader.ElectionStarted += () =>
  47. {
  48. if (countdown.CurrentCount > 0)
  49. countdown.Signal();
  50. };
  51. WriteLine("Disconnecting network");
  52. for (int i = 0; i < 3; i++)
  53. {
  54. DisconnectNode("node" + i);
  55. DisconnectNodeSending("node" + i);
  56. }
  57. for (int i = 0; i < 5; i++)
  58. {
  59. ForceTimeout(leader.Name);
  60. }
  61. Assert.True(countdown.Wait(1500));
  62. for (int i = 0; i < 3; i++)
  63. {
  64. ReconnectNode("node" + i);
  65. ReconnectNodeSending("node" + i);
  66. }
  67. Assert.True(Nodes.First().WaitForLeader());
  68. }
  69. /*
  70. * This test deals with network "partition" -> leader is detached from the rest of the nodes (simulation of network issues)
  71. * Before the network is partitioned the leader distributes the first three commands, then the partition happens.
  72. * Then the detached leader has 2 more commands appended - but because of network partition, they are not distributed to other nodes
  73. * When communication is restored, the leader from before becomes follower, and the new leader makes roll back on log of former leader,
  74. * so only the first three commands are in the log of former leader node
  75. */
  76. [Theory]
  77. [InlineData(2)]
  78. [InlineData(3)]
  79. [InlineData(5)]
  80. [InlineData(7)]
  81. public void Network_partition_for_more_time_than_timeout_can_be_healed(int nodeCount)
  82. {
  83. const int CommandCount = 5;
  84. var commands = Builder<DictionaryCommand.Set>.CreateListOfSize(CommandCount)
  85. .All()
  86. .With(x => x.Completion = new TaskCompletionSource<object>())
  87. .With(x => x.AssignedIndex = -1)
  88. .Build()
  89. .ToList();
  90. var leader = CreateNetworkAndGetLeader(nodeCount);
  91. var nonLeaderNode = Nodes.First(x => x.State != RaftEngineState.Leader);
  92. var commitsAppliedEvent = new ManualResetEventSlim();
  93. nonLeaderNode.CommitIndexChanged += (oldIndex, newIndex) =>
  94. {
  95. if (newIndex == commands[2].AssignedIndex)
  96. commitsAppliedEvent.Set();
  97. };
  98. commands.Take(3).ToList().ForEach(leader.AppendCommand);
  99. var waitForCommitsOnCluster = WaitForCommitsOnCluster(machine => machine.LastAppliedIndex == commands[2].AssignedIndex);
  100. //with in-memory transport in use, it shouldn't take more than 5 sec
  101. Assert.True(commitsAppliedEvent.Wait(5000),"Took too much time to commit the initial commands");
  102. var steppedDownMre = CreateWaitForStateChangeEvent(leader, RaftEngineState.FollowerAfterStepDown);
  103. var candidanciesMreCollection = Nodes.Where(x=>x!=leader).Select(node => CreateWaitForStateChangeEvent(node, RaftEngineState.Candidate)).ToArray();
  104. WriteLine("<Disconnecting leader!> (" + leader.Name + ")");
  105. DisconnectNode(leader.Name);
  106. commands.Skip(3).ToList().ForEach(leader.AppendCommand);
  107. var formerLeader = leader;
  108. Assert.True(steppedDownMre.Wait(leader.Options.ElectionTimeout * 2),
  109. "The leader didn't step down after being disconnected from the cluster even after leader.Options.ElectionTimeout*2");
  110. Assert.True(WaitHandle.WaitAny(candidanciesMreCollection.Select(x => x.WaitHandle).ToArray(),
  111. Nodes.Select(x => x.Options.ElectionTimeout).Min() * 2) != WaitHandle.WaitTimeout,
  112. "In the specified timeout there was no state change on any of the non-leader nodes in the cluster");
  113. WriteLine("<Reconnecting leader!> (" + leader.Name + ")");
  114. ReconnectNode(leader.Name);
  115. foreach (var raftEngine in Nodes)
  116. {
  117. Assert.True(raftEngine.WaitForLeader(),$"raftEngine.WaitForLeader() timed out for node {raftEngine.Name}");
  118. }
  119. leader = Nodes.FirstOrDefault(x => x.State == RaftEngineState.Leader);
  120. //after leader reconnection, and waiting for leader selection for all nodes at least _one_ node should be in leader state
  121. Assert.NotNull(leader);
  122. Assert.True(waitForCommitsOnCluster.Wait(3000),"Failed waiting for initial commits of the commands in the cluster");
  123. var committedCommands = formerLeader.PersistentState.LogEntriesAfter(0).Select(x => nonLeaderNode.PersistentState.CommandSerializer.Deserialize(x.Data))
  124. .OfType<DictionaryCommand.Set>()
  125. .ToList();
  126. for (int i = 0; i < 3; i++)
  127. {
  128. commands[i].Value.Should().Be(committedCommands[i].Value);
  129. commands[i].AssignedIndex.Should().Be(committedCommands[i].AssignedIndex);
  130. }
  131. }
  132. [Theory]
  133. [InlineData(2)]
  134. [InlineData(3)]
  135. [InlineData(4)]
  136. [InlineData(5)]
  137. public void Network_partition_for_less_time_than_timeout_can_be_healed_without_elections(int nodeCount)
  138. {
  139. const int CommandCount = 5;
  140. var commands = Builder<DictionaryCommand.Set>.CreateListOfSize(CommandCount)
  141. .All()
  142. .With(x => x.Completion = new TaskCompletionSource<object>())
  143. .With(x => x.AssignedIndex = -1)
  144. .Build()
  145. .ToList();
  146. var leader = CreateNetworkAndGetLeader(nodeCount, messageTimeout: 1500);
  147. var nonLeaderNode = Nodes.First(x => x.State != RaftEngineState.Leader);
  148. commands.Take(CommandCount - 1).ToList().ForEach(leader.AppendCommand);
  149. while (nonLeaderNode.CommitIndex < 2) //make sure at least one command is committed
  150. Thread.Sleep(50);
  151. WriteLine("<Disconnecting leader!> (" + leader.Name + ")");
  152. DisconnectNode(leader.Name);
  153. DictionaryCommand.Set command = commands.Last();
  154. leader.AppendCommand(command);
  155. var waitForCommitsOnCluster = WaitForCommitsOnCluster(machine => machine.LastAppliedIndex == command.AssignedIndex);
  156. WriteLine("<Reconnecting leader!> (" + leader.Name + ")");
  157. ReconnectNode(leader.Name);
  158. Assert.Equal(RaftEngineState.Leader, leader.State);
  159. Assert.True(waitForCommitsOnCluster.Wait(3000));
  160. var committedCommands = nonLeaderNode.PersistentState.LogEntriesAfter(0).Select(x => nonLeaderNode.PersistentState.CommandSerializer.Deserialize(x.Data))
  161. .OfType<DictionaryCommand.Set>()
  162. .ToList();
  163. for (int i = 0; i < CommandCount; i++)
  164. {
  165. commands[i].Value.Should().Be(committedCommands[i].Value);
  166. commands[i].AssignedIndex.Should().Be(committedCommands[i].AssignedIndex);
  167. }
  168. }
  169. [Theory]
  170. [InlineData(2)]
  171. [InlineData(3)]
  172. public void On_many_node_network_after_leader_establishment_all_nodes_know_who_is_leader(int nodeCount)
  173. {
  174. var leader = CreateNetworkAndGetLeader(nodeCount);
  175. var raftNodes = Nodes.ToList();
  176. var leadersOfNodes = raftNodes.Select(x => x.CurrentLeader).ToList();
  177. leadersOfNodes.Should().NotContainNulls("After leader is established, all nodes should know that leader exists");
  178. leadersOfNodes.Should().OnlyContain(l => l.Equals(leader.Name, StringComparison.InvariantCultureIgnoreCase),
  179. "after leader establishment, all nodes should know only one, selected leader");
  180. }
  181. [Fact]
  182. public void Follower_on_timeout_should_become_candidate()
  183. {
  184. var storageEnvironmentOptions = StorageEnvironmentOptions.CreateMemoryOnly();
  185. storageEnvironmentOptions.OwnsPagers = false;
  186. var nodeOptions = new RaftEngineOptions(new NodeConnectionInfo { Name = "real" }, storageEnvironmentOptions, _inMemoryTransportHub.CreateTransportFor("real"), new DictionaryStateMachine());
  187. PersistentState.SetTopologyExplicitly(nodeOptions,
  188. new Topology(
  189. new Guid("355a589b-cadc-463d-a515-5add2ea47205"),
  190. new[]
  191. {
  192. new NodeConnectionInfo {Name = "real"}, new NodeConnectionInfo {Name = "u2"}, new NodeConnectionInfo {Name = "pj"},
  193. }, new NodeConnectionInfo[0], new NodeConnectionInfo[0]), throwIfTopologyExists: true);
  194. storageEnvironmentOptions.OwnsPagers = true;
  195. using (var node = new RaftEngine(nodeOptions))
  196. {
  197. var timeoutEvent = new ManualResetEventSlim();
  198. node.StateTimeout += timeoutEvent.Set;
  199. ForceTimeout("real");
  200. timeoutEvent.Wait();
  201. Assert.Equal(RaftEngineState.Candidate, node.State);
  202. }
  203. }
  204. [Fact]
  205. public void AllPeers_and_AllVotingPeers_can_be_persistantly_saved_and_loaded()
  206. {
  207. var cancellationTokenSource = new CancellationTokenSource();
  208. var path = "test" + Guid.NewGuid();
  209. try
  210. {
  211. var expectedAllVotingPeers = new List<string> { "Node123", "Node1", "Node2", "NodeG", "NodeB", "NodeABC" };
  212. using (var options = StorageEnvironmentOptions.ForPath(path))
  213. {
  214. using (var persistentState = new PersistentState("self",options, cancellationTokenSource.Token)
  215. {
  216. CommandSerializer = new JsonCommandSerializer()
  217. })
  218. {
  219. var currentConfiguration = persistentState.GetCurrentTopology();
  220. Assert.Empty(currentConfiguration.AllVotingNodes);
  221. var currentTopology = new Topology(new Guid("355a589b-cadc-463d-a515-5add2ea47205"),
  222. expectedAllVotingPeers.Select(x => new NodeConnectionInfo { Name = x }), Enumerable.Empty<NodeConnectionInfo>(), Enumerable.Empty<NodeConnectionInfo>());
  223. persistentState.SetCurrentTopology(currentTopology, 1);
  224. }
  225. }
  226. using (var options = StorageEnvironmentOptions.ForPath(path))
  227. {
  228. using (var persistentState = new PersistentState("self", options, cancellationTokenSource.Token)
  229. {
  230. CommandSerializer = new JsonCommandSerializer()
  231. })
  232. {
  233. var currentConfiguration = persistentState.GetCurrentTopology();
  234. Assert.Equal(expectedAllVotingPeers.Count, currentConfiguration.AllVotingNodes.Count());
  235. foreach (var nodeConnectionInfo in currentConfiguration.AllVotingNodes)
  236. {
  237. Assert.True(expectedAllVotingPeers.Contains(nodeConnectionInfo.Name));
  238. }
  239. }
  240. }
  241. }
  242. finally
  243. {
  244. new DirectoryInfo(path).Delete(true);
  245. }
  246. }
  247. [Fact]
  248. public void Request_vote_when_leader_exists_will_be_rejected()
  249. {
  250. var node = CreateNetworkAndGetLeader(3);
  251. node.State.Should().Be(RaftEngineState.Leader);
  252. }
  253. }
  254. }