PageRenderTime 1279ms CodeModel.GetById 33ms RepoModel.GetById 0ms app.codeStats 1ms

/internal/praefect/checks_test.go

https://gitlab.com/gitlab-org/gitaly
Go | 554 lines | 498 code | 55 blank | 1 comment | 26 complexity | c39fecbfd78331cae61eea9849662f25 MD5 | raw file
  1. package praefect
  2. import (
  3. "bytes"
  4. "context"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "net"
  9. "path/filepath"
  10. "strings"
  11. "testing"
  12. "time"
  13. "github.com/google/uuid"
  14. "github.com/stretchr/testify/assert"
  15. "github.com/stretchr/testify/require"
  16. "gitlab.com/gitlab-org/gitaly/v15/internal/praefect/config"
  17. "gitlab.com/gitlab-org/gitaly/v15/internal/praefect/datastore"
  18. "gitlab.com/gitlab-org/gitaly/v15/internal/praefect/datastore/migrations"
  19. "gitlab.com/gitlab-org/gitaly/v15/internal/praefect/nodes"
  20. "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
  21. "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testdb"
  22. "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
  23. "google.golang.org/grpc"
  24. "google.golang.org/grpc/health"
  25. "google.golang.org/grpc/health/grpc_health_v1"
  26. )
  27. func TestPraefectMigrations_success(t *testing.T) {
  28. testCases := []struct {
  29. desc string
  30. prepare func(cfg config.Config) error
  31. expectedErr error
  32. }{
  33. {
  34. desc: "no migrations have run",
  35. prepare: func(cfg config.Config) error {
  36. _, err := datastore.MigrateDown(cfg, len(migrations.All()))
  37. if err != nil {
  38. return err
  39. }
  40. return nil
  41. },
  42. expectedErr: fmt.Errorf("%d migrations have not been run", len(migrations.All())),
  43. },
  44. {
  45. desc: "some migrations have run",
  46. prepare: func(cfg config.Config) error {
  47. _, err := datastore.MigrateDown(cfg, 3)
  48. if err != nil {
  49. return err
  50. }
  51. return nil
  52. },
  53. expectedErr: fmt.Errorf("3 migrations have not been run"),
  54. },
  55. {
  56. desc: "all migrations have run",
  57. prepare: func(cfg config.Config) error {
  58. return nil
  59. },
  60. expectedErr: nil,
  61. },
  62. }
  63. for _, tc := range testCases {
  64. t.Run(tc.desc, func(t *testing.T) {
  65. ctx := testhelper.Context(t)
  66. var cfg config.Config
  67. db := testdb.New(t)
  68. cfg.DB = testdb.GetConfig(t, db.Name)
  69. require.NoError(t, tc.prepare(cfg))
  70. migrationCheck := NewPraefectMigrationCheck(cfg, io.Discard, false)
  71. assert.Equal(t, "praefect migrations", migrationCheck.Name)
  72. assert.Equal(t, "confirms whether or not all praefect migrations have run", migrationCheck.Description)
  73. assert.Equal(t, tc.expectedErr, migrationCheck.Run(ctx))
  74. })
  75. }
  76. }
  77. type nodeAssertion struct {
  78. storage string
  79. token string
  80. servingStatus grpc_health_v1.HealthCheckResponse_ServingStatus
  81. serverReadable, serverWriteable bool
  82. }
  83. type mockServerServer struct {
  84. gitalypb.UnimplementedServerServiceServer
  85. node nodeAssertion
  86. }
  87. func (m *mockServerServer) ServerInfo(ctx context.Context, in *gitalypb.ServerInfoRequest) (*gitalypb.ServerInfoResponse, error) {
  88. return &gitalypb.ServerInfoResponse{
  89. StorageStatuses: []*gitalypb.ServerInfoResponse_StorageStatus{
  90. {
  91. StorageName: m.node.storage,
  92. Readable: m.node.serverReadable,
  93. Writeable: m.node.serverWriteable,
  94. },
  95. },
  96. }, nil
  97. }
  98. func TestGitalyNodeConnectivityCheck(t *testing.T) {
  99. testCases := []struct {
  100. desc string
  101. expectErr bool
  102. nodes []nodeAssertion
  103. }{
  104. {
  105. desc: "all nodes are healthy",
  106. expectErr: false,
  107. nodes: []nodeAssertion{
  108. {
  109. storage: "storage-0",
  110. token: "token-0",
  111. servingStatus: grpc_health_v1.HealthCheckResponse_SERVING,
  112. serverReadable: true,
  113. serverWriteable: true,
  114. },
  115. {
  116. storage: "storage-1",
  117. token: "token-1",
  118. servingStatus: grpc_health_v1.HealthCheckResponse_SERVING,
  119. serverReadable: true,
  120. serverWriteable: true,
  121. },
  122. },
  123. },
  124. {
  125. desc: "one node failed healthcheck",
  126. expectErr: true,
  127. nodes: []nodeAssertion{
  128. {
  129. storage: "storage-0",
  130. token: "token-0",
  131. servingStatus: grpc_health_v1.HealthCheckResponse_SERVING,
  132. serverReadable: true,
  133. serverWriteable: true,
  134. },
  135. {
  136. storage: "storage-1",
  137. token: "token-1",
  138. servingStatus: grpc_health_v1.HealthCheckResponse_NOT_SERVING,
  139. serverReadable: true,
  140. serverWriteable: true,
  141. },
  142. },
  143. },
  144. {
  145. desc: "one node failed consistency check",
  146. expectErr: true,
  147. nodes: []nodeAssertion{
  148. {
  149. storage: "storage-0",
  150. token: "token-0",
  151. servingStatus: grpc_health_v1.HealthCheckResponse_SERVING,
  152. serverReadable: false,
  153. serverWriteable: true,
  154. },
  155. {
  156. storage: "storage-1",
  157. token: "token-1",
  158. servingStatus: grpc_health_v1.HealthCheckResponse_SERVING,
  159. serverReadable: true,
  160. serverWriteable: true,
  161. },
  162. },
  163. },
  164. {
  165. desc: "all nodes failed",
  166. expectErr: true,
  167. nodes: []nodeAssertion{
  168. {
  169. storage: "storage-0",
  170. token: "token-0",
  171. servingStatus: grpc_health_v1.HealthCheckResponse_NOT_SERVING,
  172. serverReadable: false,
  173. serverWriteable: true,
  174. },
  175. {
  176. storage: "storage-1",
  177. token: "token-1",
  178. servingStatus: grpc_health_v1.HealthCheckResponse_NOT_SERVING,
  179. serverReadable: true,
  180. serverWriteable: false,
  181. },
  182. },
  183. },
  184. }
  185. for _, tc := range testCases {
  186. t.Run(tc.desc, func(t *testing.T) {
  187. cfgNodes, cleanup := runNodes(t, tc.nodes)
  188. defer cleanup()
  189. check := NewGitalyNodeConnectivityCheck(
  190. config.Config{
  191. VirtualStorages: []*config.VirtualStorage{
  192. {
  193. Name: "default",
  194. Nodes: cfgNodes,
  195. },
  196. },
  197. },
  198. io.Discard,
  199. false,
  200. )
  201. ctx := testhelper.Context(t)
  202. err := check.Run(ctx)
  203. if tc.expectErr {
  204. assert.Regexp(t, "^the following nodes are not healthy: .+", err)
  205. return
  206. }
  207. assert.Nil(t, err)
  208. })
  209. }
  210. t.Run("server not listening", func(t *testing.T) {
  211. tmp := testhelper.TempDir(t)
  212. socketAddr := fmt.Sprintf("unix://%s", filepath.Join(tmp, "storage"))
  213. cfgNodes := []*config.Node{
  214. {
  215. Storage: "storage",
  216. Token: "token",
  217. Address: socketAddr,
  218. },
  219. }
  220. check := NewGitalyNodeConnectivityCheck(
  221. config.Config{
  222. VirtualStorages: []*config.VirtualStorage{
  223. {
  224. Name: "default",
  225. Nodes: cfgNodes,
  226. },
  227. },
  228. },
  229. io.Discard,
  230. false,
  231. )
  232. ctx, cancel := context.WithCancel(testhelper.Context(t))
  233. // Cancel the context directly such that dialling the node will fail.
  234. cancel()
  235. require.Equal(t, &nodes.PingError{
  236. UnhealthyAddresses: []string{socketAddr},
  237. }, check.Run(ctx))
  238. })
  239. t.Run("output check details", func(t *testing.T) {
  240. quietSettings := []bool{true, false}
  241. nodes := []nodeAssertion{
  242. {
  243. storage: "storage-0",
  244. token: "token-0",
  245. servingStatus: grpc_health_v1.HealthCheckResponse_SERVING,
  246. serverReadable: true,
  247. serverWriteable: true,
  248. },
  249. }
  250. expectedLogLines := []string{
  251. "dialing...",
  252. "dialed successfully!",
  253. "checking health...",
  254. "SUCCESS: node is healthy!",
  255. "checking consistency...",
  256. "SUCCESS: confirmed Gitaly storage \"storage-0\" in virtual storages [default] is served",
  257. "SUCCESS: node configuration is consistent!",
  258. }
  259. for _, isQuiet := range quietSettings {
  260. var output bytes.Buffer
  261. cfgNodes, cleanup := runNodes(t, nodes)
  262. defer cleanup()
  263. check := NewGitalyNodeConnectivityCheck(
  264. config.Config{
  265. VirtualStorages: []*config.VirtualStorage{
  266. {
  267. Name: "default",
  268. Nodes: cfgNodes,
  269. },
  270. },
  271. },
  272. &output,
  273. isQuiet,
  274. )
  275. ctx := testhelper.Context(t)
  276. require.NoError(t, check.Run(ctx))
  277. for _, logLine := range expectedLogLines {
  278. if isQuiet {
  279. assert.NotContains(t, output.String(), logLine)
  280. continue
  281. }
  282. assert.Contains(t, output.String(), logLine)
  283. }
  284. }
  285. })
  286. }
  287. func runNodes(t *testing.T, nodes []nodeAssertion) ([]*config.Node, func()) {
  288. tmp := testhelper.TempDir(t)
  289. var cfgNodes []*config.Node
  290. var cleanupFns []func()
  291. for _, n := range nodes {
  292. socket := filepath.Join(tmp, n.storage)
  293. ln, err := net.Listen("unix", socket)
  294. require.NoError(t, err)
  295. healthSrv := health.NewServer()
  296. healthSrv.SetServingStatus("", n.servingStatus)
  297. srvSrv := &mockServerServer{
  298. node: n,
  299. }
  300. srv := grpc.NewServer()
  301. grpc_health_v1.RegisterHealthServer(srv, healthSrv)
  302. gitalypb.RegisterServerServiceServer(srv, srvSrv)
  303. cleanupFns = append(cleanupFns, srv.Stop)
  304. go func() {
  305. assert.NoError(t, srv.Serve(ln))
  306. }()
  307. cfgNodes = append(cfgNodes, &config.Node{
  308. Storage: n.storage,
  309. Token: n.token,
  310. Address: fmt.Sprintf("%s://%s", ln.Addr().Network(), ln.Addr().String()),
  311. })
  312. }
  313. return cfgNodes, func() {
  314. for _, cleanupFn := range cleanupFns {
  315. cleanupFn()
  316. }
  317. }
  318. }
  319. func TestPostgresReadWriteCheck(t *testing.T) {
  320. testCases := []struct {
  321. desc string
  322. setup func(t *testing.T, db testdb.DB) config.DB
  323. expectedErr string
  324. expectedLog string
  325. }{
  326. {
  327. desc: "read and write work",
  328. setup: func(t *testing.T, db testdb.DB) config.DB {
  329. return testdb.GetConfig(t, db.Name)
  330. },
  331. expectedLog: "successfully read from database\nsuccessfully wrote to database\n",
  332. },
  333. {
  334. desc: "read only",
  335. setup: func(t *testing.T, db testdb.DB) config.DB {
  336. role := "praefect_ro_role_" + strings.ReplaceAll(uuid.New().String(), "-", "")
  337. _, err := db.Exec(fmt.Sprintf(`
  338. CREATE ROLE %[1]s LOGIN;
  339. GRANT SELECT ON ALL TABLES IN SCHEMA public TO %[1]s;`, role))
  340. require.NoError(t, err)
  341. t.Cleanup(func() {
  342. _, err := db.Exec(fmt.Sprintf(`
  343. DROP OWNED BY %[1]s;
  344. DROP ROLE %[1]s;`, role))
  345. require.NoError(t, err)
  346. })
  347. dbCfg := testdb.GetConfig(t, db.Name)
  348. dbCfg.User = role
  349. dbCfg.Password = ""
  350. return dbCfg
  351. },
  352. expectedErr: "error writing to table: ERROR: permission denied for table hello_world",
  353. expectedLog: "successfully read from database\n",
  354. },
  355. }
  356. for _, tc := range testCases {
  357. t.Run(tc.desc, func(t *testing.T) {
  358. ctx := testhelper.Context(t)
  359. db := testdb.New(t)
  360. t.Cleanup(func() { require.NoError(t, db.Close()) })
  361. dbConf := tc.setup(t, db)
  362. conf := config.Config{DB: dbConf}
  363. var out bytes.Buffer
  364. c := NewPostgresReadWriteCheck(conf, &out, false)
  365. err := c.Run(ctx)
  366. if tc.expectedErr != "" {
  367. require.Error(t, err)
  368. require.Contains(t, err.Error(), tc.expectedErr)
  369. } else {
  370. require.NoError(t, err)
  371. }
  372. require.Equal(t, tc.expectedLog, out.String())
  373. })
  374. }
  375. }
  376. func TestNewUnavailableReposCheck(t *testing.T) {
  377. conf := config.Config{
  378. VirtualStorages: []*config.VirtualStorage{
  379. {
  380. Name: "virtual-storage-1",
  381. Nodes: []*config.Node{
  382. {Storage: "storage-0"},
  383. {Storage: "storage-1"},
  384. {Storage: "storage-2"},
  385. },
  386. },
  387. },
  388. }
  389. testCases := []struct {
  390. desc string
  391. healthyNodes map[string]map[string][]string
  392. expectedMsg string
  393. expectedErr error
  394. }{
  395. {
  396. desc: "all repos available",
  397. healthyNodes: map[string]map[string][]string{
  398. "praefect-0": {"virtual-storage-1": []string{"storage-0", "storage-1", "storage-2"}},
  399. },
  400. expectedMsg: "All repositories are available.\n",
  401. expectedErr: nil,
  402. },
  403. {
  404. desc: "one unavailable",
  405. healthyNodes: map[string]map[string][]string{
  406. "praefect-0": {"virtual-storage-1": []string{"storage-1", "storage-2"}},
  407. },
  408. expectedMsg: "virtual-storage \"virtual-storage-1\" has 1 repository that is unavailable.\n",
  409. expectedErr: errors.New("repositories unavailable"),
  410. },
  411. {
  412. desc: "three unavailable",
  413. healthyNodes: map[string]map[string][]string{
  414. "praefect-0": {"virtual-storage-1": []string{}},
  415. },
  416. expectedMsg: "virtual-storage \"virtual-storage-1\" has 3 repositories that are unavailable.\n",
  417. expectedErr: errors.New("repositories unavailable"),
  418. },
  419. }
  420. for _, tc := range testCases {
  421. t.Run(tc.desc, func(t *testing.T) {
  422. ctx := testhelper.Context(t)
  423. db := testdb.New(t)
  424. dbCfg := testdb.GetConfig(t, db.Name)
  425. conf.DB = dbCfg
  426. rs := datastore.NewPostgresRepositoryStore(db, nil)
  427. for path, storage := range map[string]string{
  428. "repo-0": "storage-0",
  429. "repo-1": "storage-1",
  430. "repo-2": "storage-2",
  431. } {
  432. repositoryID, err := rs.ReserveRepositoryID(ctx, "virtual-storage-1", path)
  433. require.NoError(t, err)
  434. require.NoError(t, rs.CreateRepository(
  435. ctx,
  436. repositoryID,
  437. "virtual-storage-1",
  438. path,
  439. path,
  440. storage,
  441. nil, nil, true, false,
  442. ))
  443. require.NoError(t, err)
  444. require.NoError(t, rs.SetGeneration(ctx, repositoryID, storage, path, 1))
  445. require.NoError(t, err)
  446. }
  447. testdb.SetHealthyNodes(t, ctx, db, tc.healthyNodes)
  448. var stdout bytes.Buffer
  449. check := NewUnavailableReposCheck(conf, &stdout, false)
  450. assert.Equal(t, tc.expectedErr, check.Run(ctx))
  451. assert.Equal(t, tc.expectedMsg, stdout.String())
  452. })
  453. }
  454. }
  455. func TestNewClockSyncCheck(t *testing.T) {
  456. for _, tt := range []struct {
  457. desc string
  458. offsetCheck func(ntpURL string, allowedOffset time.Duration) (bool, error)
  459. setup func(t *testing.T)
  460. expErr error
  461. }{
  462. {
  463. desc: "synced",
  464. offsetCheck: func(_ string, _ time.Duration) (bool, error) { return true, nil },
  465. },
  466. {
  467. desc: "not synced",
  468. offsetCheck: func(_ string, _ time.Duration) (bool, error) { return false, nil },
  469. expErr: errors.New("praefect: clock is not synced"),
  470. },
  471. {
  472. desc: "failure",
  473. offsetCheck: func(_ string, _ time.Duration) (bool, error) { return false, assert.AnError },
  474. expErr: fmt.Errorf("praefect: %w", assert.AnError),
  475. },
  476. {
  477. desc: "custom url",
  478. offsetCheck: func(url string, _ time.Duration) (bool, error) {
  479. if url != "custom" {
  480. return false, assert.AnError
  481. }
  482. return true, nil
  483. },
  484. setup: func(t *testing.T) {
  485. testhelper.ModifyEnvironment(t, "NTP_HOST", "custom")
  486. },
  487. },
  488. } {
  489. t.Run(tt.desc, func(t *testing.T) {
  490. ctx := testhelper.Context(t)
  491. if tt.setup != nil {
  492. tt.setup(t)
  493. }
  494. check := NewClockSyncCheck(tt.offsetCheck)
  495. err := check(config.Config{}, bytes.NewBuffer(nil), false).Run(ctx)
  496. require.Equal(t, tt.expErr, err)
  497. })
  498. }
  499. }