PageRenderTime 44ms CodeModel.GetById 29ms RepoModel.GetById 2ms app.codeStats 0ms

/agent/functional_tests/tests/functionaltests_test.go

https://gitlab.com/CORP-RESELLER/amazon-ecs-agent
Go | 721 lines | 552 code | 88 blank | 81 comment | 193 complexity | c6a2ff9b2528d19cdd122c5b4bfaf6ee MD5 | raw file
  1. // +build functional
  2. // Copyright 2014-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License"). You may
  5. // not use this file except in compliance with the License. A copy of the
  6. // License is located at
  7. //
  8. // http://aws.amazon.com/apache2.0/
  9. //
  10. // or in the "license" file accompanying this file. This file is distributed
  11. // on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  12. // express or implied. See the License for the specific language governing
  13. // permissions and limitations under the License.
  14. package functional_tests
  15. import (
  16. "bytes"
  17. "fmt"
  18. "io/ioutil"
  19. "os"
  20. "path/filepath"
  21. "reflect"
  22. "regexp"
  23. "strings"
  24. "testing"
  25. "time"
  26. "github.com/aws/amazon-ecs-agent/agent/ecs_client/model/ecs"
  27. . "github.com/aws/amazon-ecs-agent/agent/functional_tests/util"
  28. "github.com/aws/aws-sdk-go/aws"
  29. "github.com/aws/aws-sdk-go/aws/session"
  30. "github.com/aws/aws-sdk-go/service/cloudwatch"
  31. "github.com/aws/aws-sdk-go/service/cloudwatchlogs"
  32. docker "github.com/fsouza/go-dockerclient"
  33. "github.com/pborman/uuid"
  34. )
  35. const (
  36. waitTaskStateChangeDuration = 2 * time.Minute
  37. waitMetricsInCloudwatchDuration = 4 * time.Minute
  38. awslogsLogGroupName = "ecs-functional-tests"
  39. )
  40. // TestRunManyTasks runs several tasks in short succession and expects them to
  41. // all run.
  42. func TestRunManyTasks(t *testing.T) {
  43. agent := RunAgent(t, nil)
  44. defer agent.Cleanup()
  45. numToRun := 15
  46. tasks := []*TestTask{}
  47. attemptsTaken := 0
  48. for numRun := 0; len(tasks) < numToRun; attemptsTaken++ {
  49. startNum := 10
  50. if numToRun-len(tasks) < 10 {
  51. startNum = numToRun - len(tasks)
  52. }
  53. startedTasks, err := agent.StartMultipleTasks(t, "simple-exit", startNum)
  54. if err != nil {
  55. continue
  56. }
  57. tasks = append(tasks, startedTasks...)
  58. numRun += 10
  59. }
  60. t.Logf("Ran %v containers; took %v tries\n", numToRun, attemptsTaken)
  61. for _, task := range tasks {
  62. err := task.WaitStopped(10 * time.Minute)
  63. if err != nil {
  64. t.Error(err)
  65. }
  66. if code, ok := task.ContainerExitcode("exit"); !ok || code != 42 {
  67. t.Error("Wrong exit code")
  68. }
  69. }
  70. }
  71. // TestPullInvalidImage verifies that an invalid image returns an error
  72. func TestPullInvalidImage(t *testing.T) {
  73. agent := RunAgent(t, nil)
  74. defer agent.Cleanup()
  75. testTask, err := agent.StartTask(t, "invalid-image")
  76. if err != nil {
  77. t.Fatalf("Expected to start invalid-image task: %v", err)
  78. }
  79. if err = testTask.ExpectErrorType("error", "CannotPullContainerError", 1*time.Minute); err != nil {
  80. t.Error(err)
  81. }
  82. }
  83. // TestOOMContainer verifies that an OOM container returns an error
  84. func TestOOMContainer(t *testing.T) {
  85. RequireDockerVersion(t, "<1.9.0,>1.9.1") // https://github.com/docker/docker/issues/18510
  86. agent := RunAgent(t, nil)
  87. defer agent.Cleanup()
  88. testTask, err := agent.StartTask(t, "oom-container")
  89. if err != nil {
  90. t.Fatalf("Expected to start invalid-image task: %v", err)
  91. }
  92. if err = testTask.ExpectErrorType("error", "OutOfMemoryError", 1*time.Minute); err != nil {
  93. t.Error(err)
  94. }
  95. }
  96. // This test addresses a deadlock issue which was noted in GH:313 and fixed
  97. // in GH:320. It runs a service with 10 containers, waits for cleanup, starts
  98. // another two instances of that service and ensures that those tasks complete.
  99. func TestTaskCleanupDoesNotDeadlock(t *testing.T) {
  100. // Set the ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION to its lowest permissible value
  101. os.Setenv("ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION", "60s")
  102. defer os.Unsetenv("ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION")
  103. agent := RunAgent(t, nil)
  104. defer agent.Cleanup()
  105. // This bug was fixed in v1.8.1
  106. agent.RequireVersion(">=1.8.1")
  107. // Run two Tasks after cleanup, as the deadlock does not consistently occur after
  108. // after just one task cleanup cycle.
  109. for i := 0; i < 3; i++ {
  110. // Start a task with ten containers
  111. testTask, err := agent.StartTask(t, "ten-containers")
  112. if err != nil {
  113. t.Fatalf("Cycle %d: There was an error starting the Task: %v", i, err)
  114. }
  115. isTaskRunning, err := agent.WaitRunningViaIntrospection(testTask)
  116. if err != nil || !isTaskRunning {
  117. t.Fatalf("Cycle %d: Task should be RUNNING but is not: %v", i, err)
  118. }
  119. // Get the dockerID so we can later check that the container has been cleaned up.
  120. dockerId, err := agent.ResolveTaskDockerID(testTask, "1")
  121. if err != nil {
  122. t.Fatalf("Cycle %d: Error resolving docker id for container in task: %v", i, err)
  123. }
  124. // 2 minutes should be enough for the Task to have completed. If the task has not
  125. // completed and is in PENDING, the agent is most likely deadlocked.
  126. err = testTask.WaitStopped(2 * time.Minute)
  127. if err != nil {
  128. t.Fatalf("Cycle %d: Task did not transition into to STOPPED in time: %v", i, err)
  129. }
  130. isTaskStopped, err := agent.WaitStoppedViaIntrospection(testTask)
  131. if err != nil || !isTaskStopped {
  132. t.Fatalf("Cycle %d: Task should be STOPPED but is not: %v", i, err)
  133. }
  134. // Wait for the tasks to be cleaned up
  135. time.Sleep(90 * time.Second)
  136. // Ensure that tasks are cleaned up. WWe should not be able to describe the
  137. // container now since it has been cleaned up.
  138. _, err = agent.DockerClient.InspectContainer(dockerId)
  139. if err == nil {
  140. t.Fatalf("Cycle %d: Expected error inspecting container in task.", i)
  141. }
  142. }
  143. }
  144. // TestSavedState verifies that stopping the agent, stopping a container under
  145. // its control, and starting the agent results in that container being moved to
  146. // 'stopped'
  147. func TestSavedState(t *testing.T) {
  148. agent := RunAgent(t, nil)
  149. defer agent.Cleanup()
  150. testTask, err := agent.StartTask(t, "nginx")
  151. if err != nil {
  152. t.Fatal(err)
  153. }
  154. err = testTask.WaitRunning(1 * time.Minute)
  155. if err != nil {
  156. t.Fatal(err)
  157. }
  158. dockerId, err := agent.ResolveTaskDockerID(testTask, "nginx")
  159. if err != nil {
  160. t.Fatal(err)
  161. }
  162. err = agent.StopAgent()
  163. if err != nil {
  164. t.Fatal(err)
  165. }
  166. err = agent.DockerClient.StopContainer(dockerId, 1)
  167. if err != nil {
  168. t.Fatal(err)
  169. }
  170. err = agent.StartAgent()
  171. if err != nil {
  172. t.Fatal(err)
  173. }
  174. testTask.WaitStopped(1 * time.Minute)
  175. }
  176. // TestPortResourceContention verifies that running two tasks on the same port
  177. // in quick-succession does not result in the second one failing to run. It
  178. // verifies the 'seqnum' serialization stuff works.
  179. func TestPortResourceContention(t *testing.T) {
  180. agent := RunAgent(t, nil)
  181. defer agent.Cleanup()
  182. testTask, err := agent.StartTask(t, "busybox-port-5180")
  183. if err != nil {
  184. t.Fatal(err)
  185. }
  186. err = testTask.WaitRunning(2 * time.Minute)
  187. if err != nil {
  188. t.Fatal(err)
  189. }
  190. err = testTask.Stop()
  191. if err != nil {
  192. t.Fatal(err)
  193. }
  194. testTask2, err := agent.StartTask(t, "busybox-port-5180")
  195. if err != nil {
  196. t.Fatal(err)
  197. }
  198. err = testTask2.WaitRunning(4 * time.Minute)
  199. if err != nil {
  200. t.Fatal(err)
  201. }
  202. testTask2.Stop()
  203. go testTask.WaitStopped(2 * time.Minute)
  204. testTask2.WaitStopped(2 * time.Minute)
  205. }
  206. func strptr(s string) *string { return &s }
  207. func TestCommandOverrides(t *testing.T) {
  208. agent := RunAgent(t, nil)
  209. defer agent.Cleanup()
  210. task, err := agent.StartTaskWithOverrides(t, "simple-exit", []*ecs.ContainerOverride{
  211. &ecs.ContainerOverride{
  212. Name: strptr("exit"),
  213. Command: []*string{strptr("sh"), strptr("-c"), strptr("exit 21")},
  214. },
  215. })
  216. if err != nil {
  217. t.Fatal(err)
  218. }
  219. err = task.WaitStopped(2 * time.Minute)
  220. if err != nil {
  221. t.Fatal(err)
  222. }
  223. if exitCode, _ := task.ContainerExitcode("exit"); exitCode != 21 {
  224. t.Errorf("Expected exit code of 21; got %v", exitCode)
  225. }
  226. }
  227. func TestLabels(t *testing.T) {
  228. agent := RunAgent(t, nil)
  229. defer agent.Cleanup()
  230. agent.RequireVersion(">=1.5.0")
  231. task, err := agent.StartTask(t, "labels")
  232. if err != nil {
  233. t.Fatal(err)
  234. }
  235. err = task.WaitStopped(2 * time.Minute)
  236. if err != nil {
  237. t.Fatal(err)
  238. }
  239. dockerId, err := agent.ResolveTaskDockerID(task, "labeled")
  240. if err != nil {
  241. t.Fatal(err)
  242. }
  243. container, err := agent.DockerClient.InspectContainer(dockerId)
  244. if err != nil {
  245. t.Fatal(err)
  246. }
  247. if container.Config.Labels["label1"] != "" || container.Config.Labels["com.foo.label2"] != "value" {
  248. t.Fatalf("Labels did not match expected; expected to contain label1: com.foo.label2:value, got %v", container.Config.Labels)
  249. }
  250. }
  251. func TestLogdriverOptions(t *testing.T) {
  252. agent := RunAgent(t, nil)
  253. defer agent.Cleanup()
  254. agent.RequireVersion(">=1.5.0")
  255. task, err := agent.StartTask(t, "logdriver-jsonfile")
  256. if err != nil {
  257. t.Fatal(err)
  258. }
  259. err = task.WaitStopped(2 * time.Minute)
  260. if err != nil {
  261. t.Fatal(err)
  262. }
  263. dockerId, err := agent.ResolveTaskDockerID(task, "exit")
  264. if err != nil {
  265. t.Fatal(err)
  266. }
  267. container, err := agent.DockerClient.InspectContainer(dockerId)
  268. if err != nil {
  269. t.Fatal(err)
  270. }
  271. if container.HostConfig.LogConfig.Type != "json-file" {
  272. t.Errorf("Expected json-file type logconfig, was %v", container.HostConfig.LogConfig.Type)
  273. }
  274. if !reflect.DeepEqual(map[string]string{"max-file": "50", "max-size": "50k"}, container.HostConfig.LogConfig.Config) {
  275. t.Errorf("Expected max-file:50 max-size:50k for logconfig options, got %v", container.HostConfig.LogConfig.Config)
  276. }
  277. }
  278. func TestDockerAuth(t *testing.T) {
  279. agent := RunAgent(t, &AgentOptions{
  280. ExtraEnvironment: map[string]string{
  281. "ECS_ENGINE_AUTH_TYPE": "dockercfg",
  282. "ECS_ENGINE_AUTH_DATA": `{"127.0.0.1:51671":{"auth":"dXNlcjpzd29yZGZpc2g=","email":"foo@example.com"}}`, // user:swordfish
  283. },
  284. })
  285. defer agent.Cleanup()
  286. task, err := agent.StartTask(t, "simple-exit-authed")
  287. if err != nil {
  288. t.Fatal(err)
  289. }
  290. err = task.WaitStopped(2 * time.Minute)
  291. if err != nil {
  292. t.Fatal(err)
  293. }
  294. if exitCode, _ := task.ContainerExitcode("exit"); exitCode != 42 {
  295. t.Errorf("Expected exit code of 42; got %v", exitCode)
  296. }
  297. // verify there's no sign of auth details in the config; action item taken as
  298. // a result of accidentally logging them once
  299. logdir := agent.Logdir
  300. badStrings := []string{"user:swordfish", "swordfish", "dXNlcjpzd29yZGZpc2g="}
  301. err = filepath.Walk(logdir, func(path string, info os.FileInfo, err error) error {
  302. if info.IsDir() {
  303. return nil
  304. }
  305. data, err := ioutil.ReadFile(path)
  306. if err != nil {
  307. return err
  308. }
  309. for _, badstring := range badStrings {
  310. if strings.Contains(string(data), badstring) {
  311. t.Fatalf("log data contained bad string: %v, %v", string(data), badstring)
  312. }
  313. if strings.Contains(string(data), fmt.Sprintf("%v", []byte(badstring))) {
  314. t.Fatalf("log data contained byte-slice representation of bad string: %v, %v", string(data), badstring)
  315. }
  316. gobytes := fmt.Sprintf("%#v", []byte(badstring))
  317. // format is []byte{0x12, 0x34}
  318. // if it were json.RawMessage or another alias, it would print as json.RawMessage ... in the log
  319. // Because of this, strip down to just the comma-seperated hex and look for that
  320. if strings.Contains(string(data), gobytes[len(`[]byte{`):len(gobytes)-1]) {
  321. t.Fatalf("log data contained byte-hex representation of bad string: %v, %v", string(data), badstring)
  322. }
  323. }
  324. return nil
  325. })
  326. if err != nil {
  327. t.Errorf("Could not walk logdir: %v", err)
  328. }
  329. }
  330. func TestSquidProxy(t *testing.T) {
  331. // Run a squid proxy manually, verify that the agent can connect through it
  332. client, err := docker.NewVersionedClientFromEnv("1.17")
  333. if err != nil {
  334. t.Fatal(err)
  335. }
  336. dockerConfig := docker.Config{
  337. Image: "127.0.0.1:51670/amazon/squid:latest",
  338. }
  339. dockerHostConfig := docker.HostConfig{}
  340. squidContainer, err := client.CreateContainer(docker.CreateContainerOptions{
  341. Config: &dockerConfig,
  342. HostConfig: &dockerHostConfig,
  343. })
  344. if err != nil {
  345. t.Fatal(err)
  346. }
  347. if err := client.StartContainer(squidContainer.ID, &dockerHostConfig); err != nil {
  348. t.Fatal(err)
  349. }
  350. defer func() {
  351. client.RemoveContainer(docker.RemoveContainerOptions{
  352. Force: true,
  353. ID: squidContainer.ID,
  354. RemoveVolumes: true,
  355. })
  356. }()
  357. // Resolve the name so we can use it in the link below; the create returns an ID only
  358. squidContainer, err = client.InspectContainer(squidContainer.ID)
  359. if err != nil {
  360. t.Fatal(err)
  361. }
  362. // Squid startup time
  363. time.Sleep(1 * time.Second)
  364. t.Logf("Started squid container: %v", squidContainer.Name)
  365. agent := RunAgent(t, &AgentOptions{
  366. ExtraEnvironment: map[string]string{
  367. "HTTP_PROXY": "squid:3128",
  368. "NO_PROXY": "169.254.169.254,/var/run/docker.sock",
  369. },
  370. ContainerLinks: []string{squidContainer.Name + ":squid"},
  371. })
  372. defer agent.Cleanup()
  373. agent.RequireVersion(">1.5.0")
  374. task, err := agent.StartTask(t, "simple-exit")
  375. if err != nil {
  376. t.Fatal(err)
  377. }
  378. // Verify the agent can run a container using the proxy
  379. task.WaitStopped(1 * time.Minute)
  380. // stop the agent, thus forcing it to close its connections; this is needed
  381. // because squid's access logs are written on DC not connect
  382. err = agent.StopAgent()
  383. if err != nil {
  384. t.Fatal(err)
  385. }
  386. // Now verify it actually used the proxy via squids access logs. Get all the
  387. // unique addresses that squid proxied for (assume nothing else used the
  388. // proxy).
  389. // This should be '3' currently, for example I see the following at the time of writing
  390. // ecs.us-west-2.amazonaws.com:443
  391. // ecs-a-1.us-west-2.amazonaws.com:443
  392. // ecs-t-1.us-west-2.amazonaws.com:443
  393. // Note, it connects multiple times to the first one which is an
  394. // implementation detail we might change/optimize, intentionally dedupe so
  395. // we're not tied to that sorta thing
  396. // Note, do a docker exec instead of bindmount the logs out because the logs
  397. // will not be permissioned correctly in the bindmount. Once we have proper
  398. // user namespacing we could revisit this
  399. logExec, err := client.CreateExec(docker.CreateExecOptions{
  400. AttachStdout: true,
  401. AttachStdin: false,
  402. Container: squidContainer.ID,
  403. // Takes a second to flush the file sometimes, so slightly complicated command to wait for it to be written
  404. Cmd: []string{"sh", "-c", "FILE=/var/log/squid/access.log; while [ ! -s $FILE ]; do sleep 1; done; cat $FILE"},
  405. })
  406. if err != nil {
  407. t.Fatal(err)
  408. }
  409. t.Logf("Execing cat of /var/log/squid/access.log on %v", squidContainer.ID)
  410. var squidLogs bytes.Buffer
  411. err = client.StartExec(logExec.ID, docker.StartExecOptions{
  412. OutputStream: &squidLogs,
  413. })
  414. if err != nil {
  415. t.Fatal(err)
  416. }
  417. for {
  418. tmp, _ := client.InspectExec(logExec.ID)
  419. if !tmp.Running {
  420. break
  421. }
  422. time.Sleep(100 * time.Millisecond)
  423. }
  424. t.Logf("Squid logs: %v", squidLogs.String())
  425. // Of the format:
  426. // 1445018173.730 3163 10.0.0.1 TCP_MISS/200 5706 CONNECT ecs.us-west-2.amazonaws.com:443 - HIER_DIRECT/54.240.250.253 -
  427. // 1445018173.730 3103 10.0.0.1 TCP_MISS/200 3117 CONNECT ecs.us-west-2.amazonaws.com:443 - HIER_DIRECT/54.240.250.253 -
  428. // 1445018173.730 3025 10.0.0.1 TCP_MISS/200 3336 CONNECT ecs-a-1.us-west-2.amazonaws.com:443 - HIER_DIRECT/54.240.249.4 -
  429. // 1445018173.731 3086 10.0.0.1 TCP_MISS/200 3411 CONNECT ecs-t-1.us-west-2.amazonaws.com:443 - HIER_DIRECT/54.240.254.59
  430. allAddressesRegex := regexp.MustCompile("CONNECT [^ ]+ ")
  431. // Match just the host+port it's proxying to
  432. matches := allAddressesRegex.FindAllStringSubmatch(squidLogs.String(), -1)
  433. t.Logf("Proxy connections: %v", matches)
  434. dedupedMatches := map[string]struct{}{}
  435. for _, match := range matches {
  436. dedupedMatches[match[0]] = struct{}{}
  437. }
  438. if len(dedupedMatches) < 3 {
  439. t.Errorf("Expected 3 matches, actually had %d matches: %+v", len(dedupedMatches), dedupedMatches)
  440. }
  441. }
  442. // TestAwslogsDriver verifies that container logs are sent to Amazon CloudWatch Logs with awslogs as the log driver
  443. func TestAwslogsDriver(t *testing.T) {
  444. RequireDockerVersion(t, ">=1.9.0") // awslogs drivers available from docker 1.9.0
  445. cwlClient := cloudwatchlogs.New(session.New(), aws.NewConfig().WithRegion(*ECS.Config.Region))
  446. // Test whether the log group existed or not
  447. respDescribeLogGroups, err := cwlClient.DescribeLogGroups(&cloudwatchlogs.DescribeLogGroupsInput{
  448. LogGroupNamePrefix: aws.String(awslogsLogGroupName),
  449. })
  450. if err != nil {
  451. t.Fatalf("CloudWatchLogs describe log groups error: %v", err)
  452. }
  453. logGroupExists := false
  454. for i := 0; i < len(respDescribeLogGroups.LogGroups); i++ {
  455. if *respDescribeLogGroups.LogGroups[i].LogGroupName == awslogsLogGroupName {
  456. logGroupExists = true
  457. break
  458. }
  459. }
  460. if !logGroupExists {
  461. _, err := cwlClient.CreateLogGroup(&cloudwatchlogs.CreateLogGroupInput{
  462. LogGroupName: aws.String(awslogsLogGroupName),
  463. })
  464. if err != nil {
  465. t.Fatalf("Failed to create log group %s : %v", awslogsLogGroupName, err)
  466. }
  467. }
  468. agentOptions := AgentOptions{
  469. ExtraEnvironment: map[string]string{
  470. "ECS_AVAILABLE_LOGGING_DRIVERS": `["awslogs"]`,
  471. },
  472. }
  473. agent := RunAgent(t, &agentOptions)
  474. defer agent.Cleanup()
  475. agent.RequireVersion(">=1.9.0") //Required for awslogs driver
  476. testTask, err := agent.StartTask(t, "awslogs")
  477. if err != nil {
  478. t.Fatalf("Expected to start task using awslogs driver failed: %v", err)
  479. }
  480. // Wait for the container to start
  481. testTask.WaitRunning(waitTaskStateChangeDuration)
  482. containerId, err := agent.ResolveTaskDockerID(testTask, "awslogs")
  483. if err != nil {
  484. t.Fatalf("Failed to get the container ID")
  485. }
  486. // Delete the log stream after the test
  487. defer func() {
  488. cwlClient.DeleteLogStream(&cloudwatchlogs.DeleteLogStreamInput{
  489. LogGroupName: aws.String(awslogsLogGroupName),
  490. LogStreamName: aws.String(containerId),
  491. })
  492. }()
  493. params := &cloudwatchlogs.GetLogEventsInput{
  494. LogGroupName: aws.String(awslogsLogGroupName),
  495. LogStreamName: aws.String(containerId),
  496. }
  497. resp, err := cwlClient.GetLogEvents(params)
  498. if err != nil {
  499. t.Fatalf("CloudWatchLogs get log failed: %v", err)
  500. }
  501. if len(resp.Events) != 1 {
  502. t.Errorf("Get unexpected number of log events: %d", len(resp.Events))
  503. } else if *resp.Events[0].Message != "hello world" {
  504. t.Errorf("Got log events message unexpected: %s", *resp.Events[0].Message)
  505. }
  506. }
  507. func TestTaskCleanup(t *testing.T) {
  508. // Set the task cleanup time to just over a minute.
  509. os.Setenv("ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION", "70s")
  510. agent := RunAgent(t, nil)
  511. defer func() {
  512. agent.Cleanup()
  513. os.Unsetenv("ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION")
  514. }()
  515. // Start a task and get the container id once the task transitions to RUNNING.
  516. task, err := agent.StartTask(t, "nginx")
  517. if err != nil {
  518. t.Fatalf("Error starting task: %v", err)
  519. }
  520. err = task.WaitRunning(2 * time.Minute)
  521. if err != nil {
  522. t.Fatalf("Error waiting for running task: %v", err)
  523. }
  524. dockerId, err := agent.ResolveTaskDockerID(task, "nginx")
  525. if err != nil {
  526. t.Fatalf("Error resolving docker id for container in task: %v", err)
  527. }
  528. // We should be able to inspect the container ID from docker at this point.
  529. _, err = agent.DockerClient.InspectContainer(dockerId)
  530. if err != nil {
  531. t.Fatalf("Error inspecting container in task: %v", err)
  532. }
  533. // Stop the task and sleep for 2 minutes to let the task be cleaned up.
  534. err = agent.DockerClient.StopContainer(dockerId, 1)
  535. if err != nil {
  536. t.Fatalf("Error stoppping task: %v", err)
  537. }
  538. err = task.WaitStopped(1 * time.Minute)
  539. if err != nil {
  540. t.Fatalf("Error waiting for task stopped: %v", err)
  541. }
  542. time.Sleep(2 * time.Minute)
  543. // We should not be able to describe the container now since it has been cleaned up.
  544. _, err = agent.DockerClient.InspectContainer(dockerId)
  545. if err == nil {
  546. t.Fatalf("Expected error inspecting container in task")
  547. }
  548. }
  549. // TestTelemetry tests whether agent can send metrics to TACS
  550. func TestTelemetry(t *testing.T) {
  551. // Try to use a new cluster for this test, ensure no other task metrics for this cluster
  552. newClusterName := "ecstest-telemetry-" + uuid.New()
  553. _, err := ECS.CreateCluster(&ecs.CreateClusterInput{
  554. ClusterName: aws.String(newClusterName),
  555. })
  556. if err != nil {
  557. t.Fatalf("Failed to create cluster %s : %v", newClusterName, err)
  558. }
  559. defer DeleteCluster(t, newClusterName)
  560. agentOptions := AgentOptions{
  561. ExtraEnvironment: map[string]string{
  562. "ECS_CLUSTER": newClusterName,
  563. },
  564. }
  565. agent := RunAgent(t, &agentOptions)
  566. defer agent.Cleanup()
  567. params := &cloudwatch.GetMetricStatisticsInput{
  568. MetricName: aws.String("CPUUtilization"),
  569. Namespace: aws.String("AWS/ECS"),
  570. Period: aws.Int64(60),
  571. Statistics: []*string{
  572. aws.String("Average"),
  573. aws.String("SampleCount"),
  574. },
  575. Dimensions: []*cloudwatch.Dimension{
  576. {
  577. Name: aws.String("ClusterName"),
  578. Value: aws.String(newClusterName),
  579. },
  580. },
  581. }
  582. params.StartTime = aws.Time(RoundTimeUp(time.Now(), time.Minute).UTC())
  583. params.EndTime = aws.Time((*params.StartTime).Add(waitMetricsInCloudwatchDuration).UTC())
  584. // wait for the agent start and ensure no task is running
  585. time.Sleep(waitMetricsInCloudwatchDuration)
  586. cwclient := cloudwatch.New(session.New(), aws.NewConfig().WithRegion(*ECS.Config.Region))
  587. if err = VerifyMetrics(cwclient, params, true); err != nil {
  588. t.Errorf("Before task running, verify metrics for CPU utilization failed: %v", err)
  589. }
  590. params.MetricName = aws.String("MemoryUtilization")
  591. if err = VerifyMetrics(cwclient, params, true); err != nil {
  592. t.Errorf("Before task running, verify metrics for memory utilization failed: %v", err)
  593. }
  594. testTask, err := agent.StartTask(t, "telemetry")
  595. if err != nil {
  596. t.Fatalf("Expected to start telemetry task: %v", err)
  597. }
  598. // Wait for the task to run and the agent to send back metrics
  599. err = testTask.WaitRunning(waitTaskStateChangeDuration)
  600. if err != nil {
  601. t.Fatalf("Error start telemetry task: %v", err)
  602. }
  603. time.Sleep(waitMetricsInCloudwatchDuration)
  604. params.EndTime = aws.Time(RoundTimeUp(time.Now(), time.Minute).UTC())
  605. params.StartTime = aws.Time((*params.EndTime).Add(-waitMetricsInCloudwatchDuration).UTC())
  606. params.MetricName = aws.String("CPUUtilization")
  607. if err = VerifyMetrics(cwclient, params, false); err != nil {
  608. t.Errorf("Task is running, verify metrics for CPU utilization failed: %v", err)
  609. }
  610. params.MetricName = aws.String("MemoryUtilization")
  611. if err = VerifyMetrics(cwclient, params, false); err != nil {
  612. t.Errorf("Task is running, verify metrics for memory utilization failed: %v", err)
  613. }
  614. err = testTask.Stop()
  615. if err != nil {
  616. t.Fatalf("Failed to stop the telemetry task: %v", err)
  617. }
  618. err = testTask.WaitStopped(waitTaskStateChangeDuration)
  619. if err != nil {
  620. t.Fatalf("Waiting for task stop error: %v", err)
  621. }
  622. time.Sleep(waitMetricsInCloudwatchDuration)
  623. params.EndTime = aws.Time(RoundTimeUp(time.Now(), time.Minute).UTC())
  624. params.StartTime = aws.Time((*params.EndTime).Add(-waitMetricsInCloudwatchDuration).UTC())
  625. params.MetricName = aws.String("CPUUtilization")
  626. if err = VerifyMetrics(cwclient, params, true); err != nil {
  627. t.Errorf("Task stopped: verify metrics for CPU utilization failed: %v", err)
  628. }
  629. params.MetricName = aws.String("MemoryUtilization")
  630. if err = VerifyMetrics(cwclient, params, true); err != nil {
  631. t.Errorf("Task stopped, verify metrics for memory utilization failed: %v", err)
  632. }
  633. }