PageRenderTime 31ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/test/e2e/framework/util.go

https://gitlab.com/CORP-RESELLER/kubernetes
Go | 1369 lines | 1063 code | 130 blank | 176 comment | 301 complexity | e42f80a243cde10a6c3648f873528426 MD5 | raw file
  1. /*
  2. Copyright 2014 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package framework
  14. import (
  15. "bytes"
  16. "encoding/json"
  17. "fmt"
  18. "io"
  19. "math"
  20. "math/rand"
  21. "net"
  22. "net/http"
  23. "net/url"
  24. "os"
  25. "os/exec"
  26. "path"
  27. "path/filepath"
  28. "regexp"
  29. goRuntime "runtime"
  30. "sort"
  31. "strconv"
  32. "strings"
  33. "sync"
  34. "time"
  35. "k8s.io/kubernetes/federation/client/clientset_generated/federation_internalclientset"
  36. unversionedfederation "k8s.io/kubernetes/federation/client/clientset_generated/federation_internalclientset/typed/federation/unversioned"
  37. "k8s.io/kubernetes/federation/client/clientset_generated/federation_release_1_3"
  38. "k8s.io/kubernetes/federation/client/clientset_generated/federation_release_1_4"
  39. "k8s.io/kubernetes/pkg/api"
  40. apierrs "k8s.io/kubernetes/pkg/api/errors"
  41. "k8s.io/kubernetes/pkg/api/resource"
  42. "k8s.io/kubernetes/pkg/api/unversioned"
  43. "k8s.io/kubernetes/pkg/apis/extensions"
  44. "k8s.io/kubernetes/pkg/client/cache"
  45. clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
  46. "k8s.io/kubernetes/pkg/client/restclient"
  47. "k8s.io/kubernetes/pkg/client/typed/discovery"
  48. client "k8s.io/kubernetes/pkg/client/unversioned"
  49. "k8s.io/kubernetes/pkg/client/unversioned/clientcmd"
  50. clientcmdapi "k8s.io/kubernetes/pkg/client/unversioned/clientcmd/api"
  51. gcecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
  52. deploymentutil "k8s.io/kubernetes/pkg/controller/deployment/util"
  53. "k8s.io/kubernetes/pkg/fields"
  54. "k8s.io/kubernetes/pkg/kubectl"
  55. "k8s.io/kubernetes/pkg/kubelet/util/format"
  56. "k8s.io/kubernetes/pkg/labels"
  57. "k8s.io/kubernetes/pkg/master/ports"
  58. "k8s.io/kubernetes/pkg/runtime"
  59. sshutil "k8s.io/kubernetes/pkg/ssh"
  60. "k8s.io/kubernetes/pkg/types"
  61. labelsutil "k8s.io/kubernetes/pkg/util/labels"
  62. "k8s.io/kubernetes/pkg/util/sets"
  63. "k8s.io/kubernetes/pkg/util/system"
  64. "k8s.io/kubernetes/pkg/util/uuid"
  65. "k8s.io/kubernetes/pkg/util/wait"
  66. utilyaml "k8s.io/kubernetes/pkg/util/yaml"
  67. "k8s.io/kubernetes/pkg/version"
  68. "k8s.io/kubernetes/pkg/watch"
  69. "github.com/blang/semver"
  70. "golang.org/x/crypto/ssh"
  71. "golang.org/x/net/websocket"
  72. . "github.com/onsi/ginkgo"
  73. . "github.com/onsi/gomega"
  74. gomegatypes "github.com/onsi/gomega/types"
  75. )
  76. const (
  77. // How long to wait for the pod to be listable
  78. PodListTimeout = time.Minute
  79. // Initial pod start can be delayed O(minutes) by slow docker pulls
  80. // TODO: Make this 30 seconds once #4566 is resolved.
  81. PodStartTimeout = 5 * time.Minute
  82. // How long to wait for the pod to no longer be running
  83. podNoLongerRunningTimeout = 30 * time.Second
  84. // If there are any orphaned namespaces to clean up, this test is running
  85. // on a long lived cluster. A long wait here is preferably to spurious test
  86. // failures caused by leaked resources from a previous test run.
  87. NamespaceCleanupTimeout = 15 * time.Minute
  88. // Some pods can take much longer to get ready due to volume attach/detach latency.
  89. slowPodStartTimeout = 15 * time.Minute
  90. // How long to wait for a service endpoint to be resolvable.
  91. ServiceStartTimeout = 1 * time.Minute
  92. // String used to mark pod deletion
  93. nonExist = "NonExist"
  94. // How often to Poll pods, nodes and claims.
  95. Poll = 2 * time.Second
  96. // service accounts are provisioned after namespace creation
  97. // a service account is required to support pod creation in a namespace as part of admission control
  98. ServiceAccountProvisionTimeout = 2 * time.Minute
  99. // How long to try single API calls (like 'get' or 'list'). Used to prevent
  100. // transient failures from failing tests.
  101. // TODO: client should not apply this timeout to Watch calls. Increased from 30s until that is fixed.
  102. SingleCallTimeout = 5 * time.Minute
  103. // How long nodes have to be "ready" when a test begins. They should already
  104. // be "ready" before the test starts, so this is small.
  105. NodeReadyInitialTimeout = 20 * time.Second
  106. // How long pods have to be "ready" when a test begins.
  107. PodReadyBeforeTimeout = 5 * time.Minute
  108. // How long pods have to become scheduled onto nodes
  109. podScheduledBeforeTimeout = PodListTimeout + (20 * time.Second)
  110. podRespondingTimeout = 2 * time.Minute
  111. ServiceRespondingTimeout = 2 * time.Minute
  112. EndpointRegisterTimeout = time.Minute
  113. // How long claims have to become dynamically provisioned
  114. ClaimProvisionTimeout = 5 * time.Minute
  115. // When these values are updated, also update cmd/kubelet/app/options/options.go
  116. currentPodInfraContainerImageName = "gcr.io/google_containers/pause"
  117. currentPodInfraContainerImageVersion = "3.0"
  118. // How long each node is given during a process that restarts all nodes
  119. // before the test is considered failed. (Note that the total time to
  120. // restart all nodes will be this number times the number of nodes.)
  121. RestartPerNodeTimeout = 5 * time.Minute
  122. // How often to Poll the statues of a restart.
  123. RestartPoll = 20 * time.Second
  124. // How long a node is allowed to become "Ready" after it is restarted before
  125. // the test is considered failed.
  126. RestartNodeReadyAgainTimeout = 5 * time.Minute
  127. // How long a pod is allowed to become "running" and "ready" after a node
  128. // restart before test is considered failed.
  129. RestartPodReadyAgainTimeout = 5 * time.Minute
  130. // Number of times we want to retry Updates in case of conflict
  131. UpdateRetries = 5
  132. )
  133. var (
  134. // Label allocated to the image puller static pod that runs on each node
  135. // before e2es.
  136. ImagePullerLabels = map[string]string{"name": "e2e-image-puller"}
  137. // For parsing Kubectl version for version-skewed testing.
  138. gitVersionRegexp = regexp.MustCompile("GitVersion:\"(v.+?)\"")
  139. )
  140. // GetServerArchitecture fetches the architecture of the cluster's apiserver.
  141. func GetServerArchitecture(c *client.Client) string {
  142. arch := ""
  143. sVer, err := c.Discovery().ServerVersion()
  144. if err != nil || sVer.Platform == "" {
  145. // If we failed to get the server version for some reason, default to amd64.
  146. arch = "amd64"
  147. } else {
  148. // Split the platform string into OS and Arch separately.
  149. // The platform string may for example be "linux/amd64", "linux/arm" or "windows/amd64".
  150. osArchArray := strings.Split(sVer.Platform, "/")
  151. arch = osArchArray[1]
  152. }
  153. return arch
  154. }
  155. // GetPauseImageName fetches the pause image name for the same architecture as the apiserver.
  156. func GetPauseImageName(c *client.Client) string {
  157. return currentPodInfraContainerImageName + "-" + GetServerArchitecture(c) + ":" + currentPodInfraContainerImageVersion
  158. }
  159. // GetPauseImageNameForHostArch fetches the pause image name for the same architecture the test is running on.
  160. func GetPauseImageNameForHostArch() string {
  161. return currentPodInfraContainerImageName + "-" + goRuntime.GOARCH + ":" + currentPodInfraContainerImageVersion
  162. }
  163. // SubResource proxy should have been functional in v1.0.0, but SubResource
  164. // proxy via tunneling is known to be broken in v1.0. See
  165. // https://github.com/kubernetes/kubernetes/pull/15224#issuecomment-146769463
  166. //
  167. // TODO(ihmccreery): remove once we don't care about v1.0 anymore, (tentatively
  168. // in v1.3).
  169. var SubResourcePodProxyVersion = version.MustParse("v1.1.0")
  170. var subResourceServiceAndNodeProxyVersion = version.MustParse("v1.2.0")
  171. func GetServicesProxyRequest(c *client.Client, request *restclient.Request) (*restclient.Request, error) {
  172. subResourceProxyAvailable, err := ServerVersionGTE(subResourceServiceAndNodeProxyVersion, c)
  173. if err != nil {
  174. return nil, err
  175. }
  176. if subResourceProxyAvailable {
  177. return request.Resource("services").SubResource("proxy"), nil
  178. }
  179. return request.Prefix("proxy").Resource("services"), nil
  180. }
  181. // unique identifier of the e2e run
  182. var RunId = uuid.NewUUID()
  183. type CreateTestingNSFn func(baseName string, c *client.Client, labels map[string]string) (*api.Namespace, error)
  184. type ContainerFailures struct {
  185. status *api.ContainerStateTerminated
  186. Restarts int
  187. }
  188. func GetMasterHost() string {
  189. masterUrl, err := url.Parse(TestContext.Host)
  190. ExpectNoError(err)
  191. return masterUrl.Host
  192. }
  193. // Convenient wrapper around cache.Store that returns list of api.Pod instead of interface{}.
  194. type PodStore struct {
  195. cache.Store
  196. stopCh chan struct{}
  197. }
  198. func NewPodStore(c *client.Client, namespace string, label labels.Selector, field fields.Selector) *PodStore {
  199. lw := &cache.ListWatch{
  200. ListFunc: func(options api.ListOptions) (runtime.Object, error) {
  201. options.LabelSelector = label
  202. options.FieldSelector = field
  203. return c.Pods(namespace).List(options)
  204. },
  205. WatchFunc: func(options api.ListOptions) (watch.Interface, error) {
  206. options.LabelSelector = label
  207. options.FieldSelector = field
  208. return c.Pods(namespace).Watch(options)
  209. },
  210. }
  211. store := cache.NewStore(cache.MetaNamespaceKeyFunc)
  212. stopCh := make(chan struct{})
  213. cache.NewReflector(lw, &api.Pod{}, store, 0).RunUntil(stopCh)
  214. return &PodStore{store, stopCh}
  215. }
  216. func (s *PodStore) List() []*api.Pod {
  217. objects := s.Store.List()
  218. pods := make([]*api.Pod, 0)
  219. for _, o := range objects {
  220. pods = append(pods, o.(*api.Pod))
  221. }
  222. return pods
  223. }
  224. func (s *PodStore) Stop() {
  225. close(s.stopCh)
  226. }
  227. type RCConfig struct {
  228. Client *client.Client
  229. Image string
  230. Command []string
  231. Name string
  232. Namespace string
  233. PollInterval time.Duration
  234. Timeout time.Duration
  235. PodStatusFile *os.File
  236. Replicas int
  237. CpuRequest int64 // millicores
  238. CpuLimit int64 // millicores
  239. MemRequest int64 // bytes
  240. MemLimit int64 // bytes
  241. ReadinessProbe *api.Probe
  242. DNSPolicy *api.DNSPolicy
  243. // Env vars, set the same for every pod.
  244. Env map[string]string
  245. // Extra labels added to every pod.
  246. Labels map[string]string
  247. // Node selector for pods in the RC.
  248. NodeSelector map[string]string
  249. // Ports to declare in the container (map of name to containerPort).
  250. Ports map[string]int
  251. // Ports to declare in the container as host and container ports.
  252. HostPorts map[string]int
  253. Volumes []api.Volume
  254. VolumeMounts []api.VolumeMount
  255. // Pointer to a list of pods; if non-nil, will be set to a list of pods
  256. // created by this RC by RunRC.
  257. CreatedPods *[]*api.Pod
  258. // Maximum allowable container failures. If exceeded, RunRC returns an error.
  259. // Defaults to replicas*0.1 if unspecified.
  260. MaxContainerFailures *int
  261. // If set to false starting RC will print progress, otherwise only errors will be printed.
  262. Silent bool
  263. }
  264. type DeploymentConfig struct {
  265. RCConfig
  266. }
  267. type ReplicaSetConfig struct {
  268. RCConfig
  269. }
  270. func nowStamp() string {
  271. return time.Now().Format(time.StampMilli)
  272. }
  273. func log(level string, format string, args ...interface{}) {
  274. fmt.Fprintf(GinkgoWriter, nowStamp()+": "+level+": "+format+"\n", args...)
  275. }
  276. func Logf(format string, args ...interface{}) {
  277. log("INFO", format, args...)
  278. }
  279. func Failf(format string, args ...interface{}) {
  280. msg := fmt.Sprintf(format, args...)
  281. log("INFO", msg)
  282. Fail(nowStamp()+": "+msg, 1)
  283. }
  284. func Skipf(format string, args ...interface{}) {
  285. msg := fmt.Sprintf(format, args...)
  286. log("INFO", msg)
  287. Skip(nowStamp() + ": " + msg)
  288. }
  289. func SkipUnlessNodeCountIsAtLeast(minNodeCount int) {
  290. if TestContext.CloudConfig.NumNodes < minNodeCount {
  291. Skipf("Requires at least %d nodes (not %d)", minNodeCount, TestContext.CloudConfig.NumNodes)
  292. }
  293. }
  294. func SkipUnlessAtLeast(value int, minValue int, message string) {
  295. if value < minValue {
  296. Skipf(message)
  297. }
  298. }
  299. func SkipIfProviderIs(unsupportedProviders ...string) {
  300. if ProviderIs(unsupportedProviders...) {
  301. Skipf("Not supported for providers %v (found %s)", unsupportedProviders, TestContext.Provider)
  302. }
  303. }
  304. func SkipUnlessProviderIs(supportedProviders ...string) {
  305. if !ProviderIs(supportedProviders...) {
  306. Skipf("Only supported for providers %v (not %s)", supportedProviders, TestContext.Provider)
  307. }
  308. }
  309. func SkipIfContainerRuntimeIs(runtimes ...string) {
  310. for _, runtime := range runtimes {
  311. if runtime == TestContext.ContainerRuntime {
  312. Skipf("Not supported under container runtime %s", runtime)
  313. }
  314. }
  315. }
  316. func ProviderIs(providers ...string) bool {
  317. for _, provider := range providers {
  318. if strings.ToLower(provider) == strings.ToLower(TestContext.Provider) {
  319. return true
  320. }
  321. }
  322. return false
  323. }
  324. func SkipUnlessServerVersionGTE(v semver.Version, c discovery.ServerVersionInterface) {
  325. gte, err := ServerVersionGTE(v, c)
  326. if err != nil {
  327. Failf("Failed to get server version: %v", err)
  328. }
  329. if !gte {
  330. Skipf("Not supported for server versions before %q", v)
  331. }
  332. }
  333. // Detects whether the federation namespace exists in the underlying cluster
  334. func SkipUnlessFederated(c *client.Client) {
  335. federationNS := os.Getenv("FEDERATION_NAMESPACE")
  336. if federationNS == "" {
  337. federationNS = "federation"
  338. }
  339. _, err := c.Namespaces().Get(federationNS)
  340. if err != nil {
  341. if apierrs.IsNotFound(err) {
  342. Skipf("Could not find federation namespace %s: skipping federated test", federationNS)
  343. } else {
  344. Failf("Unexpected error getting namespace: %v", err)
  345. }
  346. }
  347. }
  348. // ProvidersWithSSH are those providers where each node is accessible with SSH
  349. var ProvidersWithSSH = []string{"gce", "gke", "aws"}
  350. // providersWithMasterSSH are those providers where master node is accessible with SSH
  351. var providersWithMasterSSH = []string{"gce", "gke", "kubemark", "aws"}
  352. type podCondition func(pod *api.Pod) (bool, error)
  353. // podReady returns whether pod has a condition of Ready with a status of true.
  354. // TODO: should be replaced with api.IsPodReady
  355. func podReady(pod *api.Pod) bool {
  356. for _, cond := range pod.Status.Conditions {
  357. if cond.Type == api.PodReady && cond.Status == api.ConditionTrue {
  358. return true
  359. }
  360. }
  361. return false
  362. }
  363. // logPodStates logs basic info of provided pods for debugging.
  364. func logPodStates(pods []api.Pod) {
  365. // Find maximum widths for pod, node, and phase strings for column printing.
  366. maxPodW, maxNodeW, maxPhaseW, maxGraceW := len("POD"), len("NODE"), len("PHASE"), len("GRACE")
  367. for i := range pods {
  368. pod := &pods[i]
  369. if len(pod.ObjectMeta.Name) > maxPodW {
  370. maxPodW = len(pod.ObjectMeta.Name)
  371. }
  372. if len(pod.Spec.NodeName) > maxNodeW {
  373. maxNodeW = len(pod.Spec.NodeName)
  374. }
  375. if len(pod.Status.Phase) > maxPhaseW {
  376. maxPhaseW = len(pod.Status.Phase)
  377. }
  378. }
  379. // Increase widths by one to separate by a single space.
  380. maxPodW++
  381. maxNodeW++
  382. maxPhaseW++
  383. maxGraceW++
  384. // Log pod info. * does space padding, - makes them left-aligned.
  385. Logf("%-[1]*[2]s %-[3]*[4]s %-[5]*[6]s %-[7]*[8]s %[9]s",
  386. maxPodW, "POD", maxNodeW, "NODE", maxPhaseW, "PHASE", maxGraceW, "GRACE", "CONDITIONS")
  387. for _, pod := range pods {
  388. grace := ""
  389. if pod.DeletionGracePeriodSeconds != nil {
  390. grace = fmt.Sprintf("%ds", *pod.DeletionGracePeriodSeconds)
  391. }
  392. Logf("%-[1]*[2]s %-[3]*[4]s %-[5]*[6]s %-[7]*[8]s %[9]s",
  393. maxPodW, pod.ObjectMeta.Name, maxNodeW, pod.Spec.NodeName, maxPhaseW, pod.Status.Phase, maxGraceW, grace, pod.Status.Conditions)
  394. }
  395. Logf("") // Final empty line helps for readability.
  396. }
  397. // PodRunningReady checks whether pod p's phase is running and it has a ready
  398. // condition of status true.
  399. func PodRunningReady(p *api.Pod) (bool, error) {
  400. // Check the phase is running.
  401. if p.Status.Phase != api.PodRunning {
  402. return false, fmt.Errorf("want pod '%s' on '%s' to be '%v' but was '%v'",
  403. p.ObjectMeta.Name, p.Spec.NodeName, api.PodRunning, p.Status.Phase)
  404. }
  405. // Check the ready condition is true.
  406. if !podReady(p) {
  407. return false, fmt.Errorf("pod '%s' on '%s' didn't have condition {%v %v}; conditions: %v",
  408. p.ObjectMeta.Name, p.Spec.NodeName, api.PodReady, api.ConditionTrue, p.Status.Conditions)
  409. }
  410. return true, nil
  411. }
  412. func PodRunningReadyOrSucceeded(p *api.Pod) (bool, error) {
  413. // Check if the phase is succeeded.
  414. if p.Status.Phase == api.PodSucceeded {
  415. return true, nil
  416. }
  417. return PodRunningReady(p)
  418. }
  419. // PodNotReady checks whether pod p's has a ready condition of status false.
  420. func PodNotReady(p *api.Pod) (bool, error) {
  421. // Check the ready condition is false.
  422. if podReady(p) {
  423. return false, fmt.Errorf("pod '%s' on '%s' didn't have condition {%v %v}; conditions: %v",
  424. p.ObjectMeta.Name, p.Spec.NodeName, api.PodReady, api.ConditionFalse, p.Status.Conditions)
  425. }
  426. return true, nil
  427. }
  428. // check if a Pod is controlled by a Replication Controller in the List
  429. func hasReplicationControllersForPod(rcs *api.ReplicationControllerList, pod api.Pod) bool {
  430. for _, rc := range rcs.Items {
  431. selector := labels.SelectorFromSet(rc.Spec.Selector)
  432. if selector.Matches(labels.Set(pod.ObjectMeta.Labels)) {
  433. return true
  434. }
  435. }
  436. return false
  437. }
  438. // WaitForPodsSuccess waits till all labels matching the given selector enter
  439. // the Success state. The caller is expected to only invoke this method once the
  440. // pods have been created.
  441. func WaitForPodsSuccess(c *client.Client, ns string, successPodLabels map[string]string, timeout time.Duration) error {
  442. successPodSelector := labels.SelectorFromSet(successPodLabels)
  443. start, badPods := time.Now(), []api.Pod{}
  444. if wait.PollImmediate(30*time.Second, timeout, func() (bool, error) {
  445. podList, err := c.Pods(ns).List(api.ListOptions{LabelSelector: successPodSelector})
  446. if err != nil {
  447. Logf("Error getting pods in namespace %q: %v", ns, err)
  448. return false, nil
  449. }
  450. if len(podList.Items) == 0 {
  451. Logf("Waiting for pods to enter Success, but no pods in %q match label %v", ns, successPodLabels)
  452. return true, nil
  453. }
  454. badPods = []api.Pod{}
  455. for _, pod := range podList.Items {
  456. if pod.Status.Phase != api.PodSucceeded {
  457. badPods = append(badPods, pod)
  458. }
  459. }
  460. successPods := len(podList.Items) - len(badPods)
  461. Logf("%d / %d pods in namespace %q are in Success state (%d seconds elapsed)",
  462. successPods, len(podList.Items), ns, int(time.Since(start).Seconds()))
  463. if len(badPods) == 0 {
  464. return true, nil
  465. }
  466. return false, nil
  467. }) != nil {
  468. logPodStates(badPods)
  469. LogPodsWithLabels(c, ns, successPodLabels)
  470. return fmt.Errorf("Not all pods in namespace %q are successful within %v", ns, timeout)
  471. }
  472. return nil
  473. }
  474. // WaitForPodsRunningReady waits up to timeout to ensure that all pods in
  475. // namespace ns are either running and ready, or failed but controlled by a
  476. // replication controller. Also, it ensures that at least minPods are running
  477. // and ready. It has separate behavior from other 'wait for' pods functions in
  478. // that it requires the list of pods on every iteration. This is useful, for
  479. // example, in cluster startup, because the number of pods increases while
  480. // waiting.
  481. // If ignoreLabels is not empty, pods matching this selector are ignored and
  482. // this function waits for minPods to enter Running/Ready and for all pods
  483. // matching ignoreLabels to enter Success phase. Otherwise an error is returned
  484. // even if there are minPods pods, some of which are in Running/Ready
  485. // and some in Success. This is to allow the client to decide if "Success"
  486. // means "Ready" or not.
  487. func WaitForPodsRunningReady(c *client.Client, ns string, minPods int32, timeout time.Duration, ignoreLabels map[string]string) error {
  488. ignoreSelector := labels.SelectorFromSet(ignoreLabels)
  489. start := time.Now()
  490. Logf("Waiting up to %v for all pods (need at least %d) in namespace '%s' to be running and ready",
  491. timeout, minPods, ns)
  492. wg := sync.WaitGroup{}
  493. wg.Add(1)
  494. var waitForSuccessError error
  495. go func() {
  496. waitForSuccessError = WaitForPodsSuccess(c, ns, ignoreLabels, timeout)
  497. wg.Done()
  498. }()
  499. if wait.PollImmediate(Poll, timeout, func() (bool, error) {
  500. // We get the new list of pods and replication controllers in every
  501. // iteration because more pods come online during startup and we want to
  502. // ensure they are also checked.
  503. rcList, err := c.ReplicationControllers(ns).List(api.ListOptions{})
  504. if err != nil {
  505. Logf("Error getting replication controllers in namespace '%s': %v", ns, err)
  506. return false, nil
  507. }
  508. replicas := int32(0)
  509. for _, rc := range rcList.Items {
  510. replicas += rc.Spec.Replicas
  511. }
  512. podList, err := c.Pods(ns).List(api.ListOptions{})
  513. if err != nil {
  514. Logf("Error getting pods in namespace '%s': %v", ns, err)
  515. return false, nil
  516. }
  517. nOk, replicaOk, badPods := int32(0), int32(0), []api.Pod{}
  518. for _, pod := range podList.Items {
  519. if len(ignoreLabels) != 0 && ignoreSelector.Matches(labels.Set(pod.Labels)) {
  520. Logf("%v in state %v, ignoring", pod.Name, pod.Status.Phase)
  521. continue
  522. }
  523. if res, err := PodRunningReady(&pod); res && err == nil {
  524. nOk++
  525. if hasReplicationControllersForPod(rcList, pod) {
  526. replicaOk++
  527. }
  528. } else {
  529. if pod.Status.Phase != api.PodFailed {
  530. Logf("The status of Pod %s is %s, waiting for it to be either Running or Failed", pod.ObjectMeta.Name, pod.Status.Phase)
  531. badPods = append(badPods, pod)
  532. } else if !hasReplicationControllersForPod(rcList, pod) {
  533. Logf("Pod %s is Failed, but it's not controlled by a ReplicationController", pod.ObjectMeta.Name)
  534. badPods = append(badPods, pod)
  535. }
  536. //ignore failed pods that are controlled by a replication controller
  537. }
  538. }
  539. Logf("%d / %d pods in namespace '%s' are running and ready (%d seconds elapsed)",
  540. nOk, len(podList.Items), ns, int(time.Since(start).Seconds()))
  541. Logf("expected %d pod replicas in namespace '%s', %d are Running and Ready.", replicas, ns, replicaOk)
  542. if replicaOk == replicas && nOk >= minPods && len(badPods) == 0 {
  543. return true, nil
  544. }
  545. logPodStates(badPods)
  546. return false, nil
  547. }) != nil {
  548. return fmt.Errorf("Not all pods in namespace '%s' running and ready within %v", ns, timeout)
  549. }
  550. wg.Wait()
  551. if waitForSuccessError != nil {
  552. return waitForSuccessError
  553. }
  554. return nil
  555. }
  556. func podFromManifest(filename string) (*api.Pod, error) {
  557. var pod api.Pod
  558. Logf("Parsing pod from %v", filename)
  559. data := ReadOrDie(filename)
  560. json, err := utilyaml.ToJSON(data)
  561. if err != nil {
  562. return nil, err
  563. }
  564. if err := runtime.DecodeInto(api.Codecs.UniversalDecoder(), json, &pod); err != nil {
  565. return nil, err
  566. }
  567. return &pod, nil
  568. }
  569. // Run a test container to try and contact the Kubernetes api-server from a pod, wait for it
  570. // to flip to Ready, log its output and delete it.
  571. func RunKubernetesServiceTestContainer(c *client.Client, ns string) {
  572. path := "test/images/clusterapi-tester/pod.yaml"
  573. p, err := podFromManifest(path)
  574. if err != nil {
  575. Logf("Failed to parse clusterapi-tester from manifest %v: %v", path, err)
  576. return
  577. }
  578. p.Namespace = ns
  579. if _, err := c.Pods(ns).Create(p); err != nil {
  580. Logf("Failed to create %v: %v", p.Name, err)
  581. return
  582. }
  583. defer func() {
  584. if err := c.Pods(ns).Delete(p.Name, nil); err != nil {
  585. Logf("Failed to delete pod %v: %v", p.Name, err)
  586. }
  587. }()
  588. timeout := 5 * time.Minute
  589. if err := waitForPodCondition(c, ns, p.Name, "clusterapi-tester", timeout, PodRunningReady); err != nil {
  590. Logf("Pod %v took longer than %v to enter running/ready: %v", p.Name, timeout, err)
  591. return
  592. }
  593. logs, err := GetPodLogs(c, ns, p.Name, p.Spec.Containers[0].Name)
  594. if err != nil {
  595. Logf("Failed to retrieve logs from %v: %v", p.Name, err)
  596. } else {
  597. Logf("Output of clusterapi-tester:\n%v", logs)
  598. }
  599. }
  600. func kubectlLogPod(c *client.Client, pod api.Pod, containerNameSubstr string) {
  601. for _, container := range pod.Spec.Containers {
  602. if strings.Contains(container.Name, containerNameSubstr) {
  603. // Contains() matches all strings if substr is empty
  604. logs, err := GetPodLogs(c, pod.Namespace, pod.Name, container.Name)
  605. if err != nil {
  606. logs, err = getPreviousPodLogs(c, pod.Namespace, pod.Name, container.Name)
  607. if err != nil {
  608. Logf("Failed to get logs of pod %v, container %v, err: %v", pod.Name, container.Name, err)
  609. }
  610. }
  611. By(fmt.Sprintf("Logs of %v/%v:%v on node %v", pod.Namespace, pod.Name, container.Name, pod.Spec.NodeName))
  612. Logf("%s : STARTLOG\n%s\nENDLOG for container %v:%v:%v", containerNameSubstr, logs, pod.Namespace, pod.Name, container.Name)
  613. }
  614. }
  615. }
  616. func LogFailedContainers(c *client.Client, ns string) {
  617. podList, err := c.Pods(ns).List(api.ListOptions{})
  618. if err != nil {
  619. Logf("Error getting pods in namespace '%s': %v", ns, err)
  620. return
  621. }
  622. Logf("Running kubectl logs on non-ready containers in %v", ns)
  623. for _, pod := range podList.Items {
  624. if res, err := PodRunningReady(&pod); !res || err != nil {
  625. kubectlLogPod(c, pod, "")
  626. }
  627. }
  628. }
  629. func LogPodsWithLabels(c *client.Client, ns string, match map[string]string) {
  630. podList, err := c.Pods(ns).List(api.ListOptions{LabelSelector: labels.SelectorFromSet(match)})
  631. if err != nil {
  632. Logf("Error getting pods in namespace %q: %v", ns, err)
  633. return
  634. }
  635. Logf("Running kubectl logs on pods with labels %v in %v", match, ns)
  636. for _, pod := range podList.Items {
  637. kubectlLogPod(c, pod, "")
  638. }
  639. }
  640. func LogContainersInPodsWithLabels(c *client.Client, ns string, match map[string]string, containerSubstr string) {
  641. podList, err := c.Pods(ns).List(api.ListOptions{LabelSelector: labels.SelectorFromSet(match)})
  642. if err != nil {
  643. Logf("Error getting pods in namespace %q: %v", ns, err)
  644. return
  645. }
  646. for _, pod := range podList.Items {
  647. kubectlLogPod(c, pod, containerSubstr)
  648. }
  649. }
  650. // DeleteNamespaces deletes all namespaces that match the given delete and skip filters.
  651. // Filter is by simple strings.Contains; first skip filter, then delete filter.
  652. // Returns the list of deleted namespaces or an error.
  653. func DeleteNamespaces(c *client.Client, deleteFilter, skipFilter []string) ([]string, error) {
  654. By("Deleting namespaces")
  655. nsList, err := c.Namespaces().List(api.ListOptions{})
  656. Expect(err).NotTo(HaveOccurred())
  657. var deleted []string
  658. var wg sync.WaitGroup
  659. OUTER:
  660. for _, item := range nsList.Items {
  661. if skipFilter != nil {
  662. for _, pattern := range skipFilter {
  663. if strings.Contains(item.Name, pattern) {
  664. continue OUTER
  665. }
  666. }
  667. }
  668. if deleteFilter != nil {
  669. var shouldDelete bool
  670. for _, pattern := range deleteFilter {
  671. if strings.Contains(item.Name, pattern) {
  672. shouldDelete = true
  673. break
  674. }
  675. }
  676. if !shouldDelete {
  677. continue OUTER
  678. }
  679. }
  680. wg.Add(1)
  681. deleted = append(deleted, item.Name)
  682. go func(nsName string) {
  683. defer wg.Done()
  684. defer GinkgoRecover()
  685. Expect(c.Namespaces().Delete(nsName)).To(Succeed())
  686. Logf("namespace : %v api call to delete is complete ", nsName)
  687. }(item.Name)
  688. }
  689. wg.Wait()
  690. return deleted, nil
  691. }
  692. func WaitForNamespacesDeleted(c *client.Client, namespaces []string, timeout time.Duration) error {
  693. By("Waiting for namespaces to vanish")
  694. nsMap := map[string]bool{}
  695. for _, ns := range namespaces {
  696. nsMap[ns] = true
  697. }
  698. //Now POLL until all namespaces have been eradicated.
  699. return wait.Poll(2*time.Second, timeout,
  700. func() (bool, error) {
  701. nsList, err := c.Namespaces().List(api.ListOptions{})
  702. if err != nil {
  703. return false, err
  704. }
  705. for _, item := range nsList.Items {
  706. if _, ok := nsMap[item.Name]; ok {
  707. return false, nil
  708. }
  709. }
  710. return true, nil
  711. })
  712. }
  713. func waitForServiceAccountInNamespace(c *client.Client, ns, serviceAccountName string, timeout time.Duration) error {
  714. w, err := c.ServiceAccounts(ns).Watch(api.SingleObject(api.ObjectMeta{Name: serviceAccountName}))
  715. if err != nil {
  716. return err
  717. }
  718. _, err = watch.Until(timeout, w, client.ServiceAccountHasSecrets)
  719. return err
  720. }
  721. func waitForPodCondition(c *client.Client, ns, podName, desc string, timeout time.Duration, condition podCondition) error {
  722. Logf("Waiting up to %[1]v for pod %[2]s status to be %[3]s", timeout, podName, desc)
  723. for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
  724. pod, err := c.Pods(ns).Get(podName)
  725. if err != nil {
  726. if apierrs.IsNotFound(err) {
  727. Logf("Pod %q in namespace %q disappeared. Error: %v", podName, ns, err)
  728. return err
  729. }
  730. // Aligning this text makes it much more readable
  731. Logf("Get pod %[1]s in namespace '%[2]s' failed, ignoring for %[3]v. Error: %[4]v",
  732. podName, ns, Poll, err)
  733. continue
  734. }
  735. done, err := condition(pod)
  736. if done {
  737. return err
  738. }
  739. Logf("Waiting for pod %[1]s in namespace '%[2]s' status to be '%[3]s'"+
  740. "(found phase: %[4]q, readiness: %[5]t) (%[6]v elapsed)",
  741. podName, ns, desc, pod.Status.Phase, podReady(pod), time.Since(start))
  742. }
  743. return fmt.Errorf("gave up waiting for pod '%s' to be '%s' after %v", podName, desc, timeout)
  744. }
  745. // WaitForMatchPodsCondition finds match pods based on the input ListOptions.
  746. // waits and checks if all match pods are in the given podCondition
  747. func WaitForMatchPodsCondition(c *client.Client, opts api.ListOptions, desc string, timeout time.Duration, condition podCondition) error {
  748. Logf("Waiting up to %v for matching pods' status to be %s", timeout, desc)
  749. for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
  750. pods, err := c.Pods(api.NamespaceAll).List(opts)
  751. if err != nil {
  752. return err
  753. }
  754. conditionNotMatch := []string{}
  755. for _, pod := range pods.Items {
  756. done, err := condition(&pod)
  757. if done && err != nil {
  758. return fmt.Errorf("Unexpected error: %v", err)
  759. }
  760. if !done {
  761. conditionNotMatch = append(conditionNotMatch, format.Pod(&pod))
  762. }
  763. }
  764. if len(conditionNotMatch) <= 0 {
  765. return err
  766. }
  767. Logf("%d pods are not %s", len(conditionNotMatch), desc)
  768. }
  769. return fmt.Errorf("gave up waiting for matching pods to be '%s' after %v", desc, timeout)
  770. }
  771. // WaitForDefaultServiceAccountInNamespace waits for the default service account to be provisioned
  772. // the default service account is what is associated with pods when they do not specify a service account
  773. // as a result, pods are not able to be provisioned in a namespace until the service account is provisioned
  774. func WaitForDefaultServiceAccountInNamespace(c *client.Client, namespace string) error {
  775. return waitForServiceAccountInNamespace(c, namespace, "default", ServiceAccountProvisionTimeout)
  776. }
  777. // WaitForFederationApiserverReady waits for the federation apiserver to be ready.
  778. // It tests the readiness by sending a GET request and expecting a non error response.
  779. func WaitForFederationApiserverReady(c *federation_internalclientset.Clientset) error {
  780. return wait.PollImmediate(time.Second, 1*time.Minute, func() (bool, error) {
  781. _, err := c.Federation().Clusters().List(api.ListOptions{})
  782. if err != nil {
  783. return false, nil
  784. }
  785. return true, nil
  786. })
  787. }
  788. // WaitForPersistentVolumePhase waits for a PersistentVolume to be in a specific phase or until timeout occurs, whichever comes first.
  789. func WaitForPersistentVolumePhase(phase api.PersistentVolumePhase, c *client.Client, pvName string, Poll, timeout time.Duration) error {
  790. Logf("Waiting up to %v for PersistentVolume %s to have phase %s", timeout, pvName, phase)
  791. for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
  792. pv, err := c.PersistentVolumes().Get(pvName)
  793. if err != nil {
  794. Logf("Get persistent volume %s in failed, ignoring for %v: %v", pvName, Poll, err)
  795. continue
  796. } else {
  797. if pv.Status.Phase == phase {
  798. Logf("PersistentVolume %s found and phase=%s (%v)", pvName, phase, time.Since(start))
  799. return nil
  800. } else {
  801. Logf("PersistentVolume %s found but phase is %s instead of %s.", pvName, pv.Status.Phase, phase)
  802. }
  803. }
  804. }
  805. return fmt.Errorf("PersistentVolume %s not in phase %s within %v", pvName, phase, timeout)
  806. }
  807. // WaitForPersistentVolumeDeleted waits for a PersistentVolume to get deleted or until timeout occurs, whichever comes first.
  808. func WaitForPersistentVolumeDeleted(c *client.Client, pvName string, Poll, timeout time.Duration) error {
  809. Logf("Waiting up to %v for PersistentVolume %s to get deleted", timeout, pvName)
  810. for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
  811. pv, err := c.PersistentVolumes().Get(pvName)
  812. if err == nil {
  813. Logf("PersistentVolume %s found and phase=%s (%v)", pvName, pv.Status.Phase, time.Since(start))
  814. continue
  815. } else {
  816. if apierrs.IsNotFound(err) {
  817. Logf("PersistentVolume %s was removed", pvName)
  818. return nil
  819. } else {
  820. Logf("Get persistent volume %s in failed, ignoring for %v: %v", pvName, Poll, err)
  821. }
  822. }
  823. }
  824. return fmt.Errorf("PersistentVolume %s still exists within %v", pvName, timeout)
  825. }
  826. // WaitForPersistentVolumeClaimPhase waits for a PersistentVolumeClaim to be in a specific phase or until timeout occurs, whichever comes first.
  827. func WaitForPersistentVolumeClaimPhase(phase api.PersistentVolumeClaimPhase, c *client.Client, ns string, pvcName string, Poll, timeout time.Duration) error {
  828. Logf("Waiting up to %v for PersistentVolumeClaim %s to have phase %s", timeout, pvcName, phase)
  829. for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
  830. pvc, err := c.PersistentVolumeClaims(ns).Get(pvcName)
  831. if err != nil {
  832. Logf("Get persistent volume claim %s in failed, ignoring for %v: %v", pvcName, Poll, err)
  833. continue
  834. } else {
  835. if pvc.Status.Phase == phase {
  836. Logf("PersistentVolumeClaim %s found and phase=%s (%v)", pvcName, phase, time.Since(start))
  837. return nil
  838. } else {
  839. Logf("PersistentVolumeClaim %s found but phase is %s instead of %s.", pvcName, pvc.Status.Phase, phase)
  840. }
  841. }
  842. }
  843. return fmt.Errorf("PersistentVolumeClaim %s not in phase %s within %v", pvcName, phase, timeout)
  844. }
  845. // CreateTestingNS should be used by every test, note that we append a common prefix to the provided test name.
  846. // Please see NewFramework instead of using this directly.
  847. func CreateTestingNS(baseName string, c *client.Client, labels map[string]string) (*api.Namespace, error) {
  848. if labels == nil {
  849. labels = map[string]string{}
  850. }
  851. labels["e2e-run"] = string(RunId)
  852. namespaceObj := &api.Namespace{
  853. ObjectMeta: api.ObjectMeta{
  854. GenerateName: fmt.Sprintf("e2e-tests-%v-", baseName),
  855. Namespace: "",
  856. Labels: labels,
  857. },
  858. Status: api.NamespaceStatus{},
  859. }
  860. // Be robust about making the namespace creation call.
  861. var got *api.Namespace
  862. if err := wait.PollImmediate(Poll, SingleCallTimeout, func() (bool, error) {
  863. var err error
  864. got, err = c.Namespaces().Create(namespaceObj)
  865. if err != nil {
  866. Logf("Unexpected error while creating namespace: %v", err)
  867. return false, nil
  868. }
  869. return true, nil
  870. }); err != nil {
  871. return nil, err
  872. }
  873. if TestContext.VerifyServiceAccount {
  874. if err := WaitForDefaultServiceAccountInNamespace(c, got.Name); err != nil {
  875. return nil, err
  876. }
  877. }
  878. return got, nil
  879. }
  880. // CheckTestingNSDeletedExcept checks whether all e2e based existing namespaces are in the Terminating state
  881. // and waits until they are finally deleted. It ignores namespace skip.
  882. func CheckTestingNSDeletedExcept(c *client.Client, skip string) error {
  883. // TODO: Since we don't have support for bulk resource deletion in the API,
  884. // while deleting a namespace we are deleting all objects from that namespace
  885. // one by one (one deletion == one API call). This basically exposes us to
  886. // throttling - currently controller-manager has a limit of max 20 QPS.
  887. // Once #10217 is implemented and used in namespace-controller, deleting all
  888. // object from a given namespace should be much faster and we will be able
  889. // to lower this timeout.
  890. // However, now Density test is producing ~26000 events and Load capacity test
  891. // is producing ~35000 events, thus assuming there are no other requests it will
  892. // take ~30 minutes to fully delete the namespace. Thus I'm setting it to 60
  893. // minutes to avoid any timeouts here.
  894. timeout := 60 * time.Minute
  895. Logf("Waiting for terminating namespaces to be deleted...")
  896. for start := time.Now(); time.Since(start) < timeout; time.Sleep(15 * time.Second) {
  897. namespaces, err := c.Namespaces().List(api.ListOptions{})
  898. if err != nil {
  899. Logf("Listing namespaces failed: %v", err)
  900. continue
  901. }
  902. terminating := 0
  903. for _, ns := range namespaces.Items {
  904. if strings.HasPrefix(ns.ObjectMeta.Name, "e2e-tests-") && ns.ObjectMeta.Name != skip {
  905. if ns.Status.Phase == api.NamespaceActive {
  906. return fmt.Errorf("Namespace %s is active", ns.ObjectMeta.Name)
  907. }
  908. terminating++
  909. }
  910. }
  911. if terminating == 0 {
  912. return nil
  913. }
  914. }
  915. return fmt.Errorf("Waiting for terminating namespaces to be deleted timed out")
  916. }
  917. // deleteNS deletes the provided namespace, waits for it to be completely deleted, and then checks
  918. // whether there are any pods remaining in a non-terminating state.
  919. func deleteNS(c *client.Client, namespace string, timeout time.Duration) error {
  920. if err := c.Namespaces().Delete(namespace); err != nil {
  921. return err
  922. }
  923. err := wait.PollImmediate(5*time.Second, timeout, func() (bool, error) {
  924. if _, err := c.Namespaces().Get(namespace); err != nil {
  925. if apierrs.IsNotFound(err) {
  926. return true, nil
  927. }
  928. Logf("Error while waiting for namespace to be terminated: %v", err)
  929. return false, nil
  930. }
  931. return false, nil
  932. })
  933. // check for pods that were not deleted
  934. remaining := []string{}
  935. remainingPods := []api.Pod{}
  936. missingTimestamp := false
  937. if pods, perr := c.Pods(namespace).List(api.ListOptions{}); perr == nil {
  938. for _, pod := range pods.Items {
  939. Logf("Pod %s %s on node %s remains, has deletion timestamp %s", namespace, pod.Name, pod.Spec.NodeName, pod.DeletionTimestamp)
  940. remaining = append(remaining, fmt.Sprintf("%s{Reason=%s}", pod.Name, pod.Status.Reason))
  941. remainingPods = append(remainingPods, pod)
  942. if pod.DeletionTimestamp == nil {
  943. missingTimestamp = true
  944. }
  945. }
  946. }
  947. // log pod status
  948. if len(remainingPods) > 0 {
  949. logPodStates(remainingPods)
  950. }
  951. // a timeout occurred
  952. if err != nil {
  953. if missingTimestamp {
  954. return fmt.Errorf("namespace %s was not deleted within limit: %v, some pods were not marked with a deletion timestamp, pods remaining: %v", namespace, err, remaining)
  955. }
  956. return fmt.Errorf("namespace %s was not deleted within limit: %v, pods remaining: %v", namespace, err, remaining)
  957. }
  958. // pods were not deleted but the namespace was deleted
  959. if len(remaining) > 0 {
  960. return fmt.Errorf("pods remained within namespace %s after deletion: %v", namespace, remaining)
  961. }
  962. return nil
  963. }
  964. func ContainerInitInvariant(older, newer runtime.Object) error {
  965. oldPod := older.(*api.Pod)
  966. newPod := newer.(*api.Pod)
  967. if len(oldPod.Spec.InitContainers) == 0 {
  968. return nil
  969. }
  970. if len(oldPod.Spec.InitContainers) != len(newPod.Spec.InitContainers) {
  971. return fmt.Errorf("init container list changed")
  972. }
  973. if oldPod.UID != newPod.UID {
  974. return fmt.Errorf("two different pods exist in the condition: %s vs %s", oldPod.UID, newPod.UID)
  975. }
  976. if err := initContainersInvariants(oldPod); err != nil {
  977. return err
  978. }
  979. if err := initContainersInvariants(newPod); err != nil {
  980. return err
  981. }
  982. oldInit, _, _ := podInitialized(oldPod)
  983. newInit, _, _ := podInitialized(newPod)
  984. if oldInit && !newInit {
  985. // TODO: we may in the future enable resetting PodInitialized = false if the kubelet needs to restart it
  986. // from scratch
  987. return fmt.Errorf("pod cannot be initialized and then regress to not being initialized")
  988. }
  989. return nil
  990. }
  991. func podInitialized(pod *api.Pod) (ok bool, failed bool, err error) {
  992. allInit := true
  993. initFailed := false
  994. for _, s := range pod.Status.InitContainerStatuses {
  995. switch {
  996. case initFailed && s.State.Waiting == nil:
  997. return allInit, initFailed, fmt.Errorf("container %s is after a failed container but isn't waiting", s.Name)
  998. case allInit && s.State.Waiting == nil:
  999. return allInit, initFailed, fmt.Errorf("container %s is after an initializing container but isn't waiting", s.Name)
  1000. case s.State.Terminated == nil:
  1001. allInit = false
  1002. case s.State.Terminated.ExitCode != 0:
  1003. allInit = false
  1004. initFailed = true
  1005. case !s.Ready:
  1006. return allInit, initFailed, fmt.Errorf("container %s initialized but isn't marked as ready", s.Name)
  1007. }
  1008. }
  1009. return allInit, initFailed, nil
  1010. }
  1011. func initContainersInvariants(pod *api.Pod) error {
  1012. allInit, initFailed, err := podInitialized(pod)
  1013. if err != nil {
  1014. return err
  1015. }
  1016. if !allInit || initFailed {
  1017. for _, s := range pod.Status.ContainerStatuses {
  1018. if s.State.Waiting == nil || s.RestartCount != 0 {
  1019. return fmt.Errorf("container %s is not waiting but initialization not complete", s.Name)
  1020. }
  1021. if s.State.Waiting.Reason != "PodInitializing" {
  1022. return fmt.Errorf("container %s should have reason PodInitializing: %s", s.Name, s.State.Waiting.Reason)
  1023. }
  1024. }
  1025. }
  1026. _, c := api.GetPodCondition(&pod.Status, api.PodInitialized)
  1027. if c == nil {
  1028. return fmt.Errorf("pod does not have initialized condition")
  1029. }
  1030. if c.LastTransitionTime.IsZero() {
  1031. return fmt.Errorf("PodInitialized condition should always have a transition time")
  1032. }
  1033. switch {
  1034. case c.Status == api.ConditionUnknown:
  1035. return fmt.Errorf("PodInitialized condition should never be Unknown")
  1036. case c.Status == api.ConditionTrue && (initFailed || !allInit):
  1037. return fmt.Errorf("PodInitialized condition was True but all not all containers initialized")
  1038. case c.Status == api.ConditionFalse && (!initFailed && allInit):
  1039. return fmt.Errorf("PodInitialized condition was False but all containers initialized")
  1040. }
  1041. return nil
  1042. }
  1043. type InvariantFunc func(older, newer runtime.Object) error
  1044. func CheckInvariants(events []watch.Event, fns ...InvariantFunc) error {
  1045. errs := sets.NewString()
  1046. for i := range events {
  1047. j := i + 1
  1048. if j >= len(events) {
  1049. continue
  1050. }
  1051. for _, fn := range fns {
  1052. if err := fn(events[i].Object, events[j].Object); err != nil {
  1053. errs.Insert(err.Error())
  1054. }
  1055. }
  1056. }
  1057. if errs.Len() > 0 {
  1058. return fmt.Errorf("invariants violated:\n* %s", strings.Join(errs.List(), "\n* "))
  1059. }
  1060. return nil
  1061. }
  1062. // Waits default amount of time (PodStartTimeout) for the specified pod to become running.
  1063. // Returns an error if timeout occurs first, or pod goes in to failed state.
  1064. func WaitForPodRunningInNamespace(c *client.Client, pod *api.Pod) error {
  1065. // this short-cicuit is needed for cases when we pass a list of pods instead
  1066. // of newly created pod (eg. VerifyPods) which means we are getting already
  1067. // running pod for which waiting does not make sense and will always fail
  1068. if pod.Status.Phase == api.PodRunning {
  1069. return nil
  1070. }
  1071. return waitTimeoutForPodRunningInNamespace(c, pod.Name, pod.Namespace, pod.ResourceVersion, PodStartTimeout)
  1072. }
  1073. // Waits default amount of time (PodStartTimeout) for the specified pod to become running.
  1074. // Returns an error if timeout occurs first, or pod goes in to failed state.
  1075. func WaitForPodNameRunningInNamespace(c *client.Client, podName, namespace string) error {
  1076. return waitTimeoutForPodRunningInNamespace(c, podName, namespace, "", PodStartTimeout)
  1077. }
  1078. // Waits an extended amount of time (slowPodStartTimeout) for the specified pod to become running.
  1079. // The resourceVersion is used when Watching object changes, it tells since when we care
  1080. // about changes to the pod. Returns an error if timeout occurs first, or pod goes in to failed state.
  1081. func waitForPodRunningInNamespaceSlow(c *client.Client, podName, namespace, resourceVersion string) error {
  1082. return waitTimeoutForPodRunningInNamespace(c, podName, namespace, resourceVersion, slowPodStartTimeout)
  1083. }
  1084. func waitTimeoutForPodRunningInNamespace(c *client.Client, podName, namespace, resourceVersion string, timeout time.Duration) error {
  1085. w, err := c.Pods(namespace).Watch(api.SingleObject(api.ObjectMeta{Name: podName, ResourceVersion: resourceVersion}))
  1086. if err != nil {
  1087. return err
  1088. }
  1089. _, err = watch.Until(timeout, w, client.PodRunning)
  1090. return err
  1091. }
  1092. // Waits default amount of time (podNoLongerRunningTimeout) for the specified pod to stop running.
  1093. // Returns an error if timeout occurs first.
  1094. func WaitForPodNoLongerRunningInNamespace(c *client.Client, podName, namespace, resourceVersion string) error {
  1095. return waitTimeoutForPodNoLongerRunningInNamespace(c, podName, namespace, resourceVersion, podNoLongerRunningTimeout)
  1096. }
  1097. func waitTimeoutForPodNoLongerRunningInNamespace(c *client.Client, podName, namespace, resourceVersion string, timeout time.Duration) error {
  1098. w, err := c.Pods(namespace).Watch(api.SingleObject(api.ObjectMeta{Name: podName, ResourceVersion: resourceVersion}))
  1099. if err != nil {
  1100. return err
  1101. }
  1102. _, err = watch.Until(timeout, w, client.PodCompleted)
  1103. return err
  1104. }
  1105. func waitTimeoutForPodReadyInNamespace(c *client.Client, podName, namespace, resourceVersion string, timeout time.Duration) error {
  1106. w, err := c.Pods(namespace).Watch(api.SingleObject(api.ObjectMeta{Name: podName, ResourceVersion: resourceVersion}))
  1107. if err != nil {
  1108. return err
  1109. }
  1110. _, err = watch.Until(timeout, w, client.PodRunningAndReady)
  1111. return err
  1112. }
  1113. // WaitForPodNotPending returns an error if it took too long for the pod to go out of pending state.
  1114. // The resourceVersion is used when Watching object changes, it tells since when we care
  1115. // about changes to the pod.
  1116. func WaitForPodNotPending(c *client.Client, ns, podName, resourceVersion string) error {
  1117. w, err := c.Pods(ns).Watch(api.SingleObject(api.ObjectMeta{Name: podName, ResourceVersion: resourceVersion}))
  1118. if err != nil {
  1119. return err
  1120. }
  1121. _, err = watch.Until(PodStartTimeout, w, client.PodNotPending)
  1122. return err
  1123. }
  1124. // waitForPodTerminatedInNamespace returns an error if it took too long for the pod
  1125. // to terminate or if the pod terminated with an unexpected reason.
  1126. func waitForPodTerminatedInNamespace(c *client.Client, podName, reason, namespace string) error {
  1127. return waitForPodCondition(c, namespace, podName, "terminated due to deadline exceeded", PodStartTimeout, func(pod *api.Pod) (bool, error) {
  1128. if pod.Status.Phase == api.PodFailed {
  1129. if pod.Status.Reason == reason {
  1130. return true, nil
  1131. } else {
  1132. return true, fmt.Errorf("Expected pod %v in namespace %v to be terminated with reason %v, got reason: %v", podName, namespace, reason, pod.Status.Reason)
  1133. }
  1134. }
  1135. return false, nil
  1136. })
  1137. }
  1138. // waitForPodSuccessInNamespaceTimeout returns nil if the pod reached state success, or an error if it reached failure or ran too long.
  1139. func waitForPodSuccessInNamespaceTimeout(c *client.Client, podName string, contName string, namespace string, timeout time.Duration) error {
  1140. return waitForPodCondition(c, namespace, podName, "success or failure", timeout, func(pod *api.Pod) (bool, error) {
  1141. // Cannot use pod.Status.Phase == api.PodSucceeded/api.PodFailed due to #2632
  1142. ci, ok := api.GetContainerStatus(pod.Status.ContainerStatuses, contName)
  1143. if !ok {
  1144. Logf("No Status.Info for container '%s' in pod '%s' yet", contName, podName)
  1145. } else {
  1146. if ci.State.Terminated != nil {
  1147. if ci.State.Terminated.ExitCode == 0 {
  1148. By("Saw pod success")
  1149. return true, nil
  1150. }
  1151. return true, fmt.Errorf("pod '%s' terminated with failure: %+v", podName, ci.State.Terminated)
  1152. }
  1153. Logf("Nil State.Terminated for container '%s' in pod '%s' in namespace '%s' so far", contName, podName, namespace)
  1154. }
  1155. return false, nil
  1156. })
  1157. }
  1158. // WaitForPodSuccessInNamespace returns nil if the pod reached state success, or an error if it reached failure or until podStartupTimeout.
  1159. func WaitForPodSuccessInNamespace(c *client.Client, podName string, contName string, namespace string) error {
  1160. return waitForPodSuccessInNamespaceTimeout(c, podName, contName, namespace, PodStartTimeout)
  1161. }
  1162. // WaitForPodSuccessInNamespaceSlow returns nil if the pod reached state success, or an error if it reached failure or until slowPodStartupTimeout.
  1163. func WaitForPodSuccessInNamespaceSlow(c *client.Client, podName string, contName string, namespace string) error {
  1164. return waitForPodSuccessInNamespaceTimeout(c, podName, contName, namespace, slowPodStartTimeout)
  1165. }
  1166. // waitForRCPodOnNode returns the pod from the given replication controller (described by rcName) which is scheduled on the given node.
  1167. // In case of failure or too long waiting time, an error is returned.
  1168. func waitForRCPodOnNode(c *client.Client, ns, rcName, node string) (*api.Pod, error) {
  1169. label := labels.SelectorFromSet(labels.Set(map[string]string{"name": rcName}))
  1170. var p *api.Pod = nil
  1171. err := wait.PollImmediate(10*time.Second, 5*time.Minute, func() (bool, error) {
  1172. Logf("Waiting for pod %s to appear on node %s", rcName, node)
  1173. options := api.ListOptions{LabelSelector: label}
  1174. pods, err := c.Pods(ns).List(options)
  1175. if err != nil {
  1176. return false, err
  1177. }
  1178. for _, pod := range pods.Items {
  1179. if pod.Spec.NodeName == node {
  1180. Logf("Pod %s found on node %s", pod.Name, node)
  1181. p = &pod
  1182. return true, nil
  1183. }
  1184. }
  1185. return false, nil
  1186. })
  1187. return p, err
  1188. }
  1189. // WaitForRCToStabilize waits till the RC has a matching generation/replica count between spec and status.
  1190. func WaitForRCToStabilize(c *client.Client, ns, name string, timeout time.Duration) error {
  1191. options := api.ListOptions{FieldSelector: fields.Set{
  1192. "metadata.name": name,
  1193. "metadata.namespace": ns,
  1194. }.AsSelector()}
  1195. w, err := c.ReplicationControllers(ns).Watch(options)
  1196. if err != nil {
  1197. return err
  1198. }
  1199. _, err = watch.Until(timeout, w, func(event watch.Event) (bool, error) {
  1200. switch event.Type {
  1201. case watch.Deleted:
  1202. return false, apierrs.NewNotFound(unversioned.GroupResource{Resource: "replicationcontrollers"}, "")
  1203. }
  1204. switch rc := event.Object.(type) {
  1205. case *api.ReplicationController:
  1206. if rc.Name == name && rc.Namespace == ns &&
  1207. rc.Generation <= rc.Status.ObservedGeneration &&
  1208. rc.Spec.Replicas == rc.Status.Replicas {
  1209. return true, nil
  1210. }
  1211. Logf("Waiting for rc %s to stabilize, generation %v observed generation %v spec.replicas %d status.replicas %d",
  1212. name, rc.Generation, rc.Status.ObservedGeneration, rc.Spec.Replicas, rc.Status.Replicas)
  1213. }
  1214. return false, nil
  1215. })
  1216. return err
  1217. }
  1218. func WaitForPodToDisappear(c *client.Client, ns, podName string, label labels.Selector, interval, timeout time.Duration) error {
  1219. return wait.PollImmediate(interval, timeout, func() (bool, error) {
  1220. Logf("Waiting for pod %s to disappear", podName)
  1221. options := api.ListOptions{LabelSelector: label}
  1222. pods, err := c.Pods(ns).List(options)
  1223. if err != nil {
  1224. return false, err
  1225. }
  1226. found := false
  1227. for _, pod := range pods.Items {
  1228. if pod.Name == podName {
  1229. Logf("Pod %s still exists", podName)
  1230. found = true
  1231. }
  1232. }
  1233. if !found {
  1234. Logf("Pod %s no longer exists", podName)
  1235. return true, nil
  1236. }