PageRenderTime 58ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/test/e2e/framework/util.go

https://gitlab.com/unofficial-mirrors/kubernetes
Go | 1381 lines | 1071 code | 136 blank | 174 comment | 305 complexity | cc04f4b455a8a2597759e504f06799fa MD5 | raw file
  1. /*
  2. Copyright 2014 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package framework
  14. import (
  15. "bytes"
  16. "context"
  17. "encoding/json"
  18. "errors"
  19. "fmt"
  20. "io"
  21. "io/ioutil"
  22. "math/rand"
  23. "net"
  24. "net/http"
  25. "net/url"
  26. "os"
  27. "os/exec"
  28. "path"
  29. "path/filepath"
  30. "regexp"
  31. "sort"
  32. "strconv"
  33. "strings"
  34. "sync"
  35. "syscall"
  36. "text/tabwriter"
  37. "time"
  38. "github.com/golang/glog"
  39. "golang.org/x/crypto/ssh"
  40. "golang.org/x/net/websocket"
  41. "google.golang.org/api/googleapi"
  42. . "github.com/onsi/ginkgo"
  43. . "github.com/onsi/gomega"
  44. gomegatypes "github.com/onsi/gomega/types"
  45. apps "k8s.io/api/apps/v1"
  46. batch "k8s.io/api/batch/v1"
  47. "k8s.io/api/core/v1"
  48. extensions "k8s.io/api/extensions/v1beta1"
  49. apierrs "k8s.io/apimachinery/pkg/api/errors"
  50. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  51. "k8s.io/apimachinery/pkg/fields"
  52. "k8s.io/apimachinery/pkg/labels"
  53. "k8s.io/apimachinery/pkg/runtime"
  54. "k8s.io/apimachinery/pkg/runtime/schema"
  55. "k8s.io/apimachinery/pkg/types"
  56. "k8s.io/apimachinery/pkg/util/sets"
  57. "k8s.io/apimachinery/pkg/util/uuid"
  58. "k8s.io/apimachinery/pkg/util/wait"
  59. utilyaml "k8s.io/apimachinery/pkg/util/yaml"
  60. "k8s.io/apimachinery/pkg/watch"
  61. "k8s.io/client-go/discovery"
  62. "k8s.io/client-go/dynamic"
  63. restclient "k8s.io/client-go/rest"
  64. "k8s.io/client-go/tools/clientcmd"
  65. clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
  66. utilfeature "k8s.io/apiserver/pkg/util/feature"
  67. clientset "k8s.io/client-go/kubernetes"
  68. scaleclient "k8s.io/client-go/scale"
  69. "k8s.io/kubernetes/pkg/api/legacyscheme"
  70. podutil "k8s.io/kubernetes/pkg/api/v1/pod"
  71. appsinternal "k8s.io/kubernetes/pkg/apis/apps"
  72. batchinternal "k8s.io/kubernetes/pkg/apis/batch"
  73. api "k8s.io/kubernetes/pkg/apis/core"
  74. extensionsinternal "k8s.io/kubernetes/pkg/apis/extensions"
  75. "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
  76. "k8s.io/kubernetes/pkg/client/conditions"
  77. "k8s.io/kubernetes/pkg/cloudprovider/providers/azure"
  78. gcecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
  79. "k8s.io/kubernetes/pkg/controller"
  80. nodectlr "k8s.io/kubernetes/pkg/controller/nodelifecycle"
  81. "k8s.io/kubernetes/pkg/features"
  82. "k8s.io/kubernetes/pkg/kubectl"
  83. kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
  84. "k8s.io/kubernetes/pkg/kubelet/util/format"
  85. "k8s.io/kubernetes/pkg/master/ports"
  86. "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
  87. "k8s.io/kubernetes/pkg/scheduler/schedulercache"
  88. sshutil "k8s.io/kubernetes/pkg/ssh"
  89. "k8s.io/kubernetes/pkg/util/system"
  90. taintutils "k8s.io/kubernetes/pkg/util/taints"
  91. utilversion "k8s.io/kubernetes/pkg/util/version"
  92. "k8s.io/kubernetes/test/e2e/framework/ginkgowrapper"
  93. testutils "k8s.io/kubernetes/test/utils"
  94. imageutils "k8s.io/kubernetes/test/utils/image"
  95. uexec "k8s.io/utils/exec"
  96. )
  97. const (
  98. // How long to wait for the pod to be listable
  99. PodListTimeout = time.Minute
  100. // Initial pod start can be delayed O(minutes) by slow docker pulls
  101. // TODO: Make this 30 seconds once #4566 is resolved.
  102. PodStartTimeout = 5 * time.Minute
  103. // Same as `PodStartTimeout` to wait for the pod to be started, but shorter.
  104. // Use it case by case when we are sure pod start will not be delayed
  105. // minutes by slow docker pulls or something else.
  106. PodStartShortTimeout = 1 * time.Minute
  107. // How long to wait for a pod to be deleted
  108. PodDeleteTimeout = 5 * time.Minute
  109. // If there are any orphaned namespaces to clean up, this test is running
  110. // on a long lived cluster. A long wait here is preferably to spurious test
  111. // failures caused by leaked resources from a previous test run.
  112. NamespaceCleanupTimeout = 15 * time.Minute
  113. // Some pods can take much longer to get ready due to volume attach/detach latency.
  114. slowPodStartTimeout = 15 * time.Minute
  115. // How long to wait for a service endpoint to be resolvable.
  116. ServiceStartTimeout = 3 * time.Minute
  117. // How often to Poll pods, nodes and claims.
  118. Poll = 2 * time.Second
  119. pollShortTimeout = 1 * time.Minute
  120. pollLongTimeout = 5 * time.Minute
  121. // service accounts are provisioned after namespace creation
  122. // a service account is required to support pod creation in a namespace as part of admission control
  123. ServiceAccountProvisionTimeout = 2 * time.Minute
  124. // How long to try single API calls (like 'get' or 'list'). Used to prevent
  125. // transient failures from failing tests.
  126. // TODO: client should not apply this timeout to Watch calls. Increased from 30s until that is fixed.
  127. SingleCallTimeout = 5 * time.Minute
  128. // How long nodes have to be "ready" when a test begins. They should already
  129. // be "ready" before the test starts, so this is small.
  130. NodeReadyInitialTimeout = 20 * time.Second
  131. // How long pods have to be "ready" when a test begins.
  132. PodReadyBeforeTimeout = 5 * time.Minute
  133. // How long pods have to become scheduled onto nodes
  134. podScheduledBeforeTimeout = PodListTimeout + (20 * time.Second)
  135. podRespondingTimeout = 15 * time.Minute
  136. ServiceRespondingTimeout = 2 * time.Minute
  137. EndpointRegisterTimeout = time.Minute
  138. // How long claims have to become dynamically provisioned
  139. ClaimProvisionTimeout = 5 * time.Minute
  140. // Same as `ClaimProvisionTimeout` to wait for claim to be dynamically provisioned, but shorter.
  141. // Use it case by case when we are sure this timeout is enough.
  142. ClaimProvisionShortTimeout = 1 * time.Minute
  143. // How long claims have to become bound
  144. ClaimBindingTimeout = 3 * time.Minute
  145. // How long claims have to become deleted
  146. ClaimDeletingTimeout = 3 * time.Minute
  147. // How long PVs have to beome reclaimed
  148. PVReclaimingTimeout = 3 * time.Minute
  149. // How long PVs have to become bound
  150. PVBindingTimeout = 3 * time.Minute
  151. // How long PVs have to become deleted
  152. PVDeletingTimeout = 3 * time.Minute
  153. // How long a node is allowed to become "Ready" after it is restarted before
  154. // the test is considered failed.
  155. RestartNodeReadyAgainTimeout = 5 * time.Minute
  156. // How long a pod is allowed to become "running" and "ready" after a node
  157. // restart before test is considered failed.
  158. RestartPodReadyAgainTimeout = 5 * time.Minute
  159. // Number of objects that gc can delete in a second.
  160. // GC issues 2 requestes for single delete.
  161. gcThroughput = 10
  162. // Minimal number of nodes for the cluster to be considered large.
  163. largeClusterThreshold = 100
  164. // TODO(justinsb): Avoid hardcoding this.
  165. awsMasterIP = "172.20.0.9"
  166. // ssh port
  167. sshPort = "22"
  168. // ImagePrePullingTimeout is the time we wait for the e2e-image-puller
  169. // static pods to pull the list of seeded images. If they don't pull
  170. // images within this time we simply log their output and carry on
  171. // with the tests.
  172. ImagePrePullingTimeout = 5 * time.Minute
  173. )
  174. var (
  175. BusyBoxImage = "busybox"
  176. // Label allocated to the image puller static pod that runs on each node
  177. // before e2es.
  178. ImagePullerLabels = map[string]string{"name": "e2e-image-puller"}
  179. // For parsing Kubectl version for version-skewed testing.
  180. gitVersionRegexp = regexp.MustCompile("GitVersion:\"(v.+?)\"")
  181. // Slice of regexps for names of pods that have to be running to consider a Node "healthy"
  182. requiredPerNodePods = []*regexp.Regexp{
  183. regexp.MustCompile(".*kube-proxy.*"),
  184. regexp.MustCompile(".*fluentd-elasticsearch.*"),
  185. regexp.MustCompile(".*node-problem-detector.*"),
  186. }
  187. // Serve hostname image name
  188. ServeHostnameImage = imageutils.GetE2EImage(imageutils.ServeHostname)
  189. )
  190. type Address struct {
  191. internalIP string
  192. externalIP string
  193. hostname string
  194. }
  195. // GetServerArchitecture fetches the architecture of the cluster's apiserver.
  196. func GetServerArchitecture(c clientset.Interface) string {
  197. arch := ""
  198. sVer, err := c.Discovery().ServerVersion()
  199. if err != nil || sVer.Platform == "" {
  200. // If we failed to get the server version for some reason, default to amd64.
  201. arch = "amd64"
  202. } else {
  203. // Split the platform string into OS and Arch separately.
  204. // The platform string may for example be "linux/amd64", "linux/arm" or "windows/amd64".
  205. osArchArray := strings.Split(sVer.Platform, "/")
  206. arch = osArchArray[1]
  207. }
  208. return arch
  209. }
  210. // GetPauseImageName fetches the pause image name for the same architecture as the apiserver.
  211. func GetPauseImageName(c clientset.Interface) string {
  212. return imageutils.GetE2EImageWithArch(imageutils.Pause, GetServerArchitecture(c))
  213. }
  214. func GetServicesProxyRequest(c clientset.Interface, request *restclient.Request) (*restclient.Request, error) {
  215. return request.Resource("services").SubResource("proxy"), nil
  216. }
  217. // unique identifier of the e2e run
  218. var RunId = uuid.NewUUID()
  219. type CreateTestingNSFn func(baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error)
  220. type ContainerFailures struct {
  221. status *v1.ContainerStateTerminated
  222. Restarts int
  223. }
  224. func GetMasterHost() string {
  225. masterUrl, err := url.Parse(TestContext.Host)
  226. ExpectNoError(err)
  227. return masterUrl.Host
  228. }
  229. func nowStamp() string {
  230. return time.Now().Format(time.StampMilli)
  231. }
  232. func log(level string, format string, args ...interface{}) {
  233. fmt.Fprintf(GinkgoWriter, nowStamp()+": "+level+": "+format+"\n", args...)
  234. }
  235. func Logf(format string, args ...interface{}) {
  236. log("INFO", format, args...)
  237. }
  238. func Failf(format string, args ...interface{}) {
  239. FailfWithOffset(1, format, args...)
  240. }
  241. // FailfWithOffset calls "Fail" and logs the error at "offset" levels above its caller
  242. // (for example, for call chain f -> g -> FailfWithOffset(1, ...) error would be logged for "f").
  243. func FailfWithOffset(offset int, format string, args ...interface{}) {
  244. msg := fmt.Sprintf(format, args...)
  245. log("INFO", msg)
  246. ginkgowrapper.Fail(nowStamp()+": "+msg, 1+offset)
  247. }
  248. func Skipf(format string, args ...interface{}) {
  249. msg := fmt.Sprintf(format, args...)
  250. log("INFO", msg)
  251. ginkgowrapper.Skip(nowStamp() + ": " + msg)
  252. }
  253. func SkipUnlessNodeCountIsAtLeast(minNodeCount int) {
  254. if TestContext.CloudConfig.NumNodes < minNodeCount {
  255. Skipf("Requires at least %d nodes (not %d)", minNodeCount, TestContext.CloudConfig.NumNodes)
  256. }
  257. }
  258. func SkipUnlessNodeCountIsAtMost(maxNodeCount int) {
  259. if TestContext.CloudConfig.NumNodes > maxNodeCount {
  260. Skipf("Requires at most %d nodes (not %d)", maxNodeCount, TestContext.CloudConfig.NumNodes)
  261. }
  262. }
  263. func SkipUnlessAtLeast(value int, minValue int, message string) {
  264. if value < minValue {
  265. Skipf(message)
  266. }
  267. }
  268. func SkipIfProviderIs(unsupportedProviders ...string) {
  269. if ProviderIs(unsupportedProviders...) {
  270. Skipf("Not supported for providers %v (found %s)", unsupportedProviders, TestContext.Provider)
  271. }
  272. }
  273. func SkipUnlessLocalEphemeralStorageEnabled() {
  274. if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
  275. Skipf("Only supported when %v feature is enabled", features.LocalStorageCapacityIsolation)
  276. }
  277. }
  278. func SkipUnlessSSHKeyPresent() {
  279. if _, err := GetSigner(TestContext.Provider); err != nil {
  280. Skipf("No SSH Key for provider %s: '%v'", TestContext.Provider, err)
  281. }
  282. }
  283. func SkipUnlessProviderIs(supportedProviders ...string) {
  284. if !ProviderIs(supportedProviders...) {
  285. Skipf("Only supported for providers %v (not %s)", supportedProviders, TestContext.Provider)
  286. }
  287. }
  288. func SkipUnlessMultizone(c clientset.Interface) {
  289. zones, err := GetClusterZones(c)
  290. if err != nil {
  291. Skipf("Error listing cluster zones")
  292. }
  293. if zones.Len() <= 1 {
  294. Skipf("Requires more than one zone")
  295. }
  296. }
  297. func SkipIfMultizone(c clientset.Interface) {
  298. zones, err := GetClusterZones(c)
  299. if err != nil {
  300. Skipf("Error listing cluster zones")
  301. }
  302. if zones.Len() > 1 {
  303. Skipf("Requires more than one zone")
  304. }
  305. }
  306. func SkipUnlessClusterMonitoringModeIs(supportedMonitoring ...string) {
  307. if !ClusterMonitoringModeIs(supportedMonitoring...) {
  308. Skipf("Only next monitoring modes are supported %v (not %s)", supportedMonitoring, TestContext.ClusterMonitoringMode)
  309. }
  310. }
  311. func SkipUnlessPrometheusMonitoringIsEnabled(supportedMonitoring ...string) {
  312. if !TestContext.EnablePrometheusMonitoring {
  313. Skipf("Skipped because prometheus monitoring is not enabled")
  314. }
  315. }
  316. func SkipUnlessMasterOSDistroIs(supportedMasterOsDistros ...string) {
  317. if !MasterOSDistroIs(supportedMasterOsDistros...) {
  318. Skipf("Only supported for master OS distro %v (not %s)", supportedMasterOsDistros, TestContext.MasterOSDistro)
  319. }
  320. }
  321. func SkipUnlessNodeOSDistroIs(supportedNodeOsDistros ...string) {
  322. if !NodeOSDistroIs(supportedNodeOsDistros...) {
  323. Skipf("Only supported for node OS distro %v (not %s)", supportedNodeOsDistros, TestContext.NodeOSDistro)
  324. }
  325. }
  326. func SkipUnlessSecretExistsAfterWait(c clientset.Interface, name, namespace string, timeout time.Duration) {
  327. Logf("Waiting for secret %v in namespace %v to exist in duration %v", name, namespace, timeout)
  328. start := time.Now()
  329. if wait.PollImmediate(15*time.Second, timeout, func() (bool, error) {
  330. _, err := c.CoreV1().Secrets(namespace).Get(name, metav1.GetOptions{})
  331. if err != nil {
  332. Logf("Secret %v in namespace %v still does not exist after duration %v", name, namespace, time.Since(start))
  333. return false, nil
  334. }
  335. return true, nil
  336. }) != nil {
  337. Skipf("Secret %v in namespace %v did not exist after timeout of %v", name, namespace, timeout)
  338. }
  339. Logf("Secret %v in namespace %v found after duration %v", name, namespace, time.Since(start))
  340. }
  341. func SkipIfContainerRuntimeIs(runtimes ...string) {
  342. for _, runtime := range runtimes {
  343. if runtime == TestContext.ContainerRuntime {
  344. Skipf("Not supported under container runtime %s", runtime)
  345. }
  346. }
  347. }
  348. func RunIfContainerRuntimeIs(runtimes ...string) {
  349. for _, runtime := range runtimes {
  350. if runtime == TestContext.ContainerRuntime {
  351. return
  352. }
  353. }
  354. Skipf("Skipped because container runtime %q is not in %s", TestContext.ContainerRuntime, runtimes)
  355. }
  356. func RunIfSystemSpecNameIs(names ...string) {
  357. for _, name := range names {
  358. if name == TestContext.SystemSpecName {
  359. return
  360. }
  361. }
  362. Skipf("Skipped because system spec name %q is not in %v", TestContext.SystemSpecName, names)
  363. }
  364. func ProviderIs(providers ...string) bool {
  365. for _, provider := range providers {
  366. if strings.ToLower(provider) == strings.ToLower(TestContext.Provider) {
  367. return true
  368. }
  369. }
  370. return false
  371. }
  372. func ClusterMonitoringModeIs(monitoringModes ...string) bool {
  373. for _, mode := range monitoringModes {
  374. if strings.ToLower(mode) == strings.ToLower(TestContext.ClusterMonitoringMode) {
  375. return true
  376. }
  377. }
  378. return false
  379. }
  380. func MasterOSDistroIs(supportedMasterOsDistros ...string) bool {
  381. for _, distro := range supportedMasterOsDistros {
  382. if strings.ToLower(distro) == strings.ToLower(TestContext.MasterOSDistro) {
  383. return true
  384. }
  385. }
  386. return false
  387. }
  388. func NodeOSDistroIs(supportedNodeOsDistros ...string) bool {
  389. for _, distro := range supportedNodeOsDistros {
  390. if strings.ToLower(distro) == strings.ToLower(TestContext.NodeOSDistro) {
  391. return true
  392. }
  393. }
  394. return false
  395. }
  396. func ProxyMode(f *Framework) (string, error) {
  397. pod := &v1.Pod{
  398. ObjectMeta: metav1.ObjectMeta{
  399. Name: "kube-proxy-mode-detector",
  400. Namespace: f.Namespace.Name,
  401. },
  402. Spec: v1.PodSpec{
  403. HostNetwork: true,
  404. Containers: []v1.Container{
  405. {
  406. Name: "detector",
  407. Image: imageutils.GetE2EImage(imageutils.Net),
  408. Command: []string{"/bin/sleep", "3600"},
  409. },
  410. },
  411. },
  412. }
  413. f.PodClient().CreateSync(pod)
  414. defer f.PodClient().DeleteSync(pod.Name, &metav1.DeleteOptions{}, DefaultPodDeletionTimeout)
  415. cmd := "curl -q -s --connect-timeout 1 http://localhost:10249/proxyMode"
  416. stdout, err := RunHostCmd(pod.Namespace, pod.Name, cmd)
  417. if err != nil {
  418. return "", err
  419. }
  420. Logf("ProxyMode: %s", stdout)
  421. return stdout, nil
  422. }
  423. func SkipUnlessServerVersionGTE(v *utilversion.Version, c discovery.ServerVersionInterface) {
  424. gte, err := ServerVersionGTE(v, c)
  425. if err != nil {
  426. Failf("Failed to get server version: %v", err)
  427. }
  428. if !gte {
  429. Skipf("Not supported for server versions before %q", v)
  430. }
  431. }
  432. func SkipIfMissingResource(dynamicClient dynamic.Interface, gvr schema.GroupVersionResource, namespace string) {
  433. resourceClient := dynamicClient.Resource(gvr).Namespace(namespace)
  434. _, err := resourceClient.List(metav1.ListOptions{})
  435. if err != nil {
  436. // not all resources support list, so we ignore those
  437. if apierrs.IsMethodNotSupported(err) || apierrs.IsNotFound(err) || apierrs.IsForbidden(err) {
  438. Skipf("Could not find %s resource, skipping test: %#v", gvr, err)
  439. }
  440. Failf("Unexpected error getting %v: %v", gvr, err)
  441. }
  442. }
  443. // ProvidersWithSSH are those providers where each node is accessible with SSH
  444. var ProvidersWithSSH = []string{"gce", "gke", "aws", "local"}
  445. type podCondition func(pod *v1.Pod) (bool, error)
  446. // logPodStates logs basic info of provided pods for debugging.
  447. func logPodStates(pods []v1.Pod) {
  448. // Find maximum widths for pod, node, and phase strings for column printing.
  449. maxPodW, maxNodeW, maxPhaseW, maxGraceW := len("POD"), len("NODE"), len("PHASE"), len("GRACE")
  450. for i := range pods {
  451. pod := &pods[i]
  452. if len(pod.ObjectMeta.Name) > maxPodW {
  453. maxPodW = len(pod.ObjectMeta.Name)
  454. }
  455. if len(pod.Spec.NodeName) > maxNodeW {
  456. maxNodeW = len(pod.Spec.NodeName)
  457. }
  458. if len(pod.Status.Phase) > maxPhaseW {
  459. maxPhaseW = len(pod.Status.Phase)
  460. }
  461. }
  462. // Increase widths by one to separate by a single space.
  463. maxPodW++
  464. maxNodeW++
  465. maxPhaseW++
  466. maxGraceW++
  467. // Log pod info. * does space padding, - makes them left-aligned.
  468. Logf("%-[1]*[2]s %-[3]*[4]s %-[5]*[6]s %-[7]*[8]s %[9]s",
  469. maxPodW, "POD", maxNodeW, "NODE", maxPhaseW, "PHASE", maxGraceW, "GRACE", "CONDITIONS")
  470. for _, pod := range pods {
  471. grace := ""
  472. if pod.DeletionGracePeriodSeconds != nil {
  473. grace = fmt.Sprintf("%ds", *pod.DeletionGracePeriodSeconds)
  474. }
  475. Logf("%-[1]*[2]s %-[3]*[4]s %-[5]*[6]s %-[7]*[8]s %[9]s",
  476. maxPodW, pod.ObjectMeta.Name, maxNodeW, pod.Spec.NodeName, maxPhaseW, pod.Status.Phase, maxGraceW, grace, pod.Status.Conditions)
  477. }
  478. Logf("") // Final empty line helps for readability.
  479. }
  480. // errorBadPodsStates create error message of basic info of bad pods for debugging.
  481. func errorBadPodsStates(badPods []v1.Pod, desiredPods int, ns, desiredState string, timeout time.Duration) string {
  482. errStr := fmt.Sprintf("%d / %d pods in namespace %q are NOT in %s state in %v\n", len(badPods), desiredPods, ns, desiredState, timeout)
  483. // Print bad pods info only if there are fewer than 10 bad pods
  484. if len(badPods) > 10 {
  485. return errStr + "There are too many bad pods. Please check log for details."
  486. }
  487. buf := bytes.NewBuffer(nil)
  488. w := tabwriter.NewWriter(buf, 0, 0, 1, ' ', 0)
  489. fmt.Fprintln(w, "POD\tNODE\tPHASE\tGRACE\tCONDITIONS")
  490. for _, badPod := range badPods {
  491. grace := ""
  492. if badPod.DeletionGracePeriodSeconds != nil {
  493. grace = fmt.Sprintf("%ds", *badPod.DeletionGracePeriodSeconds)
  494. }
  495. podInfo := fmt.Sprintf("%s\t%s\t%s\t%s\t%+v",
  496. badPod.ObjectMeta.Name, badPod.Spec.NodeName, badPod.Status.Phase, grace, badPod.Status.Conditions)
  497. fmt.Fprintln(w, podInfo)
  498. }
  499. w.Flush()
  500. return errStr + buf.String()
  501. }
  502. // WaitForPodsSuccess waits till all labels matching the given selector enter
  503. // the Success state. The caller is expected to only invoke this method once the
  504. // pods have been created.
  505. func WaitForPodsSuccess(c clientset.Interface, ns string, successPodLabels map[string]string, timeout time.Duration) error {
  506. successPodSelector := labels.SelectorFromSet(successPodLabels)
  507. start, badPods, desiredPods := time.Now(), []v1.Pod{}, 0
  508. if wait.PollImmediate(30*time.Second, timeout, func() (bool, error) {
  509. podList, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{LabelSelector: successPodSelector.String()})
  510. if err != nil {
  511. Logf("Error getting pods in namespace %q: %v", ns, err)
  512. if testutils.IsRetryableAPIError(err) {
  513. return false, nil
  514. }
  515. return false, err
  516. }
  517. if len(podList.Items) == 0 {
  518. Logf("Waiting for pods to enter Success, but no pods in %q match label %v", ns, successPodLabels)
  519. return true, nil
  520. }
  521. badPods = []v1.Pod{}
  522. desiredPods = len(podList.Items)
  523. for _, pod := range podList.Items {
  524. if pod.Status.Phase != v1.PodSucceeded {
  525. badPods = append(badPods, pod)
  526. }
  527. }
  528. successPods := len(podList.Items) - len(badPods)
  529. Logf("%d / %d pods in namespace %q are in Success state (%d seconds elapsed)",
  530. successPods, len(podList.Items), ns, int(time.Since(start).Seconds()))
  531. if len(badPods) == 0 {
  532. return true, nil
  533. }
  534. return false, nil
  535. }) != nil {
  536. logPodStates(badPods)
  537. LogPodsWithLabels(c, ns, successPodLabels, Logf)
  538. return errors.New(errorBadPodsStates(badPods, desiredPods, ns, "SUCCESS", timeout))
  539. }
  540. return nil
  541. }
  542. // WaitForPodsRunningReady waits up to timeout to ensure that all pods in
  543. // namespace ns are either running and ready, or failed but controlled by a
  544. // controller. Also, it ensures that at least minPods are running and
  545. // ready. It has separate behavior from other 'wait for' pods functions in
  546. // that it requests the list of pods on every iteration. This is useful, for
  547. // example, in cluster startup, because the number of pods increases while
  548. // waiting. All pods that are in SUCCESS state are not counted.
  549. //
  550. // If ignoreLabels is not empty, pods matching this selector are ignored.
  551. func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration, ignoreLabels map[string]string) error {
  552. ignoreSelector := labels.SelectorFromSet(ignoreLabels)
  553. start := time.Now()
  554. Logf("Waiting up to %v for all pods (need at least %d) in namespace '%s' to be running and ready",
  555. timeout, minPods, ns)
  556. wg := sync.WaitGroup{}
  557. wg.Add(1)
  558. var ignoreNotReady bool
  559. badPods := []v1.Pod{}
  560. desiredPods := 0
  561. notReady := int32(0)
  562. if wait.PollImmediate(Poll, timeout, func() (bool, error) {
  563. // We get the new list of pods, replication controllers, and
  564. // replica sets in every iteration because more pods come
  565. // online during startup and we want to ensure they are also
  566. // checked.
  567. replicas, replicaOk := int32(0), int32(0)
  568. rcList, err := c.CoreV1().ReplicationControllers(ns).List(metav1.ListOptions{})
  569. if err != nil {
  570. Logf("Error getting replication controllers in namespace '%s': %v", ns, err)
  571. if testutils.IsRetryableAPIError(err) {
  572. return false, nil
  573. }
  574. return false, err
  575. }
  576. for _, rc := range rcList.Items {
  577. replicas += *rc.Spec.Replicas
  578. replicaOk += rc.Status.ReadyReplicas
  579. }
  580. rsList, err := c.ExtensionsV1beta1().ReplicaSets(ns).List(metav1.ListOptions{})
  581. if err != nil {
  582. Logf("Error getting replication sets in namespace %q: %v", ns, err)
  583. if testutils.IsRetryableAPIError(err) {
  584. return false, nil
  585. }
  586. return false, err
  587. }
  588. for _, rs := range rsList.Items {
  589. replicas += *rs.Spec.Replicas
  590. replicaOk += rs.Status.ReadyReplicas
  591. }
  592. podList, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{})
  593. if err != nil {
  594. Logf("Error getting pods in namespace '%s': %v", ns, err)
  595. if testutils.IsRetryableAPIError(err) {
  596. return false, nil
  597. }
  598. return false, err
  599. }
  600. nOk := int32(0)
  601. notReady = int32(0)
  602. badPods = []v1.Pod{}
  603. desiredPods = len(podList.Items)
  604. for _, pod := range podList.Items {
  605. if len(ignoreLabels) != 0 && ignoreSelector.Matches(labels.Set(pod.Labels)) {
  606. continue
  607. }
  608. res, err := testutils.PodRunningReady(&pod)
  609. switch {
  610. case res && err == nil:
  611. nOk++
  612. case pod.Status.Phase == v1.PodSucceeded:
  613. Logf("The status of Pod %s is Succeeded, skipping waiting", pod.ObjectMeta.Name)
  614. // it doesn't make sense to wait for this pod
  615. continue
  616. case pod.Status.Phase != v1.PodFailed:
  617. Logf("The status of Pod %s is %s (Ready = false), waiting for it to be either Running (with Ready = true) or Failed", pod.ObjectMeta.Name, pod.Status.Phase)
  618. notReady++
  619. badPods = append(badPods, pod)
  620. default:
  621. if metav1.GetControllerOf(&pod) == nil {
  622. Logf("Pod %s is Failed, but it's not controlled by a controller", pod.ObjectMeta.Name)
  623. badPods = append(badPods, pod)
  624. }
  625. //ignore failed pods that are controlled by some controller
  626. }
  627. }
  628. Logf("%d / %d pods in namespace '%s' are running and ready (%d seconds elapsed)",
  629. nOk, len(podList.Items), ns, int(time.Since(start).Seconds()))
  630. Logf("expected %d pod replicas in namespace '%s', %d are Running and Ready.", replicas, ns, replicaOk)
  631. if replicaOk == replicas && nOk >= minPods && len(badPods) == 0 {
  632. return true, nil
  633. }
  634. ignoreNotReady = (notReady <= allowedNotReadyPods)
  635. logPodStates(badPods)
  636. return false, nil
  637. }) != nil {
  638. if !ignoreNotReady {
  639. return errors.New(errorBadPodsStates(badPods, desiredPods, ns, "RUNNING and READY", timeout))
  640. }
  641. Logf("Number of not-ready pods (%d) is below the allowed threshold (%d).", notReady, allowedNotReadyPods)
  642. }
  643. return nil
  644. }
  645. func kubectlLogPod(c clientset.Interface, pod v1.Pod, containerNameSubstr string, logFunc func(ftm string, args ...interface{})) {
  646. for _, container := range pod.Spec.Containers {
  647. if strings.Contains(container.Name, containerNameSubstr) {
  648. // Contains() matches all strings if substr is empty
  649. logs, err := GetPodLogs(c, pod.Namespace, pod.Name, container.Name)
  650. if err != nil {
  651. logs, err = getPreviousPodLogs(c, pod.Namespace, pod.Name, container.Name)
  652. if err != nil {
  653. logFunc("Failed to get logs of pod %v, container %v, err: %v", pod.Name, container.Name, err)
  654. }
  655. }
  656. logFunc("Logs of %v/%v:%v on node %v", pod.Namespace, pod.Name, container.Name, pod.Spec.NodeName)
  657. logFunc("%s : STARTLOG\n%s\nENDLOG for container %v:%v:%v", containerNameSubstr, logs, pod.Namespace, pod.Name, container.Name)
  658. }
  659. }
  660. }
  661. func LogFailedContainers(c clientset.Interface, ns string, logFunc func(ftm string, args ...interface{})) {
  662. podList, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{})
  663. if err != nil {
  664. logFunc("Error getting pods in namespace '%s': %v", ns, err)
  665. return
  666. }
  667. logFunc("Running kubectl logs on non-ready containers in %v", ns)
  668. for _, pod := range podList.Items {
  669. if res, err := testutils.PodRunningReady(&pod); !res || err != nil {
  670. kubectlLogPod(c, pod, "", Logf)
  671. }
  672. }
  673. }
  674. func LogPodsWithLabels(c clientset.Interface, ns string, match map[string]string, logFunc func(ftm string, args ...interface{})) {
  675. podList, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{LabelSelector: labels.SelectorFromSet(match).String()})
  676. if err != nil {
  677. logFunc("Error getting pods in namespace %q: %v", ns, err)
  678. return
  679. }
  680. logFunc("Running kubectl logs on pods with labels %v in %v", match, ns)
  681. for _, pod := range podList.Items {
  682. kubectlLogPod(c, pod, "", logFunc)
  683. }
  684. }
  685. func LogContainersInPodsWithLabels(c clientset.Interface, ns string, match map[string]string, containerSubstr string, logFunc func(ftm string, args ...interface{})) {
  686. podList, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{LabelSelector: labels.SelectorFromSet(match).String()})
  687. if err != nil {
  688. Logf("Error getting pods in namespace %q: %v", ns, err)
  689. return
  690. }
  691. for _, pod := range podList.Items {
  692. kubectlLogPod(c, pod, containerSubstr, logFunc)
  693. }
  694. }
  695. // DeleteNamespaces deletes all namespaces that match the given delete and skip filters.
  696. // Filter is by simple strings.Contains; first skip filter, then delete filter.
  697. // Returns the list of deleted namespaces or an error.
  698. func DeleteNamespaces(c clientset.Interface, deleteFilter, skipFilter []string) ([]string, error) {
  699. By("Deleting namespaces")
  700. nsList, err := c.CoreV1().Namespaces().List(metav1.ListOptions{})
  701. Expect(err).NotTo(HaveOccurred())
  702. var deleted []string
  703. var wg sync.WaitGroup
  704. OUTER:
  705. for _, item := range nsList.Items {
  706. if skipFilter != nil {
  707. for _, pattern := range skipFilter {
  708. if strings.Contains(item.Name, pattern) {
  709. continue OUTER
  710. }
  711. }
  712. }
  713. if deleteFilter != nil {
  714. var shouldDelete bool
  715. for _, pattern := range deleteFilter {
  716. if strings.Contains(item.Name, pattern) {
  717. shouldDelete = true
  718. break
  719. }
  720. }
  721. if !shouldDelete {
  722. continue OUTER
  723. }
  724. }
  725. wg.Add(1)
  726. deleted = append(deleted, item.Name)
  727. go func(nsName string) {
  728. defer wg.Done()
  729. defer GinkgoRecover()
  730. Expect(c.CoreV1().Namespaces().Delete(nsName, nil)).To(Succeed())
  731. Logf("namespace : %v api call to delete is complete ", nsName)
  732. }(item.Name)
  733. }
  734. wg.Wait()
  735. return deleted, nil
  736. }
  737. func WaitForNamespacesDeleted(c clientset.Interface, namespaces []string, timeout time.Duration) error {
  738. By("Waiting for namespaces to vanish")
  739. nsMap := map[string]bool{}
  740. for _, ns := range namespaces {
  741. nsMap[ns] = true
  742. }
  743. //Now POLL until all namespaces have been eradicated.
  744. return wait.Poll(2*time.Second, timeout,
  745. func() (bool, error) {
  746. nsList, err := c.CoreV1().Namespaces().List(metav1.ListOptions{})
  747. if err != nil {
  748. return false, err
  749. }
  750. for _, item := range nsList.Items {
  751. if _, ok := nsMap[item.Name]; ok {
  752. return false, nil
  753. }
  754. }
  755. return true, nil
  756. })
  757. }
  758. func waitForServiceAccountInNamespace(c clientset.Interface, ns, serviceAccountName string, timeout time.Duration) error {
  759. w, err := c.CoreV1().ServiceAccounts(ns).Watch(metav1.SingleObject(metav1.ObjectMeta{Name: serviceAccountName}))
  760. if err != nil {
  761. return err
  762. }
  763. _, err = watch.Until(timeout, w, conditions.ServiceAccountHasSecrets)
  764. return err
  765. }
  766. func WaitForPodCondition(c clientset.Interface, ns, podName, desc string, timeout time.Duration, condition podCondition) error {
  767. Logf("Waiting up to %v for pod %q in namespace %q to be %q", timeout, podName, ns, desc)
  768. for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
  769. pod, err := c.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{})
  770. if err != nil {
  771. if apierrs.IsNotFound(err) {
  772. Logf("Pod %q in namespace %q not found. Error: %v", podName, ns, err)
  773. return err
  774. }
  775. Logf("Get pod %q in namespace %q failed, ignoring for %v. Error: %v", podName, ns, Poll, err)
  776. continue
  777. }
  778. // log now so that current pod info is reported before calling `condition()`
  779. Logf("Pod %q: Phase=%q, Reason=%q, readiness=%t. Elapsed: %v",
  780. podName, pod.Status.Phase, pod.Status.Reason, podutil.IsPodReady(pod), time.Since(start))
  781. if done, err := condition(pod); done {
  782. if err == nil {
  783. Logf("Pod %q satisfied condition %q", podName, desc)
  784. }
  785. return err
  786. }
  787. }
  788. return fmt.Errorf("Gave up after waiting %v for pod %q to be %q", timeout, podName, desc)
  789. }
  790. // WaitForMatchPodsCondition finds match pods based on the input ListOptions.
  791. // waits and checks if all match pods are in the given podCondition
  792. func WaitForMatchPodsCondition(c clientset.Interface, opts metav1.ListOptions, desc string, timeout time.Duration, condition podCondition) error {
  793. Logf("Waiting up to %v for matching pods' status to be %s", timeout, desc)
  794. for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
  795. pods, err := c.CoreV1().Pods(metav1.NamespaceAll).List(opts)
  796. if err != nil {
  797. return err
  798. }
  799. conditionNotMatch := []string{}
  800. for _, pod := range pods.Items {
  801. done, err := condition(&pod)
  802. if done && err != nil {
  803. return fmt.Errorf("Unexpected error: %v", err)
  804. }
  805. if !done {
  806. conditionNotMatch = append(conditionNotMatch, format.Pod(&pod))
  807. }
  808. }
  809. if len(conditionNotMatch) <= 0 {
  810. return err
  811. }
  812. Logf("%d pods are not %s: %v", len(conditionNotMatch), desc, conditionNotMatch)
  813. }
  814. return fmt.Errorf("gave up waiting for matching pods to be '%s' after %v", desc, timeout)
  815. }
  816. // WaitForDefaultServiceAccountInNamespace waits for the default service account to be provisioned
  817. // the default service account is what is associated with pods when they do not specify a service account
  818. // as a result, pods are not able to be provisioned in a namespace until the service account is provisioned
  819. func WaitForDefaultServiceAccountInNamespace(c clientset.Interface, namespace string) error {
  820. return waitForServiceAccountInNamespace(c, namespace, "default", ServiceAccountProvisionTimeout)
  821. }
  822. // WaitForPersistentVolumePhase waits for a PersistentVolume to be in a specific phase or until timeout occurs, whichever comes first.
  823. func WaitForPersistentVolumePhase(phase v1.PersistentVolumePhase, c clientset.Interface, pvName string, Poll, timeout time.Duration) error {
  824. Logf("Waiting up to %v for PersistentVolume %s to have phase %s", timeout, pvName, phase)
  825. for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
  826. pv, err := c.CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{})
  827. if err != nil {
  828. Logf("Get persistent volume %s in failed, ignoring for %v: %v", pvName, Poll, err)
  829. continue
  830. } else {
  831. if pv.Status.Phase == phase {
  832. Logf("PersistentVolume %s found and phase=%s (%v)", pvName, phase, time.Since(start))
  833. return nil
  834. } else {
  835. Logf("PersistentVolume %s found but phase is %s instead of %s.", pvName, pv.Status.Phase, phase)
  836. }
  837. }
  838. }
  839. return fmt.Errorf("PersistentVolume %s not in phase %s within %v", pvName, phase, timeout)
  840. }
  841. // WaitForStatefulSetReplicasReady waits for all replicas of a StatefulSet to become ready or until timeout occurs, whichever comes first.
  842. func WaitForStatefulSetReplicasReady(statefulSetName, ns string, c clientset.Interface, Poll, timeout time.Duration) error {
  843. Logf("Waiting up to %v for StatefulSet %s to have all replicas ready", timeout, statefulSetName)
  844. for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
  845. sts, err := c.AppsV1().StatefulSets(ns).Get(statefulSetName, metav1.GetOptions{})
  846. if err != nil {
  847. Logf("Get StatefulSet %s failed, ignoring for %v: %v", statefulSetName, Poll, err)
  848. continue
  849. } else {
  850. if sts.Status.ReadyReplicas == *sts.Spec.Replicas {
  851. Logf("All %d replicas of StatefulSet %s are ready. (%v)", sts.Status.ReadyReplicas, statefulSetName, time.Since(start))
  852. return nil
  853. } else {
  854. Logf("StatefulSet %s found but there are %d ready replicas and %d total replicas.", statefulSetName, sts.Status.ReadyReplicas, *sts.Spec.Replicas)
  855. }
  856. }
  857. }
  858. return fmt.Errorf("StatefulSet %s still has unready pods within %v", statefulSetName, timeout)
  859. }
  860. // WaitForPersistentVolumeDeleted waits for a PersistentVolume to get deleted or until timeout occurs, whichever comes first.
  861. func WaitForPersistentVolumeDeleted(c clientset.Interface, pvName string, Poll, timeout time.Duration) error {
  862. Logf("Waiting up to %v for PersistentVolume %s to get deleted", timeout, pvName)
  863. for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
  864. pv, err := c.CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{})
  865. if err == nil {
  866. Logf("PersistentVolume %s found and phase=%s (%v)", pvName, pv.Status.Phase, time.Since(start))
  867. continue
  868. } else {
  869. if apierrs.IsNotFound(err) {
  870. Logf("PersistentVolume %s was removed", pvName)
  871. return nil
  872. } else {
  873. Logf("Get persistent volume %s in failed, ignoring for %v: %v", pvName, Poll, err)
  874. }
  875. }
  876. }
  877. return fmt.Errorf("PersistentVolume %s still exists within %v", pvName, timeout)
  878. }
  879. // WaitForPersistentVolumeClaimPhase waits for a PersistentVolumeClaim to be in a specific phase or until timeout occurs, whichever comes first.
  880. func WaitForPersistentVolumeClaimPhase(phase v1.PersistentVolumeClaimPhase, c clientset.Interface, ns string, pvcName string, Poll, timeout time.Duration) error {
  881. Logf("Waiting up to %v for PersistentVolumeClaim %s to have phase %s", timeout, pvcName, phase)
  882. for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
  883. pvc, err := c.CoreV1().PersistentVolumeClaims(ns).Get(pvcName, metav1.GetOptions{})
  884. if err != nil {
  885. Logf("Failed to get claim %q, retrying in %v. Error: %v", pvcName, Poll, err)
  886. continue
  887. } else {
  888. if pvc.Status.Phase == phase {
  889. Logf("PersistentVolumeClaim %s found and phase=%s (%v)", pvcName, phase, time.Since(start))
  890. return nil
  891. } else {
  892. Logf("PersistentVolumeClaim %s found but phase is %s instead of %s.", pvcName, pvc.Status.Phase, phase)
  893. }
  894. }
  895. }
  896. return fmt.Errorf("PersistentVolumeClaim %s not in phase %s within %v", pvcName, phase, timeout)
  897. }
  898. // CreateTestingNS should be used by every test, note that we append a common prefix to the provided test name.
  899. // Please see NewFramework instead of using this directly.
  900. func CreateTestingNS(baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error) {
  901. if labels == nil {
  902. labels = map[string]string{}
  903. }
  904. labels["e2e-run"] = string(RunId)
  905. namespaceObj := &v1.Namespace{
  906. ObjectMeta: metav1.ObjectMeta{
  907. GenerateName: fmt.Sprintf("e2e-tests-%v-", baseName),
  908. Namespace: "",
  909. Labels: labels,
  910. },
  911. Status: v1.NamespaceStatus{},
  912. }
  913. // Be robust about making the namespace creation call.
  914. var got *v1.Namespace
  915. if err := wait.PollImmediate(Poll, 30*time.Second, func() (bool, error) {
  916. var err error
  917. got, err = c.CoreV1().Namespaces().Create(namespaceObj)
  918. if err != nil {
  919. Logf("Unexpected error while creating namespace: %v", err)
  920. return false, nil
  921. }
  922. return true, nil
  923. }); err != nil {
  924. return nil, err
  925. }
  926. if TestContext.VerifyServiceAccount {
  927. if err := WaitForDefaultServiceAccountInNamespace(c, got.Name); err != nil {
  928. // Even if we fail to create serviceAccount in the namespace,
  929. // we have successfully create a namespace.
  930. // So, return the created namespace.
  931. return got, err
  932. }
  933. }
  934. return got, nil
  935. }
  936. // CheckTestingNSDeletedExcept checks whether all e2e based existing namespaces are in the Terminating state
  937. // and waits until they are finally deleted. It ignores namespace skip.
  938. func CheckTestingNSDeletedExcept(c clientset.Interface, skip string) error {
  939. // TODO: Since we don't have support for bulk resource deletion in the API,
  940. // while deleting a namespace we are deleting all objects from that namespace
  941. // one by one (one deletion == one API call). This basically exposes us to
  942. // throttling - currently controller-manager has a limit of max 20 QPS.
  943. // Once #10217 is implemented and used in namespace-controller, deleting all
  944. // object from a given namespace should be much faster and we will be able
  945. // to lower this timeout.
  946. // However, now Density test is producing ~26000 events and Load capacity test
  947. // is producing ~35000 events, thus assuming there are no other requests it will
  948. // take ~30 minutes to fully delete the namespace. Thus I'm setting it to 60
  949. // minutes to avoid any timeouts here.
  950. timeout := 60 * time.Minute
  951. Logf("Waiting for terminating namespaces to be deleted...")
  952. for start := time.Now(); time.Since(start) < timeout; time.Sleep(15 * time.Second) {
  953. namespaces, err := c.CoreV1().Namespaces().List(metav1.ListOptions{})
  954. if err != nil {
  955. Logf("Listing namespaces failed: %v", err)
  956. continue
  957. }
  958. terminating := 0
  959. for _, ns := range namespaces.Items {
  960. if strings.HasPrefix(ns.ObjectMeta.Name, "e2e-tests-") && ns.ObjectMeta.Name != skip {
  961. if ns.Status.Phase == v1.NamespaceActive {
  962. return fmt.Errorf("Namespace %s is active", ns.ObjectMeta.Name)
  963. }
  964. terminating++
  965. }
  966. }
  967. if terminating == 0 {
  968. return nil
  969. }
  970. }
  971. return fmt.Errorf("Waiting for terminating namespaces to be deleted timed out")
  972. }
  973. // deleteNS deletes the provided namespace, waits for it to be completely deleted, and then checks
  974. // whether there are any pods remaining in a non-terminating state.
  975. func deleteNS(c clientset.Interface, dynamicClient dynamic.Interface, namespace string, timeout time.Duration) error {
  976. startTime := time.Now()
  977. if err := c.CoreV1().Namespaces().Delete(namespace, nil); err != nil {
  978. return err
  979. }
  980. // wait for namespace to delete or timeout.
  981. err := wait.PollImmediate(2*time.Second, timeout, func() (bool, error) {
  982. if _, err := c.CoreV1().Namespaces().Get(namespace, metav1.GetOptions{}); err != nil {
  983. if apierrs.IsNotFound(err) {
  984. return true, nil
  985. }
  986. Logf("Error while waiting for namespace to be terminated: %v", err)
  987. return false, nil
  988. }
  989. return false, nil
  990. })
  991. // verify there is no more remaining content in the namespace
  992. remainingContent, cerr := hasRemainingContent(c, dynamicClient, namespace)
  993. if cerr != nil {
  994. return cerr
  995. }
  996. // if content remains, let's dump information about the namespace, and system for flake debugging.
  997. remainingPods := 0
  998. missingTimestamp := 0
  999. if remainingContent {
  1000. // log information about namespace, and set of namespaces in api server to help flake detection
  1001. logNamespace(c, namespace)
  1002. logNamespaces(c, namespace)
  1003. // if we can, check if there were pods remaining with no timestamp.
  1004. remainingPods, missingTimestamp, _ = countRemainingPods(c, namespace)
  1005. }
  1006. // a timeout waiting for namespace deletion happened!
  1007. if err != nil {
  1008. // some content remains in the namespace
  1009. if remainingContent {
  1010. // pods remain
  1011. if remainingPods > 0 {
  1012. if missingTimestamp != 0 {
  1013. // pods remained, but were not undergoing deletion (namespace controller is probably culprit)
  1014. return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v, pods missing deletion timestamp: %v", namespace, err, remainingPods, missingTimestamp)
  1015. }
  1016. // but they were all undergoing deletion (kubelet is probably culprit, check NodeLost)
  1017. return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v", namespace, err, remainingPods)
  1018. }
  1019. // other content remains (namespace controller is probably screwed up)
  1020. return fmt.Errorf("namespace %v was not deleted with limit: %v, namespaced content other than pods remain", namespace, err)
  1021. }
  1022. // no remaining content, but namespace was not deleted (namespace controller is probably wedged)
  1023. return fmt.Errorf("namespace %v was not deleted with limit: %v, namespace is empty but is not yet removed", namespace, err)
  1024. }
  1025. Logf("namespace %v deletion completed in %s", namespace, time.Since(startTime))
  1026. return nil
  1027. }
  1028. // logNamespaces logs the number of namespaces by phase
  1029. // namespace is the namespace the test was operating against that failed to delete so it can be grepped in logs
  1030. func logNamespaces(c clientset.Interface, namespace string) {
  1031. namespaceList, err := c.CoreV1().Namespaces().List(metav1.ListOptions{})
  1032. if err != nil {
  1033. Logf("namespace: %v, unable to list namespaces: %v", namespace, err)
  1034. return
  1035. }
  1036. numActive := 0
  1037. numTerminating := 0
  1038. for _, namespace := range namespaceList.Items {
  1039. if namespace.Status.Phase == v1.NamespaceActive {
  1040. numActive++
  1041. } else {
  1042. numTerminating++
  1043. }
  1044. }
  1045. Logf("namespace: %v, total namespaces: %v, active: %v, terminating: %v", namespace, len(namespaceList.Items), numActive, numTerminating)
  1046. }
  1047. // logNamespace logs detail about a namespace
  1048. func logNamespace(c clientset.Interface, namespace string) {
  1049. ns, err := c.CoreV1().Namespaces().Get(namespace, metav1.GetOptions{})
  1050. if err != nil {
  1051. if apierrs.IsNotFound(err) {
  1052. Logf("namespace: %v no longer exists", namespace)
  1053. return
  1054. }
  1055. Logf("namespace: %v, unable to get namespace due to error: %v", namespace, err)
  1056. return
  1057. }
  1058. Logf("namespace: %v, DeletionTimetamp: %v, Finalizers: %v, Phase: %v", ns.Name, ns.DeletionTimestamp, ns.Spec.Finalizers, ns.Status.Phase)
  1059. }
  1060. // countRemainingPods queries the server to count number of remaining pods, and number of pods that had a missing deletion timestamp.
  1061. func countRemainingPods(c clientset.Interface, namespace string) (int, int, error) {
  1062. // check for remaining pods
  1063. pods, err := c.CoreV1().Pods(namespace).List(metav1.ListOptions{})
  1064. if err != nil {
  1065. return 0, 0, err
  1066. }
  1067. // nothing remains!
  1068. if len(pods.Items) == 0 {
  1069. return 0, 0, nil
  1070. }
  1071. // stuff remains, log about it
  1072. logPodStates(pods.Items)
  1073. // check if there were any pods with missing deletion timestamp
  1074. numPods := len(pods.Items)
  1075. missingTimestamp := 0
  1076. for _, pod := range pods.Items {
  1077. if pod.DeletionTimestamp == nil {
  1078. missingTimestamp++
  1079. }
  1080. }
  1081. return numPods, missingTimestamp, nil
  1082. }
  1083. // isDynamicDiscoveryError returns true if the error is a group discovery error
  1084. // only for groups expected to be created/deleted dynamically during e2e tests
  1085. func isDynamicDiscoveryError(err error) bool {
  1086. if !discovery.IsGroupDiscoveryFailedError(err) {
  1087. return false
  1088. }
  1089. discoveryErr := err.(*discovery.ErrGroupDiscoveryFailed)
  1090. for gv := range discoveryErr.Groups {
  1091. switch gv.Group {
  1092. case "mygroup.example.com":
  1093. // custom_resource_definition
  1094. // garbage_collector
  1095. case "wardle.k8s.io":
  1096. // aggregator
  1097. case "metrics.k8s.io":
  1098. // aggregated metrics server add-on, no persisted resources
  1099. default:
  1100. Logf("discovery error for unexpected group: %#v", gv)
  1101. return false
  1102. }
  1103. }
  1104. return true
  1105. }
  1106. // hasRemainingContent checks if there is remaining content in the namespace via API discovery
  1107. func hasRemainingContent(c clientset.Interface, dynamicClient dynamic.Interface, namespace string) (bool, error) {
  1108. // some tests generate their own framework.Client rather than the default
  1109. // TODO: ensure every test call has a configured dynamicClient
  1110. if dynamicClient == nil {
  1111. return false, nil
  1112. }
  1113. // find out what content is supported on the server
  1114. // Since extension apiserver is not always available, e.g. metrics server sometimes goes down,
  1115. // add retry here.
  1116. resources, err := waitForServerPreferredNamespacedResources(c.Discovery(), 30*time.Second)
  1117. if err != nil {
  1118. return false, err
  1119. }
  1120. groupVersionResources, err := discovery.GroupVersionResources(resources)
  1121. if err != nil {
  1122. return false, err
  1123. }
  1124. // TODO: temporary hack for https://github.com/kubernetes/kubernetes/issues/31798
  1125. ignoredResources := sets.NewString("bindings")
  1126. contentRemaining := false
  1127. // dump how many of resource type is on the server in a log.
  1128. for gvr := range groupVersionResources {
  1129. // get a client for this group version...
  1130. dynamicClient := dynamicClient.Resource(gvr).Namespace(namespace)
  1131. if err != nil {
  1132. // not all resource types support list, so some errors here are normal depending on the resource type.
  1133. Logf("namespace: %s, unable to get client - gvr: %v, error: %v", namespace, gvr, err)
  1134. continue
  1135. }
  1136. // get the api resource
  1137. apiResource := metav1.APIResource{Name: gvr.Resource, Namespaced: true}
  1138. if ignoredResources.Has(gvr.Resource) {
  1139. Logf("namespace: %s, resource: %s, ignored listing per whitelist", namespace, apiResource.Name)
  1140. continue
  1141. }
  1142. unstructuredList, err := dynamicClient.List(metav1.ListOptions{})
  1143. if err != nil {
  1144. // not all resources support list, so we ignore those
  1145. if apierrs.IsMethodNotSupported(err) || apierrs.IsNotFound(err) || apierrs.IsForbidden(err) {
  1146. continue
  1147. }
  1148. // skip unavailable servers
  1149. if apierrs.IsServiceUnavailable(err) {
  1150. continue
  1151. }
  1152. return false, err
  1153. }
  1154. if len(unstructuredList.Items) > 0 {
  1155. Logf("namespace: %s, resource: %s, items remaining: %v", namespace, apiResource.Name, len(unstructuredList.Items))
  1156. contentRemaining = true
  1157. }
  1158. }
  1159. return contentRemaining, nil
  1160. }
  1161. func ContainerInitInvariant(older, newer runtime.Object) error {
  1162. oldPod := older.(*v1.Pod)
  1163. newPod := newer.(*v1.Pod)
  1164. if len(oldPod.Spec.InitContainers) == 0 {
  1165. return nil
  1166. }
  1167. if len(oldPod.Spec.InitContainers) != len(newPod.Spec.InitContainers) {
  1168. return fmt.Errorf("init container list changed")
  1169. }
  1170. if oldPod.UID != newPod.UID {
  1171. return fmt.Errorf("two different pods exist in the condition: %s vs %s", oldPod.UID, newPod.UID)
  1172. }
  1173. if err := initContainersInvariants(oldPod); err != nil {
  1174. return err
  1175. }
  1176. if err := initContainersInvariants(newPod); err != nil {
  1177. return err
  1178. }
  1179. oldInit, _, _ := podInitialized(oldPod)
  1180. newInit, _, _ := podInitialized(newPod)
  1181. if oldInit && !newInit {
  1182. // TODO: we may in the future enable resetting PodInitialized = false if the kubelet needs to restart it
  1183. // from scratch
  1184. return fmt.Errorf("pod cannot be initialized and then regress to not being initialized")
  1185. }
  1186. return nil
  1187. }
  1188. func podInitialized(pod *v1.Pod) (ok bool, failed bool, err error) {
  1189. allInit := true
  1190. initFailed := false
  1191. for _, s := range pod.Status.InitContainerStatuses {
  1192. switch {
  1193. case initFailed && s.State.Waiting == nil:
  1194. return allInit, initFailed, fmt.Errorf("container %s is after a failed container but isn't waiting", s.Name)
  1195. case allInit && s.State.Waiting == nil:
  1196. return allInit, initFailed, fmt.Errorf("container %s is after an initializing container but isn't waiting", s.Name)
  1197. case s.State.Terminated == nil:
  1198. allInit = false
  1199. case s.State.Terminated.ExitCode != 0:
  1200. allInit = false
  1201. initFailed = true
  1202. case !s.Ready:
  1203. return allInit, initFailed, fmt.Errorf("container %s initialized but isn't marked as ready", s.Name)
  1204. }
  1205. }
  1206. return allInit, initFailed, nil
  1207. }
  1208. func initContainersInvariants(pod *v1.Pod) error {
  1209. allInit, initFailed, err := podInitialized(pod)
  1210. if err != nil {
  1211. return err
  1212. }
  1213. if !allInit || initFailed {
  1214. for _, s := range pod.Status.ContainerStatuses {
  1215. if s.State.Waiting == nil || s.RestartCount != 0 {
  1216. return fmt.Errorf("container %s is not waiting but initialization not complete", s.Name)
  1217. }
  1218. if s.State.Waiting.Reason != "PodInitializing" {
  1219. return fmt.Errorf("container %s should have reason PodInitializing: %s", s.Name, s.State.Waiting.Reason)
  1220. }
  1221. }
  1222. }
  1223. _, c := podutil.GetPodCondition(&pod.Status, v1.PodInitialized)
  1224. if c == nil {
  1225. return fmt.Errorf("pod does not have initialized condition")
  1226. }
  1227. if c.LastTransitionTime.IsZero() {
  1228. return fmt.Errorf("PodInitialized condition should always have a transition time")
  1229. }
  1230. switch {
  1231. case c.Status == v1.ConditionUnknown:
  1232. return fmt.Errorf("PodInitialized condition should never be Unknown")
  1233. case c.Status == v1.ConditionTrue && (initFailed || !allInit):
  1234. return fmt.Errorf("PodInitialized condition was True but all not all containers initialized")
  1235. case c.Status == v1.ConditionFalse && (!initFailed && allInit):
  1236. return fmt.Errorf("PodInitialized condition was False but all containers initialized")
  1237. }
  1238. return nil
  1239. }
  1240. type InvariantFunc func(older, newer runtime.Object) error
  1241. func CheckInvariants(events []watch.Event, fns ...InvariantFunc) error {
  1242. e