/test/e2e/framework/util.go
Go | 1381 lines | 1071 code | 136 blank | 174 comment | 305 complexity | cc04f4b455a8a2597759e504f06799fa MD5 | raw file
- /*
- Copyright 2014 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package framework
- import (
- "bytes"
- "context"
- "encoding/json"
- "errors"
- "fmt"
- "io"
- "io/ioutil"
- "math/rand"
- "net"
- "net/http"
- "net/url"
- "os"
- "os/exec"
- "path"
- "path/filepath"
- "regexp"
- "sort"
- "strconv"
- "strings"
- "sync"
- "syscall"
- "text/tabwriter"
- "time"
- "github.com/golang/glog"
- "golang.org/x/crypto/ssh"
- "golang.org/x/net/websocket"
- "google.golang.org/api/googleapi"
- . "github.com/onsi/ginkgo"
- . "github.com/onsi/gomega"
- gomegatypes "github.com/onsi/gomega/types"
- apps "k8s.io/api/apps/v1"
- batch "k8s.io/api/batch/v1"
- "k8s.io/api/core/v1"
- extensions "k8s.io/api/extensions/v1beta1"
- apierrs "k8s.io/apimachinery/pkg/api/errors"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- "k8s.io/apimachinery/pkg/fields"
- "k8s.io/apimachinery/pkg/labels"
- "k8s.io/apimachinery/pkg/runtime"
- "k8s.io/apimachinery/pkg/runtime/schema"
- "k8s.io/apimachinery/pkg/types"
- "k8s.io/apimachinery/pkg/util/sets"
- "k8s.io/apimachinery/pkg/util/uuid"
- "k8s.io/apimachinery/pkg/util/wait"
- utilyaml "k8s.io/apimachinery/pkg/util/yaml"
- "k8s.io/apimachinery/pkg/watch"
- "k8s.io/client-go/discovery"
- "k8s.io/client-go/dynamic"
- restclient "k8s.io/client-go/rest"
- "k8s.io/client-go/tools/clientcmd"
- clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
- utilfeature "k8s.io/apiserver/pkg/util/feature"
- clientset "k8s.io/client-go/kubernetes"
- scaleclient "k8s.io/client-go/scale"
- "k8s.io/kubernetes/pkg/api/legacyscheme"
- podutil "k8s.io/kubernetes/pkg/api/v1/pod"
- appsinternal "k8s.io/kubernetes/pkg/apis/apps"
- batchinternal "k8s.io/kubernetes/pkg/apis/batch"
- api "k8s.io/kubernetes/pkg/apis/core"
- extensionsinternal "k8s.io/kubernetes/pkg/apis/extensions"
- "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
- "k8s.io/kubernetes/pkg/client/conditions"
- "k8s.io/kubernetes/pkg/cloudprovider/providers/azure"
- gcecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
- "k8s.io/kubernetes/pkg/controller"
- nodectlr "k8s.io/kubernetes/pkg/controller/nodelifecycle"
- "k8s.io/kubernetes/pkg/features"
- "k8s.io/kubernetes/pkg/kubectl"
- kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
- "k8s.io/kubernetes/pkg/kubelet/util/format"
- "k8s.io/kubernetes/pkg/master/ports"
- "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
- "k8s.io/kubernetes/pkg/scheduler/schedulercache"
- sshutil "k8s.io/kubernetes/pkg/ssh"
- "k8s.io/kubernetes/pkg/util/system"
- taintutils "k8s.io/kubernetes/pkg/util/taints"
- utilversion "k8s.io/kubernetes/pkg/util/version"
- "k8s.io/kubernetes/test/e2e/framework/ginkgowrapper"
- testutils "k8s.io/kubernetes/test/utils"
- imageutils "k8s.io/kubernetes/test/utils/image"
- uexec "k8s.io/utils/exec"
- )
- const (
- // How long to wait for the pod to be listable
- PodListTimeout = time.Minute
- // Initial pod start can be delayed O(minutes) by slow docker pulls
- // TODO: Make this 30 seconds once #4566 is resolved.
- PodStartTimeout = 5 * time.Minute
- // Same as `PodStartTimeout` to wait for the pod to be started, but shorter.
- // Use it case by case when we are sure pod start will not be delayed
- // minutes by slow docker pulls or something else.
- PodStartShortTimeout = 1 * time.Minute
- // How long to wait for a pod to be deleted
- PodDeleteTimeout = 5 * time.Minute
- // If there are any orphaned namespaces to clean up, this test is running
- // on a long lived cluster. A long wait here is preferably to spurious test
- // failures caused by leaked resources from a previous test run.
- NamespaceCleanupTimeout = 15 * time.Minute
- // Some pods can take much longer to get ready due to volume attach/detach latency.
- slowPodStartTimeout = 15 * time.Minute
- // How long to wait for a service endpoint to be resolvable.
- ServiceStartTimeout = 3 * time.Minute
- // How often to Poll pods, nodes and claims.
- Poll = 2 * time.Second
- pollShortTimeout = 1 * time.Minute
- pollLongTimeout = 5 * time.Minute
- // service accounts are provisioned after namespace creation
- // a service account is required to support pod creation in a namespace as part of admission control
- ServiceAccountProvisionTimeout = 2 * time.Minute
- // How long to try single API calls (like 'get' or 'list'). Used to prevent
- // transient failures from failing tests.
- // TODO: client should not apply this timeout to Watch calls. Increased from 30s until that is fixed.
- SingleCallTimeout = 5 * time.Minute
- // How long nodes have to be "ready" when a test begins. They should already
- // be "ready" before the test starts, so this is small.
- NodeReadyInitialTimeout = 20 * time.Second
- // How long pods have to be "ready" when a test begins.
- PodReadyBeforeTimeout = 5 * time.Minute
- // How long pods have to become scheduled onto nodes
- podScheduledBeforeTimeout = PodListTimeout + (20 * time.Second)
- podRespondingTimeout = 15 * time.Minute
- ServiceRespondingTimeout = 2 * time.Minute
- EndpointRegisterTimeout = time.Minute
- // How long claims have to become dynamically provisioned
- ClaimProvisionTimeout = 5 * time.Minute
- // Same as `ClaimProvisionTimeout` to wait for claim to be dynamically provisioned, but shorter.
- // Use it case by case when we are sure this timeout is enough.
- ClaimProvisionShortTimeout = 1 * time.Minute
- // How long claims have to become bound
- ClaimBindingTimeout = 3 * time.Minute
- // How long claims have to become deleted
- ClaimDeletingTimeout = 3 * time.Minute
- // How long PVs have to beome reclaimed
- PVReclaimingTimeout = 3 * time.Minute
- // How long PVs have to become bound
- PVBindingTimeout = 3 * time.Minute
- // How long PVs have to become deleted
- PVDeletingTimeout = 3 * time.Minute
- // How long a node is allowed to become "Ready" after it is restarted before
- // the test is considered failed.
- RestartNodeReadyAgainTimeout = 5 * time.Minute
- // How long a pod is allowed to become "running" and "ready" after a node
- // restart before test is considered failed.
- RestartPodReadyAgainTimeout = 5 * time.Minute
- // Number of objects that gc can delete in a second.
- // GC issues 2 requestes for single delete.
- gcThroughput = 10
- // Minimal number of nodes for the cluster to be considered large.
- largeClusterThreshold = 100
- // TODO(justinsb): Avoid hardcoding this.
- awsMasterIP = "172.20.0.9"
- // ssh port
- sshPort = "22"
- // ImagePrePullingTimeout is the time we wait for the e2e-image-puller
- // static pods to pull the list of seeded images. If they don't pull
- // images within this time we simply log their output and carry on
- // with the tests.
- ImagePrePullingTimeout = 5 * time.Minute
- )
- var (
- BusyBoxImage = "busybox"
- // Label allocated to the image puller static pod that runs on each node
- // before e2es.
- ImagePullerLabels = map[string]string{"name": "e2e-image-puller"}
- // For parsing Kubectl version for version-skewed testing.
- gitVersionRegexp = regexp.MustCompile("GitVersion:\"(v.+?)\"")
- // Slice of regexps for names of pods that have to be running to consider a Node "healthy"
- requiredPerNodePods = []*regexp.Regexp{
- regexp.MustCompile(".*kube-proxy.*"),
- regexp.MustCompile(".*fluentd-elasticsearch.*"),
- regexp.MustCompile(".*node-problem-detector.*"),
- }
- // Serve hostname image name
- ServeHostnameImage = imageutils.GetE2EImage(imageutils.ServeHostname)
- )
- type Address struct {
- internalIP string
- externalIP string
- hostname string
- }
- // GetServerArchitecture fetches the architecture of the cluster's apiserver.
- func GetServerArchitecture(c clientset.Interface) string {
- arch := ""
- sVer, err := c.Discovery().ServerVersion()
- if err != nil || sVer.Platform == "" {
- // If we failed to get the server version for some reason, default to amd64.
- arch = "amd64"
- } else {
- // Split the platform string into OS and Arch separately.
- // The platform string may for example be "linux/amd64", "linux/arm" or "windows/amd64".
- osArchArray := strings.Split(sVer.Platform, "/")
- arch = osArchArray[1]
- }
- return arch
- }
- // GetPauseImageName fetches the pause image name for the same architecture as the apiserver.
- func GetPauseImageName(c clientset.Interface) string {
- return imageutils.GetE2EImageWithArch(imageutils.Pause, GetServerArchitecture(c))
- }
- func GetServicesProxyRequest(c clientset.Interface, request *restclient.Request) (*restclient.Request, error) {
- return request.Resource("services").SubResource("proxy"), nil
- }
- // unique identifier of the e2e run
- var RunId = uuid.NewUUID()
- type CreateTestingNSFn func(baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error)
- type ContainerFailures struct {
- status *v1.ContainerStateTerminated
- Restarts int
- }
- func GetMasterHost() string {
- masterUrl, err := url.Parse(TestContext.Host)
- ExpectNoError(err)
- return masterUrl.Host
- }
- func nowStamp() string {
- return time.Now().Format(time.StampMilli)
- }
- func log(level string, format string, args ...interface{}) {
- fmt.Fprintf(GinkgoWriter, nowStamp()+": "+level+": "+format+"\n", args...)
- }
- func Logf(format string, args ...interface{}) {
- log("INFO", format, args...)
- }
- func Failf(format string, args ...interface{}) {
- FailfWithOffset(1, format, args...)
- }
- // FailfWithOffset calls "Fail" and logs the error at "offset" levels above its caller
- // (for example, for call chain f -> g -> FailfWithOffset(1, ...) error would be logged for "f").
- func FailfWithOffset(offset int, format string, args ...interface{}) {
- msg := fmt.Sprintf(format, args...)
- log("INFO", msg)
- ginkgowrapper.Fail(nowStamp()+": "+msg, 1+offset)
- }
- func Skipf(format string, args ...interface{}) {
- msg := fmt.Sprintf(format, args...)
- log("INFO", msg)
- ginkgowrapper.Skip(nowStamp() + ": " + msg)
- }
- func SkipUnlessNodeCountIsAtLeast(minNodeCount int) {
- if TestContext.CloudConfig.NumNodes < minNodeCount {
- Skipf("Requires at least %d nodes (not %d)", minNodeCount, TestContext.CloudConfig.NumNodes)
- }
- }
- func SkipUnlessNodeCountIsAtMost(maxNodeCount int) {
- if TestContext.CloudConfig.NumNodes > maxNodeCount {
- Skipf("Requires at most %d nodes (not %d)", maxNodeCount, TestContext.CloudConfig.NumNodes)
- }
- }
- func SkipUnlessAtLeast(value int, minValue int, message string) {
- if value < minValue {
- Skipf(message)
- }
- }
- func SkipIfProviderIs(unsupportedProviders ...string) {
- if ProviderIs(unsupportedProviders...) {
- Skipf("Not supported for providers %v (found %s)", unsupportedProviders, TestContext.Provider)
- }
- }
- func SkipUnlessLocalEphemeralStorageEnabled() {
- if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
- Skipf("Only supported when %v feature is enabled", features.LocalStorageCapacityIsolation)
- }
- }
- func SkipUnlessSSHKeyPresent() {
- if _, err := GetSigner(TestContext.Provider); err != nil {
- Skipf("No SSH Key for provider %s: '%v'", TestContext.Provider, err)
- }
- }
- func SkipUnlessProviderIs(supportedProviders ...string) {
- if !ProviderIs(supportedProviders...) {
- Skipf("Only supported for providers %v (not %s)", supportedProviders, TestContext.Provider)
- }
- }
- func SkipUnlessMultizone(c clientset.Interface) {
- zones, err := GetClusterZones(c)
- if err != nil {
- Skipf("Error listing cluster zones")
- }
- if zones.Len() <= 1 {
- Skipf("Requires more than one zone")
- }
- }
- func SkipIfMultizone(c clientset.Interface) {
- zones, err := GetClusterZones(c)
- if err != nil {
- Skipf("Error listing cluster zones")
- }
- if zones.Len() > 1 {
- Skipf("Requires more than one zone")
- }
- }
- func SkipUnlessClusterMonitoringModeIs(supportedMonitoring ...string) {
- if !ClusterMonitoringModeIs(supportedMonitoring...) {
- Skipf("Only next monitoring modes are supported %v (not %s)", supportedMonitoring, TestContext.ClusterMonitoringMode)
- }
- }
- func SkipUnlessPrometheusMonitoringIsEnabled(supportedMonitoring ...string) {
- if !TestContext.EnablePrometheusMonitoring {
- Skipf("Skipped because prometheus monitoring is not enabled")
- }
- }
- func SkipUnlessMasterOSDistroIs(supportedMasterOsDistros ...string) {
- if !MasterOSDistroIs(supportedMasterOsDistros...) {
- Skipf("Only supported for master OS distro %v (not %s)", supportedMasterOsDistros, TestContext.MasterOSDistro)
- }
- }
- func SkipUnlessNodeOSDistroIs(supportedNodeOsDistros ...string) {
- if !NodeOSDistroIs(supportedNodeOsDistros...) {
- Skipf("Only supported for node OS distro %v (not %s)", supportedNodeOsDistros, TestContext.NodeOSDistro)
- }
- }
- func SkipUnlessSecretExistsAfterWait(c clientset.Interface, name, namespace string, timeout time.Duration) {
- Logf("Waiting for secret %v in namespace %v to exist in duration %v", name, namespace, timeout)
- start := time.Now()
- if wait.PollImmediate(15*time.Second, timeout, func() (bool, error) {
- _, err := c.CoreV1().Secrets(namespace).Get(name, metav1.GetOptions{})
- if err != nil {
- Logf("Secret %v in namespace %v still does not exist after duration %v", name, namespace, time.Since(start))
- return false, nil
- }
- return true, nil
- }) != nil {
- Skipf("Secret %v in namespace %v did not exist after timeout of %v", name, namespace, timeout)
- }
- Logf("Secret %v in namespace %v found after duration %v", name, namespace, time.Since(start))
- }
- func SkipIfContainerRuntimeIs(runtimes ...string) {
- for _, runtime := range runtimes {
- if runtime == TestContext.ContainerRuntime {
- Skipf("Not supported under container runtime %s", runtime)
- }
- }
- }
- func RunIfContainerRuntimeIs(runtimes ...string) {
- for _, runtime := range runtimes {
- if runtime == TestContext.ContainerRuntime {
- return
- }
- }
- Skipf("Skipped because container runtime %q is not in %s", TestContext.ContainerRuntime, runtimes)
- }
- func RunIfSystemSpecNameIs(names ...string) {
- for _, name := range names {
- if name == TestContext.SystemSpecName {
- return
- }
- }
- Skipf("Skipped because system spec name %q is not in %v", TestContext.SystemSpecName, names)
- }
- func ProviderIs(providers ...string) bool {
- for _, provider := range providers {
- if strings.ToLower(provider) == strings.ToLower(TestContext.Provider) {
- return true
- }
- }
- return false
- }
- func ClusterMonitoringModeIs(monitoringModes ...string) bool {
- for _, mode := range monitoringModes {
- if strings.ToLower(mode) == strings.ToLower(TestContext.ClusterMonitoringMode) {
- return true
- }
- }
- return false
- }
- func MasterOSDistroIs(supportedMasterOsDistros ...string) bool {
- for _, distro := range supportedMasterOsDistros {
- if strings.ToLower(distro) == strings.ToLower(TestContext.MasterOSDistro) {
- return true
- }
- }
- return false
- }
- func NodeOSDistroIs(supportedNodeOsDistros ...string) bool {
- for _, distro := range supportedNodeOsDistros {
- if strings.ToLower(distro) == strings.ToLower(TestContext.NodeOSDistro) {
- return true
- }
- }
- return false
- }
- func ProxyMode(f *Framework) (string, error) {
- pod := &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: "kube-proxy-mode-detector",
- Namespace: f.Namespace.Name,
- },
- Spec: v1.PodSpec{
- HostNetwork: true,
- Containers: []v1.Container{
- {
- Name: "detector",
- Image: imageutils.GetE2EImage(imageutils.Net),
- Command: []string{"/bin/sleep", "3600"},
- },
- },
- },
- }
- f.PodClient().CreateSync(pod)
- defer f.PodClient().DeleteSync(pod.Name, &metav1.DeleteOptions{}, DefaultPodDeletionTimeout)
- cmd := "curl -q -s --connect-timeout 1 http://localhost:10249/proxyMode"
- stdout, err := RunHostCmd(pod.Namespace, pod.Name, cmd)
- if err != nil {
- return "", err
- }
- Logf("ProxyMode: %s", stdout)
- return stdout, nil
- }
- func SkipUnlessServerVersionGTE(v *utilversion.Version, c discovery.ServerVersionInterface) {
- gte, err := ServerVersionGTE(v, c)
- if err != nil {
- Failf("Failed to get server version: %v", err)
- }
- if !gte {
- Skipf("Not supported for server versions before %q", v)
- }
- }
- func SkipIfMissingResource(dynamicClient dynamic.Interface, gvr schema.GroupVersionResource, namespace string) {
- resourceClient := dynamicClient.Resource(gvr).Namespace(namespace)
- _, err := resourceClient.List(metav1.ListOptions{})
- if err != nil {
- // not all resources support list, so we ignore those
- if apierrs.IsMethodNotSupported(err) || apierrs.IsNotFound(err) || apierrs.IsForbidden(err) {
- Skipf("Could not find %s resource, skipping test: %#v", gvr, err)
- }
- Failf("Unexpected error getting %v: %v", gvr, err)
- }
- }
- // ProvidersWithSSH are those providers where each node is accessible with SSH
- var ProvidersWithSSH = []string{"gce", "gke", "aws", "local"}
- type podCondition func(pod *v1.Pod) (bool, error)
- // logPodStates logs basic info of provided pods for debugging.
- func logPodStates(pods []v1.Pod) {
- // Find maximum widths for pod, node, and phase strings for column printing.
- maxPodW, maxNodeW, maxPhaseW, maxGraceW := len("POD"), len("NODE"), len("PHASE"), len("GRACE")
- for i := range pods {
- pod := &pods[i]
- if len(pod.ObjectMeta.Name) > maxPodW {
- maxPodW = len(pod.ObjectMeta.Name)
- }
- if len(pod.Spec.NodeName) > maxNodeW {
- maxNodeW = len(pod.Spec.NodeName)
- }
- if len(pod.Status.Phase) > maxPhaseW {
- maxPhaseW = len(pod.Status.Phase)
- }
- }
- // Increase widths by one to separate by a single space.
- maxPodW++
- maxNodeW++
- maxPhaseW++
- maxGraceW++
- // Log pod info. * does space padding, - makes them left-aligned.
- Logf("%-[1]*[2]s %-[3]*[4]s %-[5]*[6]s %-[7]*[8]s %[9]s",
- maxPodW, "POD", maxNodeW, "NODE", maxPhaseW, "PHASE", maxGraceW, "GRACE", "CONDITIONS")
- for _, pod := range pods {
- grace := ""
- if pod.DeletionGracePeriodSeconds != nil {
- grace = fmt.Sprintf("%ds", *pod.DeletionGracePeriodSeconds)
- }
- Logf("%-[1]*[2]s %-[3]*[4]s %-[5]*[6]s %-[7]*[8]s %[9]s",
- maxPodW, pod.ObjectMeta.Name, maxNodeW, pod.Spec.NodeName, maxPhaseW, pod.Status.Phase, maxGraceW, grace, pod.Status.Conditions)
- }
- Logf("") // Final empty line helps for readability.
- }
- // errorBadPodsStates create error message of basic info of bad pods for debugging.
- func errorBadPodsStates(badPods []v1.Pod, desiredPods int, ns, desiredState string, timeout time.Duration) string {
- errStr := fmt.Sprintf("%d / %d pods in namespace %q are NOT in %s state in %v\n", len(badPods), desiredPods, ns, desiredState, timeout)
- // Print bad pods info only if there are fewer than 10 bad pods
- if len(badPods) > 10 {
- return errStr + "There are too many bad pods. Please check log for details."
- }
- buf := bytes.NewBuffer(nil)
- w := tabwriter.NewWriter(buf, 0, 0, 1, ' ', 0)
- fmt.Fprintln(w, "POD\tNODE\tPHASE\tGRACE\tCONDITIONS")
- for _, badPod := range badPods {
- grace := ""
- if badPod.DeletionGracePeriodSeconds != nil {
- grace = fmt.Sprintf("%ds", *badPod.DeletionGracePeriodSeconds)
- }
- podInfo := fmt.Sprintf("%s\t%s\t%s\t%s\t%+v",
- badPod.ObjectMeta.Name, badPod.Spec.NodeName, badPod.Status.Phase, grace, badPod.Status.Conditions)
- fmt.Fprintln(w, podInfo)
- }
- w.Flush()
- return errStr + buf.String()
- }
- // WaitForPodsSuccess waits till all labels matching the given selector enter
- // the Success state. The caller is expected to only invoke this method once the
- // pods have been created.
- func WaitForPodsSuccess(c clientset.Interface, ns string, successPodLabels map[string]string, timeout time.Duration) error {
- successPodSelector := labels.SelectorFromSet(successPodLabels)
- start, badPods, desiredPods := time.Now(), []v1.Pod{}, 0
- if wait.PollImmediate(30*time.Second, timeout, func() (bool, error) {
- podList, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{LabelSelector: successPodSelector.String()})
- if err != nil {
- Logf("Error getting pods in namespace %q: %v", ns, err)
- if testutils.IsRetryableAPIError(err) {
- return false, nil
- }
- return false, err
- }
- if len(podList.Items) == 0 {
- Logf("Waiting for pods to enter Success, but no pods in %q match label %v", ns, successPodLabels)
- return true, nil
- }
- badPods = []v1.Pod{}
- desiredPods = len(podList.Items)
- for _, pod := range podList.Items {
- if pod.Status.Phase != v1.PodSucceeded {
- badPods = append(badPods, pod)
- }
- }
- successPods := len(podList.Items) - len(badPods)
- Logf("%d / %d pods in namespace %q are in Success state (%d seconds elapsed)",
- successPods, len(podList.Items), ns, int(time.Since(start).Seconds()))
- if len(badPods) == 0 {
- return true, nil
- }
- return false, nil
- }) != nil {
- logPodStates(badPods)
- LogPodsWithLabels(c, ns, successPodLabels, Logf)
- return errors.New(errorBadPodsStates(badPods, desiredPods, ns, "SUCCESS", timeout))
- }
- return nil
- }
- // WaitForPodsRunningReady waits up to timeout to ensure that all pods in
- // namespace ns are either running and ready, or failed but controlled by a
- // controller. Also, it ensures that at least minPods are running and
- // ready. It has separate behavior from other 'wait for' pods functions in
- // that it requests the list of pods on every iteration. This is useful, for
- // example, in cluster startup, because the number of pods increases while
- // waiting. All pods that are in SUCCESS state are not counted.
- //
- // If ignoreLabels is not empty, pods matching this selector are ignored.
- func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration, ignoreLabels map[string]string) error {
- ignoreSelector := labels.SelectorFromSet(ignoreLabels)
- start := time.Now()
- Logf("Waiting up to %v for all pods (need at least %d) in namespace '%s' to be running and ready",
- timeout, minPods, ns)
- wg := sync.WaitGroup{}
- wg.Add(1)
- var ignoreNotReady bool
- badPods := []v1.Pod{}
- desiredPods := 0
- notReady := int32(0)
- if wait.PollImmediate(Poll, timeout, func() (bool, error) {
- // We get the new list of pods, replication controllers, and
- // replica sets in every iteration because more pods come
- // online during startup and we want to ensure they are also
- // checked.
- replicas, replicaOk := int32(0), int32(0)
- rcList, err := c.CoreV1().ReplicationControllers(ns).List(metav1.ListOptions{})
- if err != nil {
- Logf("Error getting replication controllers in namespace '%s': %v", ns, err)
- if testutils.IsRetryableAPIError(err) {
- return false, nil
- }
- return false, err
- }
- for _, rc := range rcList.Items {
- replicas += *rc.Spec.Replicas
- replicaOk += rc.Status.ReadyReplicas
- }
- rsList, err := c.ExtensionsV1beta1().ReplicaSets(ns).List(metav1.ListOptions{})
- if err != nil {
- Logf("Error getting replication sets in namespace %q: %v", ns, err)
- if testutils.IsRetryableAPIError(err) {
- return false, nil
- }
- return false, err
- }
- for _, rs := range rsList.Items {
- replicas += *rs.Spec.Replicas
- replicaOk += rs.Status.ReadyReplicas
- }
- podList, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{})
- if err != nil {
- Logf("Error getting pods in namespace '%s': %v", ns, err)
- if testutils.IsRetryableAPIError(err) {
- return false, nil
- }
- return false, err
- }
- nOk := int32(0)
- notReady = int32(0)
- badPods = []v1.Pod{}
- desiredPods = len(podList.Items)
- for _, pod := range podList.Items {
- if len(ignoreLabels) != 0 && ignoreSelector.Matches(labels.Set(pod.Labels)) {
- continue
- }
- res, err := testutils.PodRunningReady(&pod)
- switch {
- case res && err == nil:
- nOk++
- case pod.Status.Phase == v1.PodSucceeded:
- Logf("The status of Pod %s is Succeeded, skipping waiting", pod.ObjectMeta.Name)
- // it doesn't make sense to wait for this pod
- continue
- case pod.Status.Phase != v1.PodFailed:
- Logf("The status of Pod %s is %s (Ready = false), waiting for it to be either Running (with Ready = true) or Failed", pod.ObjectMeta.Name, pod.Status.Phase)
- notReady++
- badPods = append(badPods, pod)
- default:
- if metav1.GetControllerOf(&pod) == nil {
- Logf("Pod %s is Failed, but it's not controlled by a controller", pod.ObjectMeta.Name)
- badPods = append(badPods, pod)
- }
- //ignore failed pods that are controlled by some controller
- }
- }
- Logf("%d / %d pods in namespace '%s' are running and ready (%d seconds elapsed)",
- nOk, len(podList.Items), ns, int(time.Since(start).Seconds()))
- Logf("expected %d pod replicas in namespace '%s', %d are Running and Ready.", replicas, ns, replicaOk)
- if replicaOk == replicas && nOk >= minPods && len(badPods) == 0 {
- return true, nil
- }
- ignoreNotReady = (notReady <= allowedNotReadyPods)
- logPodStates(badPods)
- return false, nil
- }) != nil {
- if !ignoreNotReady {
- return errors.New(errorBadPodsStates(badPods, desiredPods, ns, "RUNNING and READY", timeout))
- }
- Logf("Number of not-ready pods (%d) is below the allowed threshold (%d).", notReady, allowedNotReadyPods)
- }
- return nil
- }
- func kubectlLogPod(c clientset.Interface, pod v1.Pod, containerNameSubstr string, logFunc func(ftm string, args ...interface{})) {
- for _, container := range pod.Spec.Containers {
- if strings.Contains(container.Name, containerNameSubstr) {
- // Contains() matches all strings if substr is empty
- logs, err := GetPodLogs(c, pod.Namespace, pod.Name, container.Name)
- if err != nil {
- logs, err = getPreviousPodLogs(c, pod.Namespace, pod.Name, container.Name)
- if err != nil {
- logFunc("Failed to get logs of pod %v, container %v, err: %v", pod.Name, container.Name, err)
- }
- }
- logFunc("Logs of %v/%v:%v on node %v", pod.Namespace, pod.Name, container.Name, pod.Spec.NodeName)
- logFunc("%s : STARTLOG\n%s\nENDLOG for container %v:%v:%v", containerNameSubstr, logs, pod.Namespace, pod.Name, container.Name)
- }
- }
- }
- func LogFailedContainers(c clientset.Interface, ns string, logFunc func(ftm string, args ...interface{})) {
- podList, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{})
- if err != nil {
- logFunc("Error getting pods in namespace '%s': %v", ns, err)
- return
- }
- logFunc("Running kubectl logs on non-ready containers in %v", ns)
- for _, pod := range podList.Items {
- if res, err := testutils.PodRunningReady(&pod); !res || err != nil {
- kubectlLogPod(c, pod, "", Logf)
- }
- }
- }
- func LogPodsWithLabels(c clientset.Interface, ns string, match map[string]string, logFunc func(ftm string, args ...interface{})) {
- podList, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{LabelSelector: labels.SelectorFromSet(match).String()})
- if err != nil {
- logFunc("Error getting pods in namespace %q: %v", ns, err)
- return
- }
- logFunc("Running kubectl logs on pods with labels %v in %v", match, ns)
- for _, pod := range podList.Items {
- kubectlLogPod(c, pod, "", logFunc)
- }
- }
- func LogContainersInPodsWithLabels(c clientset.Interface, ns string, match map[string]string, containerSubstr string, logFunc func(ftm string, args ...interface{})) {
- podList, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{LabelSelector: labels.SelectorFromSet(match).String()})
- if err != nil {
- Logf("Error getting pods in namespace %q: %v", ns, err)
- return
- }
- for _, pod := range podList.Items {
- kubectlLogPod(c, pod, containerSubstr, logFunc)
- }
- }
- // DeleteNamespaces deletes all namespaces that match the given delete and skip filters.
- // Filter is by simple strings.Contains; first skip filter, then delete filter.
- // Returns the list of deleted namespaces or an error.
- func DeleteNamespaces(c clientset.Interface, deleteFilter, skipFilter []string) ([]string, error) {
- By("Deleting namespaces")
- nsList, err := c.CoreV1().Namespaces().List(metav1.ListOptions{})
- Expect(err).NotTo(HaveOccurred())
- var deleted []string
- var wg sync.WaitGroup
- OUTER:
- for _, item := range nsList.Items {
- if skipFilter != nil {
- for _, pattern := range skipFilter {
- if strings.Contains(item.Name, pattern) {
- continue OUTER
- }
- }
- }
- if deleteFilter != nil {
- var shouldDelete bool
- for _, pattern := range deleteFilter {
- if strings.Contains(item.Name, pattern) {
- shouldDelete = true
- break
- }
- }
- if !shouldDelete {
- continue OUTER
- }
- }
- wg.Add(1)
- deleted = append(deleted, item.Name)
- go func(nsName string) {
- defer wg.Done()
- defer GinkgoRecover()
- Expect(c.CoreV1().Namespaces().Delete(nsName, nil)).To(Succeed())
- Logf("namespace : %v api call to delete is complete ", nsName)
- }(item.Name)
- }
- wg.Wait()
- return deleted, nil
- }
- func WaitForNamespacesDeleted(c clientset.Interface, namespaces []string, timeout time.Duration) error {
- By("Waiting for namespaces to vanish")
- nsMap := map[string]bool{}
- for _, ns := range namespaces {
- nsMap[ns] = true
- }
- //Now POLL until all namespaces have been eradicated.
- return wait.Poll(2*time.Second, timeout,
- func() (bool, error) {
- nsList, err := c.CoreV1().Namespaces().List(metav1.ListOptions{})
- if err != nil {
- return false, err
- }
- for _, item := range nsList.Items {
- if _, ok := nsMap[item.Name]; ok {
- return false, nil
- }
- }
- return true, nil
- })
- }
- func waitForServiceAccountInNamespace(c clientset.Interface, ns, serviceAccountName string, timeout time.Duration) error {
- w, err := c.CoreV1().ServiceAccounts(ns).Watch(metav1.SingleObject(metav1.ObjectMeta{Name: serviceAccountName}))
- if err != nil {
- return err
- }
- _, err = watch.Until(timeout, w, conditions.ServiceAccountHasSecrets)
- return err
- }
- func WaitForPodCondition(c clientset.Interface, ns, podName, desc string, timeout time.Duration, condition podCondition) error {
- Logf("Waiting up to %v for pod %q in namespace %q to be %q", timeout, podName, ns, desc)
- for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
- pod, err := c.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{})
- if err != nil {
- if apierrs.IsNotFound(err) {
- Logf("Pod %q in namespace %q not found. Error: %v", podName, ns, err)
- return err
- }
- Logf("Get pod %q in namespace %q failed, ignoring for %v. Error: %v", podName, ns, Poll, err)
- continue
- }
- // log now so that current pod info is reported before calling `condition()`
- Logf("Pod %q: Phase=%q, Reason=%q, readiness=%t. Elapsed: %v",
- podName, pod.Status.Phase, pod.Status.Reason, podutil.IsPodReady(pod), time.Since(start))
- if done, err := condition(pod); done {
- if err == nil {
- Logf("Pod %q satisfied condition %q", podName, desc)
- }
- return err
- }
- }
- return fmt.Errorf("Gave up after waiting %v for pod %q to be %q", timeout, podName, desc)
- }
- // WaitForMatchPodsCondition finds match pods based on the input ListOptions.
- // waits and checks if all match pods are in the given podCondition
- func WaitForMatchPodsCondition(c clientset.Interface, opts metav1.ListOptions, desc string, timeout time.Duration, condition podCondition) error {
- Logf("Waiting up to %v for matching pods' status to be %s", timeout, desc)
- for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
- pods, err := c.CoreV1().Pods(metav1.NamespaceAll).List(opts)
- if err != nil {
- return err
- }
- conditionNotMatch := []string{}
- for _, pod := range pods.Items {
- done, err := condition(&pod)
- if done && err != nil {
- return fmt.Errorf("Unexpected error: %v", err)
- }
- if !done {
- conditionNotMatch = append(conditionNotMatch, format.Pod(&pod))
- }
- }
- if len(conditionNotMatch) <= 0 {
- return err
- }
- Logf("%d pods are not %s: %v", len(conditionNotMatch), desc, conditionNotMatch)
- }
- return fmt.Errorf("gave up waiting for matching pods to be '%s' after %v", desc, timeout)
- }
- // WaitForDefaultServiceAccountInNamespace waits for the default service account to be provisioned
- // the default service account is what is associated with pods when they do not specify a service account
- // as a result, pods are not able to be provisioned in a namespace until the service account is provisioned
- func WaitForDefaultServiceAccountInNamespace(c clientset.Interface, namespace string) error {
- return waitForServiceAccountInNamespace(c, namespace, "default", ServiceAccountProvisionTimeout)
- }
- // WaitForPersistentVolumePhase waits for a PersistentVolume to be in a specific phase or until timeout occurs, whichever comes first.
- func WaitForPersistentVolumePhase(phase v1.PersistentVolumePhase, c clientset.Interface, pvName string, Poll, timeout time.Duration) error {
- Logf("Waiting up to %v for PersistentVolume %s to have phase %s", timeout, pvName, phase)
- for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
- pv, err := c.CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{})
- if err != nil {
- Logf("Get persistent volume %s in failed, ignoring for %v: %v", pvName, Poll, err)
- continue
- } else {
- if pv.Status.Phase == phase {
- Logf("PersistentVolume %s found and phase=%s (%v)", pvName, phase, time.Since(start))
- return nil
- } else {
- Logf("PersistentVolume %s found but phase is %s instead of %s.", pvName, pv.Status.Phase, phase)
- }
- }
- }
- return fmt.Errorf("PersistentVolume %s not in phase %s within %v", pvName, phase, timeout)
- }
- // WaitForStatefulSetReplicasReady waits for all replicas of a StatefulSet to become ready or until timeout occurs, whichever comes first.
- func WaitForStatefulSetReplicasReady(statefulSetName, ns string, c clientset.Interface, Poll, timeout time.Duration) error {
- Logf("Waiting up to %v for StatefulSet %s to have all replicas ready", timeout, statefulSetName)
- for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
- sts, err := c.AppsV1().StatefulSets(ns).Get(statefulSetName, metav1.GetOptions{})
- if err != nil {
- Logf("Get StatefulSet %s failed, ignoring for %v: %v", statefulSetName, Poll, err)
- continue
- } else {
- if sts.Status.ReadyReplicas == *sts.Spec.Replicas {
- Logf("All %d replicas of StatefulSet %s are ready. (%v)", sts.Status.ReadyReplicas, statefulSetName, time.Since(start))
- return nil
- } else {
- Logf("StatefulSet %s found but there are %d ready replicas and %d total replicas.", statefulSetName, sts.Status.ReadyReplicas, *sts.Spec.Replicas)
- }
- }
- }
- return fmt.Errorf("StatefulSet %s still has unready pods within %v", statefulSetName, timeout)
- }
- // WaitForPersistentVolumeDeleted waits for a PersistentVolume to get deleted or until timeout occurs, whichever comes first.
- func WaitForPersistentVolumeDeleted(c clientset.Interface, pvName string, Poll, timeout time.Duration) error {
- Logf("Waiting up to %v for PersistentVolume %s to get deleted", timeout, pvName)
- for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
- pv, err := c.CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{})
- if err == nil {
- Logf("PersistentVolume %s found and phase=%s (%v)", pvName, pv.Status.Phase, time.Since(start))
- continue
- } else {
- if apierrs.IsNotFound(err) {
- Logf("PersistentVolume %s was removed", pvName)
- return nil
- } else {
- Logf("Get persistent volume %s in failed, ignoring for %v: %v", pvName, Poll, err)
- }
- }
- }
- return fmt.Errorf("PersistentVolume %s still exists within %v", pvName, timeout)
- }
- // WaitForPersistentVolumeClaimPhase waits for a PersistentVolumeClaim to be in a specific phase or until timeout occurs, whichever comes first.
- func WaitForPersistentVolumeClaimPhase(phase v1.PersistentVolumeClaimPhase, c clientset.Interface, ns string, pvcName string, Poll, timeout time.Duration) error {
- Logf("Waiting up to %v for PersistentVolumeClaim %s to have phase %s", timeout, pvcName, phase)
- for start := time.Now(); time.Since(start) < timeout; time.Sleep(Poll) {
- pvc, err := c.CoreV1().PersistentVolumeClaims(ns).Get(pvcName, metav1.GetOptions{})
- if err != nil {
- Logf("Failed to get claim %q, retrying in %v. Error: %v", pvcName, Poll, err)
- continue
- } else {
- if pvc.Status.Phase == phase {
- Logf("PersistentVolumeClaim %s found and phase=%s (%v)", pvcName, phase, time.Since(start))
- return nil
- } else {
- Logf("PersistentVolumeClaim %s found but phase is %s instead of %s.", pvcName, pvc.Status.Phase, phase)
- }
- }
- }
- return fmt.Errorf("PersistentVolumeClaim %s not in phase %s within %v", pvcName, phase, timeout)
- }
- // CreateTestingNS should be used by every test, note that we append a common prefix to the provided test name.
- // Please see NewFramework instead of using this directly.
- func CreateTestingNS(baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error) {
- if labels == nil {
- labels = map[string]string{}
- }
- labels["e2e-run"] = string(RunId)
- namespaceObj := &v1.Namespace{
- ObjectMeta: metav1.ObjectMeta{
- GenerateName: fmt.Sprintf("e2e-tests-%v-", baseName),
- Namespace: "",
- Labels: labels,
- },
- Status: v1.NamespaceStatus{},
- }
- // Be robust about making the namespace creation call.
- var got *v1.Namespace
- if err := wait.PollImmediate(Poll, 30*time.Second, func() (bool, error) {
- var err error
- got, err = c.CoreV1().Namespaces().Create(namespaceObj)
- if err != nil {
- Logf("Unexpected error while creating namespace: %v", err)
- return false, nil
- }
- return true, nil
- }); err != nil {
- return nil, err
- }
- if TestContext.VerifyServiceAccount {
- if err := WaitForDefaultServiceAccountInNamespace(c, got.Name); err != nil {
- // Even if we fail to create serviceAccount in the namespace,
- // we have successfully create a namespace.
- // So, return the created namespace.
- return got, err
- }
- }
- return got, nil
- }
- // CheckTestingNSDeletedExcept checks whether all e2e based existing namespaces are in the Terminating state
- // and waits until they are finally deleted. It ignores namespace skip.
- func CheckTestingNSDeletedExcept(c clientset.Interface, skip string) error {
- // TODO: Since we don't have support for bulk resource deletion in the API,
- // while deleting a namespace we are deleting all objects from that namespace
- // one by one (one deletion == one API call). This basically exposes us to
- // throttling - currently controller-manager has a limit of max 20 QPS.
- // Once #10217 is implemented and used in namespace-controller, deleting all
- // object from a given namespace should be much faster and we will be able
- // to lower this timeout.
- // However, now Density test is producing ~26000 events and Load capacity test
- // is producing ~35000 events, thus assuming there are no other requests it will
- // take ~30 minutes to fully delete the namespace. Thus I'm setting it to 60
- // minutes to avoid any timeouts here.
- timeout := 60 * time.Minute
- Logf("Waiting for terminating namespaces to be deleted...")
- for start := time.Now(); time.Since(start) < timeout; time.Sleep(15 * time.Second) {
- namespaces, err := c.CoreV1().Namespaces().List(metav1.ListOptions{})
- if err != nil {
- Logf("Listing namespaces failed: %v", err)
- continue
- }
- terminating := 0
- for _, ns := range namespaces.Items {
- if strings.HasPrefix(ns.ObjectMeta.Name, "e2e-tests-") && ns.ObjectMeta.Name != skip {
- if ns.Status.Phase == v1.NamespaceActive {
- return fmt.Errorf("Namespace %s is active", ns.ObjectMeta.Name)
- }
- terminating++
- }
- }
- if terminating == 0 {
- return nil
- }
- }
- return fmt.Errorf("Waiting for terminating namespaces to be deleted timed out")
- }
- // deleteNS deletes the provided namespace, waits for it to be completely deleted, and then checks
- // whether there are any pods remaining in a non-terminating state.
- func deleteNS(c clientset.Interface, dynamicClient dynamic.Interface, namespace string, timeout time.Duration) error {
- startTime := time.Now()
- if err := c.CoreV1().Namespaces().Delete(namespace, nil); err != nil {
- return err
- }
- // wait for namespace to delete or timeout.
- err := wait.PollImmediate(2*time.Second, timeout, func() (bool, error) {
- if _, err := c.CoreV1().Namespaces().Get(namespace, metav1.GetOptions{}); err != nil {
- if apierrs.IsNotFound(err) {
- return true, nil
- }
- Logf("Error while waiting for namespace to be terminated: %v", err)
- return false, nil
- }
- return false, nil
- })
- // verify there is no more remaining content in the namespace
- remainingContent, cerr := hasRemainingContent(c, dynamicClient, namespace)
- if cerr != nil {
- return cerr
- }
- // if content remains, let's dump information about the namespace, and system for flake debugging.
- remainingPods := 0
- missingTimestamp := 0
- if remainingContent {
- // log information about namespace, and set of namespaces in api server to help flake detection
- logNamespace(c, namespace)
- logNamespaces(c, namespace)
- // if we can, check if there were pods remaining with no timestamp.
- remainingPods, missingTimestamp, _ = countRemainingPods(c, namespace)
- }
- // a timeout waiting for namespace deletion happened!
- if err != nil {
- // some content remains in the namespace
- if remainingContent {
- // pods remain
- if remainingPods > 0 {
- if missingTimestamp != 0 {
- // pods remained, but were not undergoing deletion (namespace controller is probably culprit)
- return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v, pods missing deletion timestamp: %v", namespace, err, remainingPods, missingTimestamp)
- }
- // but they were all undergoing deletion (kubelet is probably culprit, check NodeLost)
- return fmt.Errorf("namespace %v was not deleted with limit: %v, pods remaining: %v", namespace, err, remainingPods)
- }
- // other content remains (namespace controller is probably screwed up)
- return fmt.Errorf("namespace %v was not deleted with limit: %v, namespaced content other than pods remain", namespace, err)
- }
- // no remaining content, but namespace was not deleted (namespace controller is probably wedged)
- return fmt.Errorf("namespace %v was not deleted with limit: %v, namespace is empty but is not yet removed", namespace, err)
- }
- Logf("namespace %v deletion completed in %s", namespace, time.Since(startTime))
- return nil
- }
- // logNamespaces logs the number of namespaces by phase
- // namespace is the namespace the test was operating against that failed to delete so it can be grepped in logs
- func logNamespaces(c clientset.Interface, namespace string) {
- namespaceList, err := c.CoreV1().Namespaces().List(metav1.ListOptions{})
- if err != nil {
- Logf("namespace: %v, unable to list namespaces: %v", namespace, err)
- return
- }
- numActive := 0
- numTerminating := 0
- for _, namespace := range namespaceList.Items {
- if namespace.Status.Phase == v1.NamespaceActive {
- numActive++
- } else {
- numTerminating++
- }
- }
- Logf("namespace: %v, total namespaces: %v, active: %v, terminating: %v", namespace, len(namespaceList.Items), numActive, numTerminating)
- }
- // logNamespace logs detail about a namespace
- func logNamespace(c clientset.Interface, namespace string) {
- ns, err := c.CoreV1().Namespaces().Get(namespace, metav1.GetOptions{})
- if err != nil {
- if apierrs.IsNotFound(err) {
- Logf("namespace: %v no longer exists", namespace)
- return
- }
- Logf("namespace: %v, unable to get namespace due to error: %v", namespace, err)
- return
- }
- Logf("namespace: %v, DeletionTimetamp: %v, Finalizers: %v, Phase: %v", ns.Name, ns.DeletionTimestamp, ns.Spec.Finalizers, ns.Status.Phase)
- }
- // countRemainingPods queries the server to count number of remaining pods, and number of pods that had a missing deletion timestamp.
- func countRemainingPods(c clientset.Interface, namespace string) (int, int, error) {
- // check for remaining pods
- pods, err := c.CoreV1().Pods(namespace).List(metav1.ListOptions{})
- if err != nil {
- return 0, 0, err
- }
- // nothing remains!
- if len(pods.Items) == 0 {
- return 0, 0, nil
- }
- // stuff remains, log about it
- logPodStates(pods.Items)
- // check if there were any pods with missing deletion timestamp
- numPods := len(pods.Items)
- missingTimestamp := 0
- for _, pod := range pods.Items {
- if pod.DeletionTimestamp == nil {
- missingTimestamp++
- }
- }
- return numPods, missingTimestamp, nil
- }
- // isDynamicDiscoveryError returns true if the error is a group discovery error
- // only for groups expected to be created/deleted dynamically during e2e tests
- func isDynamicDiscoveryError(err error) bool {
- if !discovery.IsGroupDiscoveryFailedError(err) {
- return false
- }
- discoveryErr := err.(*discovery.ErrGroupDiscoveryFailed)
- for gv := range discoveryErr.Groups {
- switch gv.Group {
- case "mygroup.example.com":
- // custom_resource_definition
- // garbage_collector
- case "wardle.k8s.io":
- // aggregator
- case "metrics.k8s.io":
- // aggregated metrics server add-on, no persisted resources
- default:
- Logf("discovery error for unexpected group: %#v", gv)
- return false
- }
- }
- return true
- }
- // hasRemainingContent checks if there is remaining content in the namespace via API discovery
- func hasRemainingContent(c clientset.Interface, dynamicClient dynamic.Interface, namespace string) (bool, error) {
- // some tests generate their own framework.Client rather than the default
- // TODO: ensure every test call has a configured dynamicClient
- if dynamicClient == nil {
- return false, nil
- }
- // find out what content is supported on the server
- // Since extension apiserver is not always available, e.g. metrics server sometimes goes down,
- // add retry here.
- resources, err := waitForServerPreferredNamespacedResources(c.Discovery(), 30*time.Second)
- if err != nil {
- return false, err
- }
- groupVersionResources, err := discovery.GroupVersionResources(resources)
- if err != nil {
- return false, err
- }
- // TODO: temporary hack for https://github.com/kubernetes/kubernetes/issues/31798
- ignoredResources := sets.NewString("bindings")
- contentRemaining := false
- // dump how many of resource type is on the server in a log.
- for gvr := range groupVersionResources {
- // get a client for this group version...
- dynamicClient := dynamicClient.Resource(gvr).Namespace(namespace)
- if err != nil {
- // not all resource types support list, so some errors here are normal depending on the resource type.
- Logf("namespace: %s, unable to get client - gvr: %v, error: %v", namespace, gvr, err)
- continue
- }
- // get the api resource
- apiResource := metav1.APIResource{Name: gvr.Resource, Namespaced: true}
- if ignoredResources.Has(gvr.Resource) {
- Logf("namespace: %s, resource: %s, ignored listing per whitelist", namespace, apiResource.Name)
- continue
- }
- unstructuredList, err := dynamicClient.List(metav1.ListOptions{})
- if err != nil {
- // not all resources support list, so we ignore those
- if apierrs.IsMethodNotSupported(err) || apierrs.IsNotFound(err) || apierrs.IsForbidden(err) {
- continue
- }
- // skip unavailable servers
- if apierrs.IsServiceUnavailable(err) {
- continue
- }
- return false, err
- }
- if len(unstructuredList.Items) > 0 {
- Logf("namespace: %s, resource: %s, items remaining: %v", namespace, apiResource.Name, len(unstructuredList.Items))
- contentRemaining = true
- }
- }
- return contentRemaining, nil
- }
- func ContainerInitInvariant(older, newer runtime.Object) error {
- oldPod := older.(*v1.Pod)
- newPod := newer.(*v1.Pod)
- if len(oldPod.Spec.InitContainers) == 0 {
- return nil
- }
- if len(oldPod.Spec.InitContainers) != len(newPod.Spec.InitContainers) {
- return fmt.Errorf("init container list changed")
- }
- if oldPod.UID != newPod.UID {
- return fmt.Errorf("two different pods exist in the condition: %s vs %s", oldPod.UID, newPod.UID)
- }
- if err := initContainersInvariants(oldPod); err != nil {
- return err
- }
- if err := initContainersInvariants(newPod); err != nil {
- return err
- }
- oldInit, _, _ := podInitialized(oldPod)
- newInit, _, _ := podInitialized(newPod)
- if oldInit && !newInit {
- // TODO: we may in the future enable resetting PodInitialized = false if the kubelet needs to restart it
- // from scratch
- return fmt.Errorf("pod cannot be initialized and then regress to not being initialized")
- }
- return nil
- }
- func podInitialized(pod *v1.Pod) (ok bool, failed bool, err error) {
- allInit := true
- initFailed := false
- for _, s := range pod.Status.InitContainerStatuses {
- switch {
- case initFailed && s.State.Waiting == nil:
- return allInit, initFailed, fmt.Errorf("container %s is after a failed container but isn't waiting", s.Name)
- case allInit && s.State.Waiting == nil:
- return allInit, initFailed, fmt.Errorf("container %s is after an initializing container but isn't waiting", s.Name)
- case s.State.Terminated == nil:
- allInit = false
- case s.State.Terminated.ExitCode != 0:
- allInit = false
- initFailed = true
- case !s.Ready:
- return allInit, initFailed, fmt.Errorf("container %s initialized but isn't marked as ready", s.Name)
- }
- }
- return allInit, initFailed, nil
- }
- func initContainersInvariants(pod *v1.Pod) error {
- allInit, initFailed, err := podInitialized(pod)
- if err != nil {
- return err
- }
- if !allInit || initFailed {
- for _, s := range pod.Status.ContainerStatuses {
- if s.State.Waiting == nil || s.RestartCount != 0 {
- return fmt.Errorf("container %s is not waiting but initialization not complete", s.Name)
- }
- if s.State.Waiting.Reason != "PodInitializing" {
- return fmt.Errorf("container %s should have reason PodInitializing: %s", s.Name, s.State.Waiting.Reason)
- }
- }
- }
- _, c := podutil.GetPodCondition(&pod.Status, v1.PodInitialized)
- if c == nil {
- return fmt.Errorf("pod does not have initialized condition")
- }
- if c.LastTransitionTime.IsZero() {
- return fmt.Errorf("PodInitialized condition should always have a transition time")
- }
- switch {
- case c.Status == v1.ConditionUnknown:
- return fmt.Errorf("PodInitialized condition should never be Unknown")
- case c.Status == v1.ConditionTrue && (initFailed || !allInit):
- return fmt.Errorf("PodInitialized condition was True but all not all containers initialized")
- case c.Status == v1.ConditionFalse && (!initFailed && allInit):
- return fmt.Errorf("PodInitialized condition was False but all containers initialized")
- }
- return nil
- }
- type InvariantFunc func(older, newer runtime.Object) error
- func CheckInvariants(events []watch.Event, fns ...InvariantFunc) error {
- e