Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func (w *legacyMonitorTests) EvaluateTestsFromConstructedIntervals(ctx context.C
isUpgrade := platformidentification.DidUpgradeHappenDuringCollection(finalIntervals, time.Time{}, time.Time{})
if isUpgrade {
junits = append(junits, testUpgradeOperatorStateTransitions(finalIntervals, w.adminRESTConfig)...)
junits = append(junits, clusterOperatorIsNotProgressingWhenMachineConfigIs(finalIntervals)...)
} else {
junits = append(junits, testStableSystemOperatorStateTransitions(finalIntervals, w.adminRESTConfig)...)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@ import (
"strings"
"time"

"github.com/openshift/origin/pkg/monitortestlibrary/utility"
configv1 "github.com/openshift/api/config/v1"
clientconfigv1 "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
"github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"

"github.com/openshift/origin/pkg/monitortests/clusterversionoperator/operatorstateanalyzer"
"github.com/sirupsen/logrus"

configv1 "github.com/openshift/api/config/v1"
clientconfigv1 "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/monitortestlibrary/platformidentification"
platformidentification2 "github.com/openshift/origin/pkg/monitortestlibrary/platformidentification"
"github.com/openshift/origin/pkg/monitortestlibrary/utility"
"github.com/openshift/origin/pkg/monitortests/clusterversionoperator/operatorstateanalyzer"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
exutil "github.com/openshift/origin/test/extended/util"
"k8s.io/client-go/rest"
)

// exceptionCallback consumes a suspicious condition and returns an
Expand Down Expand Up @@ -516,9 +516,6 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes []
for _, conditionType := range conditionTypes {
for _, operatorName := range platformidentification.KnownOperators.List() {
bzComponent := platformidentification.GetBugzillaComponentForOperator(operatorName)
if bzComponent == "Unknown" {
bzComponent = operatorName
}
testName := fmt.Sprintf("[bz-%v] clusteroperator/%v should not change condition/%v", bzComponent, operatorName, conditionType)
operatorEvents := eventsByOperator[operatorName]
if len(operatorEvents) == 0 {
Expand Down Expand Up @@ -586,6 +583,9 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes []
}

if len(fatal) > 0 || len(excepted) > 0 {
// add a failure so we
// either flake (or pass) in case len(fatal) == 0 by adding a success to the same test
// or fail in case len(fatal) > 0 by leaving the failure as the only output for the test
ret = append(ret, &junitapi.JUnitTestCase{
Name: testName,
Duration: duration,
Expand All @@ -597,7 +597,204 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes []
}

if len(fatal) == 0 {
if len(excepted) > 0 {
// add a success so we flake (or pass) and don't fail
ret = append(ret, &junitapi.JUnitTestCase{Name: testName, SystemOut: "Passing the case to make the overall test case flake as the previous failure is expected"})
} else {
ret = append(ret, &junitapi.JUnitTestCase{Name: testName})
}
}
}
}

return ret
}

func clusterOperatorIsNotProgressingWhenMachineConfigIs(events monitorapi.Intervals) []*junitapi.JUnitTestCase {
var ret []*junitapi.JUnitTestCase
upgradeWindows := getUpgradeWindows(events)

var machineConfigProgressingStart time.Time
var eventsInUpgradeWindows monitorapi.Intervals

var start, stop time.Time
for _, event := range events {
if !isInUpgradeWindow(upgradeWindows, event) {
continue
}
eventsInUpgradeWindows = append(eventsInUpgradeWindows, event)
if start.IsZero() || event.From.Before(start) {
start = event.From
}
if stop.IsZero() || event.To.After(stop) {
stop = event.To
}
}
duration := stop.Sub(start).Seconds()

eventsByOperator := getEventsByOperator(eventsInUpgradeWindows)
for _, mcEvent := range eventsByOperator["machine-config"] {
condition := monitorapi.GetOperatorConditionStatus(mcEvent)
if condition == nil {
continue // ignore non-condition intervals
}
if condition.Type == configv1.OperatorProgressing && condition.Status == configv1.ConditionTrue {
machineConfigProgressingStart = mcEvent.To
break
}
}

mcTestCase := &junitapi.JUnitTestCase{
Name: fmt.Sprintf("[bz-Machine Config Operator] clusteroperator/machine-config must go Progressing=True during an upgrade test"),
Duration: duration,
}
if machineConfigProgressingStart.IsZero() {
mcTestCase.FailureOutput = &junitapi.FailureOutput{
Output: fmt.Sprintf("machine-config was never Progressing=True during the upgrade window from %s to %s", start.Format(time.RFC3339), stop.Format(time.RFC3339)),
}
return []*junitapi.JUnitTestCase{mcTestCase}
} else {
mcTestCase.SystemOut = fmt.Sprintf("machine-config became Progressing=True at %s during the upgrade window from %s to %s", machineConfigProgressingStart.Format(time.RFC3339), start.Format(time.RFC3339), stop.Format(time.RFC3339))
}
ret = append(ret, mcTestCase)

for _, operatorName := range platformidentification.KnownOperators.Difference(sets.NewString("machine-config")).List() {
bzComponent := platformidentification.GetBugzillaComponentForOperator(operatorName)
testName := fmt.Sprintf("[bz-%v] clusteroperator/%v should stay Progressing=False while MCO is Progressing=True", bzComponent, operatorName)
operatorEvents := eventsByOperator[operatorName]
if len(operatorEvents) == 0 {
ret = append(ret, &junitapi.JUnitTestCase{
Name: testName,
Duration: duration,
})
continue
}

except := func(co string, reason string) string {
switch co {
case "csi-snapshot-controller":
if reason == "CSISnapshotController_Deploying" {
return "https://issues.redhat.com/browse/OCPBUGS-62624"
}
case "dns":
if reason == "DNSReportsProgressingIsTrue" {
return "https://issues.redhat.com/browse/OCPBUGS-62623"
}
case "image-registry":
if reason == "NodeCADaemonUnavailable::Ready" || reason == "DeploymentNotCompleted" {
return "https://issues.redhat.com/browse/OCPBUGS-62626"
}
case "ingress":
if reason == "Reconciling" {
return "https://issues.redhat.com/browse/OCPBUGS-62627"
}
case "kube-storage-version-migrator":
if reason == "KubeStorageVersionMigrator_Deploying" {
return "https://issues.redhat.com/browse/OCPBUGS-62629"
}
case "network":
if reason == "Deploying" {
return "https://issues.redhat.com/browse/OCPBUGS-62630"
}
case "node-tuning":
if reason == "Reconciling" || reason == "ProfileProgressing" {
return "https://issues.redhat.com/browse/OCPBUGS-62632"
}
case "openshift-controller-manager":
// _DesiredStateNotYetAchieved
// RouteControllerManager_DesiredStateNotYetAchieved
if strings.HasSuffix(reason, "_DesiredStateNotYetAchieved") {
return "https://issues.redhat.com/browse/OCPBUGS-63116"
}
case "service-ca":
if reason == "_ManagedDeploymentsAvailable" {
return "https://issues.redhat.com/browse/OCPBUGS-62633"
}
case "storage":
// GCPPDCSIDriverOperatorCR_GCPPDDriverControllerServiceController_Deploying
// GCPPDCSIDriverOperatorCR_GCPPDDriverNodeServiceController_Deploying
// AWSEBSCSIDriverOperatorCR_AWSEBSDriverNodeServiceController_Deploying
// VolumeDataSourceValidatorDeploymentController_Deploying
// GCPPD_Deploying
// AWSEBS_Deploying
if strings.HasSuffix(reason, "_Deploying") {
return "https://issues.redhat.com/browse/OCPBUGS-62634"
}
case "olm":
// CatalogdDeploymentCatalogdControllerManager_Deploying
// OperatorcontrollerDeploymentOperatorControllerControllerManager_Deploying
if strings.HasSuffix(reason, "ControllerManager_Deploying") {
return "https://issues.redhat.com/browse/OCPBUGS-62635"
}
case "operator-lifecycle-manager-packageserver":
if reason == "" {
return "https://issues.redhat.com/browse/OCPBUGS-63672"
}
}
return ""
}

var excepted, fatal []string
for _, operatorEvent := range operatorEvents {
if operatorEvent.From.Before(machineConfigProgressingStart) {
continue
}
condition := monitorapi.GetOperatorConditionStatus(operatorEvent)
if condition == nil {
continue // ignore non-condition intervals
}
if condition.Type == "" {
fatal = append(fatal, fmt.Sprintf("failed to convert %v into a condition with a type", operatorEvent))
continue
}

if condition.Type != configv1.OperatorProgressing || condition.Status == configv1.ConditionFalse {
continue
}

// if there was any switch, it was wrong/unexpected at some point
failure := fmt.Sprintf("%v", operatorEvent)

exception := except(operatorName, condition.Reason)
if exception == "" {
fatal = append(fatal, failure)
} else {
excepted = append(excepted, fmt.Sprintf("%s (exception: %s)", failure, exception))
}
}

output := fmt.Sprintf("%d (out of %d) unexpected clusteroperator state transitions while machine-config is progressing during the upgrade window from %s to %s", len(fatal), len(operatorEvents), start.Format(time.RFC3339), stop.Format(time.RFC3339))
if len(fatal) > 0 {
output = fmt.Sprintf("%s. These did not match any known exceptions, so they cause this test-case to fail:\n\n%v\n", output, strings.Join(fatal, "\n"))
} else {
output = fmt.Sprintf("%s, as desired.", output)
}
output = fmt.Sprintf("%s\n%d unwelcome but acceptable clusteroperator state transitions while machine-config is progressing during the upgrade window from %s to %s", output, len(excepted), start.Format(time.RFC3339), stop.Format(time.RFC3339))
if len(excepted) > 0 {
output = fmt.Sprintf("%s. These should not happen, but because they are tied to exceptions, the fact that they did happen is not sufficient to cause this test-case to fail:\n\n%v\n", output, strings.Join(excepted, "\n"))
} else {
output = fmt.Sprintf("%s, as desired.", output)
}

if len(fatal) > 0 || len(excepted) > 0 {
// add a failure so we
// either flake (or pass) in case len(fatal) == 0 by adding a success to the same test
// or fail in case len(fatal) > 0 by leaving the failure as the only output for the test
ret = append(ret, &junitapi.JUnitTestCase{
Name: testName,
Duration: duration,
SystemOut: output,
FailureOutput: &junitapi.FailureOutput{
Output: output,
},
})
}

if len(fatal) == 0 {
if len(excepted) > 0 {
// add a success so we flake (or pass) and don't fail
ret = append(ret, &junitapi.JUnitTestCase{Name: testName, SystemOut: "Passing the case to make the overall test case flake as the previous failure is expected"})
} else {
ret = append(ret, &junitapi.JUnitTestCase{Name: testName})
}
}
Expand Down