From f97a90d9ed6ee1040e25f1202a97edc4a9340eab Mon Sep 17 00:00:00 2001 From: Vincent Marguerie <24724195+vincentmrg@users.noreply.github.com> Date: Mon, 1 Jul 2024 16:47:09 +0200 Subject: [PATCH] =?UTF-8?q?Fix=20FirewallRules=20concurency=20issues=20usi?= =?UTF-8?q?ng=20a=20status=20known=20LastTransiti=E2=80=A6=20(#124)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix FirewallRules concurency issues using a status known LastTransitionTime * Upgrade chart to 0.11.0 * Set LastTransitionTime required and fix an issue on FirewalRule deletion reconcile --- api/v1alpha1/firewallrule_types.go | 6 ++ api/v1alpha1/zz_generated.deepcopy.go | 1 + .../kubestatic.quortex.io_firewallrules.yaml | 7 ++ controllers/firewallrule_controller.go | 102 +++++++++++++++--- helm/kubestatic/Chart.yaml | 4 +- helm/kubestatic/README.md | 2 +- helm/kubestatic/crds/crds.yaml | 55 +++++++--- helm/kubestatic/templates/manager_role.yaml | 1 - 8 files changed, 147 insertions(+), 31 deletions(-) diff --git a/api/v1alpha1/firewallrule_types.go b/api/v1alpha1/firewallrule_types.go index c17f10b..7ecaf07 100644 --- a/api/v1alpha1/firewallrule_types.go +++ b/api/v1alpha1/firewallrule_types.go @@ -93,6 +93,12 @@ type FirewallRuleStatus struct { // The latest FirewallRule specification applied, used to make API requests to cloud providers only if the resource has been changed to avoid throttling issues. LastApplied *string `json:"lastApplied,omitempty"` + // lastTransitionTime is the last time the status transitioned from one status to another. + // +kubebuilder:validation:Required + // +kubebuilder:validation:Type=string + // +kubebuilder:validation:Format=date-time + LastTransitionTime metav1.Time `json:"lastTransitionTime"` + // The firewall rule identifier FirewallRuleID *string `json:"firewallRuleID,omitempty"` diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 6020056..71d2418 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -231,6 +231,7 @@ func (in *FirewallRuleStatus) DeepCopyInto(out *FirewallRuleStatus) { *out = new(string) **out = **in } + in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) if in.FirewallRuleID != nil { in, out := &in.FirewallRuleID, &out.FirewallRuleID *out = new(string) diff --git a/config/crd/bases/kubestatic.quortex.io_firewallrules.yaml b/config/crd/bases/kubestatic.quortex.io_firewallrules.yaml index b7325de..308db66 100644 --- a/config/crd/bases/kubestatic.quortex.io_firewallrules.yaml +++ b/config/crd/bases/kubestatic.quortex.io_firewallrules.yaml @@ -126,12 +126,19 @@ spec: make API requests to cloud providers only if the resource has been changed to avoid throttling issues. type: string + lastTransitionTime: + description: lastTransitionTime is the last time the status transitioned + from one status to another. + format: date-time + type: string networkInterfaceID: description: The network interface identifier type: string state: description: The current state of the FirewallRule type: string + required: + - lastTransitionTime type: object type: object served: true diff --git a/controllers/firewallrule_controller.go b/controllers/firewallrule_controller.go index f5a0cff..ea31b28 100644 --- a/controllers/firewallrule_controller.go +++ b/controllers/firewallrule_controller.go @@ -27,6 +27,7 @@ import ( "github.com/google/uuid" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/pointer" @@ -47,9 +48,10 @@ const ( // FirewallRuleReconciler reconciles a FirewallRule object type FirewallRuleReconciler struct { client.Client - Log logr.Logger - Scheme *runtime.Scheme - Provider provider.Provider + Log logr.Logger + Scheme *runtime.Scheme + Provider provider.Provider + frLastTransitionTime map[string]metav1.Time } //+kubebuilder:rbac:groups=kubestatic.quortex.io,resources=firewallrules,verbs=get;list;watch;create;update;patch;delete @@ -82,6 +84,26 @@ func (r *FirewallRuleReconciler) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{}, nil } + // LastTransitionTime is not set. This should happen when kubestatic is + // upgraded from a version that does not support this field, we set it with + // the current time. + if firewallRule.Status.LastTransitionTime.IsZero() { + firewallRule.Status.LastTransitionTime = metav1.Now() + if err := r.Status().Update(ctx, firewallRule); err != nil { + log.Error(err, "Failed to update FirewallRule state", "firewallRule", firewallRule.Name) + return ctrl.Result{}, err + } + r.frLastTransitionTime[firewallRule.Name] = firewallRule.Status.LastTransitionTime + } + + // Check for LastTransitionTime consistency, if not, requeueing. + knownLastTransitionTime := r.frLastTransitionTime[firewallRule.Name] + if firewallRule.Status.LastTransitionTime.Before(&knownLastTransitionTime) { + log.V(1).Info("FirewallRule LastTransitionTime inconsistency, requeuing in 1 second") + return ctrl.Result{RequeueAfter: time.Second}, nil + } + r.frLastTransitionTime[firewallRule.Name] = firewallRule.Status.LastTransitionTime + // Lifecycle reconciliation if firewallRule.ObjectMeta.DeletionTimestamp.IsZero() { return r.reconcileFirewallRule(ctx, log, firewallRule) @@ -120,9 +142,18 @@ func (r *FirewallRuleReconciler) reconcileFirewallRule(ctx context.Context, log // Check for other rules associated to the node. // If there is already one, we update the group of rules, if not, we create a new group. - rulesAssociated := v1alpha1.FilterFirewallRules(frs.Items, func(fr v1alpha1.FirewallRule) bool { - return fr.Name != rule.Name && fr.Status.State != v1alpha1.FirewallRuleStateNone - }) + rulesAssociated := []v1alpha1.FirewallRule{} + for _, fr := range frs.Items { + knownLastTransitionTime := r.frLastTransitionTime[fr.Name] + if !fr.Status.LastTransitionTime.Equal(&knownLastTransitionTime) { + log.V(1).Info("FirewallRule LastTransitionTime inconsistency, requeuing in 1 second", "firewallRuleName", fr.Name) + return ctrl.Result{RequeueAfter: time.Second}, nil + } + if fr.Name != rule.Name && fr.Status.State != v1alpha1.FirewallRuleStateNone { + rulesAssociated = append(rulesAssociated, fr) + } + } + if len(rulesAssociated) > 0 { firewallRuleID := helper.StringValue(rulesAssociated[0].Status.FirewallRuleID) log.V(1).Info("Updating FirewallRule group", "firewallRuleID", firewallRuleID) @@ -156,6 +187,7 @@ func (r *FirewallRuleReconciler) reconcileFirewallRule(ctx context.Context, log log.Info("Created firewall rule", "id", id) // Update status + rule.Status.LastTransitionTime = metav1.Now() rule.Status.State = v1alpha1.FirewallRuleStateReserved rule.Status.FirewallRuleID = &id lastApplied, err := json.Marshal(rule.Spec) @@ -164,7 +196,13 @@ func (r *FirewallRuleReconciler) reconcileFirewallRule(ctx context.Context, log } rule.Status.LastApplied = helper.StringPointerOrNil(string(lastApplied)) log.V(1).Info("Updating FirewallRule", "state", rule.Status.State, "firewallRuleID", rule.Status.FirewallRuleID) - return ctrl.Result{RequeueAfter: time.Second * 5}, r.Status().Update(ctx, rule) + if err = r.Status().Update(ctx, rule); err != nil { + log.Error(err, "Failed to update FirewallRule status", "firewallRule", rule.Name, "status", rule.Status.State) + return ctrl.Result{}, err + } + r.frLastTransitionTime[rule.Name] = rule.Status.LastTransitionTime + return ctrl.Result{RequeueAfter: time.Second * 5}, nil + } else if rule.Spec.NodeName != nil { lastApplied := &v1alpha1.FirewallRuleSpec{} if err := json.Unmarshal([]byte(helper.StringValue(rule.Status.LastApplied)), lastApplied); err != nil { @@ -185,8 +223,17 @@ func (r *FirewallRuleReconciler) reconcileFirewallRule(ctx context.Context, log log.Error(err, "Unable to list FirewallRules") return ctrl.Result{}, err } + rules := []v1alpha1.FirewallRule{} + for _, fr := range frs.Items { + knownLastTransitionTime := r.frLastTransitionTime[fr.Name] + if !fr.Status.LastTransitionTime.Equal(&knownLastTransitionTime) { + log.V(1).Info("FirewallRule LastTransitionTime inconsistency, requeuing in 1 second", "firewallRuleName", fr.Name) + return ctrl.Result{RequeueAfter: time.Second}, nil + } + rules = append(rules, fr) + } log.V(1).Info("Updating FirewallRule group", "firewallRuleID", firewallRuleID) - _, err = r.Provider.UpdateFirewallRuleGroup(ctx, encodeUpdateFirewallRuleGroupRequest(firewallRuleID, frs.Items)) + _, err = r.Provider.UpdateFirewallRuleGroup(ctx, encodeUpdateFirewallRuleGroupRequest(firewallRuleID, rules)) } else { log.V(1).Info("Updating FirewallRule", "firewallRuleID", firewallRuleID) _, err = r.Provider.UpdateFirewallRule(ctx, encodeUpdateFirewallRuleRequest(firewallRuleID, rule)) @@ -203,9 +250,15 @@ func (r *FirewallRuleReconciler) reconcileFirewallRule(ctx context.Context, log if err != nil { return ctrl.Result{}, fmt.Errorf("Failed to marshal last applied firewallrule: %w", err) } + rule.Status.LastTransitionTime = metav1.Now() rule.Status.LastApplied = helper.StringPointerOrNil(string(lastApplied)) log.V(1).Info("Updating FirewallRule", "state", rule.Status.State, "firewallRuleID", rule.Status.FirewallRuleID) - return ctrl.Result{RequeueAfter: time.Second * 5}, r.Status().Update(ctx, rule) + if err = r.Status().Update(ctx, rule); err != nil { + log.Error(err, "Failed to update FirewallRule status", "firewallRule", rule.Name, "status", rule.Status.State) + return ctrl.Result{}, err + } + r.frLastTransitionTime[rule.Name] = rule.Status.LastTransitionTime + return ctrl.Result{RequeueAfter: time.Second * 5}, nil } } @@ -262,11 +315,17 @@ func (r *FirewallRuleReconciler) reconcileFirewallRule(ctx context.Context, log log.Info("Associated firewall rule", "firewallRuleID", *rule.Status.FirewallRuleID, "instanceID", instanceID, "networkInterfaceID", networkInterface.NetworkInterfaceID) // Update status + rule.Status.LastTransitionTime = metav1.Now() rule.Status.State = v1alpha1.FirewallRuleStateAssociated rule.Status.InstanceID = &instanceID rule.Status.NetworkInterfaceID = &networkInterface.NetworkInterfaceID log.V(1).Info("Updating FirewallRule", "state", rule.Status.State, "instanceID", rule.Status.InstanceID, "networkInterfaceID", rule.Status.NetworkInterfaceID) - return ctrl.Result{RequeueAfter: time.Second * 5}, r.Status().Update(ctx, rule) + if err = r.Status().Update(ctx, rule); err != nil { + log.Error(err, "Failed to update FirewallRule status", "firewallRule", rule.Name, "status", rule.Status.State) + return ctrl.Result{}, err + } + r.frLastTransitionTime[rule.Name] = rule.Status.LastTransitionTime + return ctrl.Result{RequeueAfter: time.Second * 5}, nil } // FirewallRule reliability check @@ -283,12 +342,14 @@ func (r *FirewallRuleReconciler) reconcileFirewallRule(ctx context.Context, log log.Info("Node not found. Set state back to Reserved", "nodeName", rule.Spec.NodeName) // Set status back to Reserved + rule.Status.LastTransitionTime = metav1.Now() rule.Status.State = v1alpha1.FirewallRuleStateReserved log.V(1).Info("Updating FirewallRule", "state", rule.Status.State, "InstanceID", rule.Status.InstanceID) if err = r.Status().Update(ctx, rule); err != nil { log.Error(err, "Failed to update FirewallRule status", "firewallRule", rule.Name, "status", rule.Status.State) return ctrl.Result{}, err } + r.frLastTransitionTime[rule.Name] = rule.Status.LastTransitionTime rule.Spec.NodeName = nil return ctrl.Result{}, r.Update(ctx, rule) @@ -344,7 +405,7 @@ func (r *FirewallRuleReconciler) clearFirewallRule(ctx context.Context, log logr toDelete := false if r.Provider.HasGroupedFirewallRules() { - // List FirewallRules with identical nodeName + // List FirewallRules frs := &v1alpha1.FirewallRuleList{} if err := r.List(ctx, frs); err != nil { log.Error(err, "Unable to list FirewallRules") @@ -353,9 +414,17 @@ func (r *FirewallRuleReconciler) clearFirewallRule(ctx context.Context, log logr // Check for other rules associated to the node. // If there is other ones, we only update the group of rules, if not, we also disassociate the group. - rules := v1alpha1.FilterFirewallRules(frs.Items, func(fr v1alpha1.FirewallRule) bool { - return fr.Name != rule.Name && helper.StringValue(fr.Status.FirewallRuleID) == helper.StringValue(rule.Status.FirewallRuleID) - }) + rules := []v1alpha1.FirewallRule{} + for _, fr := range frs.Items { + knownLastTransitionTime := r.frLastTransitionTime[fr.Name] + if !fr.Status.LastTransitionTime.Equal(&knownLastTransitionTime) { + log.V(1).Info("FirewallRule LastTransitionTime inconsistency, requeuing in 1 second", "firewallRuleName", fr.Name) + return ctrl.Result{RequeueAfter: time.Second}, nil + } + if fr.Name != rule.Name && helper.StringValue(fr.Status.FirewallRuleID) == helper.StringValue(rule.Status.FirewallRuleID) { + rules = append(rules, fr) + } + } if len(rules) > 0 { log.V(1).Info("Updating FirewallRule", "firewallRuleID", firewallRuleID) if _, err := r.Provider.UpdateFirewallRuleGroup(ctx, encodeUpdateFirewallRuleGroupRequest(firewallRuleID, rules)); err != nil { @@ -403,12 +472,13 @@ func (r *FirewallRuleReconciler) clearFirewallRule(ctx context.Context, log logr } // Update status - rule.Status = v1alpha1.FirewallRuleStatus{State: v1alpha1.FirewallRuleStateNone} + rule.Status = v1alpha1.FirewallRuleStatus{State: v1alpha1.FirewallRuleStateNone, LastTransitionTime: metav1.Now()} log.V(1).Info("Updating FirewallRule", "state", rule.Status.State) if err := r.Status().Update(ctx, rule); err != nil { log.Error(err, "Failed to update FirewallRule state", "firewallRule", rule.Name) return ctrl.Result{}, err } + r.frLastTransitionTime[rule.Name] = rule.Status.LastTransitionTime return ctrl.Result{RequeueAfter: time.Second * 5}, nil } @@ -423,6 +493,8 @@ func (r *FirewallRuleReconciler) SetupWithManager(mgr ctrl.Manager) error { return err } + r.frLastTransitionTime = make(map[string]metav1.Time) + return ctrl.NewControllerManagedBy(mgr). For(&v1alpha1.FirewallRule{}). Complete(r) diff --git a/helm/kubestatic/Chart.yaml b/helm/kubestatic/Chart.yaml index 73a20e9..cf19817 100644 --- a/helm/kubestatic/Chart.yaml +++ b/helm/kubestatic/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.10.1 +version: 0.11.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: 0.10.1 +appVersion: 0.11.0 diff --git a/helm/kubestatic/README.md b/helm/kubestatic/README.md index dc8feb9..ff2e13e 100644 --- a/helm/kubestatic/README.md +++ b/helm/kubestatic/README.md @@ -1,6 +1,6 @@ # kubestatic -![Version: 0.10.1](https://img.shields.io/badge/Version-0.10.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.10.1](https://img.shields.io/badge/AppVersion-0.10.1-informational?style=flat-square) +![Version: 0.11.0](https://img.shields.io/badge/Version-0.11.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.11.0](https://img.shields.io/badge/AppVersion-0.11.0-informational?style=flat-square) An operator to manage the lifecycle of public cloud providers resources needed to expose endpoints on public nodes. diff --git a/helm/kubestatic/crds/crds.yaml b/helm/kubestatic/crds/crds.yaml index e81637c..70f7a03 100644 --- a/helm/kubestatic/crds/crds.yaml +++ b/helm/kubestatic/crds/crds.yaml @@ -2,8 +2,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.2 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.15.0 name: externalips.kubestatic.quortex.io spec: group: kubestatic.quortex.io @@ -30,10 +29,19 @@ spec: description: ExternalIP is the Schema for the externalips API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object @@ -78,8 +86,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.2 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.15.0 name: firewallrules.kubestatic.quortex.io spec: group: kubestatic.quortex.io @@ -106,10 +113,19 @@ spec: description: FirewallRule is the Schema for the firewallrules API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object @@ -130,7 +146,9 @@ spec: description: Whether to disable reconciliation of this resource for development purpose type: boolean fromPort: - description: The start of port range for the TCP and UDP protocols, or an ICMP/ICMPv6 type number. + description: |- + The start of port range for the TCP and UDP protocols, or an ICMP/ICMPv6 + type number. format: int64 type: integer ipRanges: @@ -139,10 +157,14 @@ spec: description: IPRange Describes an IPv4 range. properties: cidr: - description: The IPv4 CIDR range. You can either specify a CIDR range or a source security group, not both. To specify a single IPv4 address, use the /32 prefix length. + description: |- + The IPv4 CIDR range. You can either specify a CIDR range or a source security + group, not both. To specify a single IPv4 address, use the /32 prefix length. type: string description: - description: A description for the rule that references this IPv4 address range. + description: |- + A description for the rule that references this IPv4 address + range. type: string required: - cidr @@ -153,7 +175,10 @@ spec: description: NodeName is the node's instance on which the firewall rule must be attached type: string protocol: - description: The IP protocol name (tcp, udp, icmp, icmpv6) or number (see Protocol Numbers (http://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml)). Use -1 to specify all protocols. + description: |- + The IP protocol name (tcp, udp, icmp, icmpv6) or number (see Protocol Numbers + (http://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml)). + Use -1 to specify all protocols. type: string toPort: description: The end of port range for the TCP and UDP protocols, or an ICMP/ICMPv6 code. @@ -177,12 +202,18 @@ spec: lastApplied: description: The latest FirewallRule specification applied, used to make API requests to cloud providers only if the resource has been changed to avoid throttling issues. type: string + lastTransitionTime: + description: lastTransitionTime is the last time the status transitioned from one status to another. + format: date-time + type: string networkInterfaceID: description: The network interface identifier type: string state: description: The current state of the FirewallRule type: string + required: + - lastTransitionTime type: object type: object served: true diff --git a/helm/kubestatic/templates/manager_role.yaml b/helm/kubestatic/templates/manager_role.yaml index 8fa125c..389a08d 100644 --- a/helm/kubestatic/templates/manager_role.yaml +++ b/helm/kubestatic/templates/manager_role.yaml @@ -2,7 +2,6 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - creationTimestamp: null name: {{ include "kubestatic.fullname" . }}-manager-role rules: - apiGroups: