Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,8 @@ spec:
command:
- /manager
env:
- name: GOMEMLIMIT
value: "230MiB"
- name: RELATED_IMAGE_FEATURE_SERVER
value: quay.io/feastdev/feature-server:0.62.0
- name: RELATED_IMAGE_CRON_JOB
Expand Down
45 changes: 45 additions & 0 deletions infra/feast-operator/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,18 @@ import (
// to ensure that exec-entrypoint and run can make use of them.
_ "k8s.io/client-go/plugin/pkg/client/auth"

appsv1 "k8s.io/api/apps/v1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
Expand Down Expand Up @@ -59,6 +66,29 @@ func init() {
// +kubebuilder:scaffold:scheme
}

func newCacheOptions() cache.Options {
managedBySelector := labels.SelectorFromSet(labels.Set{
services.ManagedByLabelKey: services.ManagedByLabelValue,
})
managedByFilter := cache.ByObject{Label: managedBySelector}

return cache.Options{
DefaultTransform: cache.TransformStripManagedFields(),
ByObject: map[client.Object]cache.ByObject{
&corev1.ConfigMap{}: managedByFilter,
&appsv1.Deployment{}: managedByFilter,
&corev1.Service{}: managedByFilter,
&corev1.ServiceAccount{}: managedByFilter,
&corev1.PersistentVolumeClaim{}: managedByFilter,
&rbacv1.RoleBinding{}: managedByFilter,
&rbacv1.Role{}: managedByFilter,
&batchv1.CronJob{}: managedByFilter,
&autoscalingv2.HorizontalPodAutoscaler{}: managedByFilter,
&policyv1.PodDisruptionBudget{}: managedByFilter,
},
}
}

func main() {
var metricsAddr string
var enableLeaderElection bool
Expand Down Expand Up @@ -145,11 +175,26 @@ func main() {
// if you are doing or is intended to do any operation such as perform cleanups
// after the manager stops then its usage might be unsafe.
// LeaderElectionReleaseOnCancel: true,
Cache: newCacheOptions(),
Client: client.Options{
Cache: &client.CacheOptions{
// Bypass the label-filtered informer cache for all reads so that
// pre-existing resources without the managed-by label are still
// visible to the reconciler. The ByObject cache filter above still
// restricts the watch to managed-by-labeled objects, limiting
// memory usage while avoiding upgrade deadlocks.
DisableFor: []client.Object{
&corev1.ConfigMap{},
&corev1.Secret{},
&appsv1.Deployment{},
&corev1.Service{},
&corev1.ServiceAccount{},
&corev1.PersistentVolumeClaim{},
&rbacv1.RoleBinding{},
&rbacv1.Role{},
&batchv1.CronJob{},
&autoscalingv2.HorizontalPodAutoscaler{},
&policyv1.PodDisruptionBudget{},
},
},
},
Expand Down
24 changes: 14 additions & 10 deletions infra/feast-operator/config/default/related_image_fs_patch.tmpl
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
- op: replace
path: "/spec/template/spec/containers/0/env/0"
value:
name: RELATED_IMAGE_FEATURE_SERVER
value: ${FS_IMG}
- op: replace
path: "/spec/template/spec/containers/0/env/1"
value:
name: RELATED_IMAGE_CRON_JOB
value: ${CJ_IMG}
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
spec:
template:
spec:
containers:
- name: manager
env:
- name: RELATED_IMAGE_FEATURE_SERVER
value: ${FS_IMG}
- name: RELATED_IMAGE_CRON_JOB
value: ${CJ_IMG}
24 changes: 14 additions & 10 deletions infra/feast-operator/config/default/related_image_fs_patch.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
- op: replace
path: "/spec/template/spec/containers/0/env/0"
value:
name: RELATED_IMAGE_FEATURE_SERVER
value: quay.io/feastdev/feature-server:0.62.0
- op: replace
path: "/spec/template/spec/containers/0/env/1"
value:
name: RELATED_IMAGE_CRON_JOB
value: quay.io/openshift/origin-cli:4.17
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
spec:
template:
spec:
containers:
- name: manager
env:
- name: RELATED_IMAGE_FEATURE_SERVER
value: quay.io/feastdev/feature-server:0.62.0
- name: RELATED_IMAGE_CRON_JOB
value: quay.io/openshift/origin-cli:4.17
2 changes: 2 additions & 0 deletions infra/feast-operator/config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ spec:
drop:
- "ALL"
env:
- name: GOMEMLIMIT
value: "230MiB"
- name: RELATED_IMAGE_FEATURE_SERVER
value: feast:latest
- name: RELATED_IMAGE_CRON_JOB
Expand Down
2 changes: 2 additions & 0 deletions infra/feast-operator/dist/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20868,6 +20868,8 @@ spec:
value: quay.io/feastdev/feature-server:0.62.0
- name: RELATED_IMAGE_CRON_JOB
value: quay.io/openshift/origin-cli:4.17
- name: GOMEMLIMIT
value: 230MiB
- name: OIDC_ISSUER_URL
value: ""
image: quay.io/feastdev/feast-operator:0.62.0
Expand Down
1 change: 1 addition & 0 deletions infra/feast-operator/internal/controller/authz/authz.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ func (authz *FeastAuthorization) getLabels() map[string]string {
return map[string]string{
services.NameLabelKey: authz.Handler.FeatureStore.Name,
services.ServiceTypeLabelKey: string(services.AuthzFeastType),
services.ManagedByLabelKey: services.ManagedByLabelValue,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 removeOrphanedRoles silently skips pre-upgrade custom auth Roles due to stricter label selector

The authz.getLabels() function now includes ManagedByLabelKey (authz.go:334), and removeOrphanedRoles uses this label set as a list selector (authz.go:85). Pre-upgrade custom auth Roles only have {NameLabelKey, ServiceTypeLabelKey} without ManagedByLabelKey, so the API server's label selector will never match them. These orphaned Roles will never be cleaned up by removeOrphanedRoles.

The main feast Role and RoleBinding are still cleaned up correctly via DeleteOwnedFeastObj (which looks up by name, not labels). Only custom auth roles from KubernetesAuthz.Roles are affected. The practical impact is limited: orphaned Roles have empty rules (no security impact) and have owner references for eventual GC on FeatureStore CR deletion. The window is narrow — it requires changing the Roles list concurrently with or very shortly after the operator upgrade, before the first reconciliation adds the label to existing Roles.

Prompt for agents
In authz.go, the removeOrphanedRoles function at line 81-101 lists Roles using authz.getLabels() as the label selector. Since getLabels() now includes ManagedByLabelKey, pre-upgrade Roles without this label are invisible to this cleanup function.

To fix: either (a) use a separate label set for removeOrphanedRoles that omits ManagedByLabelKey (matching by NameLabelKey and ServiceTypeLabelKey only), or (b) run a one-time migration during reconciliation that adds ManagedByLabelKey to all existing authz Roles before removeOrphanedRoles is called.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"encoding/json"
"fmt"
"os"
"reflect"

corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
Expand Down Expand Up @@ -181,30 +180,17 @@ func (feast *FeastServices) setNamespaceRegistryRoleBinding(rb *rbacv1.RoleBindi
Namespace: rb.Namespace,
},
}
role.Rules = desiredRules
role.SetGroupVersionKind(rbacv1.SchemeGroupVersion.WithKind("Role"))

// Attempt to create; tolerate AlreadyExists so concurrent reconcilers don't fail.
if err := feast.Handler.Client.Create(feast.Handler.Context, role); err != nil && !apierrors.IsAlreadyExists(err) {
return fmt.Errorf("failed to create namespace registry Role: %w", err)
}

// Re-fetch the authoritative copy to compare rules and obtain the latest resourceVersion.
existingRole := &rbacv1.Role{}
if err := feast.Handler.Client.Get(feast.Handler.Context, types.NamespacedName{
Name: roleName,
Namespace: rb.Namespace,
}, existingRole); err != nil {
return fmt.Errorf("failed to get namespace registry Role: %w", err)
}

if !reflect.DeepEqual(existingRole.Rules, desiredRules) {
existingRole.Rules = desiredRules
// On conflict the reconciler will re-queue automatically.
if err := feast.Handler.Client.Update(feast.Handler.Context, existingRole); err != nil {
return fmt.Errorf("failed to update namespace registry Role: %w", err)
}
if _, err := controllerutil.CreateOrUpdate(feast.Handler.Context, feast.Handler.Client, role, func() error {
role.Labels = feast.getLabels()
role.Rules = desiredRules
return nil
}); err != nil {
return fmt.Errorf("failed to reconcile namespace registry Role: %w", err)
}

rb.Labels = feast.getLabels()
rb.RoleRef = rbacv1.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "Role",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ func (feast *FeastServices) buildPDBApplyConfig() *pdbac.PodDisruptionBudgetAppl
WithBlockOwnerDeletion(true),
).
WithSpec(pdbac.PodDisruptionBudgetSpec().
WithSelector(metaac.LabelSelector().WithMatchLabels(feast.getLabels())),
WithSelector(metaac.LabelSelector().WithMatchLabels(feast.getSelectorLabels())),
)

if pdbConfig.MinAvailable != nil {
Expand All @@ -249,8 +249,7 @@ func (feast *FeastServices) updateScalingStatus(deploy *appsv1.Deployment) {
cr := feast.Handler.FeatureStore

cr.Status.Replicas = deploy.Status.ReadyReplicas
labels := feast.getLabels()
cr.Status.Selector = metav1.FormatLabelSelector(metav1.SetAsLabelSelector(labels))
cr.Status.Selector = metav1.FormatLabelSelector(metav1.SetAsLabelSelector(feast.getSelectorLabels()))

if !isScalingEnabled(cr) {
cr.Status.ScalingStatus = nil
Expand Down
33 changes: 25 additions & 8 deletions infra/feast-operator/internal/controller/services/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,10 +384,13 @@ func (feast *FeastServices) createPVC(pvcCreate *feastdevv1.PvcCreate, feastType
}

// PVCs are immutable, so we only create... we don't update an existing one.
// Treat AlreadyExists as success: a pre-existing PVC without the managed-by label
// won't appear in the filtered cache (Client.Get returns NotFound), but Create
// will hit AlreadyExists on the API server — both cases mean the PVC is present.
err = feast.Handler.Client.Get(feast.Handler.Context, client.ObjectKeyFromObject(pvc), pvc)
if err != nil && apierrors.IsNotFound(err) {
err = feast.Handler.Client.Create(feast.Handler.Context, pvc)
if err != nil {
if err != nil && !apierrors.IsAlreadyExists(err) {
return err
}
logger.Info("Successfully created", "PersistentVolumeClaim", pvc.Name)
Expand All @@ -408,9 +411,10 @@ func (feast *FeastServices) setDeployment(deploy *appsv1.Deployment) error {
}

deploy.Labels = feast.getLabels()
selectorLabels := feast.getSelectorLabels()
deploy.Spec = appsv1.DeploymentSpec{
Replicas: replicas,
Selector: metav1.SetAsLabelSelector(deploy.GetLabels()),
Selector: metav1.SetAsLabelSelector(selectorLabels),
Strategy: feast.getDeploymentStrategy(),
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -818,7 +822,7 @@ func (feast *FeastServices) setService(svc *corev1.Service, feastType FeastServi
}

svc.Spec = corev1.ServiceSpec{
Selector: feast.getLabels(),
Selector: feast.getSelectorLabels(),
Type: corev1.ServiceTypeClusterIP,
Ports: []corev1.ServicePort{
{
Expand Down Expand Up @@ -868,6 +872,7 @@ func (feast *FeastServices) setServiceAccount(sa *corev1.ServiceAccount) error {

func (feast *FeastServices) createNewPVC(pvcCreate *feastdevv1.PvcCreate, feastType FeastServiceType) (*corev1.PersistentVolumeClaim, error) {
pvc := feast.initPVC(feastType)
pvc.Labels = feast.getFeastTypeLabels(feastType)

pvc.Spec = corev1.PersistentVolumeClaimSpec{
AccessModes: pvcCreate.AccessModes,
Expand Down Expand Up @@ -976,7 +981,7 @@ func (feast *FeastServices) applyTopologySpread(podSpec *corev1.PodSpec) {
MaxSkew: 1,
TopologyKey: "topology.kubernetes.io/zone",
WhenUnsatisfiable: corev1.ScheduleAnyway,
LabelSelector: metav1.SetAsLabelSelector(feast.getLabels()),
LabelSelector: metav1.SetAsLabelSelector(feast.getSelectorLabels()),
}}
}

Expand All @@ -999,7 +1004,7 @@ func (feast *FeastServices) applyAffinity(podSpec *corev1.PodSpec) {
Weight: 100,
PodAffinityTerm: corev1.PodAffinityTerm{
TopologyKey: "kubernetes.io/hostname",
LabelSelector: metav1.SetAsLabelSelector(feast.getLabels()),
LabelSelector: metav1.SetAsLabelSelector(feast.getSelectorLabels()),
},
}},
},
Expand Down Expand Up @@ -1060,12 +1065,24 @@ func (feast *FeastServices) getFeastTypeLabels(feastType FeastServiceType) map[s
return labels
}

func (feast *FeastServices) getLabels() map[string]string {
// getSelectorLabels returns the minimal label set used for immutable selectors
// (Deployment spec.selector, Service spec.selector, TopologySpreadConstraints, PodAffinity).
// This must NOT change after initial resource creation.
func (feast *FeastServices) getSelectorLabels() map[string]string {
return map[string]string{
NameLabelKey: feast.Handler.FeatureStore.Name,
}
}

// getLabels returns the full label set for mutable metadata (ObjectMeta.Labels).
// Includes the managed-by label used by the informer cache filter.
func (feast *FeastServices) getLabels() map[string]string {
return map[string]string{
NameLabelKey: feast.Handler.FeatureStore.Name,
ManagedByLabelKey: ManagedByLabelValue,
}
}

func (feast *FeastServices) setServiceHostnames() error {
feast.Handler.FeatureStore.Status.ServiceHostnames = feastdevv1.ServiceHostnames{}
domain := svcDomain + ":"
Expand Down Expand Up @@ -1438,10 +1455,10 @@ func IsDeploymentAvailable(conditions []appsv1.DeploymentCondition) bool {
// container that is in a failing state. Returns empty string if no failure found.
func (feast *FeastServices) GetPodContainerFailureMessage(deploy appsv1.Deployment) string {
podList := corev1.PodList{}
labels := feast.getLabels()
selectorLabels := feast.getSelectorLabels()
if err := feast.Handler.Client.List(feast.Handler.Context, &podList,
client.InNamespace(deploy.Namespace),
client.MatchingLabels(labels),
client.MatchingLabels(selectorLabels),
); err != nil {
return ""
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ const (
OidcMissingSecretError string = "missing OIDC secret: %s"
)

const (
ManagedByLabelKey = "app.kubernetes.io/managed-by"
ManagedByLabelValue = "feast-operator"
)

var (
DefaultImage = "quay.io/feastdev/feature-server:" + feastversion.FeastVersion
DefaultCronJobImage = "quay.io/openshift/origin-cli:4.17"
Expand Down
Loading