diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4065f4254..5aa4afc3c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,21 +69,9 @@ jobs: uses: actions/setup-go@v5 with: go-version-file: go.mod - - name: Cache dependencies - # ref: https://github.com/actions/cache/blob/main/examples.md#go---module - uses: actions/cache@v4 - with: - # Cache, works only on Linux - path: | - ~/.cache/go-build - ~/go/pkg/mod - # Cache key - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} - # An ordered list of keys to use for restoring the cache if no cache hit occurred for key - restore-keys: | - ${{ runner.os }}-go- + cache-dependency-path: go.sum - name: Check Code Format - run: make fmt && git status && [[ -z `git status -s` ]] + run: make fmt && git status && [[ -z $(git status -s) ]] - name: Run Unit Test run: make test # TODO(marsevilspirit): add lint diff --git a/api/mesh/v1alpha1/rule_intent.pb.go b/api/mesh/v1alpha1/rule_intent.pb.go new file mode 100644 index 000000000..4ae61d701 --- /dev/null +++ b/api/mesh/v1alpha1/rule_intent.pb.go @@ -0,0 +1,346 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.11 +// protoc v7.34.1 +// source: api/mesh/v1alpha1/rule_intent.proto + +package v1alpha1 + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + timestamppb "google.golang.org/protobuf/types/known/timestamppb" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// RuleIntent records recovery state for a rule mutation before the registry +// write. COMMITTED and FAILED are usually short-lived because the intent is +// cleaned up immediately after the terminal status is written. RuleVersion is +// the durable version record. +type RuleIntent struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Parent rule identification + ParentRuleKind string `protobuf:"bytes,1,opt,name=parent_rule_kind,json=parentRuleKind,proto3" json:"parent_rule_kind,omitempty"` + ParentRuleMesh string `protobuf:"bytes,2,opt,name=parent_rule_mesh,json=parentRuleMesh,proto3" json:"parent_rule_mesh,omitempty"` + ParentRuleName string `protobuf:"bytes,3,opt,name=parent_rule_name,json=parentRuleName,proto3" json:"parent_rule_name,omitempty"` + // Intent metadata + ContentHash string `protobuf:"bytes,5,opt,name=content_hash,json=contentHash,proto3" json:"content_hash,omitempty"` // Hash of the intended spec + SpecJson string `protobuf:"bytes,6,opt,name=spec_json,json=specJson,proto3" json:"spec_json,omitempty"` // Snapshot of intended spec + Operation string `protobuf:"bytes,7,opt,name=operation,proto3" json:"operation,omitempty"` // CREATE, UPDATE, DELETE + Source string `protobuf:"bytes,8,opt,name=source,proto3" json:"source,omitempty"` // ADMIN, UPSTREAM, BOOTSTRAP, ROLLBACK + Author string `protobuf:"bytes,9,opt,name=author,proto3" json:"author,omitempty"` // Who initiated this change + Reason string `protobuf:"bytes,10,opt,name=reason,proto3" json:"reason,omitempty"` // Why this change was made + // rolled_back_from_id records the historical version whose snapshot is being + // re-published. It is audit metadata only and never a current-version pointer. + RolledBackFromId int64 `protobuf:"varint,11,opt,name=rolled_back_from_id,json=rolledBackFromId,proto3" json:"rolled_back_from_id,omitempty"` + // Intent lifecycle. Terminal statuses can remain only when cleanup fails; on + // restart, repair reconciles open intents from observed resource state. + Status string `protobuf:"bytes,12,opt,name=status,proto3" json:"status,omitempty"` // PENDING, APPLIED, OUTCOME_UNKNOWN, FAILED, COMMITTED + FailureReason string `protobuf:"bytes,13,opt,name=failure_reason,json=failureReason,proto3" json:"failure_reason,omitempty"` // Error message if status=FAILED + CreatedAt *timestamppb.Timestamp `protobuf:"bytes,14,opt,name=created_at,json=createdAt,proto3" json:"created_at,omitempty"` + AppliedAt *timestamppb.Timestamp `protobuf:"bytes,15,opt,name=applied_at,json=appliedAt,proto3" json:"applied_at,omitempty"` + CommittedAt *timestamppb.Timestamp `protobuf:"bytes,16,opt,name=committed_at,json=committedAt,proto3" json:"committed_at,omitempty"` + // Durable reconcile marker written when a non-matching subscriber event is + // observed while the intent is open. The marker prevents intent cleanup from + // committing an older snapshot after a real upstream change has arrived. + ReconcileRequired bool `protobuf:"varint,17,opt,name=reconcile_required,json=reconcileRequired,proto3" json:"reconcile_required,omitempty"` + ObservedContentHash string `protobuf:"bytes,18,opt,name=observed_content_hash,json=observedContentHash,proto3" json:"observed_content_hash,omitempty"` + ObservedSpecJson string `protobuf:"bytes,19,opt,name=observed_spec_json,json=observedSpecJson,proto3" json:"observed_spec_json,omitempty"` + ObservedOperation string `protobuf:"bytes,20,opt,name=observed_operation,json=observedOperation,proto3" json:"observed_operation,omitempty"` + ObservedAt *timestamppb.Timestamp `protobuf:"bytes,21,opt,name=observed_at,json=observedAt,proto3" json:"observed_at,omitempty"` + // Monotonic resource revision owned by RuleIntent. Status transitions and + // observed-marker writes advance it so finalization can reject stale commits + // instead of overwriting a subscriber reconcile marker. + Revision int64 `protobuf:"varint,22,opt,name=revision,proto3" json:"revision,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *RuleIntent) Reset() { + *x = RuleIntent{} + mi := &file_api_mesh_v1alpha1_rule_intent_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *RuleIntent) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*RuleIntent) ProtoMessage() {} + +func (x *RuleIntent) ProtoReflect() protoreflect.Message { + mi := &file_api_mesh_v1alpha1_rule_intent_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use RuleIntent.ProtoReflect.Descriptor instead. +func (*RuleIntent) Descriptor() ([]byte, []int) { + return file_api_mesh_v1alpha1_rule_intent_proto_rawDescGZIP(), []int{0} +} + +func (x *RuleIntent) GetParentRuleKind() string { + if x != nil { + return x.ParentRuleKind + } + return "" +} + +func (x *RuleIntent) GetParentRuleMesh() string { + if x != nil { + return x.ParentRuleMesh + } + return "" +} + +func (x *RuleIntent) GetParentRuleName() string { + if x != nil { + return x.ParentRuleName + } + return "" +} + +func (x *RuleIntent) GetContentHash() string { + if x != nil { + return x.ContentHash + } + return "" +} + +func (x *RuleIntent) GetSpecJson() string { + if x != nil { + return x.SpecJson + } + return "" +} + +func (x *RuleIntent) GetOperation() string { + if x != nil { + return x.Operation + } + return "" +} + +func (x *RuleIntent) GetSource() string { + if x != nil { + return x.Source + } + return "" +} + +func (x *RuleIntent) GetAuthor() string { + if x != nil { + return x.Author + } + return "" +} + +func (x *RuleIntent) GetReason() string { + if x != nil { + return x.Reason + } + return "" +} + +func (x *RuleIntent) GetRolledBackFromId() int64 { + if x != nil { + return x.RolledBackFromId + } + return 0 +} + +func (x *RuleIntent) GetStatus() string { + if x != nil { + return x.Status + } + return "" +} + +func (x *RuleIntent) GetFailureReason() string { + if x != nil { + return x.FailureReason + } + return "" +} + +func (x *RuleIntent) GetCreatedAt() *timestamppb.Timestamp { + if x != nil { + return x.CreatedAt + } + return nil +} + +func (x *RuleIntent) GetAppliedAt() *timestamppb.Timestamp { + if x != nil { + return x.AppliedAt + } + return nil +} + +func (x *RuleIntent) GetCommittedAt() *timestamppb.Timestamp { + if x != nil { + return x.CommittedAt + } + return nil +} + +func (x *RuleIntent) GetReconcileRequired() bool { + if x != nil { + return x.ReconcileRequired + } + return false +} + +func (x *RuleIntent) GetObservedContentHash() string { + if x != nil { + return x.ObservedContentHash + } + return "" +} + +func (x *RuleIntent) GetObservedSpecJson() string { + if x != nil { + return x.ObservedSpecJson + } + return "" +} + +func (x *RuleIntent) GetObservedOperation() string { + if x != nil { + return x.ObservedOperation + } + return "" +} + +func (x *RuleIntent) GetObservedAt() *timestamppb.Timestamp { + if x != nil { + return x.ObservedAt + } + return nil +} + +func (x *RuleIntent) GetRevision() int64 { + if x != nil { + return x.Revision + } + return 0 +} + +var File_api_mesh_v1alpha1_rule_intent_proto protoreflect.FileDescriptor + +const file_api_mesh_v1alpha1_rule_intent_proto_rawDesc = "" + + "\n" + + "#api/mesh/v1alpha1/rule_intent.proto\x12\x13dubbo.mesh.v1alpha1\x1a\x1fgoogle/protobuf/timestamp.proto\"\xfe\x06\n" + + "\n" + + "RuleIntent\x12(\n" + + "\x10parent_rule_kind\x18\x01 \x01(\tR\x0eparentRuleKind\x12(\n" + + "\x10parent_rule_mesh\x18\x02 \x01(\tR\x0eparentRuleMesh\x12(\n" + + "\x10parent_rule_name\x18\x03 \x01(\tR\x0eparentRuleName\x12!\n" + + "\fcontent_hash\x18\x05 \x01(\tR\vcontentHash\x12\x1b\n" + + "\tspec_json\x18\x06 \x01(\tR\bspecJson\x12\x1c\n" + + "\toperation\x18\a \x01(\tR\toperation\x12\x16\n" + + "\x06source\x18\b \x01(\tR\x06source\x12\x16\n" + + "\x06author\x18\t \x01(\tR\x06author\x12\x16\n" + + "\x06reason\x18\n" + + " \x01(\tR\x06reason\x12-\n" + + "\x13rolled_back_from_id\x18\v \x01(\x03R\x10rolledBackFromId\x12\x16\n" + + "\x06status\x18\f \x01(\tR\x06status\x12%\n" + + "\x0efailure_reason\x18\r \x01(\tR\rfailureReason\x129\n" + + "\n" + + "created_at\x18\x0e \x01(\v2\x1a.google.protobuf.TimestampR\tcreatedAt\x129\n" + + "\n" + + "applied_at\x18\x0f \x01(\v2\x1a.google.protobuf.TimestampR\tappliedAt\x12=\n" + + "\fcommitted_at\x18\x10 \x01(\v2\x1a.google.protobuf.TimestampR\vcommittedAt\x12-\n" + + "\x12reconcile_required\x18\x11 \x01(\bR\x11reconcileRequired\x122\n" + + "\x15observed_content_hash\x18\x12 \x01(\tR\x13observedContentHash\x12,\n" + + "\x12observed_spec_json\x18\x13 \x01(\tR\x10observedSpecJson\x12-\n" + + "\x12observed_operation\x18\x14 \x01(\tR\x11observedOperation\x12;\n" + + "\vobserved_at\x18\x15 \x01(\v2\x1a.google.protobuf.TimestampR\n" + + "observedAt\x12\x1a\n" + + "\brevision\x18\x16 \x01(\x03R\brevisionJ\x04\b\x04\x10\x05R\n" + + "version_noB1Z/github.com/apache/dubbo-admin/api/mesh/v1alpha1b\x06proto3" + +var ( + file_api_mesh_v1alpha1_rule_intent_proto_rawDescOnce sync.Once + file_api_mesh_v1alpha1_rule_intent_proto_rawDescData []byte +) + +func file_api_mesh_v1alpha1_rule_intent_proto_rawDescGZIP() []byte { + file_api_mesh_v1alpha1_rule_intent_proto_rawDescOnce.Do(func() { + file_api_mesh_v1alpha1_rule_intent_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_api_mesh_v1alpha1_rule_intent_proto_rawDesc), len(file_api_mesh_v1alpha1_rule_intent_proto_rawDesc))) + }) + return file_api_mesh_v1alpha1_rule_intent_proto_rawDescData +} + +var file_api_mesh_v1alpha1_rule_intent_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_api_mesh_v1alpha1_rule_intent_proto_goTypes = []any{ + (*RuleIntent)(nil), // 0: dubbo.mesh.v1alpha1.RuleIntent + (*timestamppb.Timestamp)(nil), // 1: google.protobuf.Timestamp +} +var file_api_mesh_v1alpha1_rule_intent_proto_depIdxs = []int32{ + 1, // 0: dubbo.mesh.v1alpha1.RuleIntent.created_at:type_name -> google.protobuf.Timestamp + 1, // 1: dubbo.mesh.v1alpha1.RuleIntent.applied_at:type_name -> google.protobuf.Timestamp + 1, // 2: dubbo.mesh.v1alpha1.RuleIntent.committed_at:type_name -> google.protobuf.Timestamp + 1, // 3: dubbo.mesh.v1alpha1.RuleIntent.observed_at:type_name -> google.protobuf.Timestamp + 4, // [4:4] is the sub-list for method output_type + 4, // [4:4] is the sub-list for method input_type + 4, // [4:4] is the sub-list for extension type_name + 4, // [4:4] is the sub-list for extension extendee + 0, // [0:4] is the sub-list for field type_name +} + +func init() { file_api_mesh_v1alpha1_rule_intent_proto_init() } +func file_api_mesh_v1alpha1_rule_intent_proto_init() { + if File_api_mesh_v1alpha1_rule_intent_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_api_mesh_v1alpha1_rule_intent_proto_rawDesc), len(file_api_mesh_v1alpha1_rule_intent_proto_rawDesc)), + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_api_mesh_v1alpha1_rule_intent_proto_goTypes, + DependencyIndexes: file_api_mesh_v1alpha1_rule_intent_proto_depIdxs, + MessageInfos: file_api_mesh_v1alpha1_rule_intent_proto_msgTypes, + }.Build() + File_api_mesh_v1alpha1_rule_intent_proto = out.File + file_api_mesh_v1alpha1_rule_intent_proto_goTypes = nil + file_api_mesh_v1alpha1_rule_intent_proto_depIdxs = nil +} diff --git a/api/mesh/v1alpha1/rule_intent.proto b/api/mesh/v1alpha1/rule_intent.proto new file mode 100644 index 000000000..7bc0bf7c3 --- /dev/null +++ b/api/mesh/v1alpha1/rule_intent.proto @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +package dubbo.mesh.v1alpha1; + +option go_package = "github.com/apache/dubbo-admin/api/mesh/v1alpha1"; + +import "google/protobuf/timestamp.proto"; + +// RuleIntent records recovery state for a rule mutation before the registry +// write. COMMITTED and FAILED are usually short-lived because the intent is +// cleaned up immediately after the terminal status is written. RuleVersion is +// the durable version record. +message RuleIntent { + reserved 4; + reserved "version_no"; + + // Parent rule identification + string parent_rule_kind = 1; + string parent_rule_mesh = 2; + string parent_rule_name = 3; + + // Intent metadata + string content_hash = 5; // Hash of the intended spec + string spec_json = 6; // Snapshot of intended spec + string operation = 7; // CREATE, UPDATE, DELETE + string source = 8; // ADMIN, UPSTREAM, BOOTSTRAP, ROLLBACK + string author = 9; // Who initiated this change + string reason = 10; // Why this change was made + + // rolled_back_from_id records the historical version whose snapshot is being + // re-published. It is audit metadata only and never a current-version pointer. + int64 rolled_back_from_id = 11; + + // Intent lifecycle. Terminal statuses can remain only when cleanup fails; on + // restart, repair reconciles open intents from observed resource state. + string status = 12; // PENDING, APPLIED, OUTCOME_UNKNOWN, FAILED, COMMITTED + string failure_reason = 13; // Error message if status=FAILED + google.protobuf.Timestamp created_at = 14; + google.protobuf.Timestamp applied_at = 15; + google.protobuf.Timestamp committed_at = 16; + + // Durable reconcile marker written when a non-matching subscriber event is + // observed while the intent is open. The marker prevents intent cleanup from + // committing an older snapshot after a real upstream change has arrived. + bool reconcile_required = 17; + string observed_content_hash = 18; + string observed_spec_json = 19; + string observed_operation = 20; + google.protobuf.Timestamp observed_at = 21; + + // Monotonic resource revision owned by RuleIntent. Status transitions and + // observed-marker writes advance it so finalization can reject stale commits + // instead of overwriting a subscriber reconcile marker. + int64 revision = 22; +} diff --git a/api/mesh/v1alpha1/rule_version.pb.go b/api/mesh/v1alpha1/rule_version.pb.go new file mode 100644 index 000000000..3c44a5aa2 --- /dev/null +++ b/api/mesh/v1alpha1/rule_version.pb.go @@ -0,0 +1,275 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.11 +// protoc v7.34.1 +// source: api/mesh/v1alpha1/rule_version.proto + +package v1alpha1 + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + timestamppb "google.golang.org/protobuf/types/known/timestamppb" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// RuleVersion is a traffic-rule version entry. Entries are immutable after +// creation. Rollback appends a new RuleVersion, while retention may delete the +// oldest entries. +type RuleVersion struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Parent rule information + ParentRuleKind string `protobuf:"bytes,1,opt,name=parent_rule_kind,json=parentRuleKind,proto3" json:"parent_rule_kind,omitempty"` // e.g., "ConditionRoute" + ParentRuleMesh string `protobuf:"bytes,2,opt,name=parent_rule_mesh,json=parentRuleMesh,proto3" json:"parent_rule_mesh,omitempty"` // Mesh name + ParentRuleName string `protobuf:"bytes,3,opt,name=parent_rule_name,json=parentRuleName,proto3" json:"parent_rule_name,omitempty"` // Rule name + // Version metadata + VersionNo int64 `protobuf:"varint,4,opt,name=version_no,json=versionNo,proto3" json:"version_no,omitempty"` // Sequential version number (1, 2, 3...) + ContentHash string `protobuf:"bytes,5,opt,name=content_hash,json=contentHash,proto3" json:"content_hash,omitempty"` // SHA256 of normalized spec JSON + // Spec snapshot + SpecJson string `protobuf:"bytes,6,opt,name=spec_json,json=specJson,proto3" json:"spec_json,omitempty"` // JSON-serialized rule spec at this version + // Mutation context + Operation string `protobuf:"bytes,7,opt,name=operation,proto3" json:"operation,omitempty"` // CREATE, UPDATE, DELETE + Source string `protobuf:"bytes,8,opt,name=source,proto3" json:"source,omitempty"` // ADMIN, UPSTREAM, BOOTSTRAP, ROLLBACK + Author string `protobuf:"bytes,9,opt,name=author,proto3" json:"author,omitempty"` // User or system identifier + Reason string `protobuf:"bytes,10,opt,name=reason,proto3" json:"reason,omitempty"` // Change description + // rolled_back_from_id records the historical version whose snapshot was + // re-published to produce this version. It is audit metadata only and MUST NOT + // be used as the current-version pointer. + RolledBackFromId int64 `protobuf:"varint,11,opt,name=rolled_back_from_id,json=rolledBackFromId,proto3" json:"rolled_back_from_id,omitempty"` + // Timestamps + CreatedAt *timestamppb.Timestamp `protobuf:"bytes,12,opt,name=created_at,json=createdAt,proto3" json:"created_at,omitempty"` + CommittedAt *timestamppb.Timestamp `protobuf:"bytes,13,opt,name=committed_at,json=committedAt,proto3" json:"committed_at,omitempty"` + // intent_id links a RuleVersion to the mutation intent that produced it. + // Non-intent versions such as UPSTREAM and BOOTSTRAP leave this as 0. + IntentId int64 `protobuf:"varint,14,opt,name=intent_id,json=intentId,proto3" json:"intent_id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *RuleVersion) Reset() { + *x = RuleVersion{} + mi := &file_api_mesh_v1alpha1_rule_version_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *RuleVersion) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*RuleVersion) ProtoMessage() {} + +func (x *RuleVersion) ProtoReflect() protoreflect.Message { + mi := &file_api_mesh_v1alpha1_rule_version_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use RuleVersion.ProtoReflect.Descriptor instead. +func (*RuleVersion) Descriptor() ([]byte, []int) { + return file_api_mesh_v1alpha1_rule_version_proto_rawDescGZIP(), []int{0} +} + +func (x *RuleVersion) GetParentRuleKind() string { + if x != nil { + return x.ParentRuleKind + } + return "" +} + +func (x *RuleVersion) GetParentRuleMesh() string { + if x != nil { + return x.ParentRuleMesh + } + return "" +} + +func (x *RuleVersion) GetParentRuleName() string { + if x != nil { + return x.ParentRuleName + } + return "" +} + +func (x *RuleVersion) GetVersionNo() int64 { + if x != nil { + return x.VersionNo + } + return 0 +} + +func (x *RuleVersion) GetContentHash() string { + if x != nil { + return x.ContentHash + } + return "" +} + +func (x *RuleVersion) GetSpecJson() string { + if x != nil { + return x.SpecJson + } + return "" +} + +func (x *RuleVersion) GetOperation() string { + if x != nil { + return x.Operation + } + return "" +} + +func (x *RuleVersion) GetSource() string { + if x != nil { + return x.Source + } + return "" +} + +func (x *RuleVersion) GetAuthor() string { + if x != nil { + return x.Author + } + return "" +} + +func (x *RuleVersion) GetReason() string { + if x != nil { + return x.Reason + } + return "" +} + +func (x *RuleVersion) GetRolledBackFromId() int64 { + if x != nil { + return x.RolledBackFromId + } + return 0 +} + +func (x *RuleVersion) GetCreatedAt() *timestamppb.Timestamp { + if x != nil { + return x.CreatedAt + } + return nil +} + +func (x *RuleVersion) GetCommittedAt() *timestamppb.Timestamp { + if x != nil { + return x.CommittedAt + } + return nil +} + +func (x *RuleVersion) GetIntentId() int64 { + if x != nil { + return x.IntentId + } + return 0 +} + +var File_api_mesh_v1alpha1_rule_version_proto protoreflect.FileDescriptor + +const file_api_mesh_v1alpha1_rule_version_proto_rawDesc = "" + + "\n" + + "$api/mesh/v1alpha1/rule_version.proto\x12\x13dubbo.mesh.v1alpha1\x1a\x1fgoogle/protobuf/timestamp.proto\"\x96\x04\n" + + "\vRuleVersion\x12(\n" + + "\x10parent_rule_kind\x18\x01 \x01(\tR\x0eparentRuleKind\x12(\n" + + "\x10parent_rule_mesh\x18\x02 \x01(\tR\x0eparentRuleMesh\x12(\n" + + "\x10parent_rule_name\x18\x03 \x01(\tR\x0eparentRuleName\x12\x1d\n" + + "\n" + + "version_no\x18\x04 \x01(\x03R\tversionNo\x12!\n" + + "\fcontent_hash\x18\x05 \x01(\tR\vcontentHash\x12\x1b\n" + + "\tspec_json\x18\x06 \x01(\tR\bspecJson\x12\x1c\n" + + "\toperation\x18\a \x01(\tR\toperation\x12\x16\n" + + "\x06source\x18\b \x01(\tR\x06source\x12\x16\n" + + "\x06author\x18\t \x01(\tR\x06author\x12\x16\n" + + "\x06reason\x18\n" + + " \x01(\tR\x06reason\x12-\n" + + "\x13rolled_back_from_id\x18\v \x01(\x03R\x10rolledBackFromId\x129\n" + + "\n" + + "created_at\x18\f \x01(\v2\x1a.google.protobuf.TimestampR\tcreatedAt\x12=\n" + + "\fcommitted_at\x18\r \x01(\v2\x1a.google.protobuf.TimestampR\vcommittedAt\x12\x1b\n" + + "\tintent_id\x18\x0e \x01(\x03R\bintentIdB1Z/github.com/apache/dubbo-admin/api/mesh/v1alpha1b\x06proto3" + +var ( + file_api_mesh_v1alpha1_rule_version_proto_rawDescOnce sync.Once + file_api_mesh_v1alpha1_rule_version_proto_rawDescData []byte +) + +func file_api_mesh_v1alpha1_rule_version_proto_rawDescGZIP() []byte { + file_api_mesh_v1alpha1_rule_version_proto_rawDescOnce.Do(func() { + file_api_mesh_v1alpha1_rule_version_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_api_mesh_v1alpha1_rule_version_proto_rawDesc), len(file_api_mesh_v1alpha1_rule_version_proto_rawDesc))) + }) + return file_api_mesh_v1alpha1_rule_version_proto_rawDescData +} + +var file_api_mesh_v1alpha1_rule_version_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_api_mesh_v1alpha1_rule_version_proto_goTypes = []any{ + (*RuleVersion)(nil), // 0: dubbo.mesh.v1alpha1.RuleVersion + (*timestamppb.Timestamp)(nil), // 1: google.protobuf.Timestamp +} +var file_api_mesh_v1alpha1_rule_version_proto_depIdxs = []int32{ + 1, // 0: dubbo.mesh.v1alpha1.RuleVersion.created_at:type_name -> google.protobuf.Timestamp + 1, // 1: dubbo.mesh.v1alpha1.RuleVersion.committed_at:type_name -> google.protobuf.Timestamp + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name +} + +func init() { file_api_mesh_v1alpha1_rule_version_proto_init() } +func file_api_mesh_v1alpha1_rule_version_proto_init() { + if File_api_mesh_v1alpha1_rule_version_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_api_mesh_v1alpha1_rule_version_proto_rawDesc), len(file_api_mesh_v1alpha1_rule_version_proto_rawDesc)), + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_api_mesh_v1alpha1_rule_version_proto_goTypes, + DependencyIndexes: file_api_mesh_v1alpha1_rule_version_proto_depIdxs, + MessageInfos: file_api_mesh_v1alpha1_rule_version_proto_msgTypes, + }.Build() + File_api_mesh_v1alpha1_rule_version_proto = out.File + file_api_mesh_v1alpha1_rule_version_proto_goTypes = nil + file_api_mesh_v1alpha1_rule_version_proto_depIdxs = nil +} diff --git a/api/mesh/v1alpha1/rule_version.proto b/api/mesh/v1alpha1/rule_version.proto new file mode 100644 index 000000000..8e5a94dd0 --- /dev/null +++ b/api/mesh/v1alpha1/rule_version.proto @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +package dubbo.mesh.v1alpha1; + +option go_package = "github.com/apache/dubbo-admin/api/mesh/v1alpha1"; + +import "google/protobuf/timestamp.proto"; + +// RuleVersion is a traffic-rule version entry. Entries are immutable after +// creation. Rollback appends a new RuleVersion, while retention may delete the +// oldest entries. +message RuleVersion { + // Parent rule information + string parent_rule_kind = 1; // e.g., "ConditionRoute" + string parent_rule_mesh = 2; // Mesh name + string parent_rule_name = 3; // Rule name + + // Version metadata + int64 version_no = 4; // Sequential version number (1, 2, 3...) + string content_hash = 5; // SHA256 of normalized spec JSON + + // Spec snapshot + string spec_json = 6; // JSON-serialized rule spec at this version + + // Mutation context + string operation = 7; // CREATE, UPDATE, DELETE + string source = 8; // ADMIN, UPSTREAM, BOOTSTRAP, ROLLBACK + string author = 9; // User or system identifier + string reason = 10; // Change description + + // rolled_back_from_id records the historical version whose snapshot was + // re-published to produce this version. It is audit metadata only and MUST NOT + // be used as the current-version pointer. + int64 rolled_back_from_id = 11; + + // Timestamps + google.protobuf.Timestamp created_at = 12; + google.protobuf.Timestamp committed_at = 13; + + // intent_id links a RuleVersion to the mutation intent that produced it. + // Non-intent versions such as UPSTREAM and BOOTSTRAP leave this as 0. + int64 intent_id = 14; +} diff --git a/app/dubbo-admin/dubbo-admin.yaml b/app/dubbo-admin/dubbo-admin.yaml index 1e74b47ea..b155e5582 100644 --- a/app/dubbo-admin/dubbo-admin.yaml +++ b/app/dubbo-admin/dubbo-admin.yaml @@ -63,6 +63,14 @@ store: # type: mysql # address: root:123456@tcp(127.0.0.1:23306)/dubbo-admin?charset=utf8mb4&parseTime=True&loc=Asia%2FShanghai +# [Optional] version history and rollback for traffic rules. +# This is an always-on built-in capability. Startup requires RuleVersion and +# RuleIntent stores plus an available lock implementation; when those +# prerequisites are missing, startup fails closed instead of recording unsafe +# history. Set maxVersionsPerRule to 0 to disable retention trimming. +ruleVersioning: + maxVersionsPerRule: 20 + # [Necessary] configs for service discovery discovery: # [Necessary] discovery type, options are nacos2, zookeeper, mock(only for dev) diff --git a/docs/server-develop.md b/docs/server-develop.md index 310905734..44ff0b916 100644 --- a/docs/server-develop.md +++ b/docs/server-develop.md @@ -43,8 +43,17 @@ If you're using GoLand, you can run it locally by following steps: 2. Fill the block with the config that screenshot shows below: ![ide_configuration.png](./static/images/ide-config.png) 3. Modify the config file(app/dubbo-admin/dubbo-admin.yaml), make sure that the discovery, engine, store is configured. + Traffic-rule version history is always enabled and writes RuleVersion and RuleIntent resources for history, rollback, and crash recovery. Memory store uses a process-local lock; shared database stores must initialize the lock component, otherwise startup fails closed instead of recording unsafe history. Registry writes are reconciled from actual rule state after mutation because context cancellation is not server-side fencing. 4. Run the application, you can open the browser and visit localhost:8888/admin if everything works. +### Traffic-rule versioning notes + +Traffic-rule versioning is always enabled. On startup it requires stores for `RuleVersion` and `RuleIntent`, plus a distributed lock implementation. Startup first repairs open intents and then bootstraps existing `ConditionRoute`, `TagRoute`, and `DynamicConfig` rules; both steps honor shutdown cancellation. + +Version entries are immutable after creation. Rollback publishes a new version from a historical snapshot, while `maxVersionsPerRule` retention may physically delete the oldest entries. This is a bounded immutable version history, not a permanent compliance audit log; the latest version and version number are derived from the RuleVersion ledger. + +The traffic-form field preservation fix stays with this versioning PR because version history smoke tests depend on round-tripping `priority`, `force`, and `configVersion` without losing fields. The Zookeeper delete nil guard also stays here because versioning subscribers consume delete events through the same discovery event path and require safe old-object handling. These notes document the review boundary instead of splitting the PR. + ### Project catalog We are currently restructuring the entire project, so the directory structure of the project will be changed in the near future. diff --git a/pkg/common/constants/lock.go b/pkg/common/constants/lock.go index 30a9f4049..c7fdb23be 100644 --- a/pkg/common/constants/lock.go +++ b/pkg/common/constants/lock.go @@ -46,6 +46,9 @@ const ( // Lock key prefixes for different resource types const ( + // RuleVersioningKeyPrefix is the canonical prefix for per-rule versioning locks. + RuleVersioningKeyPrefix = "rule_versioning" + // TagRouteKeyPrefix is the prefix for tag route lock keys TagRouteKeyPrefix = "tag_route" diff --git a/pkg/config/app/admin.go b/pkg/config/app/admin.go index 71cf3f95f..5362867ed 100644 --- a/pkg/config/app/admin.go +++ b/pkg/config/app/admin.go @@ -31,6 +31,7 @@ import ( "github.com/apache/dubbo-admin/pkg/config/log" "github.com/apache/dubbo-admin/pkg/config/observability" "github.com/apache/dubbo-admin/pkg/config/store" + "github.com/apache/dubbo-admin/pkg/config/versioning" ) type AdminConfig struct { @@ -51,6 +52,9 @@ type AdminConfig struct { Engine *engine.Config `json:"engine" yaml:"engine"` // EventBus configuration EventBus *eventbus.Config `json:"eventBus,omitempty" yaml:"eventBus,omitempty"` + // RuleVersioning provides version history and optimistic locking for governor-managed traffic rules. + // This applies to ConditionRoute, TagRoute, and Configurator (DynamicConfig). + RuleVersioning *versioning.Config `json:"ruleVersioning,omitempty" yaml:"ruleVersioning,omitempty"` } var _ = &AdminConfig{} @@ -58,17 +62,18 @@ var _ = &AdminConfig{} var DefaultAdminConfig = func() AdminConfig { eventBusCfg := eventbus.Default() return AdminConfig{ - Log: log.DefaultLogConfig(), - Store: store.DefaultStoreConfig(), - Engine: engine.DefaultResourceEngineConfig(), - Observability: observability.DefaultObservabilityConfig(), - Diagnostics: diagnostics.DefaultDiagnosticsConfig(), - Console: console.DefaultConsoleConfig(), - EventBus: &eventBusCfg, + Log: log.DefaultLogConfig(), + Store: store.DefaultStoreConfig(), + Engine: engine.DefaultResourceEngineConfig(), + Observability: observability.DefaultObservabilityConfig(), + Diagnostics: diagnostics.DefaultDiagnosticsConfig(), + Console: console.DefaultConsoleConfig(), + EventBus: &eventBusCfg, + RuleVersioning: versioning.Default(), } } -func (c AdminConfig) Sanitize() { +func (c *AdminConfig) Sanitize() { c.Engine.Sanitize() for _, d := range c.Discovery { d.Sanitize() @@ -78,9 +83,13 @@ func (c AdminConfig) Sanitize() { c.Observability.Sanitize() c.Diagnostics.Sanitize() c.Log.Sanitize() + if c.RuleVersioning == nil { + c.RuleVersioning = versioning.Default() + } + c.RuleVersioning.Sanitize() } -func (c AdminConfig) PreProcess() error { +func (c *AdminConfig) PreProcess() error { discoveryPreProcess := func() error { for _, d := range c.Discovery { if err := d.PreProcess(); err != nil { @@ -89,6 +98,9 @@ func (c AdminConfig) PreProcess() error { } return nil } + if c.RuleVersioning == nil { + c.RuleVersioning = versioning.Default() + } return multierr.Combine( c.Engine.PreProcess(), discoveryPreProcess(), @@ -97,10 +109,11 @@ func (c AdminConfig) PreProcess() error { c.Observability.PreProcess(), c.Diagnostics.PreProcess(), c.Log.PreProcess(), + c.RuleVersioning.PreProcess(), ) } -func (c AdminConfig) PostProcess() error { +func (c *AdminConfig) PostProcess() error { discoveryPostProcess := func() error { for _, d := range c.Discovery { if err := d.PostProcess(); err != nil { @@ -109,6 +122,9 @@ func (c AdminConfig) PostProcess() error { } return nil } + if c.RuleVersioning == nil { + c.RuleVersioning = versioning.Default() + } return multierr.Combine( c.Engine.PostProcess(), discoveryPostProcess(), @@ -117,10 +133,11 @@ func (c AdminConfig) PostProcess() error { c.Observability.PostProcess(), c.Diagnostics.PostProcess(), c.Log.PostProcess(), + c.RuleVersioning.PostProcess(), ) } -func (c AdminConfig) Validate() error { +func (c *AdminConfig) Validate() error { if c.Log == nil { c.Log = log.DefaultLogConfig() } else if err := c.Log.Validate(); err != nil { @@ -171,6 +188,11 @@ func (c AdminConfig) Validate() error { } else if err := c.EventBus.Validate(); err != nil { return bizerror.Wrap(err, bizerror.ConfigError, "event bus config validation failed") } + if c.RuleVersioning == nil { + c.RuleVersioning = versioning.Default() + } else if err := c.RuleVersioning.Validate(); err != nil { + return bizerror.Wrap(err, bizerror.ConfigError, "versioning config validation failed") + } return nil } diff --git a/pkg/config/versioning/config.go b/pkg/config/versioning/config.go new file mode 100644 index 000000000..8217d288c --- /dev/null +++ b/pkg/config/versioning/config.go @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "encoding/json" + + "github.com/apache/dubbo-admin/pkg/common/bizerror" + "github.com/apache/dubbo-admin/pkg/config" +) + +const ( + // DefaultMaxVersionsPerRule is the retention window used when configuration + // omits maxVersionsPerRule or provides a negative value. + DefaultMaxVersionsPerRule = int64(50) +) + +// Config controls rule-version retention. A zero MaxVersionsPerRule disables +// cleanup; committed mutations are still recorded. +type Config struct { + config.BaseConfig + MaxVersionsPerRule int64 `json:"maxVersionsPerRule" yaml:"maxVersionsPerRule"` +} + +func (c *Config) UnmarshalJSON(data []byte) error { + type config Config + defaults := Default() + *c = *defaults + return json.Unmarshal(data, (*config)(c)) +} + +// Default returns rule-versioning configuration with retention enabled. +func Default() *Config { + return &Config{ + MaxVersionsPerRule: DefaultMaxVersionsPerRule, + } +} + +// Sanitize normalizes invalid retention values to the default window. +func (c *Config) Sanitize() { + if c.MaxVersionsPerRule < 0 { + c.MaxVersionsPerRule = DefaultMaxVersionsPerRule + } +} + +// Validate rejects negative retention values before startup. +func (c *Config) Validate() error { + if c.MaxVersionsPerRule < 0 { + return bizerror.New(bizerror.ConfigError, "ruleVersioning.maxVersionsPerRule must be greater than or equal to 0") + } + return nil +} diff --git a/pkg/console/context/context.go b/pkg/console/context/context.go index f4c64ce5b..496011d26 100644 --- a/pkg/console/context/context.go +++ b/pkg/console/context/context.go @@ -25,6 +25,7 @@ import ( "github.com/apache/dubbo-admin/pkg/console/counter" "github.com/apache/dubbo-admin/pkg/core/manager" "github.com/apache/dubbo-admin/pkg/core/runtime" + "github.com/apache/dubbo-admin/pkg/core/versioning" ) type Context interface { @@ -35,6 +36,7 @@ type Context interface { AppContext() ctx.Context LockManager() lock.Lock + RuleVersioning() *versioning.Service } var _ Context = &context{} @@ -81,3 +83,15 @@ func (c *context) LockManager() lock.Lock { } return distributedLock } + +func (c *context) RuleVersioning() *versioning.Service { + comp, err := c.coreRt.GetComponent(versioning.ComponentType) + if err != nil { + return nil + } + versioningComp, ok := comp.(versioning.Component) + if !ok { + return nil + } + return versioningComp.Service() +} diff --git a/pkg/console/handler/condition_rule.go b/pkg/console/handler/condition_rule.go index 653c12e71..626683dc6 100644 --- a/pkg/console/handler/condition_rule.go +++ b/pkg/console/handler/condition_rule.go @@ -94,8 +94,14 @@ func PutConditionRuleWithRuleName(cs consolectx.Context) gin.HandlerFunc { util.HandleArgumentError(c, err) return } - - if err := service.UpdateConditionRule(cs, res); err != nil { + opts, ok := mutationOptions(c) + if !ok { + return + } + if err := service.UpdateConditionRuleWithOptions(cs, res, opts); err != nil { + if writeVersioningMutationError(c, err) { + return + } util.HandleServiceError(c, err) return } else { @@ -118,8 +124,14 @@ func PostConditionRuleWithRuleName(cs consolectx.Context) gin.HandlerFunc { util.HandleArgumentError(c, err) return } - - if err := service.CreateConditionRule(cs, res); err != nil { + opts, ok := mutationOptions(c) + if !ok { + return + } + if err := service.CreateConditionRuleWithOptions(cs, res, opts); err != nil { + if writeVersioningMutationError(c, err) { + return + } c.JSON(http.StatusOK, model.NewErrorResp(err.Error())) return } else { @@ -137,7 +149,14 @@ func DeleteConditionRuleWithRuleName(cs consolectx.Context) gin.HandlerFunc { fmt.Sprintf("ruleName must end with %s", constants.ConditionRuleDotSuffix)))) return } - if err := service.DeleteConditionRule(cs, ruleName, mesh); err != nil { + opts, ok := mutationOptions(c) + if !ok { + return + } + if err := service.DeleteConditionRuleWithOptions(cs, ruleName, mesh, opts); err != nil { + if writeVersioningMutationError(c, err) { + return + } c.JSON(http.StatusOK, model.NewErrorResp(err.Error())) return } diff --git a/pkg/console/handler/configurator_rule.go b/pkg/console/handler/configurator_rule.go index 0b806715b..15318a9d4 100644 --- a/pkg/console/handler/configurator_rule.go +++ b/pkg/console/handler/configurator_rule.go @@ -105,7 +105,14 @@ func PutConfiguratorWithRuleName(ctx consolectx.Context) gin.HandlerFunc { c.JSON(http.StatusOK, model.NewBizErrorResp( bizerror.New(bizerror.NotFoundError, fmt.Sprintf("%s not found", ruleName)))) } - if err = service.UpdateConfigurator(ctx, res); err != nil { + opts, ok := mutationOptions(c) + if !ok { + return + } + if err = service.UpdateConfiguratorWithOptions(ctx, res, opts); err != nil { + if writeVersioningMutationError(c, err) { + return + } c.JSON(http.StatusOK, model.NewErrorResp(err.Error())) return } @@ -128,7 +135,14 @@ func PostConfiguratorWithRuleName(ctx consolectx.Context) gin.HandlerFunc { util.HandleArgumentError(c, err) return } - if err = service.CreateConfigurator(ctx, res); err != nil { + opts, ok := mutationOptions(c) + if !ok { + return + } + if err = service.CreateConfiguratorWithOptions(ctx, res, opts); err != nil { + if writeVersioningMutationError(c, err) { + return + } c.JSON(http.StatusOK, model.NewErrorResp(err.Error())) return } @@ -146,7 +160,14 @@ func DeleteConfiguratorWithRuleName(ctx consolectx.Context) gin.HandlerFunc { fmt.Sprintf("dynamic config name must end with %s", constants.ConfiguratorRuleDotSuffix)))) return } - if err := service.DeleteConfigurator(ctx, ruleName, mesh); err != nil { + opts, ok := mutationOptions(c) + if !ok { + return + } + if err := service.DeleteConfiguratorWithOptions(ctx, ruleName, mesh, opts); err != nil { + if writeVersioningMutationError(c, err) { + return + } c.JSON(http.StatusOK, model.NewErrorResp(err.Error())) return } diff --git a/pkg/console/handler/rule_version.go b/pkg/console/handler/rule_version.go new file mode 100644 index 000000000..b6c3f8fd8 --- /dev/null +++ b/pkg/console/handler/rule_version.go @@ -0,0 +1,478 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package handler + +import ( + "encoding/json" + "errors" + "fmt" + "net/http" + "strconv" + "strings" + "time" + + "github.com/gin-contrib/sessions" + "github.com/gin-gonic/gin" + + "github.com/apache/dubbo-admin/pkg/common/bizerror" + consolectx "github.com/apache/dubbo-admin/pkg/console/context" + "github.com/apache/dubbo-admin/pkg/console/model" + "github.com/apache/dubbo-admin/pkg/console/service" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" + "github.com/apache/dubbo-admin/pkg/core/versioning" +) + +type rollbackReq struct { + Reason string `json:"reason"` + ExpectedVersionID json.RawMessage `json:"expectedVersionId"` +} + +type abandonIntentReq struct { + Reason string `json:"reason"` +} + +const maxRuleVersionReasonLength = 1024 + +func ListRuleVersions(cs consolectx.Context, kind coremodel.ResourceKind) gin.HandlerFunc { + return func(c *gin.Context) { + if !ensureVersioningEnabled(c, cs) { + return + } + resp, err := service.ListRuleVersions(cs, service.RuleKindName{Kind: kind, Mesh: c.Query("mesh"), Name: c.Param("ruleName")}) + writeVersioningResp(c, resp, err) + } +} + +func GetRuleVersion(cs consolectx.Context, kind coremodel.ResourceKind) gin.HandlerFunc { + return func(c *gin.Context) { + if !ensureVersioningEnabled(c, cs) { + return + } + id, ok := parseVersionID(c) + if !ok { + return + } + resp, err := service.GetRuleVersion(cs, service.RuleKindName{Kind: kind, Mesh: c.Query("mesh"), Name: c.Param("ruleName")}, id) + writeVersioningResp(c, resp, err) + } +} + +func DiffRuleVersion(cs consolectx.Context, kind coremodel.ResourceKind) gin.HandlerFunc { + return func(c *gin.Context) { + if !ensureVersioningEnabled(c, cs) { + return + } + id, ok := parseVersionID(c) + if !ok { + return + } + resp, err := service.DiffRuleVersion(cs, service.RuleKindName{Kind: kind, Mesh: c.Query("mesh"), Name: c.Param("ruleName")}, id, c.Query("against")) + writeVersioningResp(c, resp, err) + } +} + +func RollbackRuleVersion(cs consolectx.Context, kind coremodel.ResourceKind) gin.HandlerFunc { + return func(c *gin.Context) { + if !ensureVersioningEnabled(c, cs) { + return + } + id, ok := parseVersionID(c) + if !ok { + return + } + req := rollbackReq{} + if err := c.ShouldBindJSON(&req); err != nil { + writeVersioningInvalidArgument(c, err.Error()) + return + } + if !validateRuleVersionReasonLength(c, req.Reason) { + return + } + expectedVersionID, ok := parseJSONInt64(c, req.ExpectedVersionID, "expectedVersionId") + if !ok { + return + } + // Rollback re-publishes a historical snapshot through the same mutation + // path as normal rule updates and returns the newly committed version. + resp, err := service.RollbackRuleVersion(cs, service.RuleKindName{Kind: kind, Mesh: c.Query("mesh"), Name: c.Param("ruleName")}, id, req.Reason, expectedVersionID, currentUser(c)) + writeVersioningResp(c, resp, err) + } +} + +func RepairRuleVersionIntent(cs consolectx.Context) gin.HandlerFunc { + return func(c *gin.Context) { + if !ensureVersioningEnabled(c, cs) { + return + } + id, ok := parseIntentID(c) + if !ok { + return + } + resp, err := service.RepairRuleVersionIntent(cs, id) + writeVersioningResp(c, resp, err) + } +} + +func AbandonRuleVersionIntent(cs consolectx.Context) gin.HandlerFunc { + return func(c *gin.Context) { + if !ensureVersioningEnabled(c, cs) { + return + } + id, ok := parseIntentID(c) + if !ok { + return + } + req := abandonIntentReq{} + if err := c.ShouldBindJSON(&req); err != nil { + writeVersioningInvalidArgument(c, err.Error()) + return + } + if !validateRuleVersionReasonLength(c, req.Reason) { + return + } + err := service.AbandonRuleVersionIntent(cs, id, req.Reason) + writeVersioningResp(c, "", err) + } +} + +func validateRuleVersionReasonLength(c *gin.Context, reason string) bool { + if len(strings.TrimSpace(reason)) <= maxRuleVersionReasonLength { + return true + } + writeVersioningResp(c, nil, bizerror.New(bizerror.InvalidArgument, "reason must be at most 1024 characters")) + return false +} + +// expectedVersionId is omitted/null for no precondition, "0" for an absent or +// deleted current rule, or a positive version ID. JSON requests carry it as a +// string so browser clients do not lose int64 precision. +func parseExpectedVersionID(c *gin.Context) (*int64, bool) { + raw := strings.TrimSpace(c.Query("expectedVersionId")) + if raw == "" { + return nil, true + } + id, err := parseProtocolInt64(raw, true) + if err != nil { + writeVersioningInvalidArgument(c, "expectedVersionId must be omitted, \"0\", or a positive decimal string") + return nil, false + } + return &id, true +} + +func parseJSONInt64(c *gin.Context, raw json.RawMessage, field string) (*int64, bool) { + trimmed := strings.TrimSpace(string(raw)) + if trimmed == "" || trimmed == "null" { + return nil, true + } + var value string + if strings.HasPrefix(trimmed, `"`) { + if err := json.Unmarshal(raw, &value); err != nil { + writeVersioningInvalidArgument(c, fmt.Sprintf("%s must be an integer", field)) + return nil, false + } + value = strings.TrimSpace(value) + } else { + writeVersioningInvalidArgument(c, fmt.Sprintf("%s must be a decimal string", field)) + return nil, false + } + if value == "" { + writeVersioningInvalidArgument(c, fmt.Sprintf("%s must not be empty", field)) + return nil, false + } + id, err := parseProtocolInt64(value, true) + if err != nil { + writeVersioningInvalidArgument(c, fmt.Sprintf("%s must be omitted, \"0\", or a positive decimal string", field)) + return nil, false + } + return &id, true +} + +func mutationOptions(c *gin.Context) (service.RuleMutationOptions, bool) { + expected, ok := parseExpectedVersionID(c) + if !ok { + return service.RuleMutationOptions{}, false + } + return service.RuleMutationOptions{ExpectedVersionID: expected, Author: currentUser(c)}, true +} + +func parseVersionID(c *gin.Context) (int64, bool) { + id, err := parseProtocolInt64(c.Param("versionId"), false) + if err != nil { + writeVersioningInvalidArgument(c, "versionId must be a positive decimal string") + return 0, false + } + return id, true +} + +func parseIntentID(c *gin.Context) (int64, bool) { + id, err := parseProtocolInt64(c.Param("intentId"), false) + if err != nil { + writeVersioningInvalidArgument(c, "intentId must be a positive decimal string") + return 0, false + } + return id, true +} + +func parseProtocolInt64(raw string, allowZero bool) (int64, error) { + if raw == "" { + return 0, fmt.Errorf("empty id") + } + for i := range raw { + if raw[i] < '0' || raw[i] > '9' { + return 0, fmt.Errorf("invalid decimal id") + } + } + if len(raw) > 1 && raw[0] == '0' { + return 0, fmt.Errorf("invalid leading zero") + } + id, err := strconv.ParseInt(raw, 10, 64) + if err != nil { + return 0, err + } + if id == 0 && allowZero { + return id, nil + } + if id <= 0 { + return 0, fmt.Errorf("id must be positive") + } + return id, nil +} + +func currentUser(c *gin.Context) string { + session := sessions.Default(c) + if user, ok := session.Get("user").(string); ok && strings.TrimSpace(user) != "" { + return user + } + return "system:unknown" +} + +func ensureVersioningEnabled(c *gin.Context, cs consolectx.Context) bool { + if cs.RuleVersioning() != nil { + return true + } + c.JSON(http.StatusServiceUnavailable, &model.CommonResp{ + Code: "VERSION_LEDGER_UNAVAILABLE", + Message: "rule versioning service is unavailable", + }) + return false +} + +func writeVersioningResp(c *gin.Context, data any, err error) { + if err == nil { + c.JSON(http.StatusOK, model.NewSuccessResp(versioningAPIData(data))) + return + } + var conflict *versioning.ConflictError + var pending *versioning.IntentPendingError + var bizErr bizerror.Error + // Versioning callers need status codes to distinguish validation failures, + // weak-CAS conflicts, pending ledgers, and backend failures. + switch { + case errors.As(err, &conflict): + // Conflict and pending responses intentionally use flat fields because + // the frontend interceptor reads currentVersionId/intentId at top level. + c.JSON(http.StatusConflict, gin.H{ + "code": "VERSION_CONFLICT", + "message": versioning.ErrVersionConflict.Error(), + "currentVersionId": formatOptionalInt64(conflict.CurrentVersionID), + }) + case errors.As(err, &pending): + c.JSON(http.StatusConflict, gin.H{ + "code": "VERSION_LEDGER_PENDING", + "message": versioning.ErrVersionIntentPending.Error(), + "intentId": formatInt64(pending.IntentID), + }) + case errors.Is(err, versioning.ErrVersionIntentPending): + c.JSON(http.StatusConflict, gin.H{ + "code": "VERSION_LEDGER_PENDING", + "message": versioning.ErrVersionIntentPending.Error(), + }) + case errors.Is(err, versioning.ErrIntentOutcomeMismatch): + c.JSON(http.StatusConflict, gin.H{ + "code": "VERSION_LEDGER_OUTCOME_MISMATCH", + "message": err.Error(), + }) + case errors.Is(err, versioning.ErrVersionNotFound), errors.Is(err, versioning.ErrVersionIntentNotFound): + c.JSON(http.StatusNotFound, model.NewBizErrorResp(bizerror.New(bizerror.NotFoundError, err.Error()))) + case errors.Is(err, versioning.ErrRollbackToDelete), errors.Is(err, versioning.ErrRollbackToCurrent), errors.Is(err, versioning.ErrVersionIntentNotOpen): + c.JSON(http.StatusBadRequest, model.NewBizErrorResp(bizerror.New(bizerror.InvalidArgument, err.Error()))) + case errors.As(err, &bizErr) && bizErr.Code() == bizerror.InvalidArgument: + c.JSON(http.StatusBadRequest, model.NewBizErrorResp(bizErr)) + case errors.As(err, &bizErr) && bizErr.Code() == bizerror.NotFoundError: + c.JSON(http.StatusNotFound, model.NewBizErrorResp(bizErr)) + default: + c.JSON(http.StatusInternalServerError, model.NewBizErrorResp(bizerror.New(bizerror.UnknownError, err.Error()))) + } +} + +type ruleVersionAPI struct { + ID string `json:"id"` + RuleKind coremodel.ResourceKind `json:"ruleKind"` + Mesh string `json:"mesh"` + ResourceKey string `json:"resourceKey"` + RuleName string `json:"ruleName"` + VersionNo int64 `json:"versionNo"` + ContentHash string `json:"contentHash"` + SpecJSON string `json:"specJson"` + Source versioning.Source `json:"source"` + Operation versioning.Operation `json:"operation"` + Author string `json:"author"` + Reason string `json:"reason,omitempty"` + IntentID string `json:"intentId,omitempty"` + RolledBackFromID *string `json:"rolledBackFromId,omitempty"` + CreatedAt time.Time `json:"createdAt"` + CommittedAt time.Time `json:"committedAt"` + IsCurrent bool `json:"isCurrent"` +} + +type ruleVersionListAPI struct { + Items []ruleVersionAPI `json:"items"` + Total int64 `json:"total"` + CurrentVersionID *string `json:"currentVersionId,omitempty"` + CurrentVersionNo int64 `json:"currentVersionNo,omitempty"` + Deleted bool `json:"deleted"` +} + +type ruleVersionDiffAPI struct { + Left ruleVersionDiffSideAPI `json:"left"` + Right ruleVersionDiffSideAPI `json:"right"` +} + +type ruleVersionDiffSideAPI struct { + ID string `json:"id"` + VersionNo int64 `json:"versionNo"` + SpecJSON string `json:"specJson"` +} + +type rollbackRuleVersionAPI struct { + RolledBackFromID string `json:"rolledBackFromId"` + VersionID string `json:"versionId"` + VersionNo int64 `json:"versionNo"` + Source string `json:"source"` + Committed bool `json:"committed"` +} + +func versioningAPIData(data any) any { + switch v := data.(type) { + case *versioning.ListResult: + if v == nil { + return nil + } + items := make([]ruleVersionAPI, 0, len(v.Items)) + for i := range v.Items { + items = append(items, toRuleVersionAPI(&v.Items[i])) + } + return &ruleVersionListAPI{ + Items: items, + Total: v.Total, + CurrentVersionID: formatOptionalInt64(v.CurrentVersionID), + CurrentVersionNo: v.CurrentVersionNo, + Deleted: v.Deleted, + } + case *versioning.Version: + if v == nil { + return nil + } + return toRuleVersionAPI(v) + case *versioning.DiffResult: + if v == nil { + return nil + } + return &ruleVersionDiffAPI{ + Left: toRuleVersionDiffSideAPI(v.Left), + Right: toRuleVersionDiffSideAPI(v.Right), + } + case *service.RollbackResult: + if v == nil { + return nil + } + return &rollbackRuleVersionAPI{ + RolledBackFromID: formatInt64(v.RolledBackFromID), + VersionID: formatInt64(v.VersionID), + VersionNo: v.VersionNo, + Source: v.Source, + Committed: v.Committed, + } + default: + return data + } +} + +func toRuleVersionAPI(v *versioning.Version) ruleVersionAPI { + return ruleVersionAPI{ + ID: formatInt64(v.ID), + RuleKind: v.RuleKind, + Mesh: v.Mesh, + ResourceKey: v.ResourceKey, + RuleName: v.RuleName, + VersionNo: v.VersionNo, + ContentHash: v.ContentHash, + SpecJSON: v.SpecJSON, + Source: v.Source, + Operation: v.Operation, + Author: v.Author, + Reason: v.Reason, + IntentID: formatZeroInt64(v.IntentID), + RolledBackFromID: formatOptionalInt64(v.RolledBackFromID), + CreatedAt: v.CreatedAt, + CommittedAt: v.CommittedAt, + IsCurrent: v.IsCurrent, + } +} + +func toRuleVersionDiffSideAPI(side versioning.DiffSide) ruleVersionDiffSideAPI { + return ruleVersionDiffSideAPI{ + ID: formatInt64(side.ID), + VersionNo: side.VersionNo, + SpecJSON: side.SpecJSON, + } +} + +func formatInt64(id int64) string { + return strconv.FormatInt(id, 10) +} + +func formatZeroInt64(id int64) string { + if id == 0 { + return "" + } + return formatInt64(id) +} + +func formatOptionalInt64(id *int64) *string { + if id == nil { + return nil + } + value := formatInt64(*id) + return &value +} + +func writeVersioningInvalidArgument(c *gin.Context, message string) { + writeVersioningResp(c, nil, bizerror.New(bizerror.InvalidArgument, message)) +} + +func writeVersioningMutationError(c *gin.Context, err error) bool { + var conflict *versioning.ConflictError + var pending *versioning.IntentPendingError + if errors.As(err, &conflict) || errors.As(err, &pending) || + errors.Is(err, versioning.ErrVersionIntentPending) { + writeVersioningResp(c, nil, err) + return true + } + return false +} diff --git a/pkg/console/handler/tag_rule.go b/pkg/console/handler/tag_rule.go index a6fe3637c..563b1baa2 100644 --- a/pkg/console/handler/tag_rule.go +++ b/pkg/console/handler/tag_rule.go @@ -103,7 +103,14 @@ func PutTagRuleWithRuleName(ctx consolectx.Context) gin.HandlerFunc { c.JSON(http.StatusOK, model.NewErrorResp(err.Error())) return } - if err = service.UpdateTagRule(ctx, res); err != nil { + opts, ok := mutationOptions(c) + if !ok { + return + } + if err = service.UpdateTagRuleWithOptions(ctx, res, opts); err != nil { + if writeVersioningMutationError(c, err) { + return + } c.JSON(http.StatusOK, model.NewErrorResp(err.Error())) return } else { @@ -127,7 +134,14 @@ func PostTagRuleWithRuleName(ctx consolectx.Context) gin.HandlerFunc { c.JSON(http.StatusOK, model.NewErrorResp(err.Error())) return } - if err = service.CreateTagRule(ctx, res); err != nil { + opts, ok := mutationOptions(c) + if !ok { + return + } + if err = service.CreateTagRuleWithOptions(ctx, res, opts); err != nil { + if writeVersioningMutationError(c, err) { + return + } c.JSON(http.StatusOK, model.NewErrorResp(err.Error())) return } else { @@ -145,7 +159,14 @@ func DeleteTagRuleWithRuleName(ctx consolectx.Context) gin.HandlerFunc { c.JSON(http.StatusBadRequest, model.NewBizErrorResp(err)) return } - if err := service.DeleteTagRule(ctx, ruleName, mesh); err != nil { + opts, ok := mutationOptions(c) + if !ok { + return + } + if err := service.DeleteTagRuleWithOptions(ctx, ruleName, mesh, opts); err != nil { + if writeVersioningMutationError(c, err) { + return + } c.JSON(http.StatusOK, model.NewErrorResp(err.Error())) return } diff --git a/pkg/console/model/condition_rule.go b/pkg/console/model/condition_rule.go index 11b92fe11..a0e479d90 100644 --- a/pkg/console/model/condition_rule.go +++ b/pkg/console/model/condition_rule.go @@ -52,7 +52,9 @@ type ConditionRuleResp struct { Conditions []string `json:"conditions"` ConfigVersion string `json:"configVersion"` Enabled bool `json:"enabled"` + Force bool `json:"force"` Key string `json:"key"` + Priority int32 `json:"priority"` Runtime bool `json:"runtime"` Scope string `json:"scope"` } @@ -246,7 +248,9 @@ func GenConditionRuleToResp(data *meshproto.ConditionRoute) *CommonResp { Conditions: data.Conditions, ConfigVersion: data.ConfigVersion, Enabled: data.Enabled, + Force: data.Force, Key: data.Key, + Priority: data.Priority, Runtime: data.Runtime, Scope: data.Scope, }) diff --git a/pkg/console/model/tag_rule.go b/pkg/console/model/tag_rule.go index 9428771ea..b4d733f2e 100644 --- a/pkg/console/model/tag_rule.go +++ b/pkg/console/model/tag_rule.go @@ -31,7 +31,9 @@ type TagRuleSearchResp struct { type TagRuleResp struct { ConfigVersion string `json:"configVersion"` Enabled bool `json:"enabled"` + Force bool `json:"force"` Key string `json:"key"` + Priority int32 `json:"priority"` Runtime bool `json:"runtime"` Scope string `json:"scope"` Tags []RespTagElement `json:"tags"` @@ -50,7 +52,9 @@ func GenTagRouteResp(pb *meshproto.TagRoute) *CommonResp { return NewSuccessResp(TagRuleResp{ ConfigVersion: pb.ConfigVersion, Enabled: pb.Enabled, + Force: pb.Force, Key: pb.Key, + Priority: pb.Priority, Runtime: pb.Runtime, Scope: constants.ScopeApplication, Tags: tagToRespTagElement(pb.Tags), diff --git a/pkg/console/router/router.go b/pkg/console/router/router.go index 24cd7d44c..068fc2187 100644 --- a/pkg/console/router/router.go +++ b/pkg/console/router/router.go @@ -22,6 +22,7 @@ import ( consolectx "github.com/apache/dubbo-admin/pkg/console/context" "github.com/apache/dubbo-admin/pkg/console/handler" + meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" ) func InitRouter(r *gin.Engine, ctx consolectx.Context) { @@ -112,6 +113,10 @@ func InitRouter(r *gin.Engine, ctx consolectx.Context) { { configuration := router.Group("/configurator") configuration.GET("/search", handler.ConfiguratorSearch(ctx)) + configuration.GET("/:ruleName/versions", handler.ListRuleVersions(ctx, meshresource.DynamicConfigKind)) + configuration.GET("/:ruleName/versions/:versionId", handler.GetRuleVersion(ctx, meshresource.DynamicConfigKind)) + configuration.GET("/:ruleName/versions/:versionId/diff", handler.DiffRuleVersion(ctx, meshresource.DynamicConfigKind)) + configuration.POST("/:ruleName/versions/:versionId/rollback", handler.RollbackRuleVersion(ctx, meshresource.DynamicConfigKind)) configuration.GET("/:ruleName", handler.GetConfiguratorWithRuleName(ctx)) configuration.PUT("/:ruleName", handler.PutConfiguratorWithRuleName(ctx)) configuration.POST("/:ruleName", handler.PostConfiguratorWithRuleName(ctx)) @@ -121,6 +126,10 @@ func InitRouter(r *gin.Engine, ctx consolectx.Context) { { conditionRule := router.Group("/condition-rule") conditionRule.GET("/search", handler.ConditionRuleSearch(ctx)) + conditionRule.GET("/:ruleName/versions", handler.ListRuleVersions(ctx, meshresource.ConditionRouteKind)) + conditionRule.GET("/:ruleName/versions/:versionId", handler.GetRuleVersion(ctx, meshresource.ConditionRouteKind)) + conditionRule.GET("/:ruleName/versions/:versionId/diff", handler.DiffRuleVersion(ctx, meshresource.ConditionRouteKind)) + conditionRule.POST("/:ruleName/versions/:versionId/rollback", handler.RollbackRuleVersion(ctx, meshresource.ConditionRouteKind)) conditionRule.GET("/:ruleName", handler.GetConditionRuleWithRuleName(ctx)) conditionRule.PUT("/:ruleName", handler.PutConditionRuleWithRuleName(ctx)) conditionRule.POST("/:ruleName", handler.PostConditionRuleWithRuleName(ctx)) @@ -130,12 +139,24 @@ func InitRouter(r *gin.Engine, ctx consolectx.Context) { { tagRule := router.Group("/tag-rule") tagRule.GET("/search", handler.TagRuleSearch(ctx)) + tagRule.GET("/:ruleName/versions", handler.ListRuleVersions(ctx, meshresource.TagRouteKind)) + tagRule.GET("/:ruleName/versions/:versionId", handler.GetRuleVersion(ctx, meshresource.TagRouteKind)) + tagRule.GET("/:ruleName/versions/:versionId/diff", handler.DiffRuleVersion(ctx, meshresource.TagRouteKind)) + tagRule.POST("/:ruleName/versions/:versionId/rollback", handler.RollbackRuleVersion(ctx, meshresource.TagRouteKind)) tagRule.GET("/:ruleName", handler.GetTagRuleWithRuleName(ctx)) tagRule.PUT("/:ruleName", handler.PutTagRuleWithRuleName(ctx)) tagRule.POST("/:ruleName", handler.PostTagRuleWithRuleName(ctx)) tagRule.DELETE("/:ruleName", handler.DeleteTagRuleWithRuleName(ctx)) } + { + // Intent repair reconciles the version ledger from ResourceManager state + // when an admin mutation finishes before the ledger is finalized. + ruleVersionIntent := router.Group("/rule-version-intents") + ruleVersionIntent.POST("/:intentId/repair", handler.RepairRuleVersionIntent(ctx)) + ruleVersionIntent.POST("/:intentId/abandon", handler.AbandonRuleVersionIntent(ctx)) + } + router.GET("/prometheus", handler.GetPrometheus(ctx)) router.GET("/search", handler.BannerGlobalSearch(ctx)) router.GET("/overview", handler.ClusterOverview(ctx)) diff --git a/pkg/console/service/affinity_rule.go b/pkg/console/service/affinity_rule.go index c7d5f2b2d..4b330dbb8 100644 --- a/pkg/console/service/affinity_rule.go +++ b/pkg/console/service/affinity_rule.go @@ -38,7 +38,7 @@ func GetAffinityRule(ctx consolectx.Context, name string, mesh string) (*meshres } func UpdateAffinityRule(ctx consolectx.Context, res *meshresource.AffinityRouteResource) error { - if err := ctx.ResourceManager().Update(res); err != nil { + if err := ctx.ResourceManager().Update(ctx.AppContext(), res); err != nil { logger.Warnf("update %s affinity rule failed with error: %s", res.Name, err.Error()) return err } @@ -46,7 +46,7 @@ func UpdateAffinityRule(ctx consolectx.Context, res *meshresource.AffinityRouteR } func CreateAffinityRule(ctx consolectx.Context, res *meshresource.AffinityRouteResource) error { - if err := ctx.ResourceManager().Add(res); err != nil { + if err := ctx.ResourceManager().Add(ctx.AppContext(), res); err != nil { logger.Warnf("create %s condition failed with error: %s", res.Name, err.Error()) return err } @@ -55,6 +55,7 @@ func CreateAffinityRule(ctx consolectx.Context, res *meshresource.AffinityRouteR func DeleteAffinityRule(ctx consolectx.Context, name string, mesh string) error { if err := ctx.ResourceManager().DeleteByKey( + ctx.AppContext(), meshresource.AffinityRouteKind, mesh, coremodel.BuildResourceKey(mesh, name)); err != nil { diff --git a/pkg/console/service/condition_rule.go b/pkg/console/service/condition_rule.go index 9fae15940..f58bf683d 100644 --- a/pkg/console/service/condition_rule.go +++ b/pkg/console/service/condition_rule.go @@ -18,8 +18,6 @@ package service import ( - "github.com/apache/dubbo-admin/pkg/common/constants" - "github.com/apache/dubbo-admin/pkg/core/lock" "github.com/duke-git/lancet/v2/slice" "github.com/duke-git/lancet/v2/strutil" @@ -31,6 +29,7 @@ import ( meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" "github.com/apache/dubbo-admin/pkg/core/store/index" + "github.com/apache/dubbo-admin/pkg/core/versioning" ) func SearchConditionRules(ctx context.Context, req *model.SearchConditionRuleReq) (*model.SearchPaginationResult, error) { @@ -108,57 +107,61 @@ func GetConditionRule(ctx context.Context, name string, mesh string) (*meshresou } func UpdateConditionRule(ctx context.Context, res *meshresource.ConditionRouteResource) error { - lockMgr := ctx.LockManager() - if lockMgr == nil { - return updateConditionRuleUnsafe(ctx, res) - } - lockKey := lock.BuildConditionRuleLockKey(res.Mesh, res.Name) - return lockMgr.WithLock(ctx.AppContext(), lockKey, constants.DefaultLockTimeout, func() error { - return updateConditionRuleUnsafe(ctx, res) - }) + return UpdateConditionRuleWithOptions(ctx, res, RuleMutationOptions{}) } -func updateConditionRuleUnsafe(ctx context.Context, res *meshresource.ConditionRouteResource) error { - if err := ctx.ResourceManager().Update(res); err != nil { - logger.Warnf("update %s condition failed with error: %s", res.Name, err.Error()) - return err - } - return nil +func UpdateConditionRuleWithOptions(ctx context.Context, res *meshresource.ConditionRouteResource, opts RuleMutationOptions) error { + kindName := RuleKindName{Kind: meshresource.ConditionRouteKind, Mesh: res.Mesh, Name: res.Name} + return withRuleMutation(ctx, kindName, opts, + func(RuleMutationOptions) (coremodel.Resource, error) { + return res, nil + }, + versioning.OperationUpdate, + func(scoped RuleMutationOptions) error { + if err := ctx.ResourceManager().Update(scoped.leaseCtx, res); err != nil { + logger.Warnf("update %s condition failed with error: %s", res.Name, err.Error()) + return err + } + return nil + }) } func CreateConditionRule(ctx context.Context, res *meshresource.ConditionRouteResource) error { - lockMgr := ctx.LockManager() - if lockMgr == nil { - return createConditionRuleUnsafe(ctx, res) - } - lockKey := lock.BuildConditionRuleLockKey(res.Mesh, res.Name) - return lockMgr.WithLock(ctx.AppContext(), lockKey, constants.DefaultLockTimeout, func() error { - return createConditionRuleUnsafe(ctx, res) - }) + return CreateConditionRuleWithOptions(ctx, res, RuleMutationOptions{}) } -func createConditionRuleUnsafe(ctx context.Context, res *meshresource.ConditionRouteResource) error { - if err := ctx.ResourceManager().Add(res); err != nil { - logger.Warnf("create %s condition failed with error: %s", res.Name, err.Error()) - return err - } - return nil +func CreateConditionRuleWithOptions(ctx context.Context, res *meshresource.ConditionRouteResource, opts RuleMutationOptions) error { + kindName := RuleKindName{Kind: meshresource.ConditionRouteKind, Mesh: res.Mesh, Name: res.Name} + return withRuleMutation(ctx, kindName, opts, + func(RuleMutationOptions) (coremodel.Resource, error) { + return res, nil + }, + versioning.OperationCreate, + func(scoped RuleMutationOptions) error { + if err := ctx.ResourceManager().Add(scoped.leaseCtx, res); err != nil { + logger.Warnf("create %s condition failed with error: %s", res.Name, err.Error()) + return err + } + return nil + }) } func DeleteConditionRule(ctx context.Context, name string, mesh string) error { - lockMgr := ctx.LockManager() - if lockMgr == nil { - return deleteConditionRuleUnsafe(ctx, name, mesh) - } - lockKey := lock.BuildConditionRuleLockKey(mesh, name) - return lockMgr.WithLock(ctx.AppContext(), lockKey, constants.DefaultLockTimeout, func() error { - return deleteConditionRuleUnsafe(ctx, name, mesh) - }) + return DeleteConditionRuleWithOptions(ctx, name, mesh, RuleMutationOptions{}) } -func deleteConditionRuleUnsafe(ctx context.Context, name string, mesh string) error { - if err := ctx.ResourceManager().DeleteByKey(meshresource.ConditionRouteKind, mesh, coremodel.BuildResourceKey(mesh, name)); err != nil { - return err - } - return nil +func DeleteConditionRuleWithOptions(ctx context.Context, name string, mesh string, opts RuleMutationOptions) error { + kindName := RuleKindName{Kind: meshresource.ConditionRouteKind, Mesh: mesh, Name: name} + return withRuleMutation(ctx, kindName, opts, + func(RuleMutationOptions) (coremodel.Resource, error) { + return getExistingRule(ctx, kindName) + }, + versioning.OperationDelete, + func(scoped RuleMutationOptions) error { + if err := ctx.ResourceManager().DeleteByKey(scoped.leaseCtx, meshresource.ConditionRouteKind, mesh, coremodel.BuildResourceKey(mesh, name)); err != nil { + logger.Warnf("delete %s condition failed with error: %s", name, err.Error()) + return err + } + return nil + }) } diff --git a/pkg/console/service/configurator_rule.go b/pkg/console/service/configurator_rule.go index 13dd2284d..faa98c320 100644 --- a/pkg/console/service/configurator_rule.go +++ b/pkg/console/service/configurator_rule.go @@ -18,8 +18,6 @@ package service import ( - "github.com/apache/dubbo-admin/pkg/common/constants" - "github.com/apache/dubbo-admin/pkg/core/lock" "github.com/duke-git/lancet/v2/slice" "github.com/apache/dubbo-admin/pkg/common/bizerror" @@ -30,6 +28,7 @@ import ( meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" "github.com/apache/dubbo-admin/pkg/core/store/index" + "github.com/apache/dubbo-admin/pkg/core/versioning" ) func PageListConfiguratorRule(ctx consolectx.Context, req *model.SearchReq) (*model.SearchPaginationResult, error) { @@ -116,58 +115,61 @@ func GetConfigurator(ctx consolectx.Context, name string, mesh string) (*meshres } func UpdateConfigurator(ctx consolectx.Context, res *meshresource.DynamicConfigResource) error { - lockMgr := ctx.LockManager() - if lockMgr == nil { - return updateConfiguratorUnsafe(ctx, res) - } - lockKey := lock.BuildConfiguratorRuleLockKey(res.Mesh, res.Name) - return lockMgr.WithLock(ctx.AppContext(), lockKey, constants.DefaultLockTimeout, func() error { - return updateConfiguratorUnsafe(ctx, res) - }) + return UpdateConfiguratorWithOptions(ctx, res, RuleMutationOptions{}) } -func updateConfiguratorUnsafe(ctx consolectx.Context, res *meshresource.DynamicConfigResource) error { - if err := ctx.ResourceManager().Update(res); err != nil { - logger.Warnf("update %s configurator failed with error: %s", res.Name, err.Error()) - return err - } - return nil +func UpdateConfiguratorWithOptions(ctx consolectx.Context, res *meshresource.DynamicConfigResource, opts RuleMutationOptions) error { + kindName := RuleKindName{Kind: meshresource.DynamicConfigKind, Mesh: res.Mesh, Name: res.Name} + return withRuleMutation(ctx, kindName, opts, + func(RuleMutationOptions) (coremodel.Resource, error) { + return res, nil + }, + versioning.OperationUpdate, + func(scoped RuleMutationOptions) error { + if err := ctx.ResourceManager().Update(scoped.leaseCtx, res); err != nil { + logger.Warnf("update %s configurator failed with error: %s", res.Name, err.Error()) + return err + } + return nil + }) } func CreateConfigurator(ctx consolectx.Context, res *meshresource.DynamicConfigResource) error { - lockMgr := ctx.LockManager() - if lockMgr == nil { - return createConfiguratorUnsafe(ctx, res) - } - lockKey := lock.BuildConfiguratorRuleLockKey(res.Mesh, res.Name) - return lockMgr.WithLock(ctx.AppContext(), lockKey, constants.DefaultLockTimeout, func() error { - return createConfiguratorUnsafe(ctx, res) - }) + return CreateConfiguratorWithOptions(ctx, res, RuleMutationOptions{}) } -func createConfiguratorUnsafe(ctx consolectx.Context, res *meshresource.DynamicConfigResource) error { - if err := ctx.ResourceManager().Add(res); err != nil { - logger.Warnf("create %s configurator failed with error: %s", res.Name, err.Error()) - return err - } - return nil +func CreateConfiguratorWithOptions(ctx consolectx.Context, res *meshresource.DynamicConfigResource, opts RuleMutationOptions) error { + kindName := RuleKindName{Kind: meshresource.DynamicConfigKind, Mesh: res.Mesh, Name: res.Name} + return withRuleMutation(ctx, kindName, opts, + func(RuleMutationOptions) (coremodel.Resource, error) { + return res, nil + }, + versioning.OperationCreate, + func(scoped RuleMutationOptions) error { + if err := ctx.ResourceManager().Add(scoped.leaseCtx, res); err != nil { + logger.Warnf("create %s configurator failed with error: %s", res.Name, err.Error()) + return err + } + return nil + }) } func DeleteConfigurator(ctx consolectx.Context, name string, mesh string) error { - lockMgr := ctx.LockManager() - if lockMgr == nil { - return deleteConfiguratorUnsafe(ctx, name, mesh) - } - lockKey := lock.BuildConfiguratorRuleLockKey(mesh, name) - return lockMgr.WithLock(ctx.AppContext(), lockKey, constants.DefaultLockTimeout, func() error { - return deleteConfiguratorUnsafe(ctx, name, mesh) - }) + return DeleteConfiguratorWithOptions(ctx, name, mesh, RuleMutationOptions{}) } -func deleteConfiguratorUnsafe(ctx consolectx.Context, name string, mesh string) error { - if err := ctx.ResourceManager().DeleteByKey(meshresource.DynamicConfigKind, mesh, coremodel.BuildResourceKey(mesh, name)); err != nil { - logger.Warnf("delete %s configurator failed with error: %s", name, err.Error()) - return err - } - return nil +func DeleteConfiguratorWithOptions(ctx consolectx.Context, name string, mesh string, opts RuleMutationOptions) error { + kindName := RuleKindName{Kind: meshresource.DynamicConfigKind, Mesh: mesh, Name: name} + return withRuleMutation(ctx, kindName, opts, + func(RuleMutationOptions) (coremodel.Resource, error) { + return getExistingRule(ctx, kindName) + }, + versioning.OperationDelete, + func(scoped RuleMutationOptions) error { + if err := ctx.ResourceManager().DeleteByKey(scoped.leaseCtx, meshresource.DynamicConfigKind, mesh, coremodel.BuildResourceKey(mesh, name)); err != nil { + logger.Warnf("delete %s configurator failed with error: %s", name, err.Error()) + return err + } + return nil + }) } diff --git a/pkg/console/service/rule_version.go b/pkg/console/service/rule_version.go new file mode 100644 index 000000000..f8919f40e --- /dev/null +++ b/pkg/console/service/rule_version.go @@ -0,0 +1,509 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package service + +import ( + "context" + "errors" + "fmt" + "strings" + "time" + + "github.com/apache/dubbo-admin/pkg/common/bizerror" + consolectx "github.com/apache/dubbo-admin/pkg/console/context" + "github.com/apache/dubbo-admin/pkg/core/lock" + meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" + "github.com/apache/dubbo-admin/pkg/core/versioning" +) + +const ruleLockTTL = 30 * time.Second + +// RuleMutationOptions carries version control metadata for rule mutations. +// ExpectedVersionID is a weak CAS guard supplied by the UI; it prevents a user +// from mutating over another user's newer rule change. +type RuleMutationOptions struct { + ExpectedVersionID *int64 + Author string + leaseCtx context.Context +} + +func (o RuleMutationOptions) WithLeaseContext(ctx context.Context) RuleMutationOptions { + o.leaseCtx = ctx + return o +} + +func ensureMutationContext(ctx consolectx.Context, opts RuleMutationOptions) RuleMutationOptions { + if opts.leaseCtx != nil { + return opts + } + if ctx != nil { + opts.leaseCtx = ctx.AppContext() + } + return opts +} + +func ruleVersioning(ctx consolectx.Context) *versioning.Service { + if ctx == nil { + return nil + } + return ctx.RuleVersioning() +} + +func checkExpectedVersion(ctx consolectx.Context, kindName RuleKindName, opts RuleMutationOptions) error { + svc := ruleVersioning(ctx) + if svc == nil { + return versioning.ErrVersionLedgerCorrupt + } + return svc.CheckExpected(kindName.Kind, kindName.Mesh, kindName.Name, opts.ExpectedVersionID) +} + +// prepareRuleMutation repairs stale intents before applying the weak CAS guard, +// so expectedVersionId is compared against the latest committed ledger state. +func prepareRuleMutation(ctx consolectx.Context, kindName RuleKindName, opts RuleMutationOptions) error { + if err := checkMutationLease(opts); err != nil { + return err + } + if err := repairPendingIntent(ctx, kindName, opts); err != nil { + return err + } + if err := checkMutationLease(opts); err != nil { + return err + } + return checkExpectedVersion(ctx, kindName, opts) +} + +// repairPendingIntent commits an open intent only when ResourceManager state +// already reflects that mutation; otherwise the pending ledger blocks writes. +func repairPendingIntent(ctx consolectx.Context, kindName RuleKindName, opts RuleMutationOptions) error { + svc := ruleVersioning(ctx) + if svc == nil { + return versioning.ErrVersionLedgerCorrupt + } + resourceKey := coremodel.BuildResourceKey(kindName.Mesh, kindName.Name) + if err := checkMutationLease(opts); err != nil { + return err + } + current, exists, err := ctx.ResourceManager().GetByKey(kindName.Kind, resourceKey) + if err != nil { + return err + } + if err := checkMutationLease(opts); err != nil { + return err + } + _, err = svc.RepairIntent(opts.leaseCtx, kindName.Kind, resourceKey, current, !exists) + return err +} + +type RuleKindName struct { + Kind coremodel.ResourceKind + Mesh string + Name string +} + +func getExistingRule(ctx consolectx.Context, kindName RuleKindName) (coremodel.Resource, error) { + key := coremodel.BuildResourceKey(kindName.Mesh, kindName.Name) + res, exists, err := ctx.ResourceManager().GetByKey(kindName.Kind, key) + if err != nil { + return nil, err + } + if !exists { + return nil, fmt.Errorf("%s %s does not exist", kindName.Kind, key) + } + return res, nil +} + +func withRuleMutation( + ctx consolectx.Context, + kindName RuleKindName, + opts RuleMutationOptions, + loadResource func(RuleMutationOptions) (coremodel.Resource, error), + op versioning.Operation, + mutate func(RuleMutationOptions) error, +) error { + execute := func(scoped RuleMutationOptions) error { + scoped = ensureMutationContext(ctx, scoped) + if err := prepareRuleMutation(ctx, kindName, scoped); err != nil { + return err + } + res, err := loadResource(scoped) + if err != nil { + return err + } + return applyAdminMutation(ctx, res, op, scoped, func() error { + if err := checkMutationLease(scoped); err != nil { + return err + } + return mutate(scoped) + }) + } + + lockMgr := ctx.LockManager() + if lockMgr == nil { + return lock.ErrLockUnavailable + } + lockKey, err := ruleLockKey(kindName) + if err != nil { + return err + } + return lock.WithLock(ctx.AppContext(), lockMgr, lockKey, ruleLockTTL, func(leaseCtx context.Context) error { + return execute(opts.WithLeaseContext(leaseCtx)) + }) +} + +// applyAdminMutation is a convenience wrapper for admin-initiated mutations. +func applyAdminMutation(ctx consolectx.Context, res coremodel.Resource, op versioning.Operation, opts RuleMutationOptions, mutate func() error) error { + _, err := applyRuleMutationIntentWithOptions(ctx, res, op, versioning.SourceAdmin, opts, "", nil, mutate) + return err +} + +type MutationCommit struct { + Intent *versioning.Intent + Version *versioning.Version +} + +func applyRuleMutationIntentWithOptions(ctx consolectx.Context, res coremodel.Resource, op versioning.Operation, source versioning.Source, opts RuleMutationOptions, reason string, rolledBackFromID *int64, mutate func() error) (*MutationCommit, error) { + svc := ruleVersioning(ctx) + if svc == nil { + return nil, versioning.ErrVersionLedgerCorrupt + } + if err := checkMutationLease(opts); err != nil { + return nil, err + } + intent, err := svc.BeginMutation(opts.leaseCtx, res, op, source, opts.Author, reason, rolledBackFromID) + if err != nil { + return nil, err + } + if intent == nil { + return nil, versioning.ErrVersionLedgerCorrupt + } + if err := checkMutationLease(opts); err != nil { + return nil, err + } + if err := mutate(); err != nil { + // A registry error, timeout, or lease loss does not prove the remote + // mutation failed. Keep the durable intent and reconcile actual state + // under the same canonical rule lock before reporting the outcome. + if markErr := svc.MarkIntentOutcomeUnknown(opts.leaseCtx, intent, err.Error()); markErr != nil && !errors.Is(markErr, versioning.ErrVersionIntentNotFound) { + return nil, markErr + } + if version, finalizeErr := ensureMutationIntentCommitted(ctx, svc, intent, opts); finalizeErr == nil { + return &MutationCommit{Intent: intent, Version: version}, nil + } + return nil, pendingLedgerError(intent.ID, err) + } + if err := checkMutationLease(opts); err != nil { + return nil, err + } + version, err := ensureMutationIntentCommitted(ctx, svc, intent, opts) + if err != nil { + return nil, pendingLedgerError(intent.ID, err) + } + return &MutationCommit{Intent: intent, Version: version}, nil +} + +func checkMutationLease(opts RuleMutationOptions) error { + return lock.CheckLease(opts.leaseCtx) +} + +func ensureMutationIntentCommitted(ctx consolectx.Context, svc *versioning.Service, intent *versioning.Intent, opts RuleMutationOptions) (*versioning.Version, error) { + if err := checkMutationLease(opts); err != nil { + return nil, err + } + current, exists, err := ctx.ResourceManager().GetByKey(intent.RuleKind, intent.ResourceKey) + if err != nil { + return nil, err + } + return svc.FinalizeMutation(opts.leaseCtx, intent, current, !exists) +} + +func abandonIntentAndReconcile(ctx consolectx.Context, svc *versioning.Service, leaseCtx context.Context, intent *versioning.Intent, reason string) error { + current, exists, err := ctx.ResourceManager().GetByKey(intent.RuleKind, intent.ResourceKey) + if err != nil { + return err + } + if versioning.IntentMatchesResource(intent, current, !exists) { + return bizerror.New(bizerror.InvalidArgument, "rule version intent matches the current resource; repair it instead") + } + if err := lock.CheckLease(leaseCtx); err != nil { + return err + } + if _, err := svc.ReconcileActualState(leaseCtx, intent.RuleKind, intent.ResourceKey, current, !exists, "system:reconcile"); err != nil { + return err + } + if err := lock.CheckLease(leaseCtx); err != nil { + return err + } + return svc.AbandonIntent(leaseCtx, intent, reason) +} + +func pendingLedgerError(intentID int64, cause error) error { + if errors.Is(cause, versioning.ErrVersionLedgerCorrupt) || errors.Is(cause, versioning.ErrIntentOutcomeMismatch) { + return cause + } + return fmt.Errorf("%w: %v", &versioning.IntentPendingError{IntentID: intentID}, cause) +} + +func ListRuleVersions(ctx consolectx.Context, kindName RuleKindName) (*versioning.ListResult, error) { + svc := ruleVersioning(ctx) + if svc == nil { + return nil, versioning.ErrVersionLedgerCorrupt + } + return svc.List(kindName.Kind, kindName.Mesh, kindName.Name) +} + +func GetRuleVersion(ctx consolectx.Context, kindName RuleKindName, versionID int64) (*versioning.Version, error) { + svc := ruleVersioning(ctx) + if svc == nil { + return nil, versioning.ErrVersionLedgerCorrupt + } + return svc.Get(kindName.Kind, kindName.Mesh, kindName.Name, versionID) +} + +func DiffRuleVersion(ctx consolectx.Context, kindName RuleKindName, versionID int64, against string) (*versioning.DiffResult, error) { + svc := ruleVersioning(ctx) + if svc == nil { + return nil, versioning.ErrVersionLedgerCorrupt + } + return svc.Diff(kindName.Kind, kindName.Mesh, kindName.Name, versionID, against) +} + +func RepairRuleVersionIntent(ctx consolectx.Context, intentID int64) (*versioning.Version, error) { + svc := ruleVersioning(ctx) + if svc == nil { + return nil, versioning.ErrVersionLedgerCorrupt + } + intent, err := svc.GetIntent(intentID) + if err != nil { + return nil, err + } + kindName := ruleKindNameFromIntent(intent) + var repaired *versioning.Version + err = withRuleLock(ctx, kindName, func(leaseCtx context.Context) error { + current, deleted, err := currentResourceForIntent(ctx, intentID) + if err != nil { + return err + } + if err := lock.CheckLease(leaseCtx); err != nil { + return err + } + intent, err := svc.GetIntent(intentID) + if err != nil { + return err + } + repaired, err = svc.FinalizeMutation(leaseCtx, intent, current, deleted) + return err + }) + return repaired, err +} + +func AbandonRuleVersionIntent(ctx consolectx.Context, intentID int64, reason string) error { + svc := ruleVersioning(ctx) + if svc == nil { + return versioning.ErrVersionLedgerCorrupt + } + reason = strings.TrimSpace(reason) + if reason == "" { + return bizerror.New(bizerror.InvalidArgument, "abandon reason is required") + } + intent, err := svc.GetIntent(intentID) + if err != nil { + return err + } + return withRuleLock(ctx, ruleKindNameFromIntent(intent), func(leaseCtx context.Context) error { + intent, err := svc.GetIntent(intentID) + if err != nil { + return err + } + if intent.Status == versioning.IntentStatusFailed { + return svc.AbandonIntent(leaseCtx, intent, reason) + } + if intent.Status != versioning.IntentStatusPending && + intent.Status != versioning.IntentStatusApplied && + intent.Status != versioning.IntentStatusOutcomeUnknown { + return bizerror.New(bizerror.InvalidArgument, "only open rule version intent can be abandoned") + } + if err := lock.CheckLease(leaseCtx); err != nil { + return err + } + return abandonIntentAndReconcile(ctx, svc, leaseCtx, intent, reason) + }) +} + +// RollbackResult summarizes a committed rollback for the API response. +type RollbackResult struct { + RolledBackFromID int64 `json:"rolledBackFromId"` + VersionID int64 `json:"versionId"` + VersionNo int64 `json:"versionNo"` + Source string `json:"source"` + Committed bool `json:"committed"` +} + +// RollbackRuleVersion re-publishes the spec of a historical version as a new +// rule mutation. It does not modify historical versions; the resulting rule +// change is observed through the normal versioning flow and recorded as a new +// SourceRollback version. +func RollbackRuleVersion(ctx consolectx.Context, kindName RuleKindName, targetVersionID int64, reason string, expectedVersionID *int64, author string) (*RollbackResult, error) { + var result *RollbackResult + err := withRuleLock(ctx, kindName, func(leaseCtx context.Context) error { + var inner error + result, inner = rollbackRuleVersionLocked(ctx, kindName, targetVersionID, reason, expectedVersionID, author, leaseCtx) + return inner + }) + return result, err +} + +func rollbackRuleVersionLocked(ctx consolectx.Context, kindName RuleKindName, targetVersionID int64, reason string, expectedVersionID *int64, author string, leaseCtx context.Context) (*RollbackResult, error) { + svc := ruleVersioning(ctx) + if svc == nil { + return nil, versioning.ErrVersionLedgerCorrupt + } + + reason = strings.TrimSpace(reason) + if reason == "" { + return nil, bizerror.New(bizerror.InvalidArgument, "rollback reason is required") + } + + target, err := svc.Get(kindName.Kind, kindName.Mesh, kindName.Name, targetVersionID) + if err != nil { + return nil, err + } + if target.Operation == versioning.OperationDelete { + // A delete marker represents absence of a rule. Treating it as a + // rollback target would turn rollback into a delete operation, which is + // intentionally kept out of scope for this endpoint. + return nil, versioning.ErrRollbackToDelete + } + + // Repair stale intents and enforce optimistic locking before touching state. + opts := RuleMutationOptions{ExpectedVersionID: expectedVersionID, Author: author}.WithLeaseContext(leaseCtx) + if err := prepareRuleMutation(ctx, kindName, opts); err != nil { + return nil, err + } + + resourceKey := coremodel.BuildResourceKey(kindName.Mesh, kindName.Name) + current, currentDeleted, err := svc.CurrentLedgerHead(kindName.Kind, resourceKey) + if err != nil { + return nil, err + } + if current != nil && !currentDeleted { + if current.ContentHash == target.ContentHash { + return nil, versioning.ErrRollbackToCurrent + } + } + + res, err := versioning.ResourceFromSpecJSON(kindName.Kind, kindName.Mesh, kindName.Name, target.SpecJSON) + if err != nil { + return nil, err + } + + fromID := target.ID + operation := versioning.OperationUpdate + if currentDeleted { + operation = versioning.OperationCreate + } + commit, err := applyRuleMutationIntentWithOptions(ctx, res, operation, versioning.SourceRollback, opts, reason, &fromID, func() error { + if err := checkMutationLease(opts); err != nil { + return err + } + return ctx.ResourceManager().Upsert(opts.leaseCtx, res) + }) + if err != nil { + return nil, err + } + if commit == nil || commit.Intent == nil || commit.Version == nil { + return nil, fmt.Errorf("rollback intent was not created for %s", resourceKey) + } + + committed, err := validateRollbackCommit(commit.Version, kindName.Kind, resourceKey, commit.Intent.ID, fromID, target) + if err != nil { + return nil, err + } + + return &RollbackResult{ + RolledBackFromID: fromID, + VersionID: committed.ID, + VersionNo: committed.VersionNo, + Source: string(committed.Source), + Committed: true, + }, nil +} + +func validateRollbackCommit(current *versioning.Version, kind coremodel.ResourceKind, resourceKey string, intentID, rolledBackFromID int64, target *versioning.Version) (*versioning.Version, error) { + if current == nil || + current.IntentID != intentID || + current.RuleKind != kind || + current.ResourceKey != resourceKey || + current.Source != versioning.SourceRollback || + (current.Operation != versioning.OperationUpdate && current.Operation != versioning.OperationCreate) || + current.RolledBackFromID == nil || + *current.RolledBackFromID != rolledBackFromID || + target == nil || + current.ContentHash != target.ContentHash || + current.SpecJSON != target.SpecJSON { + return nil, fmt.Errorf("rollback version commit was not observed for %s", resourceKey) + } + current.IsCurrent = true + return current, nil +} + +func ruleKindNameFromIntent(intent *versioning.Intent) RuleKindName { + if intent == nil { + return RuleKindName{} + } + return RuleKindName{ + Kind: intent.RuleKind, + Mesh: intent.Mesh, + Name: intent.RuleName, + } +} + +func currentResourceForIntent(ctx consolectx.Context, intentID int64) (coremodel.Resource, bool, error) { + intent, err := ctx.RuleVersioning().GetIntent(intentID) + if err != nil { + return nil, false, err + } + current, exists, err := ctx.ResourceManager().GetByKey(intent.RuleKind, intent.ResourceKey) + // Repair APIs pass deleted=true when the resource manager no longer has the rule. + return current, !exists, err +} + +func withRuleLock(ctx consolectx.Context, kindName RuleKindName, fn func(context.Context) error) error { + lockMgr := ctx.LockManager() + if lockMgr == nil { + return lock.ErrLockUnavailable + } + lockKey, err := ruleLockKey(kindName) + if err != nil { + return err + } + return lock.WithLock(ctx.AppContext(), lockMgr, lockKey, ruleLockTTL, fn) +} + +func ruleLockKey(kindName RuleKindName) (string, error) { + switch kindName.Kind { + case meshresource.ConditionRouteKind: + return lock.BuildConditionRuleLockKey(kindName.Mesh, kindName.Name), nil + case meshresource.TagRouteKind: + return lock.BuildTagRouteLockKey(kindName.Mesh, kindName.Name), nil + case meshresource.DynamicConfigKind: + return lock.BuildConfiguratorRuleLockKey(kindName.Mesh, kindName.Name), nil + default: + return "", bizerror.New(bizerror.InvalidArgument, "unsupported rule kind") + } +} diff --git a/pkg/console/service/rule_version_rollback_test.go b/pkg/console/service/rule_version_rollback_test.go new file mode 100644 index 000000000..910a93366 --- /dev/null +++ b/pkg/console/service/rule_version_rollback_test.go @@ -0,0 +1,510 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package service + +import ( + "context" + "errors" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + meshproto "github.com/apache/dubbo-admin/api/mesh/v1alpha1" + "github.com/apache/dubbo-admin/pkg/common/bizerror" + appcfg "github.com/apache/dubbo-admin/pkg/config/app" + versioningcfg "github.com/apache/dubbo-admin/pkg/config/versioning" + "github.com/apache/dubbo-admin/pkg/console/counter" + "github.com/apache/dubbo-admin/pkg/core/events" + "github.com/apache/dubbo-admin/pkg/core/governor" + "github.com/apache/dubbo-admin/pkg/core/lock" + "github.com/apache/dubbo-admin/pkg/core/manager" + meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" + "github.com/apache/dubbo-admin/pkg/core/store" + "github.com/apache/dubbo-admin/pkg/core/versioning" + locallock "github.com/apache/dubbo-admin/pkg/lock/local" + memoryst "github.com/apache/dubbo-admin/pkg/store/memory" +) + +type testContext struct { + rm manager.ResourceManager + versioningSvc *versioning.Service + adapter *versioning.ResourceStoreAdapter + cfg *appcfg.AdminConfig + bus *simpleBus + lockMgr lock.Lock +} + +func (c *testContext) ResourceManager() manager.ResourceManager { return c.rm } +func (c *testContext) CounterManager() counter.CounterManager { return nil } +func (c *testContext) Config() appcfg.AdminConfig { return *c.cfg } +func (c *testContext) AppContext() context.Context { return context.Background() } +func (c *testContext) LockManager() lock.Lock { return c.lockMgr } +func (c *testContext) RuleVersioning() *versioning.Service { return c.versioningSvc } + +type testRouter struct { + stores map[coremodel.ResourceKind]store.ResourceStore +} + +func (r *testRouter) ResourceRoute(res coremodel.Resource) (store.ResourceStore, error) { + return r.ResourceKindRoute(res.ResourceKind()) +} + +func (r *testRouter) ResourceKindRoute(kind coremodel.ResourceKind) (store.ResourceStore, error) { + s, ok := r.stores[kind] + if !ok { + return nil, bizerror.New(bizerror.InvalidArgument, "store not found for kind") + } + return s, nil +} + +type noopGovernor struct { + stores map[coremodel.ResourceKind]store.ResourceStore + emitter events.Emitter +} + +func (g *noopGovernor) CreateRule(ctx context.Context, res coremodel.Resource) error { + if err := ctx.Err(); err != nil { + return err + } + s, ok := g.stores[res.ResourceKind()] + if !ok { + return bizerror.New(bizerror.InvalidArgument, "store not found") + } + if err := s.Add(res); err != nil { + return err + } + g.emitter.Send(events.NewResourceChangedEvent("Added", nil, res)) + return nil +} + +func (g *noopGovernor) UpdateRule(ctx context.Context, res coremodel.Resource) error { + if err := ctx.Err(); err != nil { + return err + } + s, ok := g.stores[res.ResourceKind()] + if !ok { + return bizerror.New(bizerror.InvalidArgument, "store not found") + } + old, exists, _ := s.GetByKey(res.ResourceKey()) + var oldRes coremodel.Resource + if exists { + oldRes, _ = old.(coremodel.Resource) + } + if err := s.Update(res); err != nil { + return err + } + g.emitter.Send(events.NewResourceChangedEvent("Updated", oldRes, res)) + return nil +} + +func (g *noopGovernor) DeleteRule(ctx context.Context, res coremodel.Resource) error { + if err := ctx.Err(); err != nil { + return err + } + s, ok := g.stores[res.ResourceKind()] + if !ok { + return bizerror.New(bizerror.InvalidArgument, "store not found") + } + if err := s.Delete(res); err != nil { + return err + } + g.emitter.Send(events.NewResourceChangedEvent("Deleted", res, nil)) + return nil +} + +type noopGovernorRouter struct { + gov *noopGovernor +} + +func (r *noopGovernorRouter) ResourceRoute(coremodel.Resource) (governor.RuleGovernor, error) { + return r.gov, nil +} + +func (r *noopGovernorRouter) ResourceMeshRoute(string) (governor.RuleGovernor, error) { + return r.gov, nil +} + +type simpleBus struct { + subscribers map[coremodel.ResourceKind][]events.Subscriber + muted map[coremodel.ResourceKind]bool +} + +func newSimpleBus() *simpleBus { + return &simpleBus{ + subscribers: make(map[coremodel.ResourceKind][]events.Subscriber), + muted: make(map[coremodel.ResourceKind]bool), + } +} + +func (b *simpleBus) Subscribe(sub events.Subscriber) error { + kind := sub.ResourceKind() + b.subscribers[kind] = append(b.subscribers[kind], sub) + return nil +} + +func (b *simpleBus) Unsubscribe(events.Subscriber) error { return nil } + +func (b *simpleBus) Send(event events.Event) { + obj := event.NewObj() + if obj == nil { + obj = event.OldObj() + } + if obj == nil || b.muted[obj.ResourceKind()] { + return + } + for _, sub := range b.subscribers[obj.ResourceKind()] { + if !sub.AsyncEnabled() { + _ = sub.ProcessEvent(event) + } + } +} + +type failingResourceStore struct { + store.ResourceStore + failNextAdd bool + failNextUpdate bool + failNextDelete bool + err error +} + +func (s *failingResourceStore) Add(obj interface{}) error { + if s.failNextAdd { + s.failNextAdd = false + return s.err + } + return s.ResourceStore.Add(obj) +} + +func (s *failingResourceStore) Update(obj interface{}) error { + if s.failNextUpdate { + s.failNextUpdate = false + return s.err + } + return s.ResourceStore.Update(obj) +} + +func (s *failingResourceStore) UpdateIfUnchanged(expected coremodel.Resource, updated coremodel.Resource) (bool, error) { + if s.failNextUpdate { + s.failNextUpdate = false + return false, s.err + } + cas, ok := s.ResourceStore.(store.ConditionalResourceStore) + if !ok { + return false, fmt.Errorf("wrapped store does not support conditional updates") + } + return cas.UpdateIfUnchanged(expected, updated) +} + +func (s *failingResourceStore) Delete(obj interface{}) error { + if s.failNextDelete { + s.failNextDelete = false + return s.err + } + return s.ResourceStore.Delete(obj) +} + +func setupRollbackTestEnv(t *testing.T) *testContext { + return setupRollbackTestEnvWithStoreWrappers(t, nil, nil) +} + +func setupRollbackTestEnvWithStoreWrappers(t *testing.T, wrapVersionStore, wrapIntentStore func(store.ResourceStore) store.ResourceStore) *testContext { + conditionStore := memoryst.NewMemoryResourceStore(meshresource.ConditionRouteKind) + versionStore := memoryst.NewMemoryResourceStore(meshresource.RuleVersionKind) + intentStore := memoryst.NewMemoryResourceStore(meshresource.RuleIntentKind) + for _, s := range []store.ManagedResourceStore{conditionStore, versionStore, intentStore} { + require.NoError(t, s.Init(nil)) + } + + var versioningVersionStore store.ResourceStore = versionStore + if wrapVersionStore != nil { + versioningVersionStore = wrapVersionStore(versionStore) + } + var versioningIntentStore store.ResourceStore = intentStore + if wrapIntentStore != nil { + versioningIntentStore = wrapIntentStore(intentStore) + } + stores := map[coremodel.ResourceKind]store.ResourceStore{ + meshresource.ConditionRouteKind: conditionStore, + meshresource.RuleVersionKind: versioningVersionStore, + meshresource.RuleIntentKind: versioningIntentStore, + } + + bus := newSimpleBus() + gov := &noopGovernor{stores: stores, emitter: bus} + rm := manager.NewResourceManager(&testRouter{stores: stores}, &noopGovernorRouter{gov: gov}) + adapter := versioning.NewResourceStoreAdapter(versioningVersionStore, versioningIntentStore) + lockMgr := locallock.NewLocalLock() + require.NoError(t, bus.Subscribe(versioning.NewSubscriber(meshresource.ConditionRouteKind, adapter, 5, lockMgr, context.Background()))) + + return &testContext{ + rm: rm, + versioningSvc: versioning.NewService(5, adapter), + adapter: adapter, + cfg: &appcfg.AdminConfig{RuleVersioning: &versioningcfg.Config{MaxVersionsPerRule: 5}}, + bus: bus, + lockMgr: lockMgr, + } +} + +func mustVersionStoreForTest(t *testing.T) store.ResourceStore { + s := memoryst.NewMemoryResourceStore(meshresource.RuleVersionKind) + require.NoError(t, s.Init(nil)) + return s +} + +func mustIntentStoreForTest(t *testing.T) store.ResourceStore { + s := memoryst.NewMemoryResourceStore(meshresource.RuleIntentKind) + require.NoError(t, s.Init(nil)) + return s +} + +func conditionRule(name, payload string) *meshresource.ConditionRouteResource { + res := meshresource.NewConditionRouteResourceWithAttributes(name, "") + res.Spec = &meshproto.ConditionRoute{Enabled: true, Key: name, Conditions: []string{payload}} + return res +} + +func kindName(name string) RuleKindName { + return RuleKindName{Kind: meshresource.ConditionRouteKind, Name: name} +} + +func beginMutationForTest(ctx *testContext, res coremodel.Resource) (*versioning.Intent, error) { + var intent *versioning.Intent + err := withRuleLock(ctx, RuleKindName{Kind: res.ResourceKind(), Mesh: res.ResourceMesh(), Name: res.ResourceMeta().Name}, func(leaseCtx context.Context) error { + var inner error + intent, inner = ctx.versioningSvc.BeginMutation(leaseCtx, res, versioning.OperationUpdate, versioning.SourceAdmin, "admin", "", nil) + return inner + }) + return intent, err +} + +func TestRuleMutationFailClosedWithoutVersioningService(t *testing.T) { + ctx := setupRollbackTestEnv(t) + ctx.versioningSvc = nil + + res := conditionRule("demo-rule", "v1") + err := CreateConditionRuleWithOptions(ctx, res, RuleMutationOptions{Author: "admin"}) + require.ErrorIs(t, err, versioning.ErrVersionLedgerCorrupt) + + _, exists, getErr := ctx.rm.GetByKey(res.ResourceKind(), res.ResourceKey()) + require.NoError(t, getErr) + assert.False(t, exists) +} + +func TestRuleMutationFailClosedWithoutLockManager(t *testing.T) { + ctx := setupRollbackTestEnv(t) + ctx.lockMgr = nil + + res := conditionRule("demo-rule", "v1") + err := CreateConditionRuleWithOptions(ctx, res, RuleMutationOptions{Author: "admin"}) + require.ErrorIs(t, err, lock.ErrLockUnavailable) + + _, exists, getErr := ctx.rm.GetByKey(res.ResourceKind(), res.ResourceKey()) + require.NoError(t, getErr) + assert.False(t, exists) +} + +func TestRuleMutationFailClosedWithoutIntentOrVersionStore(t *testing.T) { + for name, adapter := range map[string]*versioning.ResourceStoreAdapter{ + "intent-store-nil": versioning.NewResourceStoreAdapter(mustVersionStoreForTest(t), nil), + "version-store-nil": versioning.NewResourceStoreAdapter(nil, mustIntentStoreForTest(t)), + } { + t.Run(name, func(t *testing.T) { + ctx := setupRollbackTestEnv(t) + ctx.adapter = adapter + ctx.versioningSvc = versioning.NewService(5, adapter) + + res := conditionRule("demo-rule", "v1") + err := CreateConditionRuleWithOptions(ctx, res, RuleMutationOptions{Author: "admin"}) + require.ErrorIs(t, err, versioning.ErrVersionLedgerCorrupt) + + _, exists, getErr := ctx.rm.GetByKey(res.ResourceKind(), res.ResourceKey()) + require.NoError(t, getErr) + assert.False(t, exists) + }) + } +} + +func TestRollbackRuleVersion_Success(t *testing.T) { + ctx := setupRollbackTestEnv(t) + require.NoError(t, ctx.rm.Add(context.Background(), conditionRule("demo-rule", "v1"))) + require.NoError(t, ctx.rm.Update(context.Background(), conditionRule("demo-rule", "v2"))) + + versions, err := ListRuleVersions(ctx, kindName("demo-rule")) + require.NoError(t, err) + require.Len(t, versions.Items, 2) + v1ID := versions.Items[1].ID + v2ID := versions.Items[0].ID + + result, err := RollbackRuleVersion(ctx, kindName("demo-rule"), v1ID, "test rollback", &v2ID, "admin") + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, v1ID, result.RolledBackFromID) + assert.True(t, result.Committed) + + versions, err = ListRuleVersions(ctx, kindName("demo-rule")) + require.NoError(t, err) + require.Len(t, versions.Items, 3) + assert.Equal(t, versioning.SourceRollback, versions.Items[0].Source) + require.NotNil(t, versions.Items[0].RolledBackFromID) + assert.Equal(t, v1ID, *versions.Items[0].RolledBackFromID) +} + +func TestRollbackRuleVersion_DeletedStateCASRace(t *testing.T) { + ctx := setupRollbackTestEnv(t) + require.NoError(t, ctx.rm.Add(context.Background(), conditionRule("demo-rule", "v1"))) + require.NoError(t, ctx.rm.Update(context.Background(), conditionRule("demo-rule", "v2"))) + + versions, err := ListRuleVersions(ctx, kindName("demo-rule")) + require.NoError(t, err) + v1 := versions.Items[1] + require.NoError(t, ctx.rm.DeleteByKey(context.Background(), meshresource.ConditionRouteKind, "", "/demo-rule")) + + expectedDeleted := int64(0) + require.NoError(t, ctx.rm.Add(context.Background(), conditionRule("demo-rule", "v3"))) + + _, err = RollbackRuleVersion(ctx, kindName("demo-rule"), v1.ID, "restore stale deleted view", &expectedDeleted, "admin") + var conflict *versioning.ConflictError + require.ErrorAs(t, err, &conflict) + require.NotNil(t, conflict.CurrentVersionID) +} + +func TestRollbackRuleVersion_RepairsWhenCommitNotObserved(t *testing.T) { + ctx := setupRollbackTestEnv(t) + require.NoError(t, ctx.rm.Add(context.Background(), conditionRule("demo-rule", "v1"))) + require.NoError(t, ctx.rm.Update(context.Background(), conditionRule("demo-rule", "v2"))) + + versions, err := ListRuleVersions(ctx, kindName("demo-rule")) + require.NoError(t, err) + v1 := versions.Items[1] + v2 := versions.Items[0] + + ctx.bus.muted[meshresource.ConditionRouteKind] = true + result, err := RollbackRuleVersion(ctx, kindName("demo-rule"), v1.ID, "repair rollback", &v2.ID, "admin") + ctx.bus.muted[meshresource.ConditionRouteKind] = false + + require.NoError(t, err) + require.NotNil(t, result) + assert.True(t, result.Committed) + assert.Equal(t, v1.ID, result.RolledBackFromID) + + versions, err = ListRuleVersions(ctx, kindName("demo-rule")) + require.NoError(t, err) + require.Len(t, versions.Items, 3) + assert.Equal(t, versioning.SourceRollback, versions.Items[0].Source) +} + +func TestRollbackRuleVersion_PendingIntentBlocks(t *testing.T) { + ctx := setupRollbackTestEnv(t) + require.NoError(t, ctx.rm.Add(context.Background(), conditionRule("demo-rule", "v1"))) + require.NoError(t, ctx.rm.Update(context.Background(), conditionRule("demo-rule", "v2"))) + + versions, err := ListRuleVersions(ctx, kindName("demo-rule")) + require.NoError(t, err) + v1ID := versions.Items[1].ID + + _, err = beginMutationForTest(ctx, conditionRule("demo-rule", "phantom-divergent")) + require.NoError(t, err) + + _, err = RollbackRuleVersion(ctx, kindName("demo-rule"), v1ID, "rollback", nil, "admin") + require.ErrorIs(t, err, versioning.ErrVersionIntentPending) +} + +func TestAbandonRuleVersionIntent_CrashBeforeReconcileKeepsIntentOpen(t *testing.T) { + versionErr := errors.New("version add failed before reconcile") + failingVersionStore := &failingResourceStore{err: versionErr} + ctx := setupRollbackTestEnvWithStoreWrappers(t, func(base store.ResourceStore) store.ResourceStore { + failingVersionStore.ResourceStore = base + return failingVersionStore + }, nil) + + require.NoError(t, ctx.rm.Add(context.Background(), conditionRule("demo-rule", "v1"))) + intent, err := beginMutationForTest(ctx, conditionRule("demo-rule", "admin-pending")) + require.NoError(t, err) + require.NoError(t, ctx.rm.Update(context.Background(), conditionRule("demo-rule", "external-change"))) + + failingVersionStore.failNextAdd = true + err = AbandonRuleVersionIntent(ctx, intent.ID, "operator chose external state") + require.ErrorIs(t, err, versionErr) + + open, err := ctx.versioningSvc.GetIntent(intent.ID) + require.NoError(t, err) + assert.Equal(t, versioning.IntentStatusOutcomeUnknown, open.Status) +} + +func TestAbandonRuleVersionIntent_RuleVersionAddBeforeMarkFailedCrashIsRepairable(t *testing.T) { + intentErr := errors.New("mark failed crash") + failingIntentStore := &failingResourceStore{err: intentErr} + ctx := setupRollbackTestEnvWithStoreWrappers(t, nil, func(base store.ResourceStore) store.ResourceStore { + failingIntentStore.ResourceStore = base + return failingIntentStore + }) + + require.NoError(t, ctx.rm.Add(context.Background(), conditionRule("demo-rule", "v1"))) + intent, err := beginMutationForTest(ctx, conditionRule("demo-rule", "admin-pending")) + require.NoError(t, err) + external := conditionRule("demo-rule", "external-change") + require.NoError(t, ctx.rm.Update(context.Background(), external)) + + failingIntentStore.failNextUpdate = true + err = AbandonRuleVersionIntent(ctx, intent.ID, "operator chose external state") + require.ErrorIs(t, err, intentErr) + + open, err := ctx.versioningSvc.GetIntent(intent.ID) + require.NoError(t, err) + require.True(t, open.ReconcileRequired) + + require.NoError(t, AbandonRuleVersionIntent(ctx, intent.ID, "operator chose external state")) + versions, err := ListRuleVersions(ctx, kindName("demo-rule")) + require.NoError(t, err) + require.Len(t, versions.Items, 2) + hash, _, err := versioning.NormalizeResource(external) + require.NoError(t, err) + assert.Equal(t, hash, versions.Items[0].ContentHash) + _, err = ctx.versioningSvc.GetIntent(intent.ID) + require.ErrorIs(t, err, versioning.ErrVersionIntentNotFound) +} + +func TestAbandonRuleVersionIntent_MarkFailedBeforeCleanupCrashSweepsOnRetry(t *testing.T) { + cleanupErr := errors.New("cleanup failed") + failingIntentStore := &failingResourceStore{err: cleanupErr} + ctx := setupRollbackTestEnvWithStoreWrappers(t, nil, func(base store.ResourceStore) store.ResourceStore { + failingIntentStore.ResourceStore = base + return failingIntentStore + }) + + require.NoError(t, ctx.rm.Add(context.Background(), conditionRule("demo-rule", "v1"))) + intent, err := beginMutationForTest(ctx, conditionRule("demo-rule", "admin-pending")) + require.NoError(t, err) + require.NoError(t, ctx.rm.Update(context.Background(), conditionRule("demo-rule", "external-change"))) + + failingIntentStore.failNextDelete = true + err = AbandonRuleVersionIntent(ctx, intent.ID, "operator chose external state") + require.ErrorIs(t, err, cleanupErr) + + terminal, err := ctx.versioningSvc.GetIntent(intent.ID) + require.NoError(t, err) + require.Equal(t, versioning.IntentStatusFailed, terminal.Status) + + require.NoError(t, AbandonRuleVersionIntent(ctx, intent.ID, "operator chose external state")) + _, err = ctx.versioningSvc.GetIntent(intent.ID) + require.ErrorIs(t, err, versioning.ErrVersionIntentNotFound) +} diff --git a/pkg/console/service/tag_rule.go b/pkg/console/service/tag_rule.go index a051117ca..c158669be 100644 --- a/pkg/console/service/tag_rule.go +++ b/pkg/console/service/tag_rule.go @@ -18,8 +18,6 @@ package service import ( - "github.com/apache/dubbo-admin/pkg/common/constants" - "github.com/apache/dubbo-admin/pkg/core/lock" "github.com/duke-git/lancet/v2/slice" "github.com/apache/dubbo-admin/pkg/common/bizerror" @@ -30,6 +28,7 @@ import ( meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" "github.com/apache/dubbo-admin/pkg/core/store/index" + "github.com/apache/dubbo-admin/pkg/core/versioning" ) func PageListTagRule(ctx consolectx.Context, req *model.SearchReq) (*model.SearchPaginationResult, error) { @@ -114,65 +113,63 @@ func GetTagRule(ctx consolectx.Context, name string, mesh string) (*meshresource } func UpdateTagRule(ctx consolectx.Context, res *meshresource.TagRouteResource) error { - lockMgr := ctx.LockManager() - if lockMgr == nil { - return updateTagRuleUnsafe(ctx, res) - } - - lockKey := lock.BuildTagRouteLockKey(res.Mesh, res.Name) - - return lockMgr.WithLock(ctx.AppContext(), lockKey, constants.DefaultLockTimeout, func() error { - return updateTagRuleUnsafe(ctx, res) - }) + return UpdateTagRuleWithOptions(ctx, res, RuleMutationOptions{}) } -func updateTagRuleUnsafe(ctx consolectx.Context, res *meshresource.TagRouteResource) error { - err := ctx.ResourceManager().Update(res) - if err != nil { - logger.Warnf("update tag rule %s error: %v", res.Name, err) - return err - } - return nil +func UpdateTagRuleWithOptions(ctx consolectx.Context, res *meshresource.TagRouteResource, opts RuleMutationOptions) error { + kindName := RuleKindName{Kind: meshresource.TagRouteKind, Mesh: res.Mesh, Name: res.Name} + return withRuleMutation(ctx, kindName, opts, + func(RuleMutationOptions) (coremodel.Resource, error) { + return res, nil + }, + versioning.OperationUpdate, + func(scoped RuleMutationOptions) error { + err := ctx.ResourceManager().Update(scoped.leaseCtx, res) + if err != nil { + logger.Warnf("update tag rule %s error: %v", res.Name, err) + return err + } + return nil + }) } func CreateTagRule(ctx consolectx.Context, res *meshresource.TagRouteResource) error { - lockMgr := ctx.LockManager() - if lockMgr == nil { - return createTagRuleUnsafe(ctx, res) - } - - lockKey := lock.BuildTagRouteLockKey(res.Mesh, res.Name) - - return lockMgr.WithLock(ctx.AppContext(), lockKey, constants.DefaultLockTimeout, func() error { - return createTagRuleUnsafe(ctx, res) - }) + return CreateTagRuleWithOptions(ctx, res, RuleMutationOptions{}) } -func createTagRuleUnsafe(ctx consolectx.Context, res *meshresource.TagRouteResource) error { - err := ctx.ResourceManager().Add(res) - if err != nil { - logger.Warnf("create tag rule %s error: %v", res.Name, err) - return err - } - return nil +func CreateTagRuleWithOptions(ctx consolectx.Context, res *meshresource.TagRouteResource, opts RuleMutationOptions) error { + kindName := RuleKindName{Kind: meshresource.TagRouteKind, Mesh: res.Mesh, Name: res.Name} + return withRuleMutation(ctx, kindName, opts, + func(RuleMutationOptions) (coremodel.Resource, error) { + return res, nil + }, + versioning.OperationCreate, + func(scoped RuleMutationOptions) error { + err := ctx.ResourceManager().Add(scoped.leaseCtx, res) + if err != nil { + logger.Warnf("create tag rule %s error: %v", res.Name, err) + return err + } + return nil + }) } func DeleteTagRule(ctx consolectx.Context, name string, mesh string) error { - lockMgr := ctx.LockManager() - if lockMgr == nil { - return deleteTagRuleUnsafe(ctx, name, mesh) - } - lockKey := lock.BuildTagRouteLockKey(mesh, name) - return lockMgr.WithLock(ctx.AppContext(), lockKey, constants.DefaultLockTimeout, func() error { - return deleteTagRuleUnsafe(ctx, name, mesh) - }) + return DeleteTagRuleWithOptions(ctx, name, mesh, RuleMutationOptions{}) } -func deleteTagRuleUnsafe(ctx consolectx.Context, name string, mesh string) error { - err := ctx.ResourceManager().DeleteByKey(meshresource.TagRouteKind, mesh, coremodel.BuildResourceKey(mesh, name)) - if err != nil { - logger.Warnf("delete tag rule %s error: %v", name, err) - return err - } - return nil +func DeleteTagRuleWithOptions(ctx consolectx.Context, name string, mesh string, opts RuleMutationOptions) error { + kindName := RuleKindName{Kind: meshresource.TagRouteKind, Mesh: mesh, Name: name} + return withRuleMutation(ctx, kindName, opts, + func(RuleMutationOptions) (coremodel.Resource, error) { + return getExistingRule(ctx, kindName) + }, + versioning.OperationDelete, + func(scoped RuleMutationOptions) error { + if err := ctx.ResourceManager().DeleteByKey(scoped.leaseCtx, meshresource.TagRouteKind, mesh, coremodel.BuildResourceKey(mesh, name)); err != nil { + logger.Warnf("delete tag rule %s error: %v", name, err) + return err + } + return nil + }) } diff --git a/pkg/core/bootstrap/bootstrap.go b/pkg/core/bootstrap/bootstrap.go index d1ee2c0dc..a7b5439e1 100644 --- a/pkg/core/bootstrap/bootstrap.go +++ b/pkg/core/bootstrap/bootstrap.go @@ -27,6 +27,7 @@ import ( "github.com/apache/dubbo-admin/pkg/core/lock" "github.com/apache/dubbo-admin/pkg/core/logger" "github.com/apache/dubbo-admin/pkg/core/runtime" + "github.com/apache/dubbo-admin/pkg/core/versioning" "github.com/apache/dubbo-admin/pkg/diagnostics" ) @@ -130,6 +131,7 @@ func (sb *SmartBootstrapper) gatherComponents() ([]runtime.Component, error) { {"CounterManager", counter.ComponentType}, {"DiagnosticsServer", diagnostics.DiagnosticsServer}, {"DistributedLock", lock.DistributedLockComponent}, + {"RuleVersioning", versioning.ComponentType}, } for _, comp := range optionalComps { diff --git a/pkg/core/bootstrap/init.go b/pkg/core/bootstrap/init.go index c590b84da..6c371b0d5 100644 --- a/pkg/core/bootstrap/init.go +++ b/pkg/core/bootstrap/init.go @@ -35,6 +35,8 @@ import ( _ "github.com/apache/dubbo-admin/pkg/governor/mock" _ "github.com/apache/dubbo-admin/pkg/governor/nacos2" _ "github.com/apache/dubbo-admin/pkg/governor/zk" + _ "github.com/apache/dubbo-admin/pkg/lock/gorm" + _ "github.com/apache/dubbo-admin/pkg/lock/local" _ "github.com/apache/dubbo-admin/pkg/store/memory" _ "github.com/apache/dubbo-admin/pkg/store/mysql" _ "github.com/apache/dubbo-admin/pkg/store/postgres" diff --git a/pkg/core/discovery/subscriber/zk_config.go b/pkg/core/discovery/subscriber/zk_config.go index 07f9a2620..155e9ba6d 100644 --- a/pkg/core/discovery/subscriber/zk_config.go +++ b/pkg/core/discovery/subscriber/zk_config.go @@ -38,6 +38,13 @@ type ZKConfigEventSubscriber struct { storeRouter store.Router } +// sourceRegistryZookeeper labels rule events coming from ZooKeeper so the +// versioning ledger can attribute upstream writes to author "system:zookeeper". +// Other registry subscribers (Nacos, Apollo, ...) should emit the equivalent +// SourceRegistryContextKey on their ResourceChangedEvents — until they do, +// the ledger falls back to "system:upstream" for those sources. +const sourceRegistryZookeeper = "zookeeper" + func NewZKConfigEventSubscriber(eventEmitter events.Emitter, storeRouter store.Router) *ZKConfigEventSubscriber { return &ZKConfigEventSubscriber{ emitter: eventEmitter, @@ -127,13 +134,13 @@ func (z *ZKConfigEventSubscriber) processDelete(configRes *meshresource.ZKConfig switch suffix { case constants.TagRuleSuffix: return processConfigDelete[*meshresource.TagRouteResource]( - configRes, meshresource.ToTagRouteResource, z.storeRouter, z.emitter) + configRes, meshresource.TagRouteKind, z.storeRouter, z.emitter) case constants.ConditionRuleSuffix: return processConfigDelete[*meshresource.ConditionRouteResource]( - configRes, meshresource.ToConditionRouteResource, z.storeRouter, z.emitter) + configRes, meshresource.ConditionRouteKind, z.storeRouter, z.emitter) case constants.ConfiguratorsSuffix: return processConfigDelete[*meshresource.DynamicConfigResource]( - configRes, meshresource.ToDynamicConfigResource, z.storeRouter, z.emitter) + configRes, meshresource.DynamicConfigKind, z.storeRouter, z.emitter) default: return bizerror.New(bizerror.UnknownError, fmt.Sprintf("unknown rule type in mesh %s, skipped processing, node: %s", @@ -167,7 +174,9 @@ func processConfigUpsert[T coremodel.Resource]( logger.Errorf("add rule %s to store failed, cause: %s", newRuleRes.ResourceKey(), err.Error()) return err } - emitter.Send(events.NewResourceChangedEvent(cache.Added, nil, newRuleRes)) + emitter.Send(events.NewResourceChangedEventWithContext(cache.Added, nil, newRuleRes, map[string]string{ + events.SourceRegistryContextKey: sourceRegistryZookeeper, + })) return nil } @@ -184,39 +193,43 @@ func processConfigUpsert[T coremodel.Resource]( return bizerror.NewAssertionError(reflect.TypeOf(oldMetadataRes), oldRes) } - emitter.Send(events.NewResourceChangedEvent(cache.Updated, oldMetadataRes, newRuleRes)) + emitter.Send(events.NewResourceChangedEventWithContext(cache.Updated, oldMetadataRes, newRuleRes, map[string]string{ + events.SourceRegistryContextKey: sourceRegistryZookeeper, + })) return nil } func processConfigDelete[T coremodel.Resource]( configRes *meshresource.ZKConfigResource, - toRuleRes meshresource.ToRuleResourceFunc, + ruleKind coremodel.ResourceKind, router store.Router, emitter events.Emitter) error { - ruleRes := toRuleRes(configRes.Mesh, configRes.Name, configRes.Spec.NodeData) - st, err := router.ResourceKindRoute(ruleRes.ResourceKind()) + st, err := router.ResourceKindRoute(ruleKind) if err != nil { - logger.Errorf("get %s store failed, cause: %s", ruleRes.ResourceKind(), err.Error()) + logger.Errorf("get %s store failed, cause: %s", ruleKind, err.Error()) return err } - oldRes, exists, err := st.GetByKey(ruleRes.ResourceKey()) + resourceKey := coremodel.BuildResourceKey(configRes.Mesh, configRes.Name) + oldRes, exists, err := st.GetByKey(resourceKey) if err != nil { - logger.Errorf("get rule %s from store failed, cause: %s", ruleRes.ResourceKey(), err.Error()) + logger.Errorf("get rule %s from store failed, cause: %s", resourceKey, err.Error()) return err } if !exists { - logger.Infof("rule %s not exists in store, skipped deleting", ruleRes.ResourceKey()) + logger.Infof("rule %s not exists in store, skipped deleting", resourceKey) return nil } oldRuleRes, ok := oldRes.(T) if !ok { return bizerror.NewAssertionError(reflect.TypeOf(oldRuleRes), oldRes) } - err = st.Delete(ruleRes) + err = st.Delete(oldRuleRes) if err != nil { - logger.Errorf("delete rule %s from store failed, cause: %s", ruleRes.ResourceKey(), err.Error()) + logger.Errorf("delete rule %s from store failed, cause: %s", resourceKey, err.Error()) return err } - emitter.Send(events.NewResourceChangedEvent(cache.Deleted, oldRuleRes, nil)) + emitter.Send(events.NewResourceChangedEventWithContext(cache.Deleted, oldRuleRes, nil, map[string]string{ + events.SourceRegistryContextKey: sourceRegistryZookeeper, + })) return nil } diff --git a/pkg/core/events/eventbus.go b/pkg/core/events/eventbus.go index 51393c4d2..42c06df00 100644 --- a/pkg/core/events/eventbus.go +++ b/pkg/core/events/eventbus.go @@ -26,6 +26,9 @@ import ( "github.com/apache/dubbo-admin/pkg/core/resource/model" ) +// SourceRegistryContextKey identifies which registry produced a resource event. +const SourceRegistryContextKey = "source-registry" + type Event interface { // Type returns the type of the event, see definitions in cache.DeltaType Type() cache.DeltaType @@ -33,7 +36,7 @@ type Event interface { OldObj() model.Resource // NewObj returns the new object, nil if event type is in [cache.Deleted] NewObj() model.Resource - // Context returns the context of the event, if event provider want to pass extra info to the consumer, just use context + // Context returns read-only event metadata. Subscribers must not mutate the returned map. Context() map[string]string // String returns the string representation of the event String() string diff --git a/pkg/core/governor/governor.go b/pkg/core/governor/governor.go index d29fd156a..c8d1246a7 100644 --- a/pkg/core/governor/governor.go +++ b/pkg/core/governor/governor.go @@ -18,6 +18,8 @@ package governor import ( + "context" + set "github.com/duke-git/lancet/v2/datastructure/set" meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" @@ -29,9 +31,9 @@ var RuleResourceKinds = set.New(meshresource.DynamicConfigKind, meshresource.Con // RuleGovernor makes the rule operations effective type RuleGovernor interface { // CreateRule creates a resource in the registry - CreateRule(model.Resource) error + CreateRule(context.Context, model.Resource) error // UpdateRule updates a resource in the registry - UpdateRule(model.Resource) error + UpdateRule(context.Context, model.Resource) error // DeleteRule deletes a resource from the registry - DeleteRule(model.Resource) error + DeleteRule(context.Context, model.Resource) error } diff --git a/pkg/core/lock/key.go b/pkg/core/lock/key.go index 29994e0b8..ed0e51510 100644 --- a/pkg/core/lock/key.go +++ b/pkg/core/lock/key.go @@ -18,31 +18,46 @@ package lock import ( - "fmt" + "encoding/base64" + "strings" "github.com/apache/dubbo-admin/pkg/common/constants" ) -// BuildLockKey constructs a lock key from a prefix and parts +// BuildLockKey constructs a stable lock key from a prefix and escaped identity +// parts. func BuildLockKey(prefix string, parts ...string) string { - key := prefix + segments := make([]string, 0, len(parts)+1) + segments = append(segments, encodeLockPart(prefix)) for _, part := range parts { - key += ":" + part + segments = append(segments, encodeLockPart(part)) } - return key + return strings.Join(segments, ":") } -// BuildTagRouteLockKey constructs a lock key for tag route operations +// BuildRuleVersioningLockKey constructs the canonical per-rule lock key. All +// paths that read the current rule state and then append or repair its ledger +// share this key so the check-then-act sequence observes one parent-rule state. +func BuildRuleVersioningLockKey(kind, mesh, name string) string { + return BuildLockKey(constants.RuleVersioningKeyPrefix, kind, mesh, name) +} + +// BuildTagRouteLockKey constructs the rule-versioning lock key for tag routes. func BuildTagRouteLockKey(mesh, name string) string { - return fmt.Sprintf("%s:%s:%s", constants.TagRouteKeyPrefix, mesh, name) + return BuildRuleVersioningLockKey("TagRoute", mesh, name) } -// BuildConfiguratorRuleLockKey constructs a lock key for configurator rule operations +// BuildConfiguratorRuleLockKey constructs the rule-versioning lock key for +// dynamic configs. func BuildConfiguratorRuleLockKey(mesh, name string) string { - return fmt.Sprintf("%s:%s:%s", constants.ConfiguratorRuleKeyPrefix, mesh, name) + return BuildRuleVersioningLockKey("DynamicConfig", mesh, name) } -// BuildConditionRuleLockKey constructs a lock key for condition rule operations +// BuildConditionRuleLockKey constructs the rule-versioning lock key for condition routes. func BuildConditionRuleLockKey(mesh, name string) string { - return fmt.Sprintf("%s:%s:%s", constants.ConditionRuleKeyPrefix, mesh, name) + return BuildRuleVersioningLockKey("ConditionRoute", mesh, name) +} + +func encodeLockPart(part string) string { + return base64.RawURLEncoding.EncodeToString([]byte(part)) } diff --git a/pkg/core/lock/lock.go b/pkg/core/lock/lock.go index 4b3bd781c..d00b574f1 100644 --- a/pkg/core/lock/lock.go +++ b/pkg/core/lock/lock.go @@ -19,32 +19,338 @@ package lock import ( "context" + "crypto/rand" + "encoding/hex" + "errors" + "fmt" + "sync" "time" + + "github.com/apache/dubbo-admin/pkg/common/constants" ) -// Lock defines the distributed lock interface -// This abstraction allows for multiple implementations (GORM, Redis, etcd, etc.) -type Lock interface { - // Lock acquires a distributed lock, blocking until successful or context cancelled - Lock(ctx context.Context, key string, ttl time.Duration) error +var ( + ErrLockLeaseLost = errors.New("lock lease lost") + ErrLockUnavailable = errors.New("lock is required") +) - // TryLock attempts to acquire a lock without blocking - // Returns true if lock was acquired, false otherwise - TryLock(ctx context.Context, key string, ttl time.Duration) (bool, error) +type leaseContextKey struct{} - // Unlock releases a lock held by this instance - Unlock(ctx context.Context, key string) error +// Lease represents one successful lock acquisition. Its token is scoped to this +// acquisition only; delayed Renew or Unlock calls from an older lease must not +// affect a newer lease for the same key. +type Lease interface { + Key() string + Token() string + Lost() <-chan struct{} + Renew(ctx context.Context, ttl time.Duration) error + Unlock(ctx context.Context) error +} - // Renew extends the TTL of a lock held by this instance - Renew(ctx context.Context, key string, ttl time.Duration) error +// Lock defines the lock backend contract used by cross-instance critical +// sections. A successful Acquire returns an acquisition-scoped Lease; backends +// must reject Renew and Unlock calls made with an older token. +type Lock interface { + // Acquire blocks until it obtains a lease or the context is cancelled. + Acquire(ctx context.Context, key string, ttl time.Duration) (Lease, error) - // IsLocked checks if a lock is currently held by anyone - IsLocked(ctx context.Context, key string) (bool, error) + // TryAcquire attempts to acquire a lease without blocking. + TryAcquire(ctx context.Context, key string, ttl time.Duration) (Lease, bool, error) - // WithLock executes a function while holding a lock - // Automatically acquires the lock, executes the function, and releases the lock - WithLock(ctx context.Context, key string, ttl time.Duration, fn func() error) error + // IsLocked checks if a lock is currently held by anyone. + IsLocked(ctx context.Context, key string) (bool, error) - // CleanupExpiredLocks removes expired locks (maintenance task) + // CleanupExpiredLocks removes expired locks (maintenance task). CleanupExpiredLocks(ctx context.Context) error } + +type statefulLease interface { + Lease + bindContext(context.Context) + markLost(error) + lostError() error +} + +type LeaseState struct { + key string + token string + + mu sync.RWMutex + ctx context.Context + lostErr error + lost chan struct{} + lostOnce sync.Once +} + +// NewLeaseState creates the shared lease state embedded by lock backends. +func NewLeaseState(key, token string) *LeaseState { + s := &LeaseState{ + key: key, + token: token, + ctx: context.Background(), + lost: make(chan struct{}), + } + return s +} + +func (s *LeaseState) Key() string { + return s.key +} + +func (s *LeaseState) Token() string { + return s.token +} + +func (s *LeaseState) context() context.Context { + s.mu.RLock() + defer s.mu.RUnlock() + if s.ctx != nil { + return s.ctx + } + return context.Background() +} + +func (s *LeaseState) bindContext(ctx context.Context) { + if ctx == nil { + ctx = context.Background() + } + s.mu.Lock() + s.ctx = ctx + s.mu.Unlock() +} + +func (s *LeaseState) Lost() <-chan struct{} { + return s.lost +} + +func (s *LeaseState) markLost(err error) { + if err == nil { + err = ErrLockLeaseLost + } + s.mu.Lock() + s.lostErr = err + s.mu.Unlock() + s.lostOnce.Do(func() { + close(s.lost) + }) +} + +func (s *LeaseState) lostError() error { + s.mu.RLock() + err := s.lostErr + s.mu.RUnlock() + if err != nil { + return err + } + select { + case <-s.lost: + return ErrLockLeaseLost + default: + return nil + } +} + +// NewLeaseToken returns an owner token scoped to one lock acquisition. +func NewLeaseToken() (string, error) { + var b [16]byte + if _, err := rand.Read(b[:]); err != nil { + return "", err + } + return hex.EncodeToString(b[:]), nil +} + +// WithLock runs fn while holding key and returns ErrLockLeaseLost if the lease +// expires or renewal fails before the critical section is safely complete. Code +// inside fn should call CheckLease before mutating state after blocking work. +func WithLock(ctx context.Context, lockMgr Lock, key string, ttl time.Duration, fn func(context.Context) error) (err error) { + if lockMgr == nil { + return ErrLockUnavailable + } + if ctx == nil { + ctx = context.Background() + } + if fn == nil { + return fmt.Errorf("lock callback is required") + } + + acquireCtx := ctx + cancelAcquire := func() {} + if _, ok := ctx.Deadline(); !ok { + acquireCtx, cancelAcquire = context.WithTimeout(ctx, constants.DefaultLockTimeout) + } + lease, acquireErr := lockMgr.Acquire(acquireCtx, key, ttl) + cancelAcquire() + if acquireErr != nil { + return acquireErr + } + + leaseCtx, cancelLease := context.WithCancel(ctx) + leaseCtx = context.WithValue(leaseCtx, leaseContextKey{}, lease) + if stateful, ok := lease.(statefulLease); ok { + stateful.bindContext(leaseCtx) + } + + stopRenew := make(chan struct{}) + renewDone := make(chan struct{}) + leaseLost := make(chan error, 1) + if ttl > 0 { + go autoRenewLease(leaseCtx, cancelLease, lease, ttl, stopRenew, renewDone, leaseLost) + } else { + close(renewDone) + } + + defer func() { + close(stopRenew) + <-renewDone + + unlockCtx, unlockCancel := context.WithTimeout(context.Background(), constants.DefaultUnlockTimeout) + unlockErr := lease.Unlock(unlockCtx) + unlockCancel() + cancelLease() + + if recovered := recover(); recovered != nil { + panic(recovered) + } + if err != nil { + if lostErr := leaseLostError(lease, leaseLost); lostErr != nil && + (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, ErrLockLeaseLost)) { + err = lostErr + } + return + } + if lostErr := leaseLostError(lease, leaseLost); lostErr != nil { + err = lostErr + return + } + if ctxErr := ctx.Err(); ctxErr != nil { + err = ctxErr + return + } + if unlockErr != nil { + err = unlockErr + } + }() + + err = fn(leaseCtx) + if err != nil { + return err + } + if lostErr := leaseLostError(lease, leaseLost); lostErr != nil { + return lostErr + } + return CheckLease(leaseCtx) +} + +// CheckLease fails closed when the context is cancelled or its bound lease has +// been lost. It is intentionally cheap so mutation code can call it between +// ResourceManager writes, intent CAS, and ledger appends. +func CheckLease(ctx context.Context) error { + if ctx == nil { + return nil + } + if lease, ok := ctx.Value(leaseContextKey{}).(Lease); ok { + if lostErr := leaseLostError(lease, nil); lostErr != nil { + return lostErr + } + } + if err := ctx.Err(); err != nil { + return err + } + return nil +} + +// RequireLease returns the lease bound by WithLock or fails when a mutating +// versioning path is entered without the canonical rule lock. +func RequireLease(ctx context.Context) (Lease, error) { + if ctx == nil { + return nil, ErrLockUnavailable + } + lease, ok := ctx.Value(leaseContextKey{}).(Lease) + if !ok || lease == nil { + return nil, ErrLockUnavailable + } + if lostErr := leaseLostError(lease, nil); lostErr != nil { + return nil, lostErr + } + if err := ctx.Err(); err != nil { + return nil, err + } + return lease, nil +} + +// LeaseFromContext exposes the lease bound by WithLock for diagnostics and +// tests; callers must still use CheckLease or RequireLease before writes. +func LeaseFromContext(ctx context.Context) (Lease, bool) { + if ctx == nil { + return nil, false + } + lease, ok := ctx.Value(leaseContextKey{}).(Lease) + return lease, ok +} + +func autoRenewLease(leaseCtx context.Context, cancelLease context.CancelFunc, lease Lease, ttl time.Duration, stop <-chan struct{}, done chan<- struct{}, lost chan<- error) { + defer close(done) + interval := ttl / 3 + if interval <= 0 { + interval = ttl + } + if interval < 10*time.Millisecond { + interval = 10 * time.Millisecond + } + + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for { + select { + case <-stop: + return + case <-leaseCtx.Done(): + return + case <-ticker.C: + renewCtx, cancel := context.WithTimeout(context.Background(), constants.DefaultRenewTimeout) + err := lease.Renew(renewCtx, ttl) + cancel() + if err != nil { + lostErr := fmt.Errorf("%w: renew failed for %s: %v", ErrLockLeaseLost, lease.Key(), err) + if stateful, ok := lease.(statefulLease); ok { + stateful.markLost(lostErr) + } + select { + case lost <- lostErr: + default: + } + cancelLease() + return + } + } + } +} + +func leaseLostError(lease Lease, ch <-chan error) error { + select { + case <-lease.Lost(): + if stateful, ok := lease.(statefulLease); ok { + if err := stateful.lostError(); err != nil { + if errors.Is(err, ErrLockLeaseLost) { + return err + } + return fmt.Errorf("%w: %v", ErrLockLeaseLost, err) + } + } + return ErrLockLeaseLost + default: + } + if ch == nil { + return nil + } + select { + case err := <-ch: + if err == nil { + return ErrLockLeaseLost + } + return err + default: + return nil + } +} diff --git a/pkg/core/manager/manager.go b/pkg/core/manager/manager.go index 3e4ce0942..9f36f89f5 100644 --- a/pkg/core/manager/manager.go +++ b/pkg/core/manager/manager.go @@ -18,11 +18,14 @@ package manager import ( + "context" + "errors" "fmt" "reflect" "github.com/apache/dubbo-admin/pkg/common/bizerror" "github.com/apache/dubbo-admin/pkg/core/governor" + "github.com/apache/dubbo-admin/pkg/core/lock" "github.com/apache/dubbo-admin/pkg/core/resource/model" "github.com/apache/dubbo-admin/pkg/core/store" "github.com/apache/dubbo-admin/pkg/core/store/index" @@ -37,17 +40,20 @@ type ReadOnlyResourceManager interface { ListByIndexes(rk model.ResourceKind, indexes []index.IndexCondition) ([]model.Resource, error) // PageListByIndexes page list the resources with the given index conditions PageListByIndexes(rk model.ResourceKind, indexes []index.IndexCondition, pr model.PageReq) (*model.PageData[model.Resource], error) + // GetStore returns the ResourceStore for the given resource kind. + // This is for special cases like bootstrap that need direct store access. + GetStore(rk model.ResourceKind) (store.ResourceStore, error) } type WriteOnlyResourceManager interface { // Add adds the resource - Add(r model.Resource) error + Add(ctx context.Context, r model.Resource) error // Update updates the resource - Update(r model.Resource) error + Update(ctx context.Context, r model.Resource) error // Upsert upserts the resource - Upsert(r model.Resource) error + Upsert(ctx context.Context, r model.Resource) error // DeleteByKey deletes the resource with the given resource key - DeleteByKey(rk model.ResourceKind, mesh string, key string) error + DeleteByKey(ctx context.Context, rk model.ResourceKind, mesh string, key string) error } type ResourceManager interface { @@ -126,7 +132,10 @@ func (rm *resourcesManager) PageListByIndexes( return pageData, nil } -func (rm *resourcesManager) Add(r model.Resource) error { +func (rm *resourcesManager) Add(ctx context.Context, r model.Resource) error { + if err := validateMutationContext(ctx); err != nil { + return err + } if !governor.RuleResourceKinds.Contain(r.ResourceKind()) { return bizerror.New(bizerror.InvalidArgument, "invalid resource kind") } @@ -134,10 +143,13 @@ func (rm *resourcesManager) Add(r model.Resource) error { if err != nil { return err } - return rs.CreateRule(r) + return rs.CreateRule(ctx, r) } -func (rm *resourcesManager) Update(r model.Resource) error { +func (rm *resourcesManager) Update(ctx context.Context, r model.Resource) error { + if err := validateMutationContext(ctx); err != nil { + return err + } if !governor.RuleResourceKinds.Contain(r.ResourceKind()) { return bizerror.New(bizerror.InvalidArgument, "invalid resource kind") } @@ -145,21 +157,27 @@ func (rm *resourcesManager) Update(r model.Resource) error { if err != nil { return err } - return rs.UpdateRule(r) + return rs.UpdateRule(ctx, r) } -func (rm *resourcesManager) Upsert(r model.Resource) error { +func (rm *resourcesManager) Upsert(ctx context.Context, r model.Resource) error { + if err := validateMutationContext(ctx); err != nil { + return err + } if !governor.RuleResourceKinds.Contain(r.ResourceKind()) { return bizerror.New(bizerror.InvalidArgument, "invalid resource kind") } if _, exists, _ := rm.GetByKey(r.ResourceKind(), r.ResourceKey()); exists { - return rm.Update(r) + return rm.Update(ctx, r) } else { - return rm.Add(r) + return rm.Add(ctx, r) } } -func (rm *resourcesManager) DeleteByKey(rk model.ResourceKind, mesh string, key string) error { +func (rm *resourcesManager) DeleteByKey(ctx context.Context, rk model.ResourceKind, mesh string, key string) error { + if err := validateMutationContext(ctx); err != nil { + return err + } if !governor.RuleResourceKinds.Contain(rk) { return bizerror.New(bizerror.InvalidArgument, "invalid resource kind") } @@ -174,5 +192,16 @@ func (rm *resourcesManager) DeleteByKey(rk model.ResourceKind, mesh string, key if !exists { return fmt.Errorf("%s %s does not exist", rk, key) } - return gov.DeleteRule(r) + return gov.DeleteRule(ctx, r) +} + +func (rm *resourcesManager) GetStore(rk model.ResourceKind) (store.ResourceStore, error) { + return rm.storeRouter.ResourceKindRoute(rk) +} + +func validateMutationContext(ctx context.Context) error { + if ctx == nil { + return errors.New("resource mutation context is required") + } + return lock.CheckLease(ctx) } diff --git a/pkg/core/resource/apis/mesh/v1alpha1/rule_intent_types.go b/pkg/core/resource/apis/mesh/v1alpha1/rule_intent_types.go new file mode 100644 index 000000000..f0ebe55c2 --- /dev/null +++ b/pkg/core/resource/apis/mesh/v1alpha1/rule_intent_types.go @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package v1alpha1 + +import ( + "encoding/json" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sruntime "k8s.io/apimachinery/pkg/runtime" + + meshproto "github.com/apache/dubbo-admin/api/mesh/v1alpha1" + "github.com/apache/dubbo-admin/pkg/core/resource/model" + "google.golang.org/protobuf/proto" +) + +// RuleIntentResource stores the durable recovery record for one rule mutation. +// Its spec is updated conditionally by the versioning store; callers should not +// treat it as the committed rule-version history. +type RuleIntentResource struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Mesh string `json:"mesh,omitempty"` + Spec *meshproto.RuleIntent `json:"spec,omitempty"` +} + +func (r *RuleIntentResource) ResourceKind() model.ResourceKind { + return RuleIntentKind +} + +func (r *RuleIntentResource) ResourceMesh() string { + return r.Mesh +} + +func (r *RuleIntentResource) ResourceMeta() metav1.ObjectMeta { + return r.ObjectMeta +} + +func (r *RuleIntentResource) ResourceSpec() model.ResourceSpec { + return r.Spec +} + +func (r *RuleIntentResource) ResourceKey() string { + return model.BuildResourceKey(r.Mesh, r.Name) +} + +func (r *RuleIntentResource) String() string { + jsonStr, err := json.Marshal(r) + if err != nil { + return "" + } + return string(jsonStr) +} + +func (r *RuleIntentResource) DeepCopyObject() k8sruntime.Object { + out := &RuleIntentResource{ + TypeMeta: r.TypeMeta, + ObjectMeta: *r.ObjectMeta.DeepCopy(), + Mesh: r.Mesh, + } + if r.Spec != nil { + out.Spec = proto.Clone(r.Spec).(*meshproto.RuleIntent) + } + return out +} + +// RuleIntentResourceList contains a list of RuleIntentResource +type RuleIntentResourceList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []RuleIntentResource `json:"items"` +} + +func (r *RuleIntentResourceList) DeepCopyObject() k8sruntime.Object { + out := &RuleIntentResourceList{ + TypeMeta: r.TypeMeta, + } + r.ListMeta.DeepCopyInto(&out.ListMeta) + if r.Items != nil { + out.Items = make([]RuleIntentResource, len(r.Items)) + for i := range r.Items { + out.Items[i] = *r.Items[i].DeepCopyObject().(*RuleIntentResource) + } + } + return out +} + +func (r *RuleIntentResourceList) SetItems(items []model.Resource) { + r.Items = make([]RuleIntentResource, len(items)) + for i, res := range items { + if typed, ok := res.(*RuleIntentResource); ok { + r.Items[i] = *typed + } + } +} + +// Resource kind constants +const ( + RuleIntentKind model.ResourceKind = "RuleIntent" +) + +// NewRuleIntentResource creates a new RuleIntentResource with given name and mesh +func NewRuleIntentResource() *RuleIntentResource { + return &RuleIntentResource{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1alpha1", + Kind: string(RuleIntentKind), + }, + } +} + +func NewRuleIntentResourceWithAttributes(name, mesh string) *RuleIntentResource { + r := NewRuleIntentResource() + r.Name = name + r.Mesh = mesh + return r +} + +func init() { + model.RegisterResourceSchema(RuleIntentKind, func() model.Resource { + return NewRuleIntentResource() + }, func() model.ResourceList { + return &RuleIntentResourceList{} + }) +} diff --git a/pkg/core/resource/apis/mesh/v1alpha1/rule_version_types.go b/pkg/core/resource/apis/mesh/v1alpha1/rule_version_types.go new file mode 100644 index 000000000..ebd803648 --- /dev/null +++ b/pkg/core/resource/apis/mesh/v1alpha1/rule_version_types.go @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package v1alpha1 + +import ( + "encoding/json" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sruntime "k8s.io/apimachinery/pkg/runtime" + + meshproto "github.com/apache/dubbo-admin/api/mesh/v1alpha1" + "github.com/apache/dubbo-admin/pkg/core/logger" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" + "google.golang.org/protobuf/proto" +) + +const ( + RuleVersionKind coremodel.ResourceKind = "RuleVersion" +) + +func init() { + coremodel.RegisterResourceSchema(RuleVersionKind, NewRuleVersionResource, NewRuleVersionResourceList) +} + +// RuleVersionResource stores one committed ledger entry for a parent traffic +// rule. The current rule state is derived from the highest version number, not +// from mutable fields on older resources. +type RuleVersionResource struct { + metav1.TypeMeta `json:",inline"` + + metav1.ObjectMeta `json:"metadata,omitempty"` + + // Mesh is the name of the dubbo mesh this resource belongs to. + Mesh string `json:"mesh,omitempty"` + + // Spec is the specification of the Dubbo RuleVersion resource. + Spec *meshproto.RuleVersion `json:"spec,omitempty"` +} + +func (r *RuleVersionResource) ResourceKind() coremodel.ResourceKind { + return RuleVersionKind +} + +func (r *RuleVersionResource) ResourceMesh() string { + return r.Mesh +} + +func (r *RuleVersionResource) ResourceKey() string { + return coremodel.BuildResourceKey(r.Mesh, r.Name) +} + +func (r *RuleVersionResource) ResourceMeta() metav1.ObjectMeta { + return r.ObjectMeta +} + +func (r *RuleVersionResource) ResourceSpec() coremodel.ResourceSpec { + return r.Spec +} + +func (r *RuleVersionResource) String() string { + jsonStr, err := json.Marshal(r) + if err != nil { + logger.Errorf("failed to encode RuleVersionResource: %s to json, err: %v", r.ResourceKey(), err) + return "" + } + return string(jsonStr) +} + +func (r *RuleVersionResource) DeepCopyObject() k8sruntime.Object { + out := &RuleVersionResource{ + TypeMeta: r.TypeMeta, + ObjectMeta: *r.ObjectMeta.DeepCopy(), + Mesh: r.Mesh, + } + if r.Spec != nil { + out.Spec = proto.Clone(r.Spec).(*meshproto.RuleVersion) + } + return out +} + +func NewRuleVersionResource() coremodel.Resource { + return &RuleVersionResource{} +} + +func NewRuleVersionResourceWithAttributes(name, mesh string) *RuleVersionResource { + return &RuleVersionResource{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Mesh: mesh, + Spec: &meshproto.RuleVersion{}, + } +} + +type RuleVersionResourceList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []*RuleVersionResource `json:"items"` +} + +func (r *RuleVersionResourceList) DeepCopyObject() k8sruntime.Object { + out := &RuleVersionResourceList{ + TypeMeta: r.TypeMeta, + } + r.ListMeta.DeepCopyInto(&out.ListMeta) + + if len(r.Items) == 0 { + return out + } + out.Items = make([]*RuleVersionResource, len(r.Items)) + for i := range r.Items { + out.Items[i] = r.Items[i].DeepCopyObject().(*RuleVersionResource) + } + return out +} + +func NewRuleVersionResourceList() coremodel.ResourceList { + return &RuleVersionResourceList{ + TypeMeta: metav1.TypeMeta{ + Kind: string(RuleVersionKind), + APIVersion: "v1alpha1", + }, + Items: make([]*RuleVersionResource, 0), + } +} + +func (r *RuleVersionResourceList) GetItems() []coremodel.Resource { + res := make([]coremodel.Resource, len(r.Items)) + for i := range r.Items { + res[i] = r.Items[i] + } + return res +} + +func (r *RuleVersionResourceList) SetItems(items []coremodel.Resource) { + r.Items = make([]*RuleVersionResource, len(items)) + for i, res := range items { + if typed, ok := res.(*RuleVersionResource); ok { + r.Items[i] = typed + } + } +} diff --git a/pkg/core/runtime/builder.go b/pkg/core/runtime/builder.go index 191470b00..8a7933365 100644 --- a/pkg/core/runtime/builder.go +++ b/pkg/core/runtime/builder.go @@ -34,6 +34,7 @@ type BuilderContext interface { Config() app.AdminConfig GetActivatedComponent(typ ComponentType) (Component, error) ActivateComponent(comp Component) error + AppContext() context.Context } var _ BuilderContext = &Builder{} @@ -54,6 +55,10 @@ func (b *Builder) GetActivatedComponent(typ ComponentType) (Component, error) { return comp, nil } +func (b *Builder) AppContext() context.Context { + return b.appCtx +} + func BuilderFor(appCtx context.Context, cfg app.AdminConfig) (*Builder, error) { hostname, err := os.Hostname() if err != nil { diff --git a/pkg/core/store/index/rule_intent.go b/pkg/core/store/index/rule_intent.go new file mode 100644 index 000000000..7822a3062 --- /dev/null +++ b/pkg/core/store/index/rule_intent.go @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package index + +import ( + "fmt" + "strconv" + "strings" + + "k8s.io/client-go/tools/cache" + + "github.com/apache/dubbo-admin/pkg/core/logger" + meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" +) + +const ( + ByRuleIntentParentAndStatus = "ByRuleIntentParentAndStatus" + ByRuleIntentIDIndexName = "ByRuleIntentID" + ByRuleIntentStatusIndexName = "ByRuleIntentStatus" +) + +func init() { + RegisterIndexers(meshresource.RuleIntentKind, map[string]cache.IndexFunc{ + ByRuleIntentParentAndStatus: byRuleIntentParentAndStatus, + ByRuleIntentIDIndexName: byRuleIntentID, + ByRuleIntentStatusIndexName: byRuleIntentStatus, + }) +} + +// byRuleIntentParentAndStatus indexes RuleIntent resources by parent rule and status. +// Index key format: "///" +// +// This enables efficient queries like: +// - Find pending intent for a specific rule: "ConditionRoute/default/my-rule/PENDING" +// - Find applied intents for a rule: "ConditionRoute/default/my-rule/APPLIED" +// +// Used by versioning.ResourceStoreAdapter to avoid full table scans when looking up intents. +func byRuleIntentParentAndStatus(obj interface{}) ([]string, error) { + intent, ok := obj.(*meshresource.RuleIntentResource) + if !ok || intent.Spec == nil { + return nil, nil + } + + key := fmt.Sprintf("%s/%s/%s/%s", + intent.Spec.ParentRuleKind, + intent.Spec.ParentRuleMesh, + intent.Spec.ParentRuleName, + intent.Spec.Status, + ) + return []string{key}, nil +} + +// byRuleIntentID indexes RuleIntent resources by the numeric ID suffix in +// their resource name. The suffix parsing intentionally matches versioning's +// extractIDFromIntentName behavior. +func byRuleIntentID(obj interface{}) ([]string, error) { + intent, ok := obj.(*meshresource.RuleIntentResource) + if !ok { + return nil, nil + } + idx := strings.LastIndex(intent.Name, "-") + if idx == -1 || idx == len(intent.Name)-1 { + logger.Warnf("skipping malformed RuleIntent name in id index: %s", intent.Name) + return nil, nil + } + id, err := strconv.ParseInt(intent.Name[idx+1:], 10, 64) + if err != nil { + logger.Warnf("skipping malformed RuleIntent name in id index: %s", intent.Name) + return nil, nil + } + return []string{strconv.FormatInt(id, 10)}, nil +} + +func byRuleIntentStatus(obj interface{}) ([]string, error) { + intent, ok := obj.(*meshresource.RuleIntentResource) + if !ok || intent.Spec == nil || intent.Spec.Status == "" { + return nil, nil + } + return []string{intent.Spec.Status}, nil +} diff --git a/pkg/core/store/index/rule_version.go b/pkg/core/store/index/rule_version.go new file mode 100644 index 000000000..1572782a2 --- /dev/null +++ b/pkg/core/store/index/rule_version.go @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package index + +import ( + "fmt" + "strconv" + + "k8s.io/client-go/tools/cache" + + meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" +) + +const ( + ByParentRuleIndexName = "ByParentRule" + ByRuleVersionIDIndexName = "ByRuleVersionID" + ByRuleVersionIntentIDIndexName = "ByRuleVersionIntentID" +) + +func init() { + RegisterIndexers(meshresource.RuleVersionKind, map[string]cache.IndexFunc{ + ByParentRuleIndexName: byParentRule, + ByRuleVersionIDIndexName: byRuleVersionID, + ByRuleVersionIntentIDIndexName: byRuleVersionIntentID, + }) +} + +// byParentRule indexes RuleVersion resources by their parent rule. +// Index key format: "//" +func byParentRule(obj interface{}) ([]string, error) { + rv, ok := obj.(*meshresource.RuleVersionResource) + if !ok || rv.Spec == nil { + return nil, nil + } + key := fmt.Sprintf("%s/%s/%s", + rv.Spec.ParentRuleKind, + rv.Spec.ParentRuleMesh, + rv.Spec.ParentRuleName, + ) + return []string{key}, nil +} + +func byRuleVersionID(obj interface{}) ([]string, error) { + rv, ok := obj.(*meshresource.RuleVersionResource) + if !ok || rv == nil { + return nil, nil + } + if id := rv.Annotations["dubbo.apache.org/rule-version-id"]; id != "" { + return []string{id}, nil + } + id, err := parseNumericSuffix(rv.Name) + if err != nil { + return nil, nil + } + return []string{id}, nil +} + +func byRuleVersionIntentID(obj interface{}) ([]string, error) { + rv, ok := obj.(*meshresource.RuleVersionResource) + if !ok || rv.Spec == nil || rv.Spec.IntentId == 0 { + return nil, nil + } + return []string{fmt.Sprintf("%d", rv.Spec.IntentId)}, nil +} + +func parseNumericSuffix(name string) (string, error) { + for i := len(name) - 1; i >= 0; i-- { + if name[i] != '-' { + continue + } + if i == len(name)-1 { + return "", fmt.Errorf("missing numeric suffix") + } + suffix := name[i+1:] + if _, err := strconv.ParseInt(suffix, 10, 64); err != nil { + return "", err + } + return suffix, nil + } + return "", fmt.Errorf("missing numeric suffix") +} diff --git a/pkg/core/store/store.go b/pkg/core/store/store.go index 8923f6577..a55d02eb0 100644 --- a/pkg/core/store/store.go +++ b/pkg/core/store/store.go @@ -43,6 +43,13 @@ type ResourceStore interface { PageListByIndexes(indexes []index.IndexCondition, pq model.PageReq) (*model.PageData[model.Resource], error) } +// ConditionalResourceStore is a narrow compare-and-swap extension used by +// RuleIntent recovery records. The expected and updated resources must have the +// same key; false means another writer changed or removed the resource. +type ConditionalResourceStore interface { + UpdateIfUnchanged(expected model.Resource, updated model.Resource) (bool, error) +} + // ManagedResourceStore includes both functional interfaces and lifecycle interfaces // If there is a new type of ResourceStore, it should implement this interface type ManagedResourceStore interface { @@ -77,7 +84,10 @@ func ErrorResourceNotFound(rt, name, mesh string) error { return fmt.Errorf("resource not found: type=%q name=%q mesh=%q", rt, name, mesh) } -var ErrorInvalidOffset = errors.New("invalid offset") +var ( + ErrorInvalidOffset = errors.New("invalid offset") + ErrResourceStoreTransient = errors.New("resource store transient error") +) func IsResourceNotFound(err error) bool { return err != nil && strings.HasPrefix(err.Error(), "Resource not found") diff --git a/pkg/core/versioning/component.go b/pkg/core/versioning/component.go new file mode 100644 index 000000000..41600ff16 --- /dev/null +++ b/pkg/core/versioning/component.go @@ -0,0 +1,362 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "context" + "errors" + "fmt" + "math" + "time" + + versioningcfg "github.com/apache/dubbo-admin/pkg/config/versioning" + "github.com/apache/dubbo-admin/pkg/core/events" + "github.com/apache/dubbo-admin/pkg/core/governor" + "github.com/apache/dubbo-admin/pkg/core/lock" + "github.com/apache/dubbo-admin/pkg/core/logger" + "github.com/apache/dubbo-admin/pkg/core/manager" + meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" + "github.com/apache/dubbo-admin/pkg/core/runtime" +) + +const ComponentType runtime.ComponentType = "rule versioning" + +func init() { + runtime.RegisterComponent(&component{}) +} + +type Component interface { + runtime.Component + Service() *Service +} + +type component struct { + service *Service + store Store + lock lock.Lock + reconcileRequests chan struct{} +} + +func (c *component) Type() runtime.ComponentType { + return ComponentType +} + +func (c *component) Order() int { + return math.MaxInt - 5 +} + +func (c *component) RequiredDependencies() []runtime.ComponentType { + return []runtime.ComponentType{ + runtime.EventBus, + runtime.ResourceStore, + runtime.ResourceManager, + } +} + +func (c *component) Init(ctx runtime.BuilderContext) error { + cfg := ctx.Config().RuleVersioning + if cfg == nil { + cfg = versioningcfg.Default() + } + + rmComponent, err := ctx.GetActivatedComponent(runtime.ResourceManager) + if err != nil { + return err + } + rm := rmComponent.(manager.ResourceManagerComponent).ResourceManager() + + // Get resource stores for each versioning resource kind. ResourceStore is + // routed by kind, so RuleVersion and RuleIntent cannot share one store + // instance. + rvStore, err := rm.GetStore(meshresource.RuleVersionKind) + if err != nil { + return fmt.Errorf("failed to get RuleVersion store: %w", err) + } + if rvStore == nil { + return fmt.Errorf("RuleVersion store not available - versioning requires resource store") + } + intentStore, err := rm.GetStore(meshresource.RuleIntentKind) + if err != nil { + return fmt.Errorf("failed to get RuleIntent store: %w", err) + } + if intentStore == nil { + return fmt.Errorf("RuleIntent store not available - versioning requires resource store") + } + + store := NewResourceStoreAdapter(rvStore, intentStore) + if err := store.ensureStores(); err != nil { + return err + } + lockComponent, err := ctx.GetActivatedComponent(lock.DistributedLockComponent) + if err != nil { + return fmt.Errorf("rule versioning requires a lock component: %w", err) + } + lockComp, ok := lockComponent.(*lock.Component) + if !ok { + return fmt.Errorf("component %s does not implement lock component", lock.DistributedLockComponent) + } + lockMgr := lockComp.GetLock() + if lockMgr == nil { + return fmt.Errorf("rule versioning requires an available lock implementation") + } + c.store = store + c.lock = lockMgr + c.reconcileRequests = make(chan struct{}, 1) + c.service = NewService( + cfg.MaxVersionsPerRule, + store, + ) + logger.Infof("Using resource store for rule versioning (RuleVersion, RuleIntent)") + + eventBusComponent, err := ctx.GetActivatedComponent(runtime.EventBus) + if err != nil { + return err + } + bus, ok := eventBusComponent.(events.EventBus) + if !ok { + return fmt.Errorf("component %s does not implement events.EventBus", runtime.EventBus) + } + for _, kind := range governor.RuleResourceKinds.Values() { + sub := NewSubscriber(kind, store, cfg.MaxVersionsPerRule, lockMgr, ctx.AppContext(), c.requestReconcile) + if err := bus.Subscribe(sub); err != nil { + return err + } + } + return nil +} + +func (c *component) Start(rt runtime.Runtime, stop <-chan struct{}) error { + cfg := rt.Config().RuleVersioning + if cfg == nil { + cfg = versioningcfg.Default() + } + startCtx, cancel := contextWithStop(rt.AppContext(), stop) + defer cancel() + rmComp, err := rt.GetComponent(runtime.ResourceManager) + if err != nil { + return err + } + rm := rmComp.(manager.ResourceManagerComponent).ResourceManager() + // Startup repair resolves durable intents left by crashes before bootstrap + // records current rules, so a stale intent cannot fence all later writes. + if err := c.repairOpenIntents(startCtx, rm); err != nil { + return err + } + if err := c.cleanupTerminalIntents(startCtx); err != nil { + return err + } + if err := c.bootstrapExistingRules(startCtx, rm, cfg.MaxVersionsPerRule); err != nil { + return err + } + c.startReconcileLoop(rt.AppContext(), stop, rm, cfg.MaxVersionsPerRule) + return nil +} + +func (c *component) Service() *Service { + return c.service +} + +func (c *component) bootstrapExistingRules(ctx context.Context, rm manager.ResourceManager, maxVersions int64) error { + currentKeys := make(map[coremodel.ResourceKind]map[string]struct{}) + for _, kind := range governor.RuleResourceKinds.Values() { + if err := ctx.Err(); err != nil { + return err + } + rs, err := rm.GetStore(kind) + if err != nil { + return err + } + if rs == nil { + continue + } + keys := rs.ListKeys() + currentKeys[kind] = make(map[string]struct{}, len(keys)) + for _, key := range keys { + currentKeys[kind][key] = struct{}{} + } + resources, err := rs.GetByKeys(keys) + if err != nil { + return err + } + for _, res := range resources { + if err := ctx.Err(); err != nil { + return err + } + if err := RecordBootstrapLocked(ctx, c.store, maxVersions, res.ResourceKind(), res.ResourceKey(), rm, c.lock); err != nil { + return err + } + } + } + return c.reconcileDeletedRules(ctx, rm, currentKeys) +} + +func (c *component) reconcileDeletedRules(ctx context.Context, rm manager.ResourceManager, currentKeys map[coremodel.ResourceKind]map[string]struct{}) error { + for _, kind := range governor.RuleResourceKinds.Values() { + if err := ctx.Err(); err != nil { + return err + } + latest, err := c.store.ListLatestVersions(kind) + if err != nil { + return err + } + for _, head := range latest { + if err := ctx.Err(); err != nil { + return err + } + if head.Operation == OperationDelete { + continue + } + if _, exists := currentKeys[kind][head.ResourceKey]; exists { + continue + } + err := withRuleVersionLock(ctx, c.lock, kind, head.ResourceKey, func(leaseCtx context.Context) error { + intent, err := c.store.OpenIntent(kind, head.ResourceKey) + if err != nil { + return err + } + if intent != nil { + return nil + } + current, exists, err := rm.GetByKey(kind, head.ResourceKey) + if err != nil { + return err + } + if exists { + return nil + } + _, err = c.service.ReconcileActualState(leaseCtx, kind, head.ResourceKey, current, true, "system:reconcile") + return err + }) + if err != nil { + return err + } + } + } + return nil +} + +func (c *component) requestReconcile() { + if c == nil || c.reconcileRequests == nil { + return + } + select { + case c.reconcileRequests <- struct{}{}: + default: + } +} + +func (c *component) startReconcileLoop(parent context.Context, stop <-chan struct{}, rm manager.ResourceManager, maxVersions int64) { + if c.reconcileRequests == nil { + c.reconcileRequests = make(chan struct{}, 1) + } + go func() { + ticker := time.NewTicker(time.Minute) + defer ticker.Stop() + for { + select { + case <-stop: + return + case <-parent.Done(): + return + case <-c.reconcileRequests: + case <-ticker.C: + } + ctx, cancel := contextWithStop(parent, stop) + if err := c.bootstrapExistingRules(ctx, rm, maxVersions); err != nil { + logger.Warnf("rule version current-state reconcile failed: %v", err) + } + cancel() + } + }() +} + +func (c *component) repairOpenIntents(ctx context.Context, rm manager.ResourceManager) error { + intents, err := c.store.ListOpenIntents() + if err != nil { + return err + } + for _, intent := range intents { + if err := ctx.Err(); err != nil { + return err + } + err = withRuleVersionLock(ctx, c.lock, intent.RuleKind, intent.ResourceKey, func(leaseCtx context.Context) error { + freshIntent, err := c.service.GetIntent(intent.ID) + if err != nil { + return err + } + current, exists, err := rm.GetByKey(freshIntent.RuleKind, freshIntent.ResourceKey) + if err != nil { + return err + } + _, err = c.service.FinalizeMutation(leaseCtx, freshIntent, current, !exists) + return err + }) + if err != nil { + if errors.Is(err, ErrVersionIntentNotFound) { + continue + } + if errors.Is(err, ErrVersionIntentPending) || errors.Is(err, ErrIntentOutcomeMismatch) { + logger.Warnf("rule version intent %d cannot be repaired automatically for %s: %v", intent.ID, intent.ResourceKey, err) + continue + } + return err + } + } + return nil +} + +func (c *component) cleanupTerminalIntents(ctx context.Context) error { + intents, err := c.store.ListTerminalIntents() + if err != nil { + return err + } + for _, intent := range intents { + if err := ctx.Err(); err != nil { + return err + } + err = withRuleVersionLock(ctx, c.lock, intent.RuleKind, intent.ResourceKey, func(leaseCtx context.Context) error { + if err := lock.CheckLease(leaseCtx); err != nil { + return err + } + return c.store.CleanupIntent(intent.ID, intent.Status) + }) + if err != nil { + if errors.Is(err, ErrVersionIntentNotFound) { + continue + } + if errors.Is(err, ErrVersionNotFound) && intent.Status == IntentStatusCommitted { + return fmt.Errorf("%w: terminal committed intent %d has no RuleVersion", ErrVersionLedgerCorrupt, intent.ID) + } + return err + } + } + return nil +} + +func contextWithStop(parent context.Context, stop <-chan struct{}) (context.Context, context.CancelFunc) { + ctx, cancel := context.WithCancel(parent) + go func() { + select { + case <-stop: + cancel() + case <-ctx.Done(): + } + }() + return ctx, cancel +} diff --git a/pkg/core/versioning/id_generator.go b/pkg/core/versioning/id_generator.go new file mode 100644 index 000000000..c4d6408e2 --- /dev/null +++ b/pkg/core/versioning/id_generator.go @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "crypto/rand" + "encoding/binary" +) + +type idGenerator interface { + Next() (int64, error) +} + +// IDGenerator creates positive int64 identifiers. VersionNo, not this ID, +// defines ledger ordering, so randomness plus store-level conflict retry is +// enough and avoids clock-coupled ID behavior. +type IDGenerator struct{} + +func NewIDGenerator() *IDGenerator { + return &IDGenerator{} +} + +func (g *IDGenerator) Next() (int64, error) { + var buf [8]byte + if _, err := rand.Read(buf[:]); err != nil { + return 0, err + } + + id := int64(binary.BigEndian.Uint64(buf[:]) & ((uint64(1) << 63) - 1)) + if id == 0 { + id = 1 + } + return id, nil +} diff --git a/pkg/core/versioning/normalize.go b/pkg/core/versioning/normalize.go new file mode 100644 index 000000000..68aa05a30 --- /dev/null +++ b/pkg/core/versioning/normalize.go @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + + "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/proto" + + meshproto "github.com/apache/dubbo-admin/api/mesh/v1alpha1" + "github.com/apache/dubbo-admin/pkg/common/bizerror" + meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" +) + +// DeleteSpecJSON is the canonical snapshot stored for a rule delete marker. +const DeleteSpecJSON = "{}" + +// NormalizeSpec returns canonical JSON and its hash for stable comparisons. +// It does not validate whether the spec is acceptable to the registry. +func NormalizeSpec(spec coremodel.ResourceSpec) (string, string, error) { + if spec == nil { + return HashSpecJSON(DeleteSpecJSON), DeleteSpecJSON, nil + } + var raw []byte + if msg, ok := spec.(proto.Message); ok { + var err error + raw, err = protojson.MarshalOptions{ + UseProtoNames: false, + EmitUnpopulated: false, + }.Marshal(msg) + if err != nil { + return "", "", err + } + } else { + var err error + raw, err = json.Marshal(spec) + if err != nil { + return "", "", err + } + } + var v any + if err := json.Unmarshal(raw, &v); err != nil { + return "", "", err + } + canonical, err := json.Marshal(v) + if err != nil { + return "", "", err + } + specJSON := string(canonical) + return HashSpecJSON(specJSON), specJSON, nil +} + +// HashSpecJSON hashes canonical spec JSON for comparison and dedup filters. +// It is not sufficient on its own to prove operation source or intent ownership. +func HashSpecJSON(specJSON string) string { + sum := sha256.Sum256([]byte(specJSON)) + return hex.EncodeToString(sum[:]) +} + +func NormalizeResource(res coremodel.Resource) (string, string, error) { + if res == nil { + return "", "", fmt.Errorf("resource is nil") + } + return NormalizeSpec(res.ResourceSpec()) +} + +// ResourceFromSpecJSON rebuilds a typed rule Resource from a stored version's +// spec JSON. Used by rollback to re-publish a historical snapshot through the +// normal ResourceManager mutation path. Only the three governor-managed rule +// kinds are supported. protojson is tried first (matching how specs are +// normalized), falling back to plain JSON for resilience. +func ResourceFromSpecJSON(kind coremodel.ResourceKind, mesh, ruleName, specJSON string) (coremodel.Resource, error) { + switch kind { + case meshresource.ConditionRouteKind: + res := meshresource.NewConditionRouteResourceWithAttributes(ruleName, mesh) + var spec meshproto.ConditionRoute + if err := unmarshalSpec(specJSON, &spec); err != nil { + return nil, err + } + res.Spec = &spec + return res, nil + case meshresource.TagRouteKind: + res := meshresource.NewTagRouteResourceWithAttributes(ruleName, mesh) + var spec meshproto.TagRoute + if err := unmarshalSpec(specJSON, &spec); err != nil { + return nil, err + } + res.Spec = &spec + return res, nil + case meshresource.DynamicConfigKind: + res := meshresource.NewDynamicConfigResourceWithAttributes(ruleName, mesh) + var spec meshproto.DynamicConfig + if err := unmarshalSpec(specJSON, &spec); err != nil { + return nil, err + } + res.Spec = &spec + return res, nil + default: + return nil, bizerror.New(bizerror.InvalidArgument, "unsupported rule kind") + } +} + +func unmarshalSpec(specJSON string, spec proto.Message) error { + if err := protojson.Unmarshal([]byte(specJSON), spec); err != nil { + if jsonErr := json.Unmarshal([]byte(specJSON), spec); jsonErr != nil { + return err + } + } + return nil +} diff --git a/pkg/core/versioning/resource_store_adapter.go b/pkg/core/versioning/resource_store_adapter.go new file mode 100644 index 000000000..0a374c037 --- /dev/null +++ b/pkg/core/versioning/resource_store_adapter.go @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "fmt" + "hash/fnv" + "sync" + + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" + "github.com/apache/dubbo-admin/pkg/core/store" +) + +var _ Store = &ResourceStoreAdapter{} + +const parentLockStripes = 256 +const maxIDGenerateAttempts = 16 +const maxIntentCASRetries = 8 + +// ResourceStoreAdapter routes RuleVersion and RuleIntent resources through the +// existing resource store. Callers that mutate a parent rule must hold the +// canonical per-rule lock; the striped mutexes only keep a single adapter +// instance internally consistent while it derives state from the ledger. +type ResourceStoreAdapter struct { + versionStore store.ResourceStore + intentStore store.ResourceStore + idGenerator idGenerator + parentLocks [parentLockStripes]sync.Mutex +} + +func NewResourceStoreAdapter(versionStore, intentStore store.ResourceStore) *ResourceStoreAdapter { + return &ResourceStoreAdapter{ + versionStore: versionStore, + intentStore: intentStore, + idGenerator: NewIDGenerator(), + } +} + +func (a *ResourceStoreAdapter) ensureStores() error { + if a == nil || a.versionStore == nil || a.intentStore == nil { + return fmt.Errorf("%w: RuleVersion and RuleIntent stores are required", ErrVersionLedgerCorrupt) + } + if _, ok := a.intentStore.(store.ConditionalResourceStore); !ok { + return fmt.Errorf("%w: RuleIntent store must support conditional updates", ErrVersionLedgerCorrupt) + } + return nil +} + +func (a *ResourceStoreAdapter) CheckExpectedVersion(kind coremodel.ResourceKind, resourceKey string, expected *int64) error { + if err := a.ensureStores(); err != nil { + return err + } + if expected == nil { + return nil + } + snapshot, err := a.LedgerSnapshot(kind, resourceKey) + if err != nil { + return err + } + if snapshot.Head == nil || snapshot.Deleted { + if *expected == 0 { + return nil + } + return &ConflictError{CurrentVersionID: nil} + } + currentID := snapshot.Head.ID + if *expected != currentID { + return &ConflictError{CurrentVersionID: ¤tID} + } + return nil +} + +func (a *ResourceStoreAdapter) LatestVersion(kind coremodel.ResourceKind, resourceKey string) (*Version, error) { + snapshot, err := a.LedgerSnapshot(kind, resourceKey) + if err != nil { + return nil, err + } + if snapshot.Head == nil { + return nil, ErrVersionNotFound + } + return snapshot.Head, nil +} + +func (a *ResourceStoreAdapter) withParentLock(kind coremodel.ResourceKind, resourceKey string, fn func() error) error { + key := fmt.Sprintf("%s/%s", kind, resourceKey) + mu := &a.parentLocks[parentLockIndex(key)] + mu.Lock() + defer mu.Unlock() + return fn() +} + +func parentLockIndex(key string) uint32 { + h := fnv.New32a() + _, _ = h.Write([]byte(key)) + return h.Sum32() % parentLockStripes +} diff --git a/pkg/core/versioning/resource_store_adapter_test.go b/pkg/core/versioning/resource_store_adapter_test.go new file mode 100644 index 000000000..fe2991f84 --- /dev/null +++ b/pkg/core/versioning/resource_store_adapter_test.go @@ -0,0 +1,485 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "context" + "errors" + "fmt" + "path/filepath" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "gorm.io/driver/sqlite" + "k8s.io/client-go/tools/cache" + + meshproto "github.com/apache/dubbo-admin/api/mesh/v1alpha1" + storecfg "github.com/apache/dubbo-admin/pkg/config/store" + "github.com/apache/dubbo-admin/pkg/core/events" + meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" + "github.com/apache/dubbo-admin/pkg/core/store" + locallock "github.com/apache/dubbo-admin/pkg/lock/local" + "github.com/apache/dubbo-admin/pkg/store/dbcommon" + memoryst "github.com/apache/dubbo-admin/pkg/store/memory" +) + +func TestResourceStoreAdapter_StaleObservedAndStatusUpdatesConflictOnRevision(t *testing.T) { + versionStore, intentStore := newVersioningStores(t) + adapter := NewResourceStoreAdapter(versionStore, intentStore) + + intent, err := adapter.CreateIntent(context.Background(), testInsertRequest("demo-rule", "hash-a")) + require.NoError(t, err) + stale, _, err := adapter.getIntentResourceByID(intent.ID) + require.NoError(t, err) + + require.NoError(t, adapter.MarkIntentApplied(context.Background(), intent.ID)) + err = updateIntentResourceObserved(intentStore, stale, OperationUpdate, "hash-b", `{"key":"B"}`) + require.ErrorIs(t, err, ErrVersionIntentConflict) + + fresh, _, err := adapter.getIntentResourceByID(intent.ID) + require.NoError(t, err) + require.NoError(t, updateIntentResourceObserved(intentStore, fresh, OperationUpdate, "hash-b", `{"key":"B"}`)) + err = updateIntentResourceStatus(intentStore, fresh, IntentStatusCommitting, "") + require.ErrorIs(t, err, ErrVersionIntentConflict) + + open, err := adapter.GetIntent(intent.ID) + require.NoError(t, err) + assert.Equal(t, IntentStatusApplied, open.Status) + assert.True(t, open.ReconcileRequired) + assert.Equal(t, "hash-b", open.ObservedContentHash) +} + +func TestResourceStoreAdapter_GormConditionalUpdateConcurrentCommitAndObservedOnlyOneWins(t *testing.T) { + writerA, writerB, intentStoreA, intentStoreB := newGormVersioningAdapters(t) + key := coremodel.BuildResourceKey("", "demo-rule") + + intent, err := writerA.CreateIntent(context.Background(), testInsertRequest("demo-rule", "hash-a")) + require.NoError(t, err) + require.NoError(t, writerA.MarkIntentApplied(context.Background(), intent.ID)) + + staleForCommit, _, err := writerA.getIntentResourceByID(intent.ID) + require.NoError(t, err) + staleForObserved, _, err := writerB.getIntentResourceByID(intent.ID) + require.NoError(t, err) + require.Equal(t, staleForCommit.Spec.Revision, staleForObserved.Spec.Revision) + + ready := make(chan struct{}, 2) + start := make(chan struct{}) + results := make(chan error, 2) + go func() { + ready <- struct{}{} + <-start + results <- updateIntentResourceStatus(intentStoreA, staleForCommit, IntentStatusCommitting, "") + }() + go func() { + ready <- struct{}{} + <-start + results <- updateIntentResourceObserved(intentStoreB, staleForObserved, OperationUpdate, "hash-b", `{"key":"B"}`) + }() + <-ready + <-ready + close(start) + + winners := 0 + conflicts := 0 + for i := 0; i < 2; i++ { + err := <-results + switch { + case err == nil: + winners++ + case errors.Is(err, ErrVersionIntentConflict): + conflicts++ + default: + require.NoError(t, err) + } + } + require.Equal(t, 1, winners) + require.Equal(t, 1, conflicts) + + finalIntent, err := writerA.GetIntent(intent.ID) + require.NoError(t, err) + switch finalIntent.Status { + case IntentStatusCommitting: + assert.False(t, finalIntent.ReconcileRequired) + require.NoError(t, writerB.MarkIntentObserved(context.Background(), intent.ID, OperationUpdate, "hash-b", `{"key":"B"}`)) + _, err = writerB.CommitIntent(context.Background(), intent.ID, 10) + require.NoError(t, err) + versions, err := writerB.ListVersions(meshresource.ConditionRouteKind, key) + require.NoError(t, err) + require.Len(t, versions, 2) + assert.Equal(t, "hash-b", versions[0].ContentHash) + assert.Equal(t, "hash-a", versions[1].ContentHash) + case IntentStatusApplied: + assert.True(t, finalIntent.ReconcileRequired) + _, err = writerB.CommitIntent(context.Background(), intent.ID, 10) + var pending *IntentPendingError + require.ErrorAs(t, err, &pending) + default: + t.Fatalf("unexpected final intent status after concurrent CAS: %s", finalIntent.Status) + } +} + +func TestResourceStoreAdapter_CommitIntentDirtyMarkerCASRejectsBeforeAppend(t *testing.T) { + versionStore, baseIntentStore := newVersioningStores(t) + commitAtCAS := make(chan struct{}) + releaseCommit := make(chan struct{}) + var once sync.Once + blockingIntentStore := &barrierCASStore{ResourceStore: baseIntentStore} + writerA := NewResourceStoreAdapter(versionStore, blockingIntentStore) + writerB := NewResourceStoreAdapter(versionStore, baseIntentStore) + + intent, err := writerA.CreateIntent(context.Background(), testInsertRequest("demo-rule", "hash-a")) + require.NoError(t, err) + require.NoError(t, writerA.MarkIntentApplied(context.Background(), intent.ID)) + + blockingIntentStore.beforeCAS = func(_, updated coremodel.Resource) { + intentRes, ok := updated.(*meshresource.RuleIntentResource) + if !ok || intentRes.Spec == nil || IntentStatus(intentRes.Spec.Status) != IntentStatusCommitting { + return + } + once.Do(func() { + close(commitAtCAS) + <-releaseCommit + }) + } + + errCh := make(chan error, 1) + go func() { + _, commitErr := writerA.CommitIntent(context.Background(), intent.ID, 10) + errCh <- commitErr + }() + <-commitAtCAS + + require.NoError(t, writerB.MarkIntentObserved(context.Background(), intent.ID, OperationUpdate, "hash-b", `{"key":"B"}`)) + close(releaseCommit) + + err = <-errCh + var pending *IntentPendingError + require.ErrorAs(t, err, &pending) + + versions, err := writerA.ListVersions(meshresource.ConditionRouteKind, coremodel.BuildResourceKey("", "demo-rule")) + require.NoError(t, err) + require.Empty(t, versions) + + open, err := writerA.GetIntent(intent.ID) + require.NoError(t, err) + assert.Equal(t, IntentStatusApplied, open.Status) + assert.True(t, open.ReconcileRequired) + assert.Equal(t, "hash-b", open.ObservedContentHash) +} + +func TestSubscriber_StaleOpenIntentAfterCleanupFallsBackToUpstreamVersion(t *testing.T) { + versionStore, baseIntentStore := newVersioningStores(t) + intentStore := &failOnceStore{ResourceStore: baseIntentStore, err: errors.New("cleanup failed")} + adapter := NewResourceStoreAdapter(versionStore, intentStore) + sub := NewSubscriber(meshresource.ConditionRouteKind, adapter, 10, locallock.NewLocalLock(), context.Background()) + + intended := testConditionRule("demo-rule", "A") + req, err := buildMutationInsertRequest(intended, OperationUpdate, SourceAdmin, "admin", "", nil, time.Unix(100, 0)) + require.NoError(t, err) + intent, err := adapter.CreateIntent(context.Background(), req) + require.NoError(t, err) + require.NoError(t, adapter.MarkIntentApplied(context.Background(), intent.ID)) + staleOpen, err := adapter.GetIntent(intent.ID) + require.NoError(t, err) + + intentStore.failNextDelete = true + _, err = adapter.CommitIntent(context.Background(), intent.ID, 10) + require.ErrorContains(t, err, "cleanup failed") + + external := testConditionRule("demo-rule", "B") + event, err := normalizeRuleEvent(events.NewResourceChangedEvent(cache.Updated, intended, external)) + require.NoError(t, err) + require.NoError(t, sub.handleOpenIntentEvent(staleOpen, *event)) + + versions, err := adapter.ListVersions(meshresource.ConditionRouteKind, external.ResourceKey()) + require.NoError(t, err) + require.NotEmpty(t, versions) + assert.Equal(t, HashSpecForTest(t, external), versions[0].ContentHash) + assert.Equal(t, SourceUpstream, versions[0].Source) +} + +func TestSubscriber_CommittingIntentFinishesAdminBeforeUpstreamSuccessor(t *testing.T) { + baseVersionStore, baseIntentStore := newVersioningStores(t) + intentStore := &failOnceStore{ResourceStore: baseIntentStore, err: errors.New("intent update failed")} + adapter := NewResourceStoreAdapter(baseVersionStore, intentStore) + sub := NewSubscriber(meshresource.ConditionRouteKind, adapter, 10, locallock.NewLocalLock(), context.Background()) + + intent, err := adapter.CreateIntent(context.Background(), testInsertRequest("demo-rule", "hash-a")) + require.NoError(t, err) + require.NoError(t, adapter.MarkIntentApplied(context.Background(), intent.ID)) + intentStore.failUpdateAfter = 2 + _, err = adapter.CommitIntent(context.Background(), intent.ID, 10) + require.ErrorContains(t, err, "intent update failed") + + committing, err := adapter.GetIntent(intent.ID) + require.NoError(t, err) + require.Equal(t, IntentStatusCommitting, committing.Status) + + upstream := testConditionRule("demo-rule", "B") + event, err := normalizeRuleEvent(events.NewResourceChangedEvent(cache.Updated, upstream, upstream)) + require.NoError(t, err) + require.NoError(t, sub.ProcessEvent(events.NewResourceChangedEvent(cache.Updated, upstream, upstream))) + + versions, err := adapter.ListVersions(meshresource.ConditionRouteKind, upstream.ResourceKey()) + require.NoError(t, err) + require.Len(t, versions, 2) + assert.Equal(t, event.ContentHash, versions[0].ContentHash) + assert.Equal(t, SourceUpstream, versions[0].Source) + assert.Equal(t, int64(2), versions[0].VersionNo) + assert.Equal(t, "hash-a", versions[1].ContentHash) + assert.Equal(t, SourceAdmin, versions[1].Source) + assert.Equal(t, intent.ID, versions[1].ID) + assert.Equal(t, int64(1), versions[1].VersionNo) +} + +func TestService_OutcomeUnknownActualMatchCommitsFixedIntentID(t *testing.T) { + versionStore, intentStore := newVersioningStores(t) + adapter := NewResourceStoreAdapter(versionStore, intentStore) + svc := NewService(10, adapter) + res := testConditionRule("demo-rule", "v1") + req, err := buildMutationInsertRequest(res, OperationUpdate, SourceAdmin, "admin", "", nil, time.Unix(100, 0)) + require.NoError(t, err) + intent, err := adapter.CreateIntent(context.Background(), req) + require.NoError(t, err) + require.NoError(t, adapter.MarkIntentOutcomeUnknown(context.Background(), intent.ID, "timeout")) + + committed, err := finalizeMutationForTest(svc, intent, res, false) + require.NoError(t, err) + require.NotNil(t, committed) + assert.Equal(t, intent.ID, committed.ID) + assert.Equal(t, intent.ID, committed.IntentID) + + _, err = adapter.GetIntent(intent.ID) + require.ErrorIs(t, err, ErrVersionIntentNotFound) +} + +func TestResourceStoreAdapter_CheckExpectedVersion(t *testing.T) { + versionStore, intentStore := newVersioningStores(t) + adapter := NewResourceStoreAdapter(versionStore, intentStore) + key := coremodel.BuildResourceKey("", "demo-rule") + + require.NoError(t, adapter.CheckExpectedVersion(meshresource.ConditionRouteKind, key, nil)) + + expectedDeleted := int64(0) + require.NoError(t, adapter.CheckExpectedVersion(meshresource.ConditionRouteKind, key, &expectedDeleted)) + + version, err := adapter.InsertVersion(context.Background(), testInsertRequest("demo-rule", "hash-a"), 10) + require.NoError(t, err) + err = adapter.CheckExpectedVersion(meshresource.ConditionRouteKind, key, &expectedDeleted) + var conflict *ConflictError + require.ErrorAs(t, err, &conflict) + require.NotNil(t, conflict.CurrentVersionID) + + mismatch := version.ID + 1 + err = adapter.CheckExpectedVersion(meshresource.ConditionRouteKind, key, &mismatch) + require.ErrorAs(t, err, &conflict) + require.NotNil(t, conflict.CurrentVersionID) + assert.Equal(t, version.ID, *conflict.CurrentVersionID) + + require.NoError(t, adapter.CheckExpectedVersion(meshresource.ConditionRouteKind, key, &version.ID)) +} + +func TestRuleVersionLockSerializesSharedAdapters(t *testing.T) { + versionStore, intentStore := newVersioningStores(t) + writerA := NewResourceStoreAdapter(versionStore, intentStore) + writerB := NewResourceStoreAdapter(versionStore, intentStore) + lockMgr := locallock.NewLocalLock() + key := coremodel.BuildResourceKey("", "demo-rule") + + const writes = 8 + var wg sync.WaitGroup + errCh := make(chan error, writes) + for i := 0; i < writes; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + adapter := writerA + if i%2 == 1 { + adapter = writerB + } + err := withRuleVersionLock(context.Background(), lockMgr, meshresource.ConditionRouteKind, key, func(leaseCtx context.Context) error { + _, err := adapter.InsertVersion(leaseCtx, testInsertRequest("demo-rule", fmt.Sprintf("hash-%02d", i)), 100) + return err + }) + errCh <- err + }(i) + } + wg.Wait() + close(errCh) + for err := range errCh { + require.NoError(t, err) + } + + versions, err := writerA.ListVersions(meshresource.ConditionRouteKind, key) + require.NoError(t, err) + require.Len(t, versions, writes) + seen := map[int64]bool{} + for i, version := range versions { + require.False(t, seen[version.VersionNo], "duplicate VersionNo %d", version.VersionNo) + seen[version.VersionNo] = true + assert.Equal(t, int64(writes-i), version.VersionNo) + } +} + +func TestComponent_CleanupTerminalIntentSweepAfterRestart(t *testing.T) { + versionStore, baseIntentStore := newVersioningStores(t) + intentStore := &failOnceStore{ResourceStore: baseIntentStore, err: errors.New("cleanup failed")} + adapter := NewResourceStoreAdapter(versionStore, intentStore) + + intent, err := adapter.CreateIntent(context.Background(), testInsertRequest("demo-rule", "hash-a")) + require.NoError(t, err) + require.NoError(t, adapter.MarkIntentApplied(context.Background(), intent.ID)) + + intentStore.failNextDelete = true + _, err = adapter.CommitIntent(context.Background(), intent.ID, 10) + require.ErrorContains(t, err, "cleanup failed") + terminal, err := adapter.GetIntent(intent.ID) + require.NoError(t, err) + require.Equal(t, IntentStatusCommitted, terminal.Status) + + c := &component{store: adapter, lock: locallock.NewLocalLock()} + require.NoError(t, c.cleanupTerminalIntents(context.Background())) + + _, err = adapter.GetIntent(intent.ID) + require.ErrorIs(t, err, ErrVersionIntentNotFound) + versions, err := adapter.ListVersions(meshresource.ConditionRouteKind, coremodel.BuildResourceKey("", "demo-rule")) + require.NoError(t, err) + require.Len(t, versions, 1) + assert.Equal(t, intent.ID, versions[0].ID) +} + +func newVersioningStores(t *testing.T) (store.ResourceStore, store.ResourceStore) { + t.Helper() + versionStore := memoryst.NewMemoryResourceStore(meshresource.RuleVersionKind) + intentStore := memoryst.NewMemoryResourceStore(meshresource.RuleIntentKind) + for _, s := range []store.ManagedResourceStore{versionStore, intentStore} { + require.NoError(t, s.Init(nil)) + } + return versionStore, intentStore +} + +func newGormVersioningAdapters(t *testing.T) (*ResourceStoreAdapter, *ResourceStoreAdapter, store.ResourceStore, store.ResourceStore) { + t.Helper() + dbPath := filepath.Join(t.TempDir(), "versioning.db") + dialector := sqlite.Open("file:" + dbPath + "?cache=shared&_journal_mode=WAL&_busy_timeout=5000") + pool, err := dbcommon.NewConnectionPool(dialector, storecfg.MySQL, t.Name(), dbcommon.DefaultConnectionPoolConfig()) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, pool.Close()) + }) + + versionStoreA := dbcommon.NewGormStore(meshresource.RuleVersionKind, t.Name()+"-version-a", pool) + intentStoreA := dbcommon.NewGormStore(meshresource.RuleIntentKind, t.Name()+"-intent-a", pool) + versionStoreB := dbcommon.NewGormStore(meshresource.RuleVersionKind, t.Name()+"-version-b", pool) + intentStoreB := dbcommon.NewGormStore(meshresource.RuleIntentKind, t.Name()+"-intent-b", pool) + for _, s := range []store.ManagedResourceStore{versionStoreA, intentStoreA, versionStoreB, intentStoreB} { + require.NoError(t, s.Init(nil)) + } + return NewResourceStoreAdapter(versionStoreA, intentStoreA), + NewResourceStoreAdapter(versionStoreB, intentStoreB), + intentStoreA, + intentStoreB +} + +func testInsertRequest(ruleName, hash string) InsertRequest { + return InsertRequest{ + RuleKind: meshresource.ConditionRouteKind, + Mesh: "", + ResourceKey: coremodel.BuildResourceKey("", ruleName), + RuleName: ruleName, + SpecJSON: `{"key":"` + ruleName + `","hash":"` + hash + `"}`, + ContentHash: hash, + Source: SourceAdmin, + Operation: OperationUpdate, + Author: "admin", + CreatedAt: time.Unix(100, 0), + } +} + +func HashSpecForTest(t *testing.T, res coremodel.Resource) string { + t.Helper() + hash, _, err := NormalizeResource(res) + require.NoError(t, err) + return hash +} + +func testConditionRule(ruleName, payload string) *meshresource.ConditionRouteResource { + res := meshresource.NewConditionRouteResourceWithAttributes(ruleName, "") + res.Spec = &meshproto.ConditionRoute{Enabled: true, Key: ruleName, Conditions: []string{payload}} + return res +} + +func finalizeMutationForTest(svc *Service, intent *Intent, current coremodel.Resource, deleted bool) (*Version, error) { + var version *Version + err := withRuleVersionLock(context.Background(), locallock.NewLocalLock(), intent.RuleKind, intent.ResourceKey, func(leaseCtx context.Context) error { + var inner error + version, inner = svc.FinalizeMutation(leaseCtx, intent, current, deleted) + return inner + }) + return version, err +} + +type barrierCASStore struct { + store.ResourceStore + beforeCAS func(expected coremodel.Resource, updated coremodel.Resource) +} + +func (s *barrierCASStore) UpdateIfUnchanged(expected coremodel.Resource, updated coremodel.Resource) (bool, error) { + if s.beforeCAS != nil { + s.beforeCAS(expected, updated) + } + cas, ok := s.ResourceStore.(store.ConditionalResourceStore) + if !ok { + return false, fmt.Errorf("wrapped store does not support conditional updates") + } + return cas.UpdateIfUnchanged(expected, updated) +} + +type failOnceStore struct { + store.ResourceStore + failNextDelete bool + failUpdateAfter int + err error +} + +func (s *failOnceStore) UpdateIfUnchanged(expected coremodel.Resource, updated coremodel.Resource) (bool, error) { + if s.failUpdateAfter > 0 { + s.failUpdateAfter-- + if s.failUpdateAfter == 0 { + return false, s.err + } + } + cas, ok := s.ResourceStore.(store.ConditionalResourceStore) + if !ok { + return false, fmt.Errorf("wrapped store does not support conditional updates") + } + return cas.UpdateIfUnchanged(expected, updated) +} + +func (s *failOnceStore) Delete(obj interface{}) error { + if s.failNextDelete { + s.failNextDelete = false + return s.err + } + return s.ResourceStore.Delete(obj) +} diff --git a/pkg/core/versioning/resource_store_convert.go b/pkg/core/versioning/resource_store_convert.go new file mode 100644 index 000000000..f9f6092b2 --- /dev/null +++ b/pkg/core/versioning/resource_store_convert.go @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "fmt" + "strconv" + "strings" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" + + meshproto "github.com/apache/dubbo-admin/api/mesh/v1alpha1" + "github.com/apache/dubbo-admin/pkg/common/bizerror" + meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" +) + +const ruleVersionIDAnnotation = "dubbo.apache.org/rule-version-id" + +func buildVersionName(kind coremodel.ResourceKind, resourceKey string, id int64) string { + return fmt.Sprintf("%s-%s-%d", kind, extractName(resourceKey), id) +} + +func buildVersionNoName(kind coremodel.ResourceKind, resourceKey string, versionNo int64) string { + return fmt.Sprintf("%s-%s-version-%d", kind, extractName(resourceKey), versionNo) +} + +func buildParentIndexKey(kind coremodel.ResourceKind, resourceKey string) string { + mesh := extractMesh(resourceKey) + name := extractName(resourceKey) + return fmt.Sprintf("%s/%s/%s", kind, mesh, name) +} + +func extractMesh(resourceKey string) string { + // resourceKey format: "mesh/name" or just "name" + for i := 0; i < len(resourceKey); i++ { + if resourceKey[i] == '/' { + return resourceKey[:i] + } + } + return "" +} + +func extractName(resourceKey string) string { + // resourceKey format: "mesh/name" or just "name" + for i := 0; i < len(resourceKey); i++ { + if resourceKey[i] == '/' { + return resourceKey[i+1:] + } + } + return resourceKey +} + +func extractIDFromName(name string) (int64, error) { + idx := strings.LastIndex(name, "-") + if idx == -1 || idx == len(name)-1 { + return 0, fmt.Errorf("invalid version name format: %s", name) + } + id, err := strconv.ParseInt(name[idx+1:], 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid version name format: %s", name) + } + return id, nil +} + +func versionIDFromResource(rv *meshresource.RuleVersionResource) (int64, error) { + if rv == nil { + return 0, fmt.Errorf("RuleVersion resource is nil") + } + if rv.Annotations != nil { + if raw := rv.Annotations[ruleVersionIDAnnotation]; raw != "" { + id, err := strconv.ParseInt(raw, 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid RuleVersion id annotation for %s: %w", rv.Name, err) + } + return id, nil + } + } + return extractIDFromName(rv.Name) +} + +func protoToVersion(spec *meshproto.RuleVersion, id int64) (*Version, error) { + if spec == nil { + return nil, bizerror.New(bizerror.InvalidArgument, "RuleVersion spec is nil") + } + + var rolledBackFromID *int64 + if spec.RolledBackFromId != 0 { + v := spec.RolledBackFromId + rolledBackFromID = &v + } + + createdAt := timestampAsTime(spec.CreatedAt) + committedAt := timestampAsTime(spec.CommittedAt) + if committedAt.IsZero() { + committedAt = createdAt + } + + return &Version{ + ID: id, + RuleKind: coremodel.ResourceKind(spec.ParentRuleKind), + Mesh: spec.ParentRuleMesh, + ResourceKey: coremodel.BuildResourceKey(spec.ParentRuleMesh, spec.ParentRuleName), + RuleName: spec.ParentRuleName, + VersionNo: spec.VersionNo, + ContentHash: spec.ContentHash, + SpecJSON: spec.SpecJson, + Operation: Operation(spec.Operation), + Source: Source(spec.Source), + Author: spec.Author, + Reason: spec.Reason, + IntentID: spec.IntentId, + RolledBackFromID: rolledBackFromID, + CreatedAt: createdAt, + CommittedAt: committedAt, + IsCurrent: false, + }, nil +} + +func buildIntentName(kind coremodel.ResourceKind, resourceKey string, id int64) string { + return fmt.Sprintf("%s-%s-intent-%d", kind, extractName(resourceKey), id) +} + +func extractIDFromIntentName(name string) (int64, error) { + id, err := extractIDFromName(name) + if err != nil { + return 0, err + } + return id, nil +} + +func intentFromResource(res *meshresource.RuleIntentResource, id int64) *Intent { + spec := res.Spec + + var rolledBackFromID *int64 + if spec.RolledBackFromId != 0 { + v := spec.RolledBackFromId + rolledBackFromID = &v + } + + createdAt := timestampAsTime(spec.CreatedAt) + observedAt := timestampAsTime(spec.ObservedAt) + + return &Intent{ + ID: id, + RuleKind: coremodel.ResourceKind(spec.ParentRuleKind), + Mesh: spec.ParentRuleMesh, + ResourceKey: coremodel.BuildResourceKey(spec.ParentRuleMesh, spec.ParentRuleName), + RuleName: spec.ParentRuleName, + ContentHash: spec.ContentHash, + SpecJSON: spec.SpecJson, + Operation: Operation(spec.Operation), + Source: Source(spec.Source), + Author: spec.Author, + Reason: spec.Reason, + RolledBackFromID: rolledBackFromID, + Status: IntentStatus(spec.Status), + LastError: spec.FailureReason, + ReconcileRequired: spec.ReconcileRequired, + ObservedContentHash: spec.ObservedContentHash, + ObservedSpecJSON: spec.ObservedSpecJson, + ObservedOperation: Operation(spec.ObservedOperation), + ObservedAt: observedAt, + Revision: spec.Revision, + CreatedAt: createdAt, + } +} + +func ledgerSnapshotFromState(state *ledgerState) *LedgerSnapshot { + if state == nil { + return &LedgerSnapshot{} + } + snapshot := &LedgerSnapshot{ + Versions: append([]Version(nil), state.Versions...), + } + if len(snapshot.Versions) == 0 { + return snapshot + } + head := snapshot.Versions[0] + snapshot.Head = &head + snapshot.Deleted = head.Operation == OperationDelete + if !snapshot.Deleted { + for i := range snapshot.Versions { + snapshot.Versions[i].IsCurrent = snapshot.Versions[i].ID == head.ID + } + } + return snapshot +} + +func duplicateVersionNoError(kind coremodel.ResourceKind, resourceKey string, versionNo, firstID, secondID int64) error { + return fmt.Errorf("%w: duplicate version number for kind=%s mesh=%s rule=%s versionNo=%d conflictingVersionIDs=%d,%d", + ErrVersionLedgerCorrupt, + kind, + extractMesh(resourceKey), + extractName(resourceKey), + versionNo, + firstID, + secondID, + ) +} + +func timestampAsTime(ts *timestamppb.Timestamp) time.Time { + if ts == nil { + return time.Time{} + } + return ts.AsTime() +} diff --git a/pkg/core/versioning/resource_store_intent.go b/pkg/core/versioning/resource_store_intent.go new file mode 100644 index 000000000..fb4ff034c --- /dev/null +++ b/pkg/core/versioning/resource_store_intent.go @@ -0,0 +1,766 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "context" + "errors" + "fmt" + "strconv" + "strings" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" + + meshproto "github.com/apache/dubbo-admin/api/mesh/v1alpha1" + "github.com/apache/dubbo-admin/pkg/core/lock" + "github.com/apache/dubbo-admin/pkg/core/logger" + meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" + "github.com/apache/dubbo-admin/pkg/core/store" + "github.com/apache/dubbo-admin/pkg/core/store/index" +) + +func (a *ResourceStoreAdapter) CreateIntent(ctx context.Context, req InsertRequest) (*Intent, error) { + if err := a.ensureStores(); err != nil { + return nil, err + } + var intent *Intent + err := a.withParentLock(req.RuleKind, req.ResourceKey, func() error { + if err := lock.CheckLease(ctx); err != nil { + return err + } + var inner error + intent, inner = a.createIntentLocked(ctx, req) + return inner + }) + return intent, err +} + +func (a *ResourceStoreAdapter) createIntentLocked(ctx context.Context, req InsertRequest) (*Intent, error) { + open, err := a.OpenIntent(req.RuleKind, req.ResourceKey) + if err != nil { + return nil, err + } + if open != nil { + return nil, &IntentPendingError{IntentID: open.ID} + } + + var intentRes *meshresource.RuleIntentResource + var id int64 + var addErr error + for attempt := 0; attempt < maxIDGenerateAttempts; attempt++ { + generated, err := a.idGenerator.Next() + if err != nil { + return nil, err + } + id = generated + if _, _, err := a.getIntentResourceByID(id); err == nil { + addErr = store.ErrorResourceAlreadyExists(meshresource.RuleIntentKind.ToString(), buildIntentName(req.RuleKind, req.ResourceKey, id), req.Mesh) + continue + } else if !errors.Is(err, ErrVersionIntentNotFound) { + return nil, err + } + if _, err := a.getVersionResourceByGlobalID(id); err == nil { + addErr = store.ErrorResourceAlreadyExists(meshresource.RuleIntentKind.ToString(), buildIntentName(req.RuleKind, req.ResourceKey, id), req.Mesh) + continue + } else if !errors.Is(err, ErrVersionNotFound) { + return nil, err + } + intentRes = newRuleIntentResource(req, id) + if err := lock.CheckLease(ctx); err != nil { + return nil, err + } + addErr = a.intentStore.Add(intentRes) + if addErr == nil { + break + } + if !isAddConflict(addErr) { + return nil, addErr + } + } + if addErr != nil { + return nil, fmt.Errorf("failed to allocate unique rule intent id after %d attempts: %w", maxIDGenerateAttempts, addErr) + } + + return intentFromResource(intentRes, id), nil +} + +func (a *ResourceStoreAdapter) GetIntent(id int64) (*Intent, error) { + if err := a.ensureStores(); err != nil { + return nil, err + } + intentRes, parsedID, err := a.getIntentResourceByID(id) + if err != nil { + return nil, err + } + return intentFromResource(intentRes, parsedID), nil +} + +func (a *ResourceStoreAdapter) OpenIntent(kind coremodel.ResourceKind, resourceKey string) (*Intent, error) { + if err := a.ensureStores(); err != nil { + return nil, err + } + intents, err := a.openIntentResources(kind, resourceKey) + if err != nil { + return nil, err + } + switch len(intents) { + case 0: + return nil, nil + case 1: + id, err := extractIDFromIntentName(intents[0].Name) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrVersionLedgerCorrupt, err) + } + return intentFromResource(intents[0], id), nil + default: + return nil, a.multipleOpenIntentsError(kind, resourceKey, intents) + } +} + +func (a *ResourceStoreAdapter) MarkIntentApplied(ctx context.Context, id int64) error { + if err := a.ensureStores(); err != nil { + return err + } + return a.updateIntentStatus(ctx, id, IntentStatusApplied, "") +} + +func (a *ResourceStoreAdapter) MarkIntentOutcomeUnknown(ctx context.Context, id int64, message string) error { + if err := a.ensureStores(); err != nil { + return err + } + intentRes, _, err := a.getIntentResourceByID(id) + if err != nil { + return err + } + return a.withParentLock(coremodel.ResourceKind(intentRes.Spec.ParentRuleKind), intentResourceKey(intentRes), func() error { + if err := lock.CheckLease(ctx); err != nil { + return err + } + fresh, _, err := a.getIntentResourceByID(id) + if err != nil { + return err + } + return updateIntentResourceStatus(a.intentStore, fresh, IntentStatusOutcomeUnknown, message) + }) +} + +func (a *ResourceStoreAdapter) MarkIntentObserved(ctx context.Context, id int64, op Operation, contentHash, specJSON string) error { + if err := a.ensureStores(); err != nil { + return err + } + if ctx == nil { + return context.Canceled + } + intentRes, _, err := a.getIntentResourceByID(id) + if err != nil { + return err + } + return a.withParentLock(coremodel.ResourceKind(intentRes.Spec.ParentRuleKind), intentResourceKey(intentRes), func() error { + var lastConflict error + for attempt := 0; attempt < maxIntentCASRetries; attempt++ { + if err := lock.CheckLease(ctx); err != nil { + return err + } + fresh, _, err := a.getIntentResourceByID(id) + if err != nil { + return err + } + err = updateIntentResourceObserved(a.intentStore, fresh, op, contentHash, specJSON) + if errors.Is(err, ErrVersionIntentConflict) { + lastConflict = err + continue + } + return err + } + if lastConflict != nil { + return lastConflict + } + return ErrVersionIntentConflict + }) +} + +func (a *ResourceStoreAdapter) MarkIntentFailed(ctx context.Context, id int64, message string) error { + if err := a.ensureStores(); err != nil { + return err + } + if err := a.updateIntentStatus(ctx, id, IntentStatusFailed, message); err != nil { + return err + } + return a.cleanupIntent(id, IntentStatusFailed) +} + +func (a *ResourceStoreAdapter) CommitIntent(ctx context.Context, id int64, maxVersions int64) (*Version, error) { + if err := a.ensureStores(); err != nil { + return nil, err + } + if err := lock.CheckLease(ctx); err != nil { + return nil, err + } + intentRes, _, err := a.getIntentResourceByID(id) + if err != nil { + return nil, err + } + var version *Version + err = a.withParentLock(coremodel.ResourceKind(intentRes.Spec.ParentRuleKind), intentResourceKey(intentRes), func() error { + if err := lock.CheckLease(ctx); err != nil { + return err + } + freshRes, parsedID, err := a.getIntentResourceByID(id) + if err != nil { + return err + } + intent := intentFromResource(freshRes, parsedID) + switch intent.Status { + case IntentStatusCommitted: + committed, err := a.committedVersionForIntentLocked(intent) + if err != nil { + return err + } + version = committed + return a.cleanupIntentLocked(id, IntentStatusCommitted) + case IntentStatusApplied: + if intent.ReconcileRequired { + return &IntentPendingError{IntentID: intent.ID} + } + case IntentStatusCommitting: + committed, err := a.insertVersionLocked(ctx, InsertRequest{ + RuleKind: intent.RuleKind, + Mesh: intent.Mesh, + ResourceKey: intent.ResourceKey, + RuleName: intent.RuleName, + SpecJSON: intent.SpecJSON, + ContentHash: intent.ContentHash, + Source: intent.Source, + Operation: intent.Operation, + Author: intent.Author, + Reason: intent.Reason, + IntentID: intent.ID, + RolledBackFromID: intent.RolledBackFromID, + CreatedAt: intent.CreatedAt, + FixedVersionID: &intent.ID, + }, maxVersions) + if err != nil { + return err + } + if err := lock.CheckLease(ctx); err != nil { + return err + } + if intent.ReconcileRequired { + if _, err := a.insertObservedSuccessorLocked(ctx, intent, maxVersions); err != nil { + return err + } + if err := lock.CheckLease(ctx); err != nil { + return err + } + } + refreshed, _, err := a.getIntentResourceByID(id) + if err != nil { + return err + } + if err := updateIntentResourceStatus(a.intentStore, refreshed, IntentStatusCommitted, ""); err != nil { + return err + } + if err := a.cleanupIntentLocked(id, IntentStatusCommitted); err != nil { + return err + } + version = committed + return nil + default: + return ErrVersionIntentNotOpen + } + + // CommitIntent first wins a storage-level CAS from APPLIED to + // COMMITTING. That CAS is the ownership boundary: a subscriber dirty + // marker racing from the same revision can win instead, but both cannot. + if err := updateIntentResourceStatus(a.intentStore, freshRes, IntentStatusCommitting, ""); err != nil { + if errors.Is(err, ErrVersionIntentConflict) { + return &IntentPendingError{IntentID: intent.ID} + } + return err + } + if err := lock.CheckLease(ctx); err != nil { + return err + } + + // Reuse the intent ID as the version resource ID so a retry after the + // COMMITTING CAS can validate and finish the existing append instead of + // creating another ledger entry. + committed, err := a.insertVersionLocked(ctx, InsertRequest{ + RuleKind: intent.RuleKind, + Mesh: intent.Mesh, + ResourceKey: intent.ResourceKey, + RuleName: intent.RuleName, + SpecJSON: intent.SpecJSON, + ContentHash: intent.ContentHash, + Source: intent.Source, + Operation: intent.Operation, + Author: intent.Author, + Reason: intent.Reason, + IntentID: intent.ID, + RolledBackFromID: intent.RolledBackFromID, + CreatedAt: intent.CreatedAt, + FixedVersionID: &intent.ID, + }, maxVersions) + if err != nil { + return err + } + + if err := lock.CheckLease(ctx); err != nil { + return err + } + if intent.ReconcileRequired { + if _, err := a.insertObservedSuccessorLocked(ctx, intent, maxVersions); err != nil { + return err + } + if err := lock.CheckLease(ctx); err != nil { + return err + } + } + refreshed, _, err := a.getIntentResourceByID(id) + if err != nil { + return err + } + if err := updateIntentResourceStatus(a.intentStore, refreshed, IntentStatusCommitted, ""); err != nil { + return err + } + if err := a.cleanupIntentLocked(id, IntentStatusCommitted); err != nil { + return err + } + version = committed + return nil + }) + return version, err +} + +func (a *ResourceStoreAdapter) insertObservedSuccessorLocked(ctx context.Context, intent *Intent, maxVersions int64) (*Version, error) { + if intent == nil || !intent.ReconcileRequired || intent.ObservedContentHash == "" { + return nil, nil + } + if err := lock.CheckLease(ctx); err != nil { + return nil, err + } + latest, err := a.latestVersionLocked(intent.RuleKind, intent.ResourceKey) + if err != nil && !errors.Is(err, ErrVersionNotFound) { + return nil, err + } + if latest != nil { + if latest.Operation == OperationDelete && intent.ObservedOperation == OperationDelete { + return latest, nil + } + if latest.Operation != OperationDelete && + intent.ObservedOperation != OperationDelete && + latest.ContentHash == intent.ObservedContentHash { + return latest, nil + } + } + operation := intent.ObservedOperation + if operation == "" { + operation = OperationUpdate + } + if latest == nil && operation != OperationDelete { + operation = OperationCreate + } else if latest != nil && latest.Operation == OperationDelete && operation != OperationDelete { + operation = OperationCreate + } + // A non-matching event observed while the intent was open represents the + // actual upstream successor. Record it after the owned intent version so the + // ledger reflects both facts in order. + return a.insertVersionLocked(ctx, InsertRequest{ + RuleKind: intent.RuleKind, + Mesh: intent.Mesh, + ResourceKey: intent.ResourceKey, + RuleName: intent.RuleName, + SpecJSON: intent.ObservedSpecJSON, + ContentHash: intent.ObservedContentHash, + Operation: operation, + Source: SourceUpstream, + Author: "system:reconcile", + CreatedAt: time.Now(), + }, maxVersions) +} + +func (a *ResourceStoreAdapter) CleanupIntent(id int64, terminalStatus IntentStatus) error { + if err := a.ensureStores(); err != nil { + return err + } + return a.cleanupIntent(id, terminalStatus) +} + +func (a *ResourceStoreAdapter) ListOpenIntents() ([]Intent, error) { + if err := a.ensureStores(); err != nil { + return nil, err + } + return a.listIntentsByStatuses(openIntentStatuses()) +} + +func (a *ResourceStoreAdapter) ListTerminalIntents() ([]Intent, error) { + if err := a.ensureStores(); err != nil { + return nil, err + } + return a.listIntentsByStatuses(terminalIntentStatuses()) +} + +func (a *ResourceStoreAdapter) listIntentsByStatuses(statuses []IntentStatus) ([]Intent, error) { + var result []Intent + for _, status := range statuses { + objects, err := a.intentStore.ByIndex(index.ByRuleIntentStatusIndexName, string(status)) + if err != nil { + return nil, err + } + for _, obj := range objects { + intentRes, ok := obj.(*meshresource.RuleIntentResource) + if !ok { + return nil, fmt.Errorf("%w: expected RuleIntentResource, got %T", ErrVersionLedgerCorrupt, obj) + } + if intentRes.Spec == nil { + return nil, fmt.Errorf("%w: RuleIntent spec is nil for %s", ErrVersionLedgerCorrupt, intentRes.Name) + } + id, err := extractIDFromIntentName(intentRes.Name) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrVersionLedgerCorrupt, err) + } + result = append(result, *intentFromResource(intentRes, id)) + } + } + + return result, nil +} + +func isOpenIntentStatus(status IntentStatus) bool { + switch status { + case IntentStatusPending, IntentStatusApplied, IntentStatusOutcomeUnknown, IntentStatusCommitting: + return true + default: + return false + } +} + +func openIntentStatuses() []IntentStatus { + return []IntentStatus{IntentStatusPending, IntentStatusApplied, IntentStatusOutcomeUnknown, IntentStatusCommitting} +} + +func terminalIntentStatuses() []IntentStatus { + return []IntentStatus{IntentStatusCommitted, IntentStatusFailed} +} + +func (a *ResourceStoreAdapter) getIntentResourceByID(id int64) (*meshresource.RuleIntentResource, int64, error) { + objects, err := a.intentStore.ByIndex(index.ByRuleIntentIDIndexName, strconv.FormatInt(id, 10)) + if err != nil { + return nil, 0, err + } + switch len(objects) { + case 0: + return nil, 0, ErrVersionIntentNotFound + case 1: + intentRes, ok := objects[0].(*meshresource.RuleIntentResource) + if !ok { + return nil, 0, fmt.Errorf("%w: expected RuleIntentResource, got %T", ErrVersionLedgerCorrupt, objects[0]) + } + if intentRes.Spec == nil { + return nil, 0, fmt.Errorf("%w: RuleIntent spec is nil for id %d", ErrVersionLedgerCorrupt, id) + } + parsedID, err := extractIDFromIntentName(intentRes.Name) + if err != nil { + return nil, 0, fmt.Errorf("%w: %v", ErrVersionLedgerCorrupt, err) + } + if parsedID != id { + return nil, 0, fmt.Errorf("%w: RuleIntent id index mismatch: requested %d, got %d", ErrVersionLedgerCorrupt, id, parsedID) + } + return intentRes, parsedID, nil + default: + return nil, 0, fmt.Errorf("%w: multiple RuleIntent resources indexed by id %d", ErrVersionLedgerCorrupt, id) + } +} + +func (a *ResourceStoreAdapter) openIntentResources(kind coremodel.ResourceKind, resourceKey string) ([]*meshresource.RuleIntentResource, error) { + mesh := extractMesh(resourceKey) + name := extractName(resourceKey) + var intents []*meshresource.RuleIntentResource + for _, status := range openIntentStatuses() { + indexKey := fmt.Sprintf("%s/%s/%s/%s", kind, mesh, name, status) + objects, err := a.intentStore.ByIndex(index.ByRuleIntentParentAndStatus, indexKey) + if err != nil { + return nil, err + } + for _, obj := range objects { + intentRes, ok := obj.(*meshresource.RuleIntentResource) + if !ok { + return nil, fmt.Errorf("%w: expected RuleIntentResource, got %T", ErrVersionLedgerCorrupt, obj) + } + if intentRes.Spec == nil { + return nil, fmt.Errorf("%w: RuleIntent spec is nil for %s", ErrVersionLedgerCorrupt, intentRes.Name) + } + if isOpenIntentStatus(IntentStatus(intentRes.Spec.Status)) { + intents = append(intents, intentRes) + } + } + } + return intents, nil +} + +func (a *ResourceStoreAdapter) updateIntentStatus(ctx context.Context, id int64, status IntentStatus, failureReason string) error { + if err := lock.CheckLease(ctx); err != nil { + return err + } + intentRes, _, err := a.getIntentResourceByID(id) + if err != nil { + return err + } + return a.withParentLock(coremodel.ResourceKind(intentRes.Spec.ParentRuleKind), intentResourceKey(intentRes), func() error { + if err := lock.CheckLease(ctx); err != nil { + return err + } + fresh, _, err := a.getIntentResourceByID(id) + if err != nil { + return err + } + if err := lock.CheckLease(ctx); err != nil { + return err + } + return updateIntentResourceStatus(a.intentStore, fresh, status, failureReason) + }) +} + +func updateIntentResourceStatus(intentStore store.ResourceStore, intentRes *meshresource.RuleIntentResource, status IntentStatus, failureReason string) error { + currentStatus := IntentStatus(intentRes.Spec.Status) + switch status { + case IntentStatusApplied: + if currentStatus == IntentStatusCommitted { + return nil + } + if currentStatus != IntentStatusPending && + currentStatus != IntentStatusApplied && + currentStatus != IntentStatusOutcomeUnknown { + return ErrVersionIntentNotOpen + } + case IntentStatusOutcomeUnknown: + if currentStatus == IntentStatusCommitted { + return ErrVersionIntentNotOpen + } + if currentStatus == IntentStatusCommitting || !isOpenIntentStatus(currentStatus) { + return ErrVersionIntentNotOpen + } + case IntentStatusCommitting: + if currentStatus == IntentStatusCommitting { + return nil + } + if currentStatus != IntentStatusApplied || intentRes.Spec.ReconcileRequired { + return ErrVersionIntentNotOpen + } + case IntentStatusCommitted: + if currentStatus == IntentStatusCommitted { + return nil + } + if currentStatus != IntentStatusCommitting { + return ErrVersionIntentNotOpen + } + case IntentStatusFailed: + if currentStatus == IntentStatusCommitted { + return ErrVersionIntentNotOpen + } + } + + updated, err := prepareIntentUpdate(intentRes) + if err != nil { + return err + } + updated.Spec.Status = string(status) + if failureReason != "" { + updated.Spec.FailureReason = failureReason + } + + now := timestamppb.New(time.Now()) + switch status { + case IntentStatusApplied: + updated.Spec.AppliedAt = now + case IntentStatusCommitting: + updated.Spec.AppliedAt = now + case IntentStatusCommitted: + updated.Spec.CommittedAt = now + } + + changed, err := conditionalIntentUpdate(intentStore, intentRes, updated) + if err != nil { + return err + } + if !changed { + return ErrVersionIntentConflict + } + + return nil +} + +func updateIntentResourceObserved(intentStore store.ResourceStore, intentRes *meshresource.RuleIntentResource, op Operation, contentHash, specJSON string) error { + currentStatus := IntentStatus(intentRes.Spec.Status) + if !isOpenIntentStatus(currentStatus) { + return ErrVersionIntentNotOpen + } + + updated, err := prepareIntentUpdate(intentRes) + if err != nil { + return err + } + updated.Spec.ReconcileRequired = true + updated.Spec.ObservedOperation = string(op) + updated.Spec.ObservedContentHash = contentHash + updated.Spec.ObservedSpecJson = specJSON + updated.Spec.ObservedAt = timestamppb.New(time.Now()) + if currentStatus == IntentStatusPending { + updated.Spec.Status = string(IntentStatusOutcomeUnknown) + } + changed, err := conditionalIntentUpdate(intentStore, intentRes, updated) + if err != nil { + return err + } + if !changed { + return ErrVersionIntentConflict + } + return nil +} + +func prepareIntentUpdate(intentRes *meshresource.RuleIntentResource) (*meshresource.RuleIntentResource, error) { + if intentRes == nil || intentRes.Spec == nil { + return nil, ErrVersionLedgerCorrupt + } + updated := intentRes.DeepCopyObject().(*meshresource.RuleIntentResource) + updated.Spec.Revision++ + return updated, nil +} + +func conditionalIntentUpdate(intentStore store.ResourceStore, expected *meshresource.RuleIntentResource, updated *meshresource.RuleIntentResource) (bool, error) { + cas, ok := intentStore.(store.ConditionalResourceStore) + if !ok { + return false, fmt.Errorf("%w: RuleIntent store must support conditional updates", ErrVersionLedgerCorrupt) + } + var lastErr error + for attempt := 0; attempt < maxIntentCASRetries; attempt++ { + changed, err := cas.UpdateIfUnchanged(expected, updated) + if err == nil || !errors.Is(err, store.ErrResourceStoreTransient) { + return changed, err + } + lastErr = err + } + return false, lastErr +} + +func intentResourceKey(intentRes *meshresource.RuleIntentResource) string { + if intentRes == nil || intentRes.Spec == nil { + return "" + } + return coremodel.BuildResourceKey(intentRes.Spec.ParentRuleMesh, intentRes.Spec.ParentRuleName) +} + +func (a *ResourceStoreAdapter) cleanupIntent(id int64, terminalStatus IntentStatus) error { + intentRes, _, err := a.getIntentResourceByID(id) + if errors.Is(err, ErrVersionIntentNotFound) { + return nil + } + if err != nil { + return fmt.Errorf("failed to read terminal rule version intent %d for cleanup: %w", id, err) + } + return a.withParentLock(coremodel.ResourceKind(intentRes.Spec.ParentRuleKind), intentResourceKey(intentRes), func() error { + return a.cleanupIntentLocked(id, terminalStatus) + }) +} + +func (a *ResourceStoreAdapter) cleanupIntentLocked(id int64, terminalStatus IntentStatus) error { + intentRes, _, err := a.getIntentResourceByID(id) + if errors.Is(err, ErrVersionIntentNotFound) { + return nil + } + if err != nil { + return fmt.Errorf("failed to read terminal rule version intent %d for cleanup: %w", id, err) + } + if intentRes.Spec == nil || IntentStatus(intentRes.Spec.Status) != terminalStatus { + return nil + } + if terminalStatus == IntentStatusCommitted { + intent := intentFromResource(intentRes, id) + if _, err := a.committedVersionForIntentLocked(intent); err != nil { + return err + } + } + if err := a.intentStore.Delete(intentRes); err != nil { + return fmt.Errorf("failed to cleanup terminal rule version intent %d: %w", id, err) + } + return nil +} + +func (a *ResourceStoreAdapter) committedVersionForIntentLocked(intent *Intent) (*Version, error) { + if intent == nil { + return nil, ErrVersionIntentNotFound + } + versions, err := a.ledgerState(intent.RuleKind, intent.ResourceKey) + if err != nil { + return nil, err + } + var found *Version + for i := range versions.Versions { + if versions.Versions[i].IntentID != intent.ID { + continue + } + if found != nil && found.ID != versions.Versions[i].ID { + return nil, fmt.Errorf("%w: multiple RuleVersion resources committed for intent %d", ErrVersionLedgerCorrupt, intent.ID) + } + v := versions.Versions[i] + found = &v + } + if found == nil { + return nil, ErrVersionNotFound + } + return validateCommittedIntentVersion(found, intent) +} + +func (a *ResourceStoreAdapter) multipleOpenIntentsError(kind coremodel.ResourceKind, resourceKey string, intents []*meshresource.RuleIntentResource) error { + ids := make([]string, 0, len(intents)) + for _, intentRes := range intents { + id, err := extractIDFromIntentName(intentRes.Name) + if err != nil { + ids = append(ids, fmt.Sprintf("%s(parse-error:%v)", intentRes.Name, err)) + continue + } + ids = append(ids, strconv.FormatInt(id, 10)) + } + msg := fmt.Sprintf("multiple open intents for parentKind=%s resourceKey=%s intentIDs=%s", kind, resourceKey, strings.Join(ids, ",")) + logger.Warnf("%s", msg) + return fmt.Errorf("%w: %s", ErrVersionLedgerCorrupt, msg) +} + +func newRuleIntentResource(req InsertRequest, id int64) *meshresource.RuleIntentResource { + intentRes := meshresource.NewRuleIntentResourceWithAttributes(buildIntentName(req.RuleKind, req.ResourceKey, id), req.Mesh) + intentRes.Spec = &meshproto.RuleIntent{ + ParentRuleKind: string(req.RuleKind), + ParentRuleMesh: req.Mesh, + ParentRuleName: req.RuleName, + ContentHash: req.ContentHash, + SpecJson: req.SpecJSON, + Operation: string(req.Operation), + Source: string(req.Source), + Author: req.Author, + Reason: req.Reason, + Status: string(IntentStatusPending), + CreatedAt: timestamppb.New(req.CreatedAt), + Revision: 1, + } + if req.RolledBackFromID != nil { + intentRes.Spec.RolledBackFromId = *req.RolledBackFromID + } + return intentRes +} diff --git a/pkg/core/versioning/resource_store_version.go b/pkg/core/versioning/resource_store_version.go new file mode 100644 index 000000000..66bb4396b --- /dev/null +++ b/pkg/core/versioning/resource_store_version.go @@ -0,0 +1,505 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "context" + "errors" + "fmt" + "sort" + "strconv" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" + + meshproto "github.com/apache/dubbo-admin/api/mesh/v1alpha1" + "github.com/apache/dubbo-admin/pkg/common/bizerror" + "github.com/apache/dubbo-admin/pkg/core/lock" + "github.com/apache/dubbo-admin/pkg/core/logger" + meshresource "github.com/apache/dubbo-admin/pkg/core/resource/apis/mesh/v1alpha1" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" + "github.com/apache/dubbo-admin/pkg/core/store" + "github.com/apache/dubbo-admin/pkg/core/store/index" +) + +func (a *ResourceStoreAdapter) GetVersion(kind coremodel.ResourceKind, resourceKey string, id int64) (*Version, error) { + if err := a.ensureStores(); err != nil { + return nil, err + } + rv, err := a.getVersionResourceForRule(kind, resourceKey, id) + if err != nil { + return nil, err + } + return protoToVersion(rv.Spec, id) +} + +func (a *ResourceStoreAdapter) ListVersions(kind coremodel.ResourceKind, resourceKey string) ([]Version, error) { + if err := a.ensureStores(); err != nil { + return nil, err + } + snapshot, err := a.LedgerSnapshot(kind, resourceKey) + if err != nil { + return nil, err + } + return snapshot.Versions, nil +} + +func (a *ResourceStoreAdapter) ListLatestVersions(kind coremodel.ResourceKind) ([]Version, error) { + if err := a.ensureStores(); err != nil { + return nil, err + } + keys := a.versionStore.ListKeys() + objs, err := a.versionStore.GetByKeys(keys) + if err != nil { + return nil, err + } + byParent := make(map[string][]Version) + for _, obj := range objs { + rv, ok := obj.(*meshresource.RuleVersionResource) + if !ok { + return nil, fmt.Errorf("%w: expected RuleVersionResource, got %T", ErrVersionLedgerCorrupt, obj) + } + if rv.Spec == nil { + return nil, fmt.Errorf("%w: RuleVersion spec is nil for %s", ErrVersionLedgerCorrupt, rv.ResourceKey()) + } + if rv.Spec.ParentRuleKind != string(kind) { + continue + } + id, err := versionIDFromResource(rv) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrVersionLedgerCorrupt, err) + } + v, err := protoToVersion(rv.Spec, id) + if err != nil { + return nil, err + } + byParent[v.ResourceKey] = append(byParent[v.ResourceKey], *v) + } + + latest := make([]Version, 0, len(byParent)) + for resourceKey, versions := range byParent { + seenVersionNo := make(map[int64]int64, len(versions)) + for _, version := range versions { + if previousID, ok := seenVersionNo[version.VersionNo]; ok && previousID != version.ID { + return nil, duplicateVersionNoError(kind, resourceKey, version.VersionNo, previousID, version.ID) + } + seenVersionNo[version.VersionNo] = version.ID + } + sort.Slice(versions, func(i, j int) bool { + return versions[i].VersionNo > versions[j].VersionNo + }) + if len(versions) > 0 { + latest = append(latest, versions[0]) + } + } + sort.Slice(latest, func(i, j int) bool { + if latest[i].ResourceKey == latest[j].ResourceKey { + return latest[i].VersionNo > latest[j].VersionNo + } + return latest[i].ResourceKey < latest[j].ResourceKey + }) + return latest, nil +} + +func (a *ResourceStoreAdapter) LedgerSnapshot(kind coremodel.ResourceKind, resourceKey string) (*LedgerSnapshot, error) { + if err := a.ensureStores(); err != nil { + return nil, err + } + var snapshot *LedgerSnapshot + err := a.withParentLock(kind, resourceKey, func() error { + state, err := a.ledgerState(kind, resourceKey) + if err != nil { + return err + } + snapshot = ledgerSnapshotFromState(state) + return nil + }) + return snapshot, err +} + +func (a *ResourceStoreAdapter) latestVersionLocked(kind coremodel.ResourceKind, resourceKey string) (*Version, error) { + state, err := a.ledgerState(kind, resourceKey) + if err != nil { + return nil, err + } + if state.Latest == nil { + return nil, ErrVersionNotFound + } + return state.Latest, nil +} + +func (a *ResourceStoreAdapter) ledgerState(kind coremodel.ResourceKind, resourceKey string) (*ledgerState, error) { + parentKey := buildParentIndexKey(kind, resourceKey) + objs, err := a.versionStore.ByIndex(index.ByParentRuleIndexName, parentKey) + if err != nil { + return nil, err + } + + versions := make([]Version, 0, len(objs)) + for _, obj := range objs { + rv, ok := obj.(*meshresource.RuleVersionResource) + if !ok { + return nil, fmt.Errorf("%w: expected RuleVersionResource, got %T", ErrVersionLedgerCorrupt, obj) + } + if rv.Spec == nil { + return nil, fmt.Errorf("%w: RuleVersion spec is nil for parent %s", ErrVersionLedgerCorrupt, parentKey) + } + id, err := versionIDFromResource(rv) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrVersionLedgerCorrupt, err) + } + v, err := protoToVersion(rv.Spec, id) + if err != nil { + return nil, err + } + versions = append(versions, *v) + } + + seenVersionNo := make(map[int64]int64, len(versions)) + for _, version := range versions { + if previousID, ok := seenVersionNo[version.VersionNo]; ok && previousID != version.ID { + return nil, duplicateVersionNoError(kind, resourceKey, version.VersionNo, previousID, version.ID) + } + seenVersionNo[version.VersionNo] = version.ID + } + + sort.Slice(versions, func(i, j int) bool { + return versions[i].VersionNo > versions[j].VersionNo + }) + + state := &ledgerState{Versions: versions} + if len(versions) > 0 { + state.Latest = &versions[0] + state.MaxVersionNo = versions[0].VersionNo + } + return state, nil +} + +func (a *ResourceStoreAdapter) InsertVersion(ctx context.Context, req InsertRequest, maxVersions int64) (*Version, error) { + if err := a.ensureStores(); err != nil { + return nil, err + } + var version *Version + err := a.withParentLock(req.RuleKind, req.ResourceKey, func() error { + if err := lock.CheckLease(ctx); err != nil { + return err + } + var inner error + version, inner = a.insertVersionLocked(ctx, req, maxVersions) + return inner + }) + return version, err +} + +func (a *ResourceStoreAdapter) insertVersionLocked(ctx context.Context, req InsertRequest, maxVersions int64) (*Version, error) { + if err := lock.CheckLease(ctx); err != nil { + return nil, err + } + state, err := a.ledgerState(req.RuleKind, req.ResourceKey) + if err != nil { + return nil, err + } + if err := lock.CheckLease(ctx); err != nil { + return nil, err + } + + versionNo := state.MaxVersionNo + 1 + + createdAt := req.CreatedAt + if createdAt.IsZero() { + createdAt = time.Now() + } + committedAt := time.Now() + + versionExists := false + var rv *meshresource.RuleVersionResource + var id int64 + if req.IntentID != 0 { + existing, existingID, err := a.getVersionResourceByIntentID(req.IntentID) + if err == nil { + if validateErr := validateExistingVersionForRequest(existing, existingID, req); validateErr != nil { + return nil, validateErr + } + rv = existing + id = existingID + versionNo = existing.Spec.VersionNo + versionExists = true + } else if !errors.Is(err, ErrVersionNotFound) { + return nil, err + } + } + if req.FixedVersionID != nil { + if *req.FixedVersionID <= 0 { + return nil, bizerror.New(bizerror.InvalidArgument, "fixed version ID must be positive") + } + id = *req.FixedVersionID + existing, err := a.getVersionResourceByGlobalID(id) + if err == nil { + if validateErr := validateExistingVersionForRequest(existing, id, req); validateErr != nil { + return nil, validateErr + } + rv = existing + versionNo = existing.Spec.VersionNo + versionExists = true + } else if !errors.Is(err, ErrVersionNotFound) { + return nil, err + } + } + + if !versionExists { + if err := lock.CheckLease(ctx); err != nil { + return nil, err + } + + attempts := maxIDGenerateAttempts + var addErr error + for attempt := 0; attempt < attempts; attempt++ { + if req.FixedVersionID == nil { + generated, err := a.idGenerator.Next() + if err != nil { + return nil, err + } + id = generated + if _, err := a.getVersionResourceByGlobalID(id); err == nil { + addErr = store.ErrorResourceAlreadyExists(meshresource.RuleVersionKind.ToString(), buildVersionName(req.RuleKind, req.ResourceKey, id), extractMesh(req.ResourceKey)) + continue + } else if !errors.Is(err, ErrVersionNotFound) { + return nil, err + } + } + rv = newRuleVersionResource(req, id, versionNo, createdAt, committedAt) + if err := lock.CheckLease(ctx); err != nil { + return nil, err + } + addErr = a.versionStore.Add(rv) + if addErr == nil { + break + } + if req.FixedVersionID != nil { + existing, getErr := a.getVersionResourceByGlobalID(id) + if getErr == nil { + if validateErr := validateExistingVersionForRequest(existing, id, req); validateErr != nil { + return nil, validateErr + } + rv = existing + addErr = nil + break + } + if !errors.Is(getErr, ErrVersionNotFound) { + return nil, fmt.Errorf("failed to add version resource with fixed id %d: %w", id, addErr) + } + } + if !isAddConflict(addErr) { + return nil, fmt.Errorf("failed to add version resource id=%d: %w", id, addErr) + } + if err := lock.CheckLease(ctx); err != nil { + return nil, err + } + state, err = a.ledgerState(req.RuleKind, req.ResourceKey) + if err != nil { + return nil, err + } + if state.MaxVersionNo+1 <= versionNo { + return nil, fmt.Errorf("failed to allocate unique rule version number %d: %w", versionNo, addErr) + } + versionNo = state.MaxVersionNo + 1 + } + if addErr != nil { + return nil, fmt.Errorf("failed to allocate unique rule version id after %d attempts: %w", attempts, addErr) + } + } + + if err := lock.CheckLease(ctx); err != nil { + return nil, err + } + if maxVersions > 0 { + if err := a.trimVersionsLocked(ctx, req.RuleKind, req.ResourceKey, maxVersions); err != nil { + logger.Warnf("rule version retention cleanup failed for kind=%s resourceKey=%s committedVersion=%d: %v", req.RuleKind, req.ResourceKey, id, err) + } + } + + return protoToVersion(rv.Spec, id) +} + +func (a *ResourceStoreAdapter) trimVersionsLocked(ctx context.Context, kind coremodel.ResourceKind, resourceKey string, keep int64) error { + state, err := a.ledgerState(kind, resourceKey) + if err != nil { + return err + } + versions := state.Versions + if int64(len(versions)) <= keep { + return nil + } + + // Retention runs after the new version is durable and only removes entries + // beyond the configured window. Cleanup failure is reported to logs by the + // caller and does not roll back the already-committed mutation. + toDelete := versions[int(keep):] + for _, v := range toDelete { + if err := lock.CheckLease(ctx); err != nil { + return err + } + rv, err := a.getVersionResourceForRule(kind, resourceKey, v.ID) + if err != nil { + return err + } + if err := a.versionStore.Delete(rv); err != nil { + return err + } + } + + return nil +} + +func (a *ResourceStoreAdapter) getVersionResourceByGlobalID(id int64) (*meshresource.RuleVersionResource, error) { + objects, err := a.versionStore.ByIndex(index.ByRuleVersionIDIndexName, strconv.FormatInt(id, 10)) + if err != nil { + return nil, err + } + if len(objects) == 0 { + return nil, ErrVersionNotFound + } + if len(objects) > 1 { + return nil, fmt.Errorf("%w: multiple RuleVersion resources indexed by id %d", ErrVersionLedgerCorrupt, id) + } + rv, ok := objects[0].(*meshresource.RuleVersionResource) + if !ok { + return nil, fmt.Errorf("%w: expected RuleVersionResource, got %T", ErrVersionLedgerCorrupt, objects[0]) + } + if rv.Spec == nil { + return nil, fmt.Errorf("%w: RuleVersion spec is nil for id %d", ErrVersionLedgerCorrupt, id) + } + indexedID, err := versionIDFromResource(rv) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrVersionLedgerCorrupt, err) + } + if indexedID != id { + return nil, fmt.Errorf("%w: RuleVersion id index mismatch: requested %d, got %d", ErrVersionLedgerCorrupt, id, indexedID) + } + return rv, nil +} + +func (a *ResourceStoreAdapter) getVersionResourceForRule(kind coremodel.ResourceKind, resourceKey string, id int64) (*meshresource.RuleVersionResource, error) { + rv, err := a.getVersionResourceByGlobalID(id) + if err != nil { + return nil, err + } + if !versionResourceMatchesParent(rv, kind, resourceKey) { + return nil, ErrVersionNotFound + } + return rv, nil +} + +func versionResourceMatchesParent(rv *meshresource.RuleVersionResource, kind coremodel.ResourceKind, resourceKey string) bool { + return rv != nil && + rv.Spec != nil && + rv.Spec.ParentRuleKind == string(kind) && + rv.Spec.ParentRuleMesh == extractMesh(resourceKey) && + rv.Spec.ParentRuleName == extractName(resourceKey) +} + +func (a *ResourceStoreAdapter) getVersionResourceByIntentID(intentID int64) (*meshresource.RuleVersionResource, int64, error) { + objects, err := a.versionStore.ByIndex(index.ByRuleVersionIntentIDIndexName, strconv.FormatInt(intentID, 10)) + if err != nil { + return nil, 0, err + } + switch len(objects) { + case 0: + return nil, 0, ErrVersionNotFound + case 1: + rv, ok := objects[0].(*meshresource.RuleVersionResource) + if !ok { + return nil, 0, fmt.Errorf("%w: expected RuleVersionResource, got %T", ErrVersionLedgerCorrupt, objects[0]) + } + if rv.Spec == nil { + return nil, 0, fmt.Errorf("%w: RuleVersion spec is nil for intent %d", ErrVersionLedgerCorrupt, intentID) + } + id, err := versionIDFromResource(rv) + if err != nil { + return nil, 0, fmt.Errorf("%w: %v", ErrVersionLedgerCorrupt, err) + } + return rv, id, nil + default: + return nil, 0, fmt.Errorf("%w: multiple RuleVersion resources indexed by intent id %d", ErrVersionLedgerCorrupt, intentID) + } +} + +func validateExistingVersionForRequest(existing *meshresource.RuleVersionResource, existingID int64, req InsertRequest) error { + spec := existing.Spec + if req.FixedVersionID != nil && existingID != *req.FixedVersionID { + return fmt.Errorf("%w: RuleVersion intent id %d maps to version id %d, expected %d", ErrVersionLedgerCorrupt, req.IntentID, existingID, *req.FixedVersionID) + } + if spec.ParentRuleKind != string(req.RuleKind) || + spec.ParentRuleMesh != extractMesh(req.ResourceKey) || + spec.ParentRuleName != extractName(req.ResourceKey) || + spec.ContentHash != req.ContentHash || + spec.SpecJson != req.SpecJSON || + spec.Source != string(req.Source) || + spec.Operation != string(req.Operation) || + spec.Author != req.Author || + spec.Reason != req.Reason || + spec.IntentId != req.IntentID || + spec.RolledBackFromId != rolledBackFromIDValue(req.RolledBackFromID) { + return fmt.Errorf("%w: RuleVersion id %d already exists with different content", ErrVersionLedgerCorrupt, existingID) + } + if !req.CreatedAt.IsZero() && !timestampAsTime(spec.CreatedAt).Equal(req.CreatedAt) { + return fmt.Errorf("%w: RuleVersion id %d already exists with different content", ErrVersionLedgerCorrupt, existingID) + } + return nil +} + +func newRuleVersionResource(req InsertRequest, id, versionNo int64, createdAt, committedAt time.Time) *meshresource.RuleVersionResource { + rv := meshresource.NewRuleVersionResourceWithAttributes( + buildVersionNoName(req.RuleKind, req.ResourceKey, versionNo), + extractMesh(req.ResourceKey), + ) + rv.Annotations = map[string]string{ + ruleVersionIDAnnotation: strconv.FormatInt(id, 10), + } + rv.Spec = &meshproto.RuleVersion{ + ParentRuleKind: string(req.RuleKind), + ParentRuleMesh: extractMesh(req.ResourceKey), + ParentRuleName: extractName(req.ResourceKey), + VersionNo: versionNo, + ContentHash: req.ContentHash, + SpecJson: req.SpecJSON, + Operation: string(req.Operation), + Source: string(req.Source), + Author: req.Author, + Reason: req.Reason, + IntentId: req.IntentID, + CreatedAt: timestamppb.New(createdAt), + CommittedAt: timestamppb.New(committedAt), + } + if req.RolledBackFromID != nil { + rv.Spec.RolledBackFromId = *req.RolledBackFromID + } + return rv +} + +func isAddConflict(err error) bool { + var conflict *store.ResourceConflictError + return errors.As(err, &conflict) +} + +func rolledBackFromIDValue(id *int64) int64 { + if id == nil { + return 0 + } + return *id +} diff --git a/pkg/core/versioning/service.go b/pkg/core/versioning/service.go new file mode 100644 index 000000000..a4db962f8 --- /dev/null +++ b/pkg/core/versioning/service.go @@ -0,0 +1,598 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "context" + "errors" + "fmt" + "strconv" + "strings" + "time" + + "github.com/apache/dubbo-admin/pkg/common/bizerror" + "github.com/apache/dubbo-admin/pkg/common/constants" + "github.com/apache/dubbo-admin/pkg/core/lock" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" +) + +// Service coordinates rule-version reads, mutation intents, repair, and +// rollback. Mutating calls require a lock lease in the context; callers choose +// the canonical per-rule lock before entering the service. +type Service struct { + maxVersions int64 + store Store +} + +func NewService(maxVersions int64, store Store) *Service { + return &Service{ + maxVersions: maxVersions, + store: store, + } +} + +func (s *Service) ensureEnabled() error { + if s == nil || s.store == nil { + return ErrVersionLedgerCorrupt + } + return nil +} + +func (s *Service) List(kind coremodel.ResourceKind, mesh, ruleName string) (*ListResult, error) { + if err := s.ensureEnabled(); err != nil { + return nil, err + } + resourceKey := coremodel.BuildResourceKey(mesh, ruleName) + snapshot, err := s.store.LedgerSnapshot(kind, resourceKey) + if err != nil { + return nil, err + } + result := &ListResult{Items: snapshot.Versions, Total: int64(len(snapshot.Versions)), Deleted: snapshot.Deleted} + if snapshot.Head != nil && !snapshot.Deleted { + currentID := snapshot.Head.ID + result.CurrentVersionID = ¤tID + result.CurrentVersionNo = snapshot.Head.VersionNo + } + return result, nil +} + +func (s *Service) Get(kind coremodel.ResourceKind, mesh, ruleName string, id int64) (*Version, error) { + if err := s.ensureEnabled(); err != nil { + return nil, err + } + resourceKey := coremodel.BuildResourceKey(mesh, ruleName) + version, err := s.store.GetVersion(kind, resourceKey, id) + if err != nil { + return nil, err + } + snapshot, err := s.store.LedgerSnapshot(kind, resourceKey) + if err != nil { + return nil, err + } + if snapshot.Head != nil && !snapshot.Deleted { + version.IsCurrent = version.ID == snapshot.Head.ID + } + return version, nil +} + +func (s *Service) Diff(kind coremodel.ResourceKind, mesh, ruleName string, id int64, against string) (*DiffResult, error) { + if err := s.ensureEnabled(); err != nil { + return nil, err + } + left, err := s.Get(kind, mesh, ruleName, id) + if err != nil { + return nil, err + } + var right *Version + switch against { + case "", "current": + resourceKey := coremodel.BuildResourceKey(mesh, ruleName) + snapshot, err := s.store.LedgerSnapshot(kind, resourceKey) + if err != nil { + return nil, err + } + if snapshot.Head == nil { + return nil, ErrVersionNotFound + } + right = snapshot.Head + case "previous": + list, err := s.store.ListVersions(kind, coremodel.BuildResourceKey(mesh, ruleName)) + if err != nil { + return nil, err + } + for i := range list { + if list[i].ID != id { + continue + } + if i+1 >= len(list) { + return nil, ErrVersionNotFound + } + right = &list[i+1] + break + } + if right == nil { + return nil, ErrVersionNotFound + } + default: + var againstID int64 + if parsed, err := strconv.ParseInt(against, 10, 64); err != nil { + return nil, bizerror.New(bizerror.InvalidArgument, "against must be 'current', 'previous', or a version ID") + } else { + againstID = parsed + } + right, err = s.Get(kind, mesh, ruleName, againstID) + if err != nil { + return nil, err + } + } + return &DiffResult{ + Left: DiffSide{ID: left.ID, VersionNo: left.VersionNo, SpecJSON: left.SpecJSON}, + Right: DiffSide{ID: right.ID, VersionNo: right.VersionNo, SpecJSON: right.SpecJSON}, + }, nil +} + +// CheckExpected applies the UI-supplied expectedVersionId as a weak +// compare-and-set guard. It prevents a mutation from proceeding over a newer +// ledger entry, but it is not a transactional lock by itself. +func (s *Service) CheckExpected(kind coremodel.ResourceKind, mesh, ruleName string, expected *int64) error { + if err := s.ensureEnabled(); err != nil { + return err + } + resourceKey := coremodel.BuildResourceKey(mesh, ruleName) + // Check for open intents first before checking version mismatch. + // Why: If Writer A created an intent at T1, and Writer B checks expected + // version at T2 (before A's subscriber commits), the ledger head still + // reflects the old version. Without this guard, B would get VersionConflict + // instead of IntentPending, masking the real issue (concurrent write). + intent, err := s.store.OpenIntent(kind, resourceKey) + if err != nil { + return err + } + if intent != nil { + return &IntentPendingError{IntentID: intent.ID} + } + return s.store.CheckExpectedVersion(kind, resourceKey, expected) +} + +// BeginMutation records a user's mutation before the rule is written. +// The immutable Version is created later from the observed rule state, not from +// the request alone. rolledBackFromID is audit metadata for rollback intents. +func (s *Service) BeginMutation(ctx context.Context, res coremodel.Resource, op Operation, source Source, author, reason string, rolledBackFromID *int64) (*Intent, error) { + if err := s.ensureEnabled(); err != nil { + return nil, err + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + req, err := buildMutationInsertRequest(res, op, source, author, reason, rolledBackFromID, time.Now()) + if err != nil { + return nil, err + } + return s.store.CreateIntent(ctx, req) +} + +func (s *Service) AbandonIntent(ctx context.Context, intent *Intent, reason string) error { + if err := s.ensureEnabled(); err != nil { + return err + } + if _, err := lock.RequireLease(ctx); err != nil { + return err + } + if intent == nil { + return bizerror.New(bizerror.InvalidArgument, "rule version intent is required") + } + fresh, err := s.store.GetIntent(intent.ID) + if err != nil { + return err + } + if fresh.Status == IntentStatusFailed { + return s.store.CleanupIntent(fresh.ID, IntentStatusFailed) + } + if fresh.Status != IntentStatusPending && + fresh.Status != IntentStatusApplied && + fresh.Status != IntentStatusOutcomeUnknown { + return ErrVersionIntentNotOpen + } + return s.store.MarkIntentFailed(ctx, intent.ID, reason) +} + +func (s *Service) MarkIntentOutcomeUnknown(ctx context.Context, intent *Intent, reason string) error { + if err := s.ensureEnabled(); err != nil { + return err + } + if intent == nil { + return bizerror.New(bizerror.InvalidArgument, "rule version intent is required") + } + return s.store.MarkIntentOutcomeUnknown(ctx, intent.ID, reason) +} + +// RepairIntent reconciles an open intent when the rule mutation reached +// ResourceManager but the subscriber did not commit the corresponding version. +// It derives the version from current rule state instead of trusting payloads. +func (s *Service) RepairIntent(ctx context.Context, kind coremodel.ResourceKind, resourceKey string, current coremodel.Resource, deleted bool) (*Version, error) { + if err := s.ensureEnabled(); err != nil { + return nil, err + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + intent, err := s.store.OpenIntent(kind, resourceKey) + if err != nil || intent == nil { + return nil, err + } + return s.repairIntent(ctx, intent, current, deleted) +} + +func (s *Service) FinalizeMutation(ctx context.Context, intent *Intent, current coremodel.Resource, deleted bool) (*Version, error) { + if err := s.ensureEnabled(); err != nil { + return nil, err + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + if intent == nil { + return nil, bizerror.New(bizerror.InvalidArgument, "rule version intent is required") + } + fresh, err := s.store.GetIntent(intent.ID) + if err != nil { + if errors.Is(err, ErrVersionIntentNotFound) { + return s.committedVersionForClosedIntent(intent) + } + return nil, err + } + switch fresh.Status { + case IntentStatusCommitted: + committed, err := s.committedVersionForIntent(fresh) + if err == nil { + if cleanupErr := s.store.CleanupIntent(fresh.ID, IntentStatusCommitted); cleanupErr != nil { + return nil, cleanupErr + } + } + return committed, err + case IntentStatusFailed: + if cleanupErr := s.store.CleanupIntent(fresh.ID, IntentStatusFailed); cleanupErr != nil { + return nil, cleanupErr + } + if fresh.LastError != "" { + return nil, fmt.Errorf("%w: %s", ErrVersionIntentNotOpen, fresh.LastError) + } + return nil, ErrVersionIntentNotOpen + } + committed, err := s.repairIntent(ctx, fresh, current, deleted) + if err != nil { + if errors.Is(err, ErrVersionIntentNotFound) { + return s.committedVersionForClosedIntent(fresh) + } + return nil, err + } + if committed != nil { + return validateCommittedIntentVersion(committed, fresh) + } + return s.committedVersionForIntent(fresh) +} + +func (s *Service) GetIntent(id int64) (*Intent, error) { + if err := s.ensureEnabled(); err != nil { + return nil, err + } + return s.store.GetIntent(id) +} + +func (s *Service) committedVersionForIntent(intent *Intent) (*Version, error) { + versions, err := s.store.ListVersions(intent.RuleKind, intent.ResourceKey) + if err != nil { + return nil, err + } + var found *Version + for i := range versions { + if versions[i].IntentID != intent.ID { + continue + } + if found != nil && found.ID != versions[i].ID { + return nil, fmt.Errorf("%w: multiple RuleVersion resources committed for intent %d", ErrVersionLedgerCorrupt, intent.ID) + } + v := versions[i] + found = &v + } + if found == nil { + return nil, ErrVersionNotFound + } + return validateCommittedIntentVersion(found, intent) +} + +func (s *Service) committedVersionForClosedIntent(intent *Intent) (*Version, error) { + version, err := s.committedVersionForIntent(intent) + if errors.Is(err, ErrVersionNotFound) { + return nil, fmt.Errorf("%w: terminal intent %d has no committed RuleVersion", ErrVersionLedgerCorrupt, intent.ID) + } + return version, err +} + +func validateCommittedIntentVersion(version *Version, intent *Intent) (*Version, error) { + if version == nil || intent == nil || + version.RuleKind != intent.RuleKind || + version.ResourceKey != intent.ResourceKey || + version.ContentHash != intent.ContentHash || + version.SpecJSON != intent.SpecJSON || + version.Operation != intent.Operation || + version.Source != intent.Source || + version.Author != intent.Author || + version.Reason != intent.Reason || + version.IntentID != intent.ID || + rolledBackFromIDValue(version.RolledBackFromID) != rolledBackFromIDValue(intent.RolledBackFromID) { + return nil, fmt.Errorf("%w: committed RuleVersion does not match intent %d", ErrVersionLedgerCorrupt, intent.ID) + } + return version, nil +} + +func (s *Service) ReconcileActualState(ctx context.Context, kind coremodel.ResourceKind, resourceKey string, current coremodel.Resource, deleted bool, author string) (*Version, error) { + if err := s.ensureEnabled(); err != nil { + return nil, err + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + + operation := OperationUpdate + mesh := extractMesh(resourceKey) + ruleName := extractName(resourceKey) + specJSON := string(DeleteSpecJSON) + contentHash := HashSpecJSON(DeleteSpecJSON) + if deleted || current == nil { + operation = OperationDelete + } else { + mesh = current.ResourceMesh() + ruleName = current.ResourceMeta().Name + hash, normalized, err := NormalizeResource(current) + if err != nil { + return nil, err + } + contentHash = hash + specJSON = normalized + } + + latest, err := s.store.LatestVersion(kind, resourceKey) + if err != nil && !errors.Is(err, ErrVersionNotFound) { + return nil, err + } + if latest != nil { + if latest.Operation == OperationDelete && operation == OperationDelete { + return nil, nil + } + if latest.Operation == OperationDelete && operation != OperationDelete { + operation = OperationCreate + } + if latest.Operation != OperationDelete && operation != OperationDelete && latest.ContentHash == contentHash { + return nil, nil + } + } else if operation != OperationDelete { + operation = OperationCreate + } + + if strings.TrimSpace(author) == "" { + author = "system:reconcile" + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + return s.store.InsertVersion(ctx, InsertRequest{ + RuleKind: kind, + Mesh: mesh, + ResourceKey: resourceKey, + RuleName: ruleName, + SpecJSON: specJSON, + ContentHash: contentHash, + Operation: operation, + Source: SourceUpstream, + Author: author, + CreatedAt: time.Now(), + }, s.maxVersions) +} + +func (s *Service) CurrentLedgerHead(kind coremodel.ResourceKind, resourceKey string) (*Version, bool, error) { + if err := s.ensureEnabled(); err != nil { + return nil, false, err + } + snapshot, err := s.store.LedgerSnapshot(kind, resourceKey) + if err != nil { + return nil, false, err + } + return snapshot.Head, snapshot.Deleted, nil +} + +func (s *Service) GetVersion(kind coremodel.ResourceKind, resourceKey string, id int64) (*Version, error) { + if err := s.ensureEnabled(); err != nil { + return nil, err + } + return s.store.GetVersion(kind, resourceKey, id) +} + +// repairIntent reconciles an open intent against ResourceManager state. It only +// commits the intent when the observed rule state proves that the mutation +// happened; otherwise the open intent continues to fence later writes or is +// failed after the actual state has been recorded. +func (s *Service) repairIntent(ctx context.Context, intent *Intent, current coremodel.Resource, deleted bool) (*Version, error) { + if intent == nil { + return nil, nil + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + if intent.Status == IntentStatusCommitted || intent.Status == IntentStatusFailed { + return nil, ErrVersionIntentNotOpen + } + if !isOpenIntentStatus(intent.Status) { + return nil, ErrVersionIntentNotOpen + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + if intent.ReconcileRequired { + return s.resolveObservedIntent(ctx, intent, current, deleted) + } + if intent.Status == IntentStatusCommitting { + return s.store.CommitIntent(ctx, intent.ID, s.maxVersions) + } + matches := IntentMatchesResource(intent, current, deleted) + if intent.Status == IntentStatusPending || intent.Status == IntentStatusOutcomeUnknown { + if !matches { + if intent.Status == IntentStatusOutcomeUnknown { + return s.failIntentAfterActualReconcile(ctx, intent, current, deleted, "registry mutation outcome did not match intended state") + } + return nil, &IntentPendingError{IntentID: intent.ID} + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + if err := s.store.MarkIntentApplied(ctx, intent.ID); err != nil { + return nil, err + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + return s.store.CommitIntent(ctx, intent.ID, s.maxVersions) + } + if !matches { + return nil, ErrIntentOutcomeMismatch + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + return s.store.CommitIntent(ctx, intent.ID, s.maxVersions) +} + +func (s *Service) resolveObservedIntent(ctx context.Context, intent *Intent, current coremodel.Resource, deleted bool) (*Version, error) { + visible, err := observedStateVisible(intent, current, deleted) + if err != nil { + return nil, err + } + if !visible { + return nil, &IntentPendingError{IntentID: intent.ID} + } + if IntentMatchesResource(intent, current, deleted) { + if intent.Status == IntentStatusPending || intent.Status == IntentStatusOutcomeUnknown { + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + if err := s.store.MarkIntentApplied(ctx, intent.ID); err != nil { + return nil, err + } + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + return s.store.CommitIntent(ctx, intent.ID, s.maxVersions) + } + if intent.Status == IntentStatusCommitting { + return s.store.CommitIntent(ctx, intent.ID, s.maxVersions) + } + return s.failIntentAfterActualReconcile(ctx, intent, current, deleted, "non-matching rule event superseded the open intent") +} + +func (s *Service) failIntentAfterActualReconcile(ctx context.Context, intent *Intent, current coremodel.Resource, deleted bool, reason string) (*Version, error) { + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + if _, err := s.ReconcileActualState(ctx, intent.RuleKind, intent.ResourceKey, current, deleted, "system:reconcile"); err != nil { + return nil, err + } + if _, err := lock.RequireLease(ctx); err != nil { + return nil, err + } + if err := s.store.MarkIntentFailed(ctx, intent.ID, reason); err != nil { + return nil, err + } + return nil, ErrIntentOutcomeMismatch +} + +func observedStateVisible(intent *Intent, current coremodel.Resource, deleted bool) (bool, error) { + if intent == nil || !intent.ReconcileRequired || intent.ObservedContentHash == "" { + return true, nil + } + op := OperationUpdate + hash := HashSpecJSON(DeleteSpecJSON) + if deleted || current == nil { + op = OperationDelete + } else { + normalizedHash, _, err := NormalizeResource(current) + if err != nil { + return false, err + } + hash = normalizedHash + } + return op == intent.ObservedOperation && hash == intent.ObservedContentHash, nil +} + +func buildMutationInsertRequest(res coremodel.Resource, op Operation, source Source, author, reason string, rolledBackFromID *int64, createdAt time.Time) (InsertRequest, error) { + if res == nil { + return InsertRequest{}, bizerror.New(bizerror.InvalidArgument, "rule resource is required") + } + hash, specJSON, err := NormalizeResource(res) + if op == OperationDelete { + hash = HashSpecJSON(DeleteSpecJSON) + specJSON = DeleteSpecJSON + err = nil + } + if err != nil { + return InsertRequest{}, err + } + if strings.TrimSpace(author) == "" { + author = "system:unknown" + } else { + author = strings.TrimSpace(author) + } + if source == "" { + source = SourceAdmin + } + return InsertRequest{ + RuleKind: res.ResourceKind(), + Mesh: res.ResourceMesh(), + ResourceKey: res.ResourceKey(), + RuleName: res.ResourceMeta().Name, + SpecJSON: specJSON, + ContentHash: hash, + Source: source, + Operation: op, + Author: author, + Reason: reason, + RolledBackFromID: rolledBackFromID, + CreatedAt: createdAt, + }, nil +} + +// IntentMatchesResource reports whether ResourceManager currently exposes the +// state requested by the intent. It is a recovery check, not event attribution; +// callers still validate intent identity before accepting an existing version. +func IntentMatchesResource(intent *Intent, current coremodel.Resource, deleted bool) bool { + if intent == nil { + return false + } + if deleted || current == nil { + return intent.Operation == OperationDelete && intent.ContentHash == HashSpecJSON(DeleteSpecJSON) + } + hash, _, err := NormalizeResource(current) + return err == nil && hash == intent.ContentHash +} + +func withRuleVersionLock(ctx context.Context, lockMgr lock.Lock, kind coremodel.ResourceKind, resourceKey string, fn func(context.Context) error) error { + if lockMgr == nil { + return lock.ErrLockUnavailable + } + key := lock.BuildRuleVersioningLockKey(string(kind), extractMesh(resourceKey), extractName(resourceKey)) + return lock.WithLock(ctx, lockMgr, key, constants.DefaultLockTimeout, fn) +} diff --git a/pkg/core/versioning/store.go b/pkg/core/versioning/store.go new file mode 100644 index 000000000..07a8bdddb --- /dev/null +++ b/pkg/core/versioning/store.go @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "context" + + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" +) + +// Store persists committed rule-version ledgers and unfinished mutation +// intents. Implementations must keep intent state transitions conditional so +// competing console/subscriber paths cannot both own the same commit. +type Store interface { + InsertVersion(ctx context.Context, req InsertRequest, maxVersions int64) (*Version, error) + CreateIntent(ctx context.Context, req InsertRequest) (*Intent, error) + GetIntent(id int64) (*Intent, error) + OpenIntent(kind coremodel.ResourceKind, resourceKey string) (*Intent, error) + MarkIntentApplied(ctx context.Context, id int64) error + MarkIntentOutcomeUnknown(ctx context.Context, id int64, message string) error + MarkIntentObserved(ctx context.Context, id int64, op Operation, contentHash, specJSON string) error + MarkIntentFailed(ctx context.Context, id int64, message string) error + CommitIntent(ctx context.Context, id int64, maxVersions int64) (*Version, error) + CleanupIntent(id int64, terminalStatus IntentStatus) error + ListOpenIntents() ([]Intent, error) + ListTerminalIntents() ([]Intent, error) + ListLatestVersions(kind coremodel.ResourceKind) ([]Version, error) + ListVersions(kind coremodel.ResourceKind, resourceKey string) ([]Version, error) + LedgerSnapshot(kind coremodel.ResourceKind, resourceKey string) (*LedgerSnapshot, error) + GetVersion(kind coremodel.ResourceKind, resourceKey string, id int64) (*Version, error) + LatestVersion(kind coremodel.ResourceKind, resourceKey string) (*Version, error) + CheckExpectedVersion(kind coremodel.ResourceKind, resourceKey string, expected *int64) error +} diff --git a/pkg/core/versioning/subscriber.go b/pkg/core/versioning/subscriber.go new file mode 100644 index 000000000..0d520b271 --- /dev/null +++ b/pkg/core/versioning/subscriber.go @@ -0,0 +1,426 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "context" + "errors" + "fmt" + "time" + + "k8s.io/client-go/tools/cache" + + "github.com/apache/dubbo-admin/pkg/core/events" + "github.com/apache/dubbo-admin/pkg/core/lock" + "github.com/apache/dubbo-admin/pkg/core/logger" + "github.com/apache/dubbo-admin/pkg/core/manager" + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" +) + +type Subscriber struct { + kind coremodel.ResourceKind + store Store + maxVersions int64 + lockMgr lock.Lock + appCtx context.Context + onError func() +} + +type ParentRef struct { + Kind coremodel.ResourceKind + Mesh string + Name string + ResourceKey string +} + +type normalizedRuleEvent struct { + Resource coremodel.Resource + Parent ParentRef + Operation Operation + SpecJSON []byte + ContentHash string + Context map[string]string +} + +func NewSubscriber(kind coremodel.ResourceKind, store Store, maxVersions int64, lockMgr lock.Lock, appCtx context.Context, onError ...func()) *Subscriber { + var trigger func() + if len(onError) > 0 { + trigger = onError[0] + } + return &Subscriber{ + kind: kind, + store: store, + maxVersions: maxVersions, + lockMgr: lockMgr, + appCtx: appCtx, + onError: trigger, + } +} + +func (s *Subscriber) ResourceKind() coremodel.ResourceKind { + return s.kind +} + +func (s *Subscriber) Name() string { + return "rule-version-" + s.kind.ToString() +} + +func (s *Subscriber) AsyncEnabled() bool { + return false +} + +func normalizeRuleEvent(event events.Event) (*normalizedRuleEvent, error) { + var res coremodel.Resource + var op Operation + switch event.Type() { + case cache.Added: + res = event.NewObj() + op = OperationCreate + case cache.Updated: + res = event.NewObj() + op = OperationUpdate + case cache.Deleted: + res = event.OldObj() + op = OperationDelete + default: + return nil, nil + } + if res == nil { + return nil, nil + } + + hash, specJSON, err := NormalizeResource(res) + if op == OperationDelete { + hash = HashSpecJSON(DeleteSpecJSON) + specJSON = DeleteSpecJSON + err = nil + } + if err != nil { + return nil, err + } + + return &normalizedRuleEvent{ + Resource: res, + Parent: ParentRef{ + Kind: res.ResourceKind(), + Mesh: res.ResourceMesh(), + Name: res.ResourceMeta().Name, + ResourceKey: res.ResourceKey(), + }, + Operation: op, + SpecJSON: []byte(specJSON), + ContentHash: hash, + Context: event.Context(), + }, nil +} + +func (s *Subscriber) ProcessEvent(event events.Event) error { + normalized, err := normalizeRuleEvent(event) + if err != nil { + return err + } + if normalized == nil { + return nil + } + err = s.processNormalizedEvent(normalized) + if err != nil && s.onError != nil { + s.onError() + } + return err +} + +func (s *Subscriber) processNormalizedEvent(normalized *normalizedRuleEvent) error { + openIntent, err := s.store.OpenIntent(normalized.Parent.Kind, normalized.Parent.ResourceKey) + if err != nil { + return err + } + if openIntent != nil { + return s.handleOpenIntentEvent(openIntent, *normalized) + } + if s.lockMgr == nil { + return lock.ErrLockUnavailable + } + if s.appCtx == nil { + return context.Canceled + } + return withRuleVersionLock(s.appCtx, s.lockMgr, normalized.Parent.Kind, normalized.Parent.ResourceKey, func(leaseCtx context.Context) error { + return s.record(leaseCtx, *normalized) + }) +} + +func (s *Subscriber) record(ctx context.Context, event normalizedRuleEvent) error { + if err := lock.CheckLease(ctx); err != nil { + return err + } + openIntent, err := s.store.OpenIntent(event.Parent.Kind, event.Parent.ResourceKey) + if err != nil { + return err + } + if openIntent != nil { + return s.handleOpenIntentEvent(openIntent, event) + } + return s.recordVersion(ctx, event) +} + +func (s *Subscriber) recordVersion(ctx context.Context, event normalizedRuleEvent) error { + source := SourceUpstream + author := "system:upstream" + if event.Context != nil { + if registry := event.Context[events.SourceRegistryContextKey]; registry != "" { + author = "system:" + registry + } + } + + if exists, err := s.checkDuplicate(event.Parent.Kind, event.Parent.ResourceKey, event.Operation, event.ContentHash); err != nil { + return fmt.Errorf("failed to check duplicate hash: %w", err) + } else if exists { + logger.Infof("skipping duplicate version for %s (operation=%s hash=%s)", event.Parent.ResourceKey, event.Operation, shortHash(event.ContentHash)) + return nil + } + + if err := lock.CheckLease(ctx); err != nil { + return err + } + req := InsertRequest{ + RuleKind: event.Parent.Kind, + Mesh: event.Parent.Mesh, + ResourceKey: event.Parent.ResourceKey, + RuleName: event.Parent.Name, + SpecJSON: string(event.SpecJSON), + ContentHash: event.ContentHash, + Operation: event.Operation, + Source: source, + Author: author, + CreatedAt: time.Now(), + } + + _, err := s.store.InsertVersion(ctx, req, s.maxVersions) + if err != nil { + return fmt.Errorf("failed to insert version: %w", err) + } + + return nil +} + +func (s *Subscriber) handleOpenIntentEvent(openIntent *Intent, event normalizedRuleEvent) error { + if intentMatchesEvent(openIntent, event) { + // The synchronous registry echo for an admin mutation is finalized by + // the console path that owns the intent; recording it here would append + // a duplicate version for the same mutation. + logger.Infof("skipping admin echo rule event for %s while rule version intent %d is open", event.Parent.ResourceKey, openIntent.ID) + return nil + } + logger.Infof("recording non-matching rule event for %s while rule version intent %d is open; intent close will reconcile actual state", event.Parent.ResourceKey, openIntent.ID) + return s.markIntentObservedOrRecord(openIntent, event) +} + +func (s *Subscriber) markIntentObservedOrRecord(openIntent *Intent, event normalizedRuleEvent) error { + if s.appCtx == nil { + return context.Canceled + } + current := openIntent + for attempt := 0; attempt < maxIntentCASRetries; attempt++ { + if err := s.appCtx.Err(); err != nil { + return err + } + if current == nil { + return s.recordAfterIntentClosed(event) + } + err := s.store.MarkIntentObserved(s.appCtx, current.ID, event.Operation, event.ContentHash, string(event.SpecJSON)) + if err == nil { + if current.Status == IntentStatusCommitting { + _, err = s.store.CommitIntent(s.appCtx, current.ID, s.maxVersions) + return err + } + return nil + } + if errors.Is(err, ErrVersionIntentNotFound) || errors.Is(err, ErrVersionIntentNotOpen) { + return s.recordAfterIntentClosed(event) + } + if !errors.Is(err, ErrVersionIntentConflict) { + return err + } + refreshed, refreshErr := s.store.OpenIntent(event.Parent.Kind, event.Parent.ResourceKey) + if refreshErr != nil { + return refreshErr + } + if refreshed != nil && intentMatchesEvent(refreshed, event) { + logger.Infof("skipping admin echo rule event for %s after intent refresh; rule version intent %d is open", event.Parent.ResourceKey, refreshed.ID) + return nil + } + current = refreshed + } + return s.recordAfterIntentClosed(event) +} + +func (s *Subscriber) recordAfterIntentClosed(event normalizedRuleEvent) error { + if s.lockMgr == nil { + return lock.ErrLockUnavailable + } + if s.appCtx == nil { + return context.Canceled + } + return withRuleVersionLock(s.appCtx, s.lockMgr, event.Parent.Kind, event.Parent.ResourceKey, func(leaseCtx context.Context) error { + // The intent can close between the first subscriber read and the dirty + // marker CAS. Re-read under the canonical rule lock before deciding + // whether this event belongs to the console intent or to upstream state. + for attempt := 0; attempt < maxIntentCASRetries; attempt++ { + if err := lock.CheckLease(leaseCtx); err != nil { + return err + } + openIntent, err := s.store.OpenIntent(event.Parent.Kind, event.Parent.ResourceKey) + if err != nil { + return err + } + if openIntent == nil { + return s.recordVersion(leaseCtx, event) + } + if intentMatchesEvent(openIntent, event) { + logger.Infof("skipping admin echo rule event for %s after lock reacquire; rule version intent %d is open", event.Parent.ResourceKey, openIntent.ID) + return nil + } + err = s.store.MarkIntentObserved(leaseCtx, openIntent.ID, event.Operation, event.ContentHash, string(event.SpecJSON)) + if err == nil { + if openIntent.Status == IntentStatusCommitting { + _, err = s.store.CommitIntent(leaseCtx, openIntent.ID, s.maxVersions) + return err + } + return nil + } + if errors.Is(err, ErrVersionIntentNotFound) || errors.Is(err, ErrVersionIntentNotOpen) { + continue + } + if errors.Is(err, ErrVersionIntentConflict) { + continue + } + return err + } + openIntent, err := s.store.OpenIntent(event.Parent.Kind, event.Parent.ResourceKey) + if err != nil { + return err + } + if openIntent == nil { + return s.recordVersion(leaseCtx, event) + } + return &IntentPendingError{IntentID: openIntent.ID} + }) +} + +func intentMatchesEvent(intent *Intent, event normalizedRuleEvent) bool { + return intent != nil && + intent.RuleKind == event.Parent.Kind && + intent.ResourceKey == event.Parent.ResourceKey && + intent.Operation == event.Operation && + intent.ContentHash == event.ContentHash && + intent.SpecJSON == string(event.SpecJSON) +} + +func (s *Subscriber) checkDuplicate(kind coremodel.ResourceKind, resourceKey string, op Operation, hash string) (bool, error) { + latest, err := s.store.LatestVersion(kind, resourceKey) + if errors.Is(err, ErrVersionNotFound) { + return false, nil + } + if err != nil { + return false, err + } + if latest.Operation == OperationDelete && op == OperationDelete { + return true, nil + } + if latest.Operation == OperationDelete { + return false, nil + } + // Hash dedup is only a projection filter for adjacent upstream echoes. + // Operation and parent identity are checked separately, and admin/rollback + // attribution is handled through intents rather than inferred from content. + return op != OperationDelete && latest.ContentHash == hash, nil +} + +// recordBootstrapState creates a baseline version for a rule during bootstrap. +func recordBootstrapState(ctx context.Context, store Store, maxVersions int64, res coremodel.Resource) error { + kind := res.ResourceKind() + hash, specJSON, err := NormalizeResource(res) + if err != nil { + return err + } + + operation := OperationCreate + source := SourceBootstrap + author := "system:bootstrap" + latest, err := store.LatestVersion(kind, res.ResourceKey()) + if err != nil && !errors.Is(err, ErrVersionNotFound) { + return err + } + if latest != nil { + if latest.Operation != OperationDelete && latest.ContentHash == hash { + return nil + } + source = SourceUpstream + author = "system:reconcile" + operation = OperationUpdate + if latest.Operation == OperationDelete { + operation = OperationCreate + } + } + + req := InsertRequest{ + RuleKind: kind, + Mesh: res.ResourceMesh(), + ResourceKey: res.ResourceKey(), + RuleName: res.ResourceMeta().Name, + SpecJSON: specJSON, + ContentHash: hash, + Source: source, + Operation: operation, + Author: author, + CreatedAt: time.Now(), + } + if _, err := store.InsertVersion(ctx, req, maxVersions); err != nil { + return fmt.Errorf("current-state version for %s failed: %w", res.ResourceKey(), err) + } + return nil +} + +func RecordBootstrapLocked(ctx context.Context, store Store, maxVersions int64, kind coremodel.ResourceKind, resourceKey string, rm manager.ResourceManager, lockMgr lock.Lock) error { + return withRuleVersionLock(ctx, lockMgr, kind, resourceKey, func(ctx context.Context) error { + if err := lock.CheckLease(ctx); err != nil { + return err + } + openIntent, err := store.OpenIntent(kind, resourceKey) + if err != nil { + return err + } + if openIntent != nil { + return nil + } + current, exists, err := rm.GetByKey(kind, resourceKey) + if err != nil { + return err + } + if !exists { + return nil + } + return recordBootstrapState(ctx, store, maxVersions, current) + }) +} + +func shortHash(hash string) string { + if len(hash) <= 8 { + return hash + } + return hash[:8] +} diff --git a/pkg/core/versioning/types.go b/pkg/core/versioning/types.go new file mode 100644 index 000000000..89bc84cc4 --- /dev/null +++ b/pkg/core/versioning/types.go @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package versioning + +import ( + "errors" + "time" + + coremodel "github.com/apache/dubbo-admin/pkg/core/resource/model" +) + +// Source identifies where a rule mutation originated. +// Used for auditing and distinguishing user-initiated changes from system-generated ones. +type Source string + +const ( + SourceAdmin Source = "ADMIN" // User edit via Admin UI/API + SourceUpstream Source = "UPSTREAM" // Registry change detected by subscriber + SourceBootstrap Source = "BOOTSTRAP" // Initial version recorded at startup + // SourceRollback marks a version produced by re-publishing a historical + // snapshot. Rollback records a new version and does not rewrite history. + SourceRollback Source = "ROLLBACK" +) + +type Operation string + +const ( + OperationCreate Operation = "CREATE" + OperationUpdate Operation = "UPDATE" + OperationDelete Operation = "DELETE" +) + +// IntentStatus tracks the lifecycle of a mutation intent. +// Intent workflow: +// - PENDING: intent is durable, registry mutation is not yet proven. +// - APPLIED: intended state was observed, but RuleVersion may not be durable. +// - OUTCOME_UNKNOWN: registry returned an uncertain result or a conflicting +// event was observed; repair must read actual state before cleanup. +// - COMMITTING: a clean APPLIED intent won the storage-level CAS for commit; +// retries must finish the fixed-ID RuleVersion or reconcile actual state. +// - COMMITTED/FAILED: terminal states, cleaned up after the durable outcome. +// +// RPC errors and context cancellation are not registry-side fencing. They move +// the intent to OUTCOME_UNKNOWN so repair can reconcile actual state later. +type IntentStatus string + +const ( + IntentStatusPending IntentStatus = "PENDING" // Intent created, mutation not yet applied + IntentStatusApplied IntentStatus = "APPLIED" // Intended state observed, awaiting version commit + IntentStatusOutcomeUnknown IntentStatus = "OUTCOME_UNKNOWN" // Actual registry outcome must be reconciled + IntentStatusCommitting IntentStatus = "COMMITTING" // Commit ownership acquired before fixed-ID version append + IntentStatusCommitted IntentStatus = "COMMITTED" // Version successfully recorded, intent closed + IntentStatusFailed IntentStatus = "FAILED" // Mutation failed or was rejected +) + +var ( + ErrVersionConflict = errors.New("rule version conflict") // ExpectedVersionID mismatch + ErrVersionNotFound = errors.New("rule version not found") + ErrVersionIntentNotFound = errors.New("rule version intent not found") + ErrVersionIntentNotOpen = errors.New("rule version intent is not open") // Intent already committed or failed + ErrVersionIntentPending = errors.New("rule version intent is pending") // Another mutation in progress + ErrVersionLedgerCorrupt = errors.New("rule version ledger corruption") + ErrVersionIntentConflict = errors.New("rule version intent revision conflict") + ErrIntentOutcomeMismatch = errors.New("rule version intent outcome does not match current resource") + ErrRollbackToDelete = errors.New("cannot roll back to a deleted rule version") + ErrRollbackToCurrent = errors.New("cannot roll back to a version identical to current") +) + +// Version represents a snapshot of a rule's spec at a point in time. Version +// entries are immutable after creation. Rollback appends a new version, while +// retention may delete the oldest entries. IsCurrent is derived from the ledger +// head at query time. +type Version struct { + ID int64 `json:"id"` + RuleKind coremodel.ResourceKind `json:"ruleKind"` + Mesh string `json:"mesh"` + ResourceKey string `json:"resourceKey"` + RuleName string `json:"ruleName"` + VersionNo int64 `json:"versionNo"` + ContentHash string `json:"contentHash"` + SpecJSON string `json:"specJson"` + Source Source `json:"source"` + Operation Operation `json:"operation"` + Author string `json:"author"` + Reason string `json:"reason,omitempty"` + IntentID int64 `json:"intentId,omitempty"` + // RolledBackFromID records the historical version whose snapshot was + // re-published to produce this version. It is audit metadata only and must + // not be used to decide the current version. + RolledBackFromID *int64 `json:"rolledBackFromId,omitempty"` + CreatedAt time.Time `json:"createdAt"` + CommittedAt time.Time `json:"committedAt"` + IsCurrent bool `json:"isCurrent"` +} + +// Intent represents a pending mutation to a rule. It records the user's +// mutation before the rule is written; the Version is created only after the +// resulting rule state is observed or repaired from ResourceManager. +type Intent struct { + ID int64 `json:"id"` + RuleKind coremodel.ResourceKind `json:"ruleKind"` + Mesh string `json:"mesh"` + ResourceKey string `json:"resourceKey"` + RuleName string `json:"ruleName"` + ContentHash string `json:"contentHash"` + SpecJSON string `json:"specJson"` + Source Source `json:"source"` + Operation Operation `json:"operation"` + Author string `json:"author"` + Reason string `json:"reason,omitempty"` + // RolledBackFromID is carried from a rollback intent to the version it + // commits. It is audit metadata only, never the current-version pointer. + RolledBackFromID *int64 `json:"rolledBackFromId,omitempty"` + Status IntentStatus `json:"status"` + LastError string `json:"lastError,omitempty"` + // ReconcileRequired is durable evidence that a non-matching event arrived + // while the intent was open. The observed fields let repair wait until the + // ResourceManager snapshot has caught up before deciding the final outcome. + ReconcileRequired bool `json:"reconcileRequired,omitempty"` + ObservedContentHash string `json:"observedContentHash,omitempty"` + ObservedSpecJSON string `json:"observedSpecJson,omitempty"` + ObservedOperation Operation `json:"observedOperation,omitempty"` + ObservedAt time.Time `json:"observedAt,omitempty"` + Revision int64 `json:"revision"` + CreatedAt time.Time `json:"createdAt"` +} + +type ledgerState struct { + Versions []Version + Latest *Version + MaxVersionNo int64 +} + +type LedgerSnapshot struct { + Versions []Version + Head *Version + Deleted bool +} + +type InsertRequest struct { + RuleKind coremodel.ResourceKind + Mesh string + ResourceKey string + RuleName string + SpecJSON string + ContentHash string + Source Source + Operation Operation + Author string + Reason string + IntentID int64 + RolledBackFromID *int64 + CreatedAt time.Time + FixedVersionID *int64 +} + +type ListResult struct { + Items []Version `json:"items"` + Total int64 `json:"total"` + CurrentVersionID *int64 `json:"currentVersionId,omitempty"` + CurrentVersionNo int64 `json:"currentVersionNo,omitempty"` + Deleted bool `json:"deleted"` +} + +type DiffResult struct { + Left DiffSide `json:"left"` + Right DiffSide `json:"right"` +} + +type DiffSide struct { + ID int64 `json:"id"` + VersionNo int64 `json:"versionNo"` + SpecJSON string `json:"specJson"` +} + +type ConflictError struct { + CurrentVersionID *int64 +} + +func (e *ConflictError) Error() string { + return ErrVersionConflict.Error() +} + +type IntentPendingError struct { + IntentID int64 +} + +func (e *IntentPendingError) Error() string { + return ErrVersionIntentPending.Error() +} + +func (e *IntentPendingError) Is(target error) bool { + return target == ErrVersionIntentPending +} diff --git a/pkg/governor/mock/factory.go b/pkg/governor/mock/factory.go index 4c7b282dd..a60732aa0 100644 --- a/pkg/governor/mock/factory.go +++ b/pkg/governor/mock/factory.go @@ -18,6 +18,8 @@ package mock import ( + "context" + discoverycfg "github.com/apache/dubbo-admin/pkg/config/discovery" "github.com/apache/dubbo-admin/pkg/core/events" "github.com/apache/dubbo-admin/pkg/core/governor" @@ -45,6 +47,14 @@ type mockGovernor struct{} var _ governor.RuleGovernor = &mockGovernor{} -func (g *mockGovernor) CreateRule(_ coremodel.Resource) error { return nil } -func (g *mockGovernor) UpdateRule(_ coremodel.Resource) error { return nil } -func (g *mockGovernor) DeleteRule(_ coremodel.Resource) error { return nil } +func (g *mockGovernor) CreateRule(ctx context.Context, _ coremodel.Resource) error { + return ctx.Err() +} + +func (g *mockGovernor) UpdateRule(ctx context.Context, _ coremodel.Resource) error { + return ctx.Err() +} + +func (g *mockGovernor) DeleteRule(ctx context.Context, _ coremodel.Resource) error { + return ctx.Err() +} diff --git a/pkg/governor/nacos2/governor.go b/pkg/governor/nacos2/governor.go index 626f665f4..73c62498b 100644 --- a/pkg/governor/nacos2/governor.go +++ b/pkg/governor/nacos2/governor.go @@ -18,12 +18,14 @@ package nacos2 import ( + "context" "fmt" "reflect" "time" nacosconfigclient "github.com/nacos-group/nacos-sdk-go/v2/clients/config_client" nacosnamingclient "github.com/nacos-group/nacos-sdk-go/v2/clients/naming_client" + nacosutil "github.com/nacos-group/nacos-sdk-go/v2/util" nacosvo "github.com/nacos-group/nacos-sdk-go/v2/vo" "k8s.io/client-go/tools/cache" "sigs.k8s.io/yaml" @@ -64,33 +66,79 @@ func NewNacos2Governor( }, nil } -func (g *RuleGovernor) CreateRule(r coremodel.Resource) error { +func (g *RuleGovernor) CreateRule(ctx context.Context, r coremodel.Resource) error { + if err := ctx.Err(); err != nil { + return err + } + rawContent, err := yaml.Marshal(r.ResourceSpec()) + if err != nil { + return bizerror.Wrap(err, bizerror.NacosError, + fmt.Sprintf("failed to marshal resource spec, res: %s", r.String())) + } + if err := g.publishRuleConfig(ctx, r, string(rawContent), ""); err != nil { + return err + } + // wait for the config to be published indeed + if err := waitForRulePropagation(ctx, 2*time.Second); err != nil { + return err + } + g.GetConfigAndUpdateStore(r) + return nil +} + +func (g *RuleGovernor) UpdateRule(ctx context.Context, r coremodel.Resource) error { + if err := ctx.Err(); err != nil { + return err + } + currentContent, err := g.configClient.GetConfig(nacosvo.ConfigParam{ + DataId: r.ResourceMeta().Name, + Group: constants.NacosConfigGroup, + }) + if err != nil { + return bizerror.Wrap(err, bizerror.NacosError, + fmt.Sprintf("failed to read config before publish, res: %s", r.String())) + } + if err := ctx.Err(); err != nil { + return err + } rawContent, err := yaml.Marshal(r.ResourceSpec()) if err != nil { return bizerror.Wrap(err, bizerror.NacosError, fmt.Sprintf("failed to marshal resource spec, res: %s", r.String())) } + if err := g.publishRuleConfig(ctx, r, string(rawContent), nacosutil.Md5(currentContent)); err != nil { + return err + } + // wait for the config to be published indeed + if err := waitForRulePropagation(ctx, 2*time.Second); err != nil { + return err + } + g.GetConfigAndUpdateStore(r) + return nil +} + +func (g *RuleGovernor) publishRuleConfig(ctx context.Context, r coremodel.Resource, content, casMd5 string) error { + if err := ctx.Err(); err != nil { + return err + } ok, err := g.configClient.PublishConfig(nacosvo.ConfigParam{ DataId: r.ResourceMeta().Name, Group: constants.NacosConfigGroup, - Content: string(rawContent), + Content: content, + CasMd5: casMd5, }) if err != nil || !ok { logger.Errorf("failed to publish config in %s, res: %s", r.String(), r.ResourceMesh()) return bizerror.Wrap(err, bizerror.NacosError, fmt.Sprintf("failed to publish config, res: %s", r.String())) } - // wait for the config to be published indeed - <-time.After(2 * time.Second) - g.GetConfigAndUpdateStore(r) - return nil + return ctx.Err() } -func (g *RuleGovernor) UpdateRule(r coremodel.Resource) error { - return g.CreateRule(r) -} - -func (g *RuleGovernor) DeleteRule(r coremodel.Resource) error { +func (g *RuleGovernor) DeleteRule(ctx context.Context, r coremodel.Resource) error { + if err := ctx.Err(); err != nil { + return err + } ok, err := g.configClient.DeleteConfig(nacosvo.ConfigParam{ DataId: r.ResourceMeta().Name, Group: constants.NacosConfigGroup, @@ -108,7 +156,9 @@ func (g *RuleGovernor) DeleteRule(r coremodel.Resource) error { return nil } // wait for the config to be deleted indeed - <-time.After(2 * time.Second) + if err := waitForRulePropagation(ctx, 2*time.Second); err != nil { + return err + } if err := st.Delete(r); err != nil { logger.Errorf("failed to delete resource in %s, res: %s, cause: %s", r.String(), r.ResourceMesh(), err) return nil @@ -116,6 +166,17 @@ func (g *RuleGovernor) DeleteRule(r coremodel.Resource) error { return nil } +func waitForRulePropagation(ctx context.Context, d time.Duration) error { + timer := time.NewTimer(d) + defer timer.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-timer.C: + return nil + } +} + // GetConfigAndUpdateStore get resource from nacos, and update resource in store, if failed, just log an error message, // the lister-watcher will sync the event finally func (g *RuleGovernor) GetConfigAndUpdateStore(r coremodel.Resource) { diff --git a/pkg/governor/zk/governor.go b/pkg/governor/zk/governor.go index a811557b2..7633ceff7 100644 --- a/pkg/governor/zk/governor.go +++ b/pkg/governor/zk/governor.go @@ -18,6 +18,7 @@ package zk import ( + "context" "fmt" "github.com/dubbogo/go-zookeeper/zk" @@ -53,7 +54,10 @@ func NewZKRuleGovernor(cfg *discoverycfg.Config, router store.Router, emitter ev }, nil } -func (g *RuleGovernor) CreateRule(r coremodel.Resource) error { +func (g *RuleGovernor) CreateRule(ctx context.Context, r coremodel.Resource) error { + if err := ctx.Err(); err != nil { + return err + } path := "/dubbo/config/" + r.ResourceMeta().Name content, err := yaml.Marshal(r.ResourceSpec()) if err != nil { @@ -65,6 +69,9 @@ func (g *RuleGovernor) CreateRule(r coremodel.Resource) error { return bizerror.Wrap(err, bizerror.ZKError, fmt.Sprintf("failed to create zk node, path: %s", path)) } + if err := ctx.Err(); err != nil { + return err + } // save to store once znode is created in zk to insure local store is consistent to zk timely. // if save to store failed, the discovery will watch and update the store finally. st, err := g.storeRouter.ResourceRoute(r) @@ -80,18 +87,32 @@ func (g *RuleGovernor) CreateRule(r coremodel.Resource) error { return nil } -func (g *RuleGovernor) UpdateRule(r coremodel.Resource) error { +func (g *RuleGovernor) UpdateRule(ctx context.Context, r coremodel.Resource) error { + if err := ctx.Err(); err != nil { + return err + } path := "/dubbo/config/" + r.ResourceMeta().Name content, err := yaml.Marshal(r.ResourceSpec()) if err != nil { return bizerror.Wrap(err, bizerror.YamlError, fmt.Sprintf("failed to marshal resource spec, res: %s", r.String())) } - _, err = g.conn.Set(path, content, -1) + _, stat, err := g.conn.Get(path) + if err != nil { + return bizerror.Wrap(err, bizerror.ZKError, + fmt.Sprintf("failed to read zk node version before update, path: %s", path)) + } + if err := ctx.Err(); err != nil { + return err + } + _, err = g.conn.Set(path, content, stat.Version) if err != nil { return bizerror.Wrap(err, bizerror.ZKError, fmt.Sprintf("failed to update zk node, path: %s", path)) } + if err := ctx.Err(); err != nil { + return err + } st, err := g.storeRouter.ResourceRoute(r) if err != nil { logger.Warnf("cannot find store for rk: %s, cause: %v", r.ResourceKind(), err) @@ -104,13 +125,31 @@ func (g *RuleGovernor) UpdateRule(r coremodel.Resource) error { return nil } -func (g *RuleGovernor) DeleteRule(r coremodel.Resource) error { +func (g *RuleGovernor) DeleteRule(ctx context.Context, r coremodel.Resource) error { + if err := ctx.Err(); err != nil { + return err + } path := "/dubbo/config/" + r.ResourceMeta().Name - err := g.conn.Delete(path, -1) + exists, stat, err := g.conn.Exists(path) + if err != nil { + return bizerror.Wrap(err, bizerror.ZKError, + fmt.Sprintf("failed to read zk node version before delete, path: %s", path)) + } + if !exists { + return bizerror.Wrap(zk.ErrNoNode, bizerror.ZKError, + fmt.Sprintf("failed to delete zk node, path: %s", path)) + } + if err := ctx.Err(); err != nil { + return err + } + err = g.conn.Delete(path, stat.Version) if err != nil { return bizerror.Wrap(err, bizerror.ZKError, fmt.Sprintf("failed to delete zk node, path: %s", path)) } + if err := ctx.Err(); err != nil { + return err + } st, err := g.storeRouter.ResourceRoute(r) if err != nil { logger.Warnf("cannot find store for rk: %s, cause: %v", r.ResourceKind(), err) diff --git a/pkg/lock/gorm/lock.go b/pkg/lock/gorm/lock.go index 83e1e658d..4a6f98dc2 100644 --- a/pkg/lock/gorm/lock.go +++ b/pkg/lock/gorm/lock.go @@ -22,48 +22,41 @@ import ( "fmt" "time" - "github.com/google/uuid" "gorm.io/gorm" "gorm.io/gorm/clause" "github.com/apache/dubbo-admin/pkg/common/bizerror" "github.com/apache/dubbo-admin/pkg/common/constants" "github.com/apache/dubbo-admin/pkg/core/lock" - "github.com/apache/dubbo-admin/pkg/core/logger" "github.com/apache/dubbo-admin/pkg/store/dbcommon" ) -// Ensure GormLock implements Lock interface var _ lock.Lock = (*GormLock)(nil) -// GormLock provides distributed locking using database as backend -// It uses GORM for database operations and supports MySQL, PostgreSQL, etc. +// GormLock provides cross-instance locking through a shared SQL database. +// Lease ownership is scoped by token so delayed renew/unlock calls from an old +// holder cannot release a newer holder's lease. type GormLock struct { - pool *dbcommon.ConnectionPool - db *gorm.DB // Direct DB reference to avoid circular dependency - owner string // Unique identifier for this lock instance + pool *dbcommon.ConnectionPool + db *gorm.DB } -// NewGormLock creates a new GORM-based distributed lock instance -// Deprecated: Use NewGormLockFromDB to avoid circular dependencies +// NewGormLock creates a GORM-based distributed lock instance. +// Deprecated: Use NewGormLockFromDB to avoid circular dependencies. func NewGormLock(pool *dbcommon.ConnectionPool) lock.Lock { return &GormLock{ - pool: pool, - db: pool.GetDB(), - owner: uuid.New().String(), + pool: pool, + db: pool.GetDB(), } } -// NewGormLockFromDB creates a new GORM-based distributed lock instance from a DB connection -// This is the preferred constructor to avoid circular dependencies +// NewGormLockFromDB creates a GORM-based distributed lock from a DB connection. func NewGormLockFromDB(db *gorm.DB) lock.Lock { return &GormLock{ - db: db, - owner: uuid.New().String(), + db: db, } } -// getDB returns the database instance, to prefer direct DB to pool func (g *GormLock) getDB() *gorm.DB { if g.db != nil { return g.db @@ -74,85 +67,91 @@ func (g *GormLock) getDB() *gorm.DB { return nil } -// Lock acquires a lock with the specified key and TTL -// It blocks until the lock is acquired or context is cancelled -func (g *GormLock) Lock(ctx context.Context, key string, ttl time.Duration) error { +type lease struct { + *lock.LeaseState + lock *GormLock +} + +func (g *GormLock) Acquire(ctx context.Context, key string, ttl time.Duration) (lock.Lease, error) { ticker := time.NewTicker(constants.DefaultLockRetryInterval) defer ticker.Stop() for { - acquired, err := g.TryLock(ctx, key, ttl) + lease, acquired, err := g.TryAcquire(ctx, key, ttl) if err != nil { - return fmt.Errorf("failed to try lock: %w", err) + return nil, fmt.Errorf("failed to try lock: %w", err) } if acquired { - return nil + return lease, nil } select { case <-ctx.Done(): - return ctx.Err() + return nil, ctx.Err() case <-ticker.C: } } } -// TryLock attempts to acquire a lock without blocking -// Returns true if lock was acquired, false otherwise -func (g *GormLock) TryLock(ctx context.Context, key string, ttl time.Duration) (bool, error) { +func (g *GormLock) TryAcquire(ctx context.Context, key string, ttl time.Duration) (lock.Lease, bool, error) { db := g.getDB().WithContext(ctx) + token, err := lock.NewLeaseToken() + if err != nil { + return nil, false, err + } expireAt := time.Now().Add(ttl) var acquired bool - err := db.Transaction(func(tx *gorm.DB) error { - // Clean up only this key's expired lock to improve performance + err = db.Transaction(func(tx *gorm.DB) error { + // Only the requested key is cleaned here so acquisition does not scan + // unrelated locks on every retry. now := time.Now() - if err := tx.Where("lock_key = ? AND expire_at < ?", key, now). + if err := tx.Where("lock_key = ? AND expire_at <= ?", key, now). Delete(&LockRecord{}).Error; err != nil { return fmt.Errorf("failed to clean expired lock for key %s: %w", key, err) } - // Try to acquire lock using INSERT ... ON CONFLICT lock := &LockRecord{ LockKey: key, - Owner: g.owner, + Owner: token, ExpireAt: expireAt, } - // Try to insert the lock record result := tx.Clauses(clause.OnConflict{ Columns: []clause.Column{{Name: "lock_key"}}, - DoNothing: true, // If conflict, do nothing + DoNothing: true, }).Create(lock) if result.Error != nil { return fmt.Errorf("failed to insert lock record: %w", result.Error) } - // Check if the insertion was successful if result.RowsAffected == 0 { - // The lock already exists acquired = false return nil } - // New row inserted successfully, lock acquired successfully acquired = true return nil }) if err != nil { - return false, err + return nil, false, err } - return acquired, nil + if !acquired { + return nil, false, nil + } + return &lease{ + LeaseState: lock.NewLeaseState(key, token), + lock: g, + }, true, nil } -// Unlock releases a lock held by this instance -func (g *GormLock) Unlock(ctx context.Context, key string) error { - db := g.getDB().WithContext(ctx) +func (l *lease) Unlock(ctx context.Context) error { + db := l.lock.getDB().WithContext(ctx) - result := db.Where("lock_key = ? AND owner = ?", key, g.owner). + result := db.Where("lock_key = ? AND owner = ?", l.Key(), l.Token()). Delete(&LockRecord{}) if result.Error != nil { @@ -166,13 +165,13 @@ func (g *GormLock) Unlock(ctx context.Context, key string) error { return nil } -// Renew extends the TTL of a lock held by this instance -func (g *GormLock) Renew(ctx context.Context, key string, ttl time.Duration) error { - db := g.getDB().WithContext(ctx) +func (l *lease) Renew(ctx context.Context, ttl time.Duration) error { + db := l.lock.getDB().WithContext(ctx) + now := time.Now() newExpireAt := time.Now().Add(ttl) result := db.Model(&LockRecord{}). - Where("lock_key = ? AND owner = ?", key, g.owner). + Where("lock_key = ? AND owner = ? AND expire_at > ?", l.Key(), l.Token(), now). Update("expire_at", newExpireAt) if result.Error != nil { @@ -186,7 +185,7 @@ func (g *GormLock) Renew(ctx context.Context, key string, ttl time.Duration) err return nil } -// IsLocked checks if a lock is currently held (by anyone) +// IsLocked reports whether a non-expired lease exists for key. func (g *GormLock) IsLocked(ctx context.Context, key string) (bool, error) { db := g.getDB().WithContext(ctx) @@ -202,70 +201,7 @@ func (g *GormLock) IsLocked(ctx context.Context, key string) (bool, error) { return count > 0, nil } -// WithLock executes a function while holding a lock -func (g *GormLock) WithLock(ctx context.Context, key string, ttl time.Duration, fn func() error) error { - // Acquire lock - if err := g.Lock(ctx, key, ttl); err != nil { - return fmt.Errorf("failed to acquire lock: %w", err) - } - - // Ensure lock is released - defer func() { - // Use background context for unlock to ensure it completes even if ctx is cancelled - unlockCtx, cancel := context.WithTimeout(context.Background(), constants.DefaultUnlockTimeout) - defer cancel() - - if err := g.Unlock(unlockCtx, key); err != nil { - logger.Errorf("Failed to release lock %s: %v", key, err) - } - }() - - // Start auto-renewal if TTL is long enough - var renewDone chan struct{} - if ttl > constants.DefaultAutoRenewThreshold { - renewDone = make(chan struct{}) - go g.autoRenew(ctx, key, ttl, renewDone) - defer close(renewDone) - } - - // Execute the function - return fn() -} - -// autoRenew periodically renews the lock until done channel is closed -func (g *GormLock) autoRenew(ctx context.Context, key string, ttl time.Duration, done <-chan struct{}) { - // Renew at 1/3 of TTL to ensure lock doesn't expire - renewInterval := ttl / 3 - ticker := time.NewTicker(renewInterval) - defer ticker.Stop() - - for { - select { - case <-done: - return - case <-ctx.Done(): - return - case <-ticker.C: - // Double-check done channel before renewing to avoid unnecessary renewal - select { - case <-done: - return - default: - } - - renewCtx, cancel := context.WithTimeout(context.Background(), constants.DefaultRenewTimeout) - if err := g.Renew(renewCtx, key, ttl); err != nil { - logger.Warnf("Failed to renew lock %s: %v", key, err) - cancel() - return - } - cancel() - } - } -} - -// CleanupExpiredLocks removes all expired locks from the database -// This should be called periodically as a maintenance task +// CleanupExpiredLocks removes expired leases from the database. func (g *GormLock) CleanupExpiredLocks(ctx context.Context) error { db := g.getDB().WithContext(ctx) diff --git a/pkg/lock/gorm/lock_test.go b/pkg/lock/gorm/lock_test.go index 480579945..cc49b30fd 100644 --- a/pkg/lock/gorm/lock_test.go +++ b/pkg/lock/gorm/lock_test.go @@ -19,6 +19,7 @@ package gorm_test import ( "context" + "errors" "sync" "sync/atomic" "testing" @@ -30,6 +31,7 @@ import ( "gorm.io/gorm" "github.com/apache/dubbo-admin/pkg/common/bizerror" + corelock "github.com/apache/dubbo-admin/pkg/core/lock" gormlock "github.com/apache/dubbo-admin/pkg/lock/gorm" ) @@ -61,14 +63,14 @@ func TestBasicLockUnlock(t *testing.T) { lockInstance := gormlock.NewGormLockFromDB(db) ctx := context.Background() - err := lockInstance.Lock(ctx, "test-key", 5*time.Second) + lease, err := lockInstance.Acquire(ctx, "test-key", 5*time.Second) assert.NoError(t, err, "should acquire lock successfully") isLocked, err := lockInstance.IsLocked(ctx, "test-key") assert.NoError(t, err) assert.True(t, isLocked, "lock should be held") - err = lockInstance.Unlock(ctx, "test-key") + err = lease.Unlock(ctx) assert.NoError(t, err, "should release lock successfully") isLocked, err = lockInstance.IsLocked(ctx, "test-key") @@ -82,22 +84,22 @@ func TestTryLock(t *testing.T) { lock2 := gormlock.NewGormLockFromDB(db) ctx := context.Background() - acquired, err := lock1.TryLock(ctx, "test-key", 5*time.Second) + lease1, acquired, err := lock1.TryAcquire(ctx, "test-key", 5*time.Second) assert.NoError(t, err) assert.True(t, acquired, "first lock should be acquired") - acquired, err = lock2.TryLock(ctx, "test-key", 5*time.Second) + _, acquired, err = lock2.TryAcquire(ctx, "test-key", 5*time.Second) assert.NoError(t, err) assert.False(t, acquired, "second lock should not be acquired") - err = lock1.Unlock(ctx, "test-key") + err = lease1.Unlock(ctx) assert.NoError(t, err) - acquired, err = lock2.TryLock(ctx, "test-key", 5*time.Second) + lease2, acquired, err := lock2.TryAcquire(ctx, "test-key", 5*time.Second) assert.NoError(t, err) assert.True(t, acquired, "second lock should be acquired after first is released") - _ = lock2.Unlock(ctx, "test-key") + _ = lease2.Unlock(ctx) } func TestConcurrentLockAttempts(t *testing.T) { @@ -113,11 +115,11 @@ func TestConcurrentLockAttempts(t *testing.T) { go func() { defer wg.Done() lockInstance := gormlock.NewGormLockFromDB(db) - acquired, err := lockInstance.TryLock(ctx, "concurrent-key", 1*time.Second) + lease, acquired, err := lockInstance.TryAcquire(ctx, "concurrent-key", 1*time.Second) if err == nil && acquired { successCount.Add(1) time.Sleep(100 * time.Millisecond) // Hold lock briefly - _ = lockInstance.Unlock(ctx, "concurrent-key") + _ = lease.Unlock(ctx) } }() } @@ -133,21 +135,21 @@ func TestLockExpiration(t *testing.T) { lock2 := gormlock.NewGormLockFromDB(db) ctx := context.Background() - acquired, err := lock1.TryLock(ctx, "expire-key", 100*time.Millisecond) + _, acquired, err := lock1.TryAcquire(ctx, "expire-key", 100*time.Millisecond) assert.NoError(t, err) assert.True(t, acquired) - acquired, err = lock2.TryLock(ctx, "expire-key", 1*time.Second) + _, acquired, err = lock2.TryAcquire(ctx, "expire-key", 1*time.Second) assert.NoError(t, err) assert.False(t, acquired, "lock should still be held") time.Sleep(200 * time.Millisecond) - acquired, err = lock2.TryLock(ctx, "expire-key", 1*time.Second) + lease2, acquired, err := lock2.TryAcquire(ctx, "expire-key", 1*time.Second) assert.NoError(t, err) assert.True(t, acquired, "lock should be acquired after expiration") - _ = lock2.Unlock(ctx, "expire-key") + _ = lease2.Unlock(ctx) } func TestLockRenewal(t *testing.T) { @@ -155,19 +157,19 @@ func TestLockRenewal(t *testing.T) { lockInstance := gormlock.NewGormLockFromDB(db) ctx := context.Background() - err := lockInstance.Lock(ctx, "renew-key", 1*time.Second) + lease, err := lockInstance.Acquire(ctx, "renew-key", 1*time.Second) require.NoError(t, err) time.Sleep(500 * time.Millisecond) - err = lockInstance.Renew(ctx, "renew-key", 2*time.Second) + err = lease.Renew(ctx, 2*time.Second) assert.NoError(t, err, "should renew lock successfully") isLocked, err := lockInstance.IsLocked(ctx, "renew-key") assert.NoError(t, err) assert.True(t, isLocked, "lock should still be held after renewal") - _ = lockInstance.Unlock(ctx, "renew-key") + _ = lease.Unlock(ctx) } func TestUnlockNotHeld(t *testing.T) { @@ -176,10 +178,14 @@ func TestUnlockNotHeld(t *testing.T) { lock2 := gormlock.NewGormLockFromDB(db) ctx := context.Background() - err := lock1.Lock(ctx, "test-key", 5*time.Second) + lease1, err := lock1.Acquire(ctx, "test-key", 20*time.Millisecond) require.NoError(t, err) + time.Sleep(30 * time.Millisecond) - err = lock2.Unlock(ctx, "test-key") + lease2, acquired, err := lock2.TryAcquire(ctx, "test-key", 5*time.Second) + require.NoError(t, err) + require.True(t, acquired) + err = lease1.Unlock(ctx) assert.Error(t, err, "should return error") // 检查错误类型和错误码 @@ -188,7 +194,7 @@ func TestUnlockNotHeld(t *testing.T) { assert.Equal(t, bizerror.LockNotHeld, bizErr.Code(), "should return LockNotHeld error code") } - _ = lock1.Unlock(ctx, "test-key") + _ = lease2.Unlock(ctx) } func TestRenewNotHeld(t *testing.T) { @@ -197,10 +203,14 @@ func TestRenewNotHeld(t *testing.T) { lock2 := gormlock.NewGormLockFromDB(db) ctx := context.Background() - err := lock1.Lock(ctx, "test-key", 5*time.Second) + lease1, err := lock1.Acquire(ctx, "test-key", 20*time.Millisecond) require.NoError(t, err) + time.Sleep(30 * time.Millisecond) - err = lock2.Renew(ctx, "test-key", 10*time.Second) + lease2, acquired, err := lock2.TryAcquire(ctx, "test-key", 5*time.Second) + require.NoError(t, err) + require.True(t, acquired) + err = lease1.Renew(ctx, 10*time.Second) assert.Error(t, err, "should return error") var bizErr bizerror.Error @@ -208,7 +218,7 @@ func TestRenewNotHeld(t *testing.T) { assert.Equal(t, bizerror.LockNotHeld, bizErr.Code(), "should return LockNotHeld error code") } - _ = lock1.Unlock(ctx, "test-key") + _ = lease2.Unlock(ctx) } func TestWithLock(t *testing.T) { @@ -217,7 +227,7 @@ func TestWithLock(t *testing.T) { ctx := context.Background() executed := false - err := lockInstance.WithLock(ctx, "with-lock-key", 2*time.Second, func() error { + err := corelock.WithLock(ctx, lockInstance, "with-lock-key", 2*time.Second, func(context.Context) error { executed = true isLocked, err := lockInstance.IsLocked(ctx, "with-lock-key") assert.NoError(t, err) @@ -240,8 +250,8 @@ func TestWithLockAutoRenewal(t *testing.T) { ctx := context.Background() executed := false - err := lockInstance.WithLock(ctx, "auto-renew-key", 15*time.Second, func() error { - time.Sleep(6 * time.Second) + err := corelock.WithLock(ctx, lockInstance, "auto-renew-key", 30*time.Millisecond, func(context.Context) error { + time.Sleep(80 * time.Millisecond) executed = true return nil }) @@ -262,7 +272,7 @@ func TestWithLockContextCancellation(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) started := make(chan struct{}) - err := lockInstance.WithLock(ctx, "cancel-key", 5*time.Second, func() error { + err := corelock.WithLock(ctx, lockInstance, "cancel-key", 5*time.Second, func(context.Context) error { close(started) cancel() time.Sleep(100 * time.Millisecond) @@ -271,7 +281,7 @@ func TestWithLockContextCancellation(t *testing.T) { <-started - assert.NoError(t, err, "function should complete even if context is cancelled during execution") + assert.ErrorIs(t, err, context.Canceled) time.Sleep(100 * time.Millisecond) isLocked, err := lockInstance.IsLocked(context.Background(), "cancel-key") @@ -279,14 +289,33 @@ func TestWithLockContextCancellation(t *testing.T) { assert.False(t, isLocked, "lock should be released even after context cancellation") } +func TestWithLockAcquisitionTimeout(t *testing.T) { + db := setupTestDB(t) + lock1 := gormlock.NewGormLockFromDB(db) + lock2 := gormlock.NewGormLockFromDB(db) + + lease1, err := lock1.Acquire(context.Background(), "timeout-key", time.Second) + require.NoError(t, err) + defer func() { _ = lease1.Unlock(context.Background()) }() + + ctx, cancel := context.WithTimeout(context.Background(), 25*time.Millisecond) + defer cancel() + err = corelock.WithLock(ctx, lock2, "timeout-key", time.Second, func(context.Context) error { + t.Fatal("second owner must not enter while lock is held") + return nil + }) + require.Error(t, err) + assert.ErrorIs(t, err, context.DeadlineExceeded) +} + func TestCleanupExpiredLocks(t *testing.T) { db := setupTestDB(t) lock1 := gormlock.NewGormLockFromDB(db) lock2 := gormlock.NewGormLockFromDB(db) ctx := context.Background() - _, _ = lock1.TryLock(ctx, "cleanup-key-1", 100*time.Millisecond) - _, _ = lock2.TryLock(ctx, "cleanup-key-2", 100*time.Millisecond) + _, _, _ = lock1.TryAcquire(ctx, "cleanup-key-1", 100*time.Millisecond) + _, _, _ = lock2.TryAcquire(ctx, "cleanup-key-2", 100*time.Millisecond) time.Sleep(200 * time.Millisecond) @@ -303,9 +332,9 @@ func TestMultipleDifferentLocks(t *testing.T) { lockInstance := gormlock.NewGormLockFromDB(db) ctx := context.Background() - err1 := lockInstance.Lock(ctx, "key-1", 5*time.Second) - err2 := lockInstance.Lock(ctx, "key-2", 5*time.Second) - err3 := lockInstance.Lock(ctx, "key-3", 5*time.Second) + lease1, err1 := lockInstance.Acquire(ctx, "key-1", 5*time.Second) + lease2, err2 := lockInstance.Acquire(ctx, "key-2", 5*time.Second) + lease3, err3 := lockInstance.Acquire(ctx, "key-3", 5*time.Second) assert.NoError(t, err1) assert.NoError(t, err2) @@ -319,9 +348,9 @@ func TestMultipleDifferentLocks(t *testing.T) { assert.True(t, isLocked2) assert.True(t, isLocked3) - _ = lockInstance.Unlock(ctx, "key-1") - _ = lockInstance.Unlock(ctx, "key-2") - _ = lockInstance.Unlock(ctx, "key-3") + _ = lease1.Unlock(ctx) + _ = lease2.Unlock(ctx) + _ = lease3.Unlock(ctx) } func TestLockBlockingBehavior(t *testing.T) { @@ -330,7 +359,7 @@ func TestLockBlockingBehavior(t *testing.T) { lock2 := gormlock.NewGormLockFromDB(db) ctx := context.Background() - err := lock1.Lock(ctx, "blocking-key", 10*time.Second) + lease1, err := lock1.Acquire(ctx, "blocking-key", 10*time.Second) require.NoError(t, err) isLocked, err := lock1.IsLocked(ctx, "blocking-key") @@ -341,16 +370,21 @@ func TestLockBlockingBehavior(t *testing.T) { done := make(chan time.Time) go func() { - _ = lock2.Lock(ctx, "blocking-key", 10*time.Second) + lease2, _ := lock2.Acquire(ctx, "blocking-key", 10*time.Second) + defer func() { + if lease2 != nil { + _ = lease2.Unlock(ctx) + } + }() done <- time.Now() }() time.Sleep(500 * time.Millisecond) - unlockErr := lock1.Unlock(ctx, "blocking-key") + unlockErr := lease1.Unlock(ctx) require.NoError(t, unlockErr, "unlock should succeed") - isLocked, err = lock1.IsLocked(ctx, "blocking-key") + _, err = lock1.IsLocked(ctx, "blocking-key") require.NoError(t, err) lock2AcquiredTime := <-done @@ -359,6 +393,40 @@ func TestLockBlockingBehavior(t *testing.T) { assert.GreaterOrEqual(t, duration, 500*time.Millisecond, "lock2 should acquire after lock1 releases") assert.Less(t, duration, 1500*time.Millisecond, "lock2 should acquire shortly after lock1 releases") +} - _ = lock2.Unlock(ctx, "blocking-key") +func TestGormLockSameInstanceABADelayedUnlockAndRenew(t *testing.T) { + db := setupTestDB(t) + lockInstance := gormlock.NewGormLockFromDB(db) + + leaseA, err := lockInstance.Acquire(context.Background(), "same-instance-aba", 20*time.Millisecond) + require.NoError(t, err) + time.Sleep(30 * time.Millisecond) + + leaseB, acquired, err := lockInstance.TryAcquire(context.Background(), "same-instance-aba", time.Second) + require.NoError(t, err) + require.True(t, acquired) + require.NotEqual(t, leaseA.Token(), leaseB.Token()) + + require.Error(t, leaseA.Unlock(context.Background())) + require.Error(t, leaseA.Renew(context.Background(), time.Second)) + + locked, err := lockInstance.IsLocked(context.Background(), "same-instance-aba") + require.NoError(t, err) + require.True(t, locked) + require.NoError(t, leaseB.Renew(context.Background(), time.Second)) + require.NoError(t, leaseB.Unlock(context.Background())) +} + +func TestGormWithLockCancelsOnLeaseLoss(t *testing.T) { + db := setupTestDB(t) + lockInstance := gormlock.NewGormLockFromDB(db) + + err := corelock.WithLock(context.Background(), lockInstance, "lost-lease", 30*time.Millisecond, func(leaseCtx context.Context) error { + require.NoError(t, db.Where("lock_key = ?", "lost-lease").Delete(&gormlock.LockRecord{}).Error) + <-leaseCtx.Done() + return nil + }) + require.Error(t, err) + require.True(t, errors.Is(err, corelock.ErrLockLeaseLost)) } diff --git a/pkg/lock/local/factory.go b/pkg/lock/local/factory.go new file mode 100644 index 000000000..71fc57097 --- /dev/null +++ b/pkg/lock/local/factory.go @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package local + +import ( + storecfg "github.com/apache/dubbo-admin/pkg/config/store" + corelock "github.com/apache/dubbo-admin/pkg/core/lock" + "github.com/apache/dubbo-admin/pkg/core/runtime" +) + +func init() { + corelock.RegisterLockFactory(&factory{}) +} + +type factory struct{} + +func (f *factory) Support(ctx runtime.BuilderContext) bool { + return ctx.Config().Store != nil && ctx.Config().Store.Type == storecfg.Memory +} + +func (f *factory) NewLock(runtime.BuilderContext) (corelock.Lock, error) { + return NewLocalLock(), nil +} diff --git a/pkg/lock/local/lock.go b/pkg/lock/local/lock.go new file mode 100644 index 000000000..8bd24008c --- /dev/null +++ b/pkg/lock/local/lock.go @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package local + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/apache/dubbo-admin/pkg/common/bizerror" + "github.com/apache/dubbo-admin/pkg/common/constants" + corelock "github.com/apache/dubbo-admin/pkg/core/lock" +) + +var ( + defaultBackend = &backend{locks: map[string]record{}} +) + +type record struct { + token string + expireAt time.Time +} + +type backend struct { + mu sync.Mutex + locks map[string]record +} + +// LocalLock implements the shared lock contract for process-local memory +// stores. It is intentionally owner-aware so tests exercise the same unlock and +// renew invariants as distributed backends. +type LocalLock struct { + backend *backend +} + +var _ corelock.Lock = (*LocalLock)(nil) + +// NewLocalLock returns a process-local lock backend. It is only shared inside +// one admin process, so clustered deployments must use a distributed backend. +func NewLocalLock() corelock.Lock { + return &LocalLock{ + backend: defaultBackend, + } +} + +type lease struct { + *corelock.LeaseState + lock *LocalLock +} + +func (l *LocalLock) Acquire(ctx context.Context, key string, ttl time.Duration) (corelock.Lease, error) { + ticker := time.NewTicker(constants.DefaultLockRetryInterval) + defer ticker.Stop() + + for { + lease, acquired, err := l.TryAcquire(ctx, key, ttl) + if err != nil { + return nil, err + } + if acquired { + return lease, nil + } + + select { + case <-ctx.Done(): + return nil, fmt.Errorf("failed to acquire local lock %s: %w", key, ctx.Err()) + case <-ticker.C: + } + } +} + +func (l *LocalLock) TryAcquire(ctx context.Context, key string, ttl time.Duration) (corelock.Lease, bool, error) { + if err := ctx.Err(); err != nil { + return nil, false, err + } + token, err := corelock.NewLeaseToken() + if err != nil { + return nil, false, err + } + now := time.Now() + l.backend.mu.Lock() + defer l.backend.mu.Unlock() + + if current, exists := l.backend.locks[key]; exists && current.expireAt.After(now) { + return nil, false, nil + } + l.backend.locks[key] = record{token: token, expireAt: now.Add(ttl)} + return &lease{ + LeaseState: corelock.NewLeaseState(key, token), + lock: l, + }, true, nil +} + +func (l *lease) Unlock(ctx context.Context) error { + if err := ctx.Err(); err != nil { + return err + } + l.lock.backend.mu.Lock() + defer l.lock.backend.mu.Unlock() + + current, exists := l.lock.backend.locks[l.Key()] + if !exists || current.token != l.Token() { + return bizerror.New(bizerror.LockNotHeld, "lock not held by this owner") + } + delete(l.lock.backend.locks, l.Key()) + return nil +} + +func (l *lease) Renew(ctx context.Context, ttl time.Duration) error { + if err := ctx.Err(); err != nil { + return err + } + l.lock.backend.mu.Lock() + defer l.lock.backend.mu.Unlock() + + now := time.Now() + current, exists := l.lock.backend.locks[l.Key()] + if !exists || current.token != l.Token() { + return bizerror.New(bizerror.LockNotHeld, "lock not held by this owner") + } + if !current.expireAt.After(now) { + delete(l.lock.backend.locks, l.Key()) + return bizerror.New(bizerror.LockNotHeld, "lock lease expired") + } + current.expireAt = now.Add(ttl) + l.lock.backend.locks[l.Key()] = current + return nil +} + +func (l *LocalLock) IsLocked(ctx context.Context, key string) (bool, error) { + if err := ctx.Err(); err != nil { + return false, err + } + now := time.Now() + l.backend.mu.Lock() + defer l.backend.mu.Unlock() + + current, exists := l.backend.locks[key] + if !exists { + return false, nil + } + if !current.expireAt.After(now) { + delete(l.backend.locks, key) + return false, nil + } + return true, nil +} + +func (l *LocalLock) CleanupExpiredLocks(ctx context.Context) error { + if err := ctx.Err(); err != nil { + return err + } + now := time.Now() + l.backend.mu.Lock() + defer l.backend.mu.Unlock() + + for key, current := range l.backend.locks { + if !current.expireAt.After(now) { + delete(l.backend.locks, key) + } + } + return nil +} diff --git a/pkg/store/dbcommon/gorm_store.go b/pkg/store/dbcommon/gorm_store.go index cedc20144..618e71a76 100644 --- a/pkg/store/dbcommon/gorm_store.go +++ b/pkg/store/dbcommon/gorm_store.go @@ -22,6 +22,7 @@ import ( "fmt" "reflect" "sort" + "strings" "sync" "gorm.io/gorm" @@ -48,6 +49,7 @@ type GormStore struct { } var _ store.ManagedResourceStore = &GormStore{} +var _ store.ConditionalResourceStore = &GormStore{} // NewGormStore creates a new GORM store for the specified resource kind func NewGormStore(kind model.ResourceKind, address string, pool *ConnectionPool) *GormStore { @@ -216,6 +218,72 @@ func (gs *GormStore) Update(obj interface{}) error { }) } +// UpdateIfUnchanged replaces a resource only when the stored serialized +// resource still matches expected. The conditional UPDATE and index rewrite run +// in one transaction; RowsAffected=0 is a CAS miss and leaves index rows intact. +func (gs *GormStore) UpdateIfUnchanged(expected model.Resource, updated model.Resource) (bool, error) { + if expected == nil || updated == nil { + return false, fmt.Errorf("expected and updated resources are required") + } + if expected.ResourceKind() != gs.kind || updated.ResourceKind() != gs.kind { + return false, fmt.Errorf("resource kind mismatch: expected store kind %s, got expected=%s updated=%s", gs.kind, expected.ResourceKind(), updated.ResourceKind()) + } + if expected.ResourceKey() != updated.ResourceKey() { + return false, fmt.Errorf("conditional update resource key mismatch: expected %s, updated %s", expected.ResourceKey(), updated.ResourceKey()) + } + + expectedModel, err := FromResource(expected) + if err != nil { + return false, err + } + updatedModel, err := FromResource(updated) + if err != nil { + return false, err + } + + db := gs.pool.GetDB() + var changed bool + err = db.Transaction(func(tx *gorm.DB) error { + result := tx.Scopes(TableScope(gs.kind.ToString())).Model(&ResourceModel{}). + Where("resource_key = ? AND name = ? AND mesh = ? AND data = ?", expected.ResourceKey(), expectedModel.Name, expectedModel.Mesh, expectedModel.Data). + Updates(map[string]interface{}{ + "name": updatedModel.Name, + "mesh": updatedModel.Mesh, + "data": updatedModel.Data, + }) + if result.Error != nil { + if isSQLiteLockedError(result.Error) { + return fmt.Errorf("%w: %v", store.ErrResourceStoreTransient, result.Error) + } + return result.Error + } + if result.RowsAffected == 0 { + changed = false + return nil + } + if err := gs.persistIndexEntriesTx(tx, updated, expected); err != nil { + return fmt.Errorf("failed to persist index entries for %s: %w", updated.ResourceKey(), err) + } + changed = true + return nil + }) + if err != nil { + if isSQLiteLockedError(err) { + return false, fmt.Errorf("%w: %v", store.ErrResourceStoreTransient, err) + } + return false, err + } + return changed, nil +} + +func isSQLiteLockedError(err error) bool { + if err == nil { + return false + } + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "database table is locked") || strings.Contains(msg, "database is locked") +} + // Delete removes a resource from the database func (gs *GormStore) Delete(obj interface{}) error { resource, ok := obj.(model.Resource) @@ -594,7 +662,7 @@ func (gs *GormStore) findByIndex(indexName, indexedValue string) ([]interface{}, func (gs *GormStore) getKeysByIndexes(indexes []index.IndexCondition) ([]string, error) { if len(indexes) == 0 { - return gs.ListKeys(), nil + return []string{}, nil } var keySet map[string]struct{} diff --git a/pkg/store/dbcommon/gorm_store_test.go b/pkg/store/dbcommon/gorm_store_test.go index b1dd1b8c6..32b71c579 100644 --- a/pkg/store/dbcommon/gorm_store_test.go +++ b/pkg/store/dbcommon/gorm_store_test.go @@ -18,14 +18,18 @@ package dbcommon import ( + "context" "encoding/json" + "errors" "fmt" "os" + "strings" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gorm.io/driver/sqlite" + "gorm.io/gorm" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" @@ -33,6 +37,7 @@ import ( storecfg "github.com/apache/dubbo-admin/pkg/config/store" "github.com/apache/dubbo-admin/pkg/core/resource/model" + corestore "github.com/apache/dubbo-admin/pkg/core/store" "github.com/apache/dubbo-admin/pkg/core/store/index" ) @@ -118,12 +123,26 @@ func (m mockResourceList) SetItems(items []model.Resource) { // setupTestStore creates a new GormStore with an in-memory SQLite database for testing func setupTestStore(t *testing.T) (*GormStore, func()) { // Create temporary SQLite database file for better isolation and reliability - tmpFile, err := os.CreateTemp("", fmt.Sprintf("test-db-%s-*.db", t.Name())) + dbPath := tempSQLitePath(t) + dialector := sqlite.Open(dbPath) + return setupTestStoreWithDialector(t, dialector) +} + +func tempSQLitePath(t *testing.T) string { + t.Helper() + safeName := strings.NewReplacer("/", "_", "\\", "_").Replace(t.Name()) + tmpFile, err := os.CreateTemp("", fmt.Sprintf("test-db-%s-*.db", safeName)) require.NoError(t, err) dbPath := tmpFile.Name() - tmpFile.Close() + require.NoError(t, tmpFile.Close()) + t.Cleanup(func() { + _ = os.Remove(dbPath) + }) + return dbPath +} - dialector := sqlite.Open(dbPath) +func setupTestStoreWithDialector(t *testing.T, dialector gorm.Dialector) (*GormStore, func()) { + t.Helper() pool, err := NewConnectionPool(dialector, storecfg.MySQL, t.Name(), DefaultConnectionPoolConfig()) require.NoError(t, err) @@ -146,8 +165,7 @@ func setupTestStore(t *testing.T) (*GormStore, func()) { // Cleanup function cleanup := func() { - pool.Close() - os.Remove(dbPath) + _ = pool.Close() } return store, cleanup @@ -347,6 +365,97 @@ func TestGormStore_UpdateNonExistent(t *testing.T) { assert.Contains(t, err.Error(), "not found") } +func TestGormStore_UpdateIfUnchangedDistinguishesCASMissDBLockedAndSQLError(t *testing.T) { + t.Run("cas miss", func(t *testing.T) { + store, cleanup := setupTestStore(t) + defer cleanup() + require.NoError(t, store.Init(nil)) + + current := &mockResource{ + Kind: "TestResource", + Key: "cas-key", + Mesh: "default", + Meta: metav1.ObjectMeta{Name: "current"}, + } + require.NoError(t, store.Add(current)) + stale := &mockResource{ + Kind: "TestResource", + Key: "cas-key", + Mesh: "default", + Meta: metav1.ObjectMeta{Name: "stale"}, + } + updated := &mockResource{ + Kind: "TestResource", + Key: "cas-key", + Mesh: "default", + Meta: metav1.ObjectMeta{Name: "updated"}, + } + + changed, err := store.UpdateIfUnchanged(stale, updated) + require.NoError(t, err) + assert.False(t, changed) + }) + + t.Run("db locked", func(t *testing.T) { + dbPath := tempSQLitePath(t) + store, cleanup := setupTestStoreWithDialector(t, sqlite.Open(dbPath+"?_busy_timeout=1")) + defer cleanup() + require.NoError(t, store.Init(nil)) + + current := &mockResource{ + Kind: "TestResource", + Key: "locked-key", + Mesh: "default", + Meta: metav1.ObjectMeta{Name: "current"}, + } + require.NoError(t, store.Add(current)) + updated := &mockResource{ + Kind: "TestResource", + Key: "locked-key", + Mesh: "default", + Meta: metav1.ObjectMeta{Name: "updated"}, + } + + sqlDB, err := store.pool.GetDB().DB() + require.NoError(t, err) + sqlDB.SetMaxOpenConns(2) + conn, err := sqlDB.Conn(context.Background()) + require.NoError(t, err) + defer conn.Close() + _, err = conn.ExecContext(context.Background(), "BEGIN EXCLUSIVE") + require.NoError(t, err) + defer conn.ExecContext(context.Background(), "ROLLBACK") + + changed, err := store.UpdateIfUnchanged(current, updated) + require.ErrorIs(t, err, corestore.ErrResourceStoreTransient) + assert.False(t, changed) + }) + + t.Run("ordinary sql error", func(t *testing.T) { + store, cleanup := setupTestStore(t) + require.NoError(t, store.Init(nil)) + current := &mockResource{ + Kind: "TestResource", + Key: "sql-error-key", + Mesh: "default", + Meta: metav1.ObjectMeta{Name: "current"}, + } + require.NoError(t, store.Add(current)) + cleanup() + updated := &mockResource{ + Kind: "TestResource", + Key: "sql-error-key", + Mesh: "default", + Meta: metav1.ObjectMeta{Name: "updated"}, + } + + changed, err := store.UpdateIfUnchanged(current, updated) + require.Error(t, err) + assert.False(t, changed) + assert.False(t, errors.Is(err, corestore.ErrResourceStoreTransient)) + }) +} + func TestGormStore_Delete(t *testing.T) { store, cleanup := setupTestStore(t) defer cleanup() @@ -802,10 +911,46 @@ func TestGormStore_ListByIndexesEmpty(t *testing.T) { err = store.Add(mockRes) require.NoError(t, err) - // List with empty indexes should return all resources + // Empty index conditions preserve memory-store semantics: no indexed query means no results. resources, err := store.ListByIndexes([]index.IndexCondition{}) assert.NoError(t, err) - assert.Len(t, resources, 1) + assert.Empty(t, resources) +} + +func TestGormStore_ListResourcesSorted(t *testing.T) { + store, cleanup := setupTestStore(t) + defer cleanup() + + err := store.Init(nil) + require.NoError(t, err) + + mockRes1 := &mockResource{ + Kind: "TestResource", + Key: "mesh/test-key-2", + Mesh: "mesh", + Meta: metav1.ObjectMeta{Name: "test-resource-2"}, + } + mockRes2 := &mockResource{ + Kind: "TestResource", + Key: "mesh/test-key-1", + Mesh: "mesh", + Meta: metav1.ObjectMeta{Name: "test-resource-1"}, + } + + err = store.Add(mockRes1) + require.NoError(t, err) + err = store.Add(mockRes2) + require.NoError(t, err) + + resources := store.List() + require.Len(t, resources, 2) + // List() returns resources in arbitrary order, so check both are present + keys := []string{ + resources[0].(model.Resource).ResourceKey(), + resources[1].(model.Resource).ResourceKey(), + } + assert.Contains(t, keys, "mesh/test-key-1") + assert.Contains(t, keys, "mesh/test-key-2") } func TestGormStore_PageListByIndexes(t *testing.T) { diff --git a/pkg/store/memory/store.go b/pkg/store/memory/store.go index 0b392bd58..c56a64fdb 100644 --- a/pkg/store/memory/store.go +++ b/pkg/store/memory/store.go @@ -40,10 +40,12 @@ type resourceStore struct { rk coremodel.ResourceKind storeProxy cache.Indexer prefixTrees map[string]*radix.Tree + mu sync.Mutex treesMu sync.RWMutex } var _ store.ManagedResourceStore = &resourceStore{} +var _ store.ConditionalResourceStore = &resourceStore{} func NewMemoryResourceStore(rk coremodel.ResourceKind) store.ManagedResourceStore { return &resourceStore{rk: rk} @@ -74,6 +76,15 @@ func (rs *resourceStore) Start(_ runtime.Runtime, _ <-chan struct{}) error { } func (rs *resourceStore) Add(obj interface{}) error { + rs.mu.Lock() + defer rs.mu.Unlock() + if r, ok := obj.(coremodel.Resource); ok { + if _, exists, err := rs.storeProxy.GetByKey(r.ResourceKey()); err != nil { + return err + } else if exists { + return store.ErrorResourceAlreadyExists(r.ResourceKind().ToString(), r.ResourceMeta().Name, r.ResourceMesh()) + } + } if err := rs.storeProxy.Add(obj); err != nil { return err } @@ -85,6 +96,8 @@ func (rs *resourceStore) Add(obj interface{}) error { } func (rs *resourceStore) Update(obj interface{}) error { + rs.mu.Lock() + defer rs.mu.Unlock() r, ok := obj.(coremodel.Resource) var oldRes coremodel.Resource if ok { @@ -107,7 +120,45 @@ func (rs *resourceStore) Update(obj interface{}) error { return nil } +func (rs *resourceStore) UpdateIfUnchanged(expected coremodel.Resource, updated coremodel.Resource) (bool, error) { + if expected == nil || updated == nil { + return false, fmt.Errorf("expected and updated resources are required") + } + if expected.ResourceKind() != rs.rk || updated.ResourceKind() != rs.rk { + return false, fmt.Errorf("resource kind mismatch: expected store kind %s, got expected=%s updated=%s", rs.rk, expected.ResourceKind(), updated.ResourceKind()) + } + if expected.ResourceKey() != updated.ResourceKey() { + return false, fmt.Errorf("conditional update resource key mismatch: expected %s, updated %s", expected.ResourceKey(), updated.ResourceKey()) + } + + rs.mu.Lock() + defer rs.mu.Unlock() + + currentObj, exists, err := rs.storeProxy.GetByKey(expected.ResourceKey()) + if err != nil { + return false, err + } + if !exists { + return false, nil + } + current, ok := currentObj.(coremodel.Resource) + if !ok { + return false, bizerror.NewAssertionError("Resource", reflect.TypeOf(currentObj).Name()) + } + if !reflect.DeepEqual(current, expected) { + return false, nil + } + if err := rs.storeProxy.Update(updated); err != nil { + return false, err + } + rs.removeFromTrees(current) + rs.addToTrees(updated) + return true, nil +} + func (rs *resourceStore) Delete(obj interface{}) error { + rs.mu.Lock() + defer rs.mu.Unlock() if err := rs.storeProxy.Delete(obj); err != nil { return err } @@ -134,6 +185,8 @@ func (rs *resourceStore) GetByKey(key string) (item interface{}, exists bool, er } func (rs *resourceStore) Replace(i []interface{}, s string) error { + rs.mu.Lock() + defer rs.mu.Unlock() // Clear all trees before replace rs.treesMu.Lock() for indexName := range rs.prefixTrees { diff --git a/pkg/store/memory/store_test.go b/pkg/store/memory/store_test.go index 8ad401995..9bb51af66 100644 --- a/pkg/store/memory/store_test.go +++ b/pkg/store/memory/store_test.go @@ -227,6 +227,44 @@ func TestResourceStore_List(t *testing.T) { assert.Contains(t, list, mockRes2) } +func TestResourceStore_ListResourcesSortedAndEmptyIndexes(t *testing.T) { + store := NewMemoryResourceStore("TestResource") + err := store.Init(nil) + assert.NoError(t, err) + + mockRes1 := &mockResource{ + kind: "TestResource", + key: "mesh/test-key-2", + mesh: "mesh", + meta: metav1.ObjectMeta{Name: "test-resource-2"}, + } + mockRes2 := &mockResource{ + kind: "TestResource", + key: "mesh/test-key-1", + mesh: "mesh", + meta: metav1.ObjectMeta{Name: "test-resource-1"}, + } + + err = store.Add(mockRes1) + assert.NoError(t, err) + err = store.Add(mockRes2) + assert.NoError(t, err) + + resources := store.List() + assert.Len(t, resources, 2) + // List() returns resources in arbitrary order, so check both are present + keys := []string{ + resources[0].(model.Resource).ResourceKey(), + resources[1].(model.Resource).ResourceKey(), + } + assert.Contains(t, keys, "mesh/test-key-1") + assert.Contains(t, keys, "mesh/test-key-2") + + indexed, err := store.ListByIndexes([]index.IndexCondition{}) + assert.NoError(t, err) + assert.Empty(t, indexed) +} + func TestResourceStore_ListKeys(t *testing.T) { store := NewMemoryResourceStore("TestResource") err := store.Init(nil) diff --git a/release/kubernetes/dubbo-system/dubbo-admin.yaml b/release/kubernetes/dubbo-system/dubbo-admin.yaml index 54216c4f2..9d8ea0217 100644 --- a/release/kubernetes/dubbo-system/dubbo-admin.yaml +++ b/release/kubernetes/dubbo-system/dubbo-admin.yaml @@ -96,12 +96,14 @@ data: observability: grafana: http://grafana.monitoringg.svc:3000 prometheus: http://prometheus-k8s.monitoring.svc:9090/ - console: - auth: - user: admin - password: dubbo@2025 - expirationTime: 3600 - discovery: + console: + auth: + user: admin + password: dubbo@2025 + expirationTime: 3600 + ruleVersioning: + maxVersionsPerRule: 50 + discovery: - type: nacos2 name: nacos2.5-standalone id: nacos2.5 diff --git a/ui-vue3/src/api/service/traffic.ts b/ui-vue3/src/api/service/traffic.ts index 9d4ff0a29..61ad0d029 100644 --- a/ui-vue3/src/api/service/traffic.ts +++ b/ui-vue3/src/api/service/traffic.ts @@ -17,6 +17,151 @@ import request from '@/base/http/request' +export type TrafficRuleKind = 'condition-rule' | 'tag-rule' | 'configurator' + +// Version and intent IDs are int64 values serialized as decimal strings by the +// API. Keep them as strings in the UI to avoid JavaScript number precision loss. +export interface RuleVersion { + id: string + ruleKind: string + mesh: string + resourceKey: string + ruleName: string + versionNo: number + contentHash: string + specJson: string + source: 'ADMIN' | 'UPSTREAM' | 'BOOTSTRAP' | 'ROLLBACK' | string + operation: 'CREATE' | 'UPDATE' | 'DELETE' | string + author: string + reason?: string + intentId?: string + rolledBackFromId?: string + createdAt: string + committedAt?: string + isCurrent: boolean +} + +export interface RuleVersionList { + items: RuleVersion[] + total: number + currentVersionId?: string + currentVersionNo?: number + deleted?: boolean +} + +export interface RuleVersionDiffSide { + id: string + versionNo: number + specJson: string +} + +export interface RuleVersionDiff { + left: RuleVersionDiffSide + right: RuleVersionDiffSide +} + +export interface RuleMutationOptions { + expectedVersionId?: string +} + +export interface RollbackRuleVersionResult { + rolledBackFromId: string + versionId: string + versionNo: number + source: 'ROLLBACK' | string + committed: boolean +} + +export interface VersionConflictError { + code: 'VERSION_CONFLICT' + message: string + currentVersionId?: string | null +} + +export interface VersionLedgerPendingError { + code: 'VERSION_LEDGER_PENDING' + message: string + intentId?: string +} + +const ruleNameForPath = (kind: TrafficRuleKind, ruleName: string): string => { + return kind === 'configurator' ? encodeURIComponent(ruleName) : ruleName +} + +const withExpectedVersion = (options?: RuleMutationOptions) => { + return options?.expectedVersionId !== undefined + ? { expectedVersionId: options.expectedVersionId } + : undefined +} + +export const listRuleVersionsAPI = ( + kind: TrafficRuleKind, + ruleName: string +): Promise<{ code: string; data: RuleVersionList }> => { + return request({ + url: `/${kind}/${ruleNameForPath(kind, ruleName)}/versions`, + method: 'get' + }) +} + +export const getRuleVersionAPI = ( + kind: TrafficRuleKind, + ruleName: string, + versionId: string +): Promise<{ code: string; data: RuleVersion }> => { + return request({ + url: `/${kind}/${ruleNameForPath(kind, ruleName)}/versions/${versionId}`, + method: 'get' + }) +} + +export const diffRuleVersionAPI = ( + kind: TrafficRuleKind, + ruleName: string, + versionId: string, + against = 'current' +): Promise<{ code: string; data: RuleVersionDiff }> => { + return request({ + url: `/${kind}/${ruleNameForPath(kind, ruleName)}/versions/${versionId}/diff`, + method: 'get', + params: { against } + }) +} + +export const rollbackRuleVersionAPI = ( + kind: TrafficRuleKind, + ruleName: string, + versionId: string, + reason: string, + expectedVersionId?: string +): Promise<{ code: string; data: RollbackRuleVersionResult }> => { + return request({ + url: `/${kind}/${ruleNameForPath(kind, ruleName)}/versions/${versionId}/rollback`, + method: 'post', + data: { reason, expectedVersionId } + }) +} + +export const repairRuleVersionIntentAPI = ( + intentId: string +): Promise<{ code: string; data: RuleVersion }> => { + return request({ + url: `/rule-version-intents/${intentId}/repair`, + method: 'post' + }) +} + +export const abandonRuleVersionIntentAPI = ( + intentId: string, + reason: string +): Promise<{ code: string; data: string }> => { + return request({ + url: `/rule-version-intents/${intentId}/abandon`, + method: 'post', + data: { reason } + }) +} + export const searchRoutingRule = (params: any): Promise => { return request({ url: '/condition-rule/search', @@ -34,28 +179,42 @@ export const getConditionRuleDetailAPI = (ruleName: string): Promise => { } // Delete condition routing. -export const deleteConditionRuleAPI = (ruleName: string): Promise => { +export const deleteConditionRuleAPI = ( + ruleName: string, + options?: RuleMutationOptions +): Promise => { return request({ url: `/condition-rule/${ruleName}`, - method: 'delete' + method: 'delete', + params: withExpectedVersion(options) }) } // update condition routing. -export const updateConditionRuleAPI = (ruleName: string, data: any): Promise => { +export const updateConditionRuleAPI = ( + ruleName: string, + data: any, + options?: RuleMutationOptions +): Promise => { return request({ url: `/condition-rule/${ruleName}`, method: 'put', - data + data, + params: withExpectedVersion(options) }) } // add condition routing. -export const addConditionRuleAPI = (ruleName: string, data: any): Promise => { +export const addConditionRuleAPI = ( + ruleName: string, + data: any, + options?: RuleMutationOptions +): Promise => { return request({ url: `/condition-rule/${ruleName}`, method: 'post', - data + data, + params: withExpectedVersion(options) }) } @@ -68,10 +227,11 @@ export const searchTagRule = (params: any): Promise => { } // Delete tag routing. -export const deleteTagRuleAPI = (ruleName: string): Promise => { +export const deleteTagRuleAPI = (ruleName: string, options?: RuleMutationOptions): Promise => { return request({ url: `/tag-rule/${ruleName}`, - method: 'delete' + method: 'delete', + params: withExpectedVersion(options) }) } @@ -83,19 +243,29 @@ export const getTagRuleDetailAPI = (ruleName: string): Promise => { }) } -export const updateTagRuleAPI = (ruleName: string, data: any): Promise => { +export const updateTagRuleAPI = ( + ruleName: string, + data: any, + options?: RuleMutationOptions +): Promise => { return request({ url: `/tag-rule/${ruleName}`, method: 'put', - data + data, + params: withExpectedVersion(options) }) } -export const addTagRuleAPI = (ruleName: string, data: any): Promise => { +export const addTagRuleAPI = ( + ruleName: string, + data: any, + options?: RuleMutationOptions +): Promise => { return request({ url: `/tag-rule/${ruleName}`, method: 'post', - data + data, + params: withExpectedVersion(options) }) } @@ -129,25 +299,34 @@ export const getConfiguratorDetail = (params: any): Promise => { method: 'get' }) } -export const saveConfiguratorDetail = (params: any, data: any): Promise => { +export const saveConfiguratorDetail = ( + params: any, + data: any, + options?: RuleMutationOptions +): Promise => { return request({ url: `/configurator/${encodeURIComponent(params.name)}`, method: 'put', - data + data, + params: withExpectedVersion(options) }) } -export const addConfiguratorDetail = (params: any, data: any): Promise => { +export const addConfiguratorDetail = ( + params: any, + data: any, + options?: RuleMutationOptions +): Promise => { return request({ url: `/configurator/${encodeURIComponent(params.name)}`, method: 'post', - data + data, + params: withExpectedVersion(options) }) } -export const delConfiguratorDetail = (params: any): Promise => { +export const delConfiguratorDetail = (params: any, options?: RuleMutationOptions): Promise => { return request({ url: `/configurator/${encodeURIComponent(params.name)}`, - method: 'delete' + method: 'delete', + params: withExpectedVersion(options) }) } - -// TODO Perform front-end and back-end joint debugging diff --git a/ui-vue3/src/base/http/request.ts b/ui-vue3/src/base/http/request.ts index 094a76cdd..eb39c4cf4 100644 --- a/ui-vue3/src/base/http/request.ts +++ b/ui-vue3/src/base/http/request.ts @@ -39,6 +39,10 @@ const isSilentErrorUrl = (url?: string): boolean => { return SILENT_ERROR_URLS.some((silentUrl) => url.includes(silentUrl)) } +const shouldShowErrorMessage = (url?: string, code?: string): boolean => { + return !isSilentErrorUrl(url) && code !== 'VERSION_CONFLICT' && code !== 'VERSION_LEDGER_PENDING' +} + const service: AxiosInstance = axios.create({ baseURL: '/api/v1', timeout: 30 * 1000 @@ -82,7 +86,7 @@ response.use( // Show error toast message const errorMsg = `${response.data.code}:${response.data.message}` - if (!isSilentErrorUrl(response.config.url)) { + if (shouldShowErrorMessage(response.config.url, response.data.code)) { message.error(errorMsg) } console.error(errorMsg) @@ -120,7 +124,7 @@ response.use( } if (response?.data) { const errorMsg = `${response.data?.code}:${response.data?.message}` - if (!isSilentErrorUrl(error.config?.url)) { + if (shouldShowErrorMessage(error.config?.url, response.data?.code)) { message.error(errorMsg) } console.error(errorMsg) diff --git a/ui-vue3/src/base/i18n/en.ts b/ui-vue3/src/base/i18n/en.ts index 1ae6ec15a..a176555da 100644 --- a/ui-vue3/src/base/i18n/en.ts +++ b/ui-vue3/src/base/i18n/en.ts @@ -110,7 +110,7 @@ const words: I18nType = { flowControlDomain: { actuatingRange: 'Actuating range', notSet: 'Not set', - versionRecords: 'Version records', + versionRecords: 'Version history', YAMLView: 'YAML View', addConfiguration: 'Add configuration', addConfigurationItem: 'Add configurationItem', @@ -554,6 +554,64 @@ const words: I18nType = { copy: 'You have successfully copied a piece of information' } }, + ruleVersionDomain: { + versionRecords: 'Version history', + versionJson: 'Version JSON', + versionDiff: 'Version Diff', + current: 'current', + currentVersion: 'Current version', + currentDeleted: 'Current deleted', + targetVersion: 'Target version', + versionConflict: 'Version conflict', + versionConflictDescription: + 'The rule was changed by another operation. Reload before submitting again.', + ledgerPending: 'Version ledger pending', + ledgerPendingDescription: + 'This rule has an unfinished version intent. Reload before submitting again.', + ledgerPendingWithIntent: + 'This rule has unfinished version intent #{intentId}. Repair or abandon it before submitting.', + empty: 'No version records', + view: 'View', + diffCurrent: 'Diff current', + rollback: 'Rollback', + rollbackConfirmTitle: 'Confirm rollback', + rollbackDeletedWarning: 'The current rule is deleted. Rollback will recreate it.', + rollbackCasHint: + 'If the current version changed, the backend will reject this rollback. Reload and try again.', + rollbackAppendHint: + 'Rollback creates a new version from the historical snapshot. Existing history is not modified.', + rollbackReason: 'Rollback reason', + rollbackReasonPlaceholder: 'Enter a rollback reason', + rollbackReasonRequired: 'Rollback reason is required', + rollbackSuccess: 'Rollback succeeded and created a new version', + rollbackSuccessWithVersion: 'Rollback succeeded and created v{versionNo}', + rollbackConflict: 'Version conflict: the rule was changed. Reload and try again.', + rollbackPending: 'A version intent is pending. Repair or abandon it first.', + rollbackFailed: 'Rollback failed', + diffFailed: 'Failed to load version diff', + rollbackCurrentDisabled: 'Current version does not need rollback', + rollbackDeleteDisabled: 'Delete markers cannot be rolled back', + source: 'Source', + author: 'Author', + createdAt: 'Created at', + modifiedAt: 'Modified at', + sourceAdmin: 'Console', + sourceUpstream: 'External sync', + sourceBootstrap: 'Bootstrap', + sourceRollback: 'Rollback', + changeReason: 'Change reason', + reason: 'Reason', + none: 'None', + reload: 'Reload', + repair: 'Repair', + repairFailed: 'Failed to repair intent', + abandon: 'Abandon', + cancel: 'Cancel', + abandonIntentTitle: 'Abandon version intent', + abandonReasonPlaceholder: 'Enter the abandon reason', + abandonReasonRequired: 'Abandon reason is required', + abandonFailed: 'Failed to abandon intent' + }, backHome: 'Back Home', noPageTip: 'Sorry, the page you visited does not exist.', globalSearchTip: 'Search ip, application, instance, service', diff --git a/ui-vue3/src/base/i18n/zh.ts b/ui-vue3/src/base/i18n/zh.ts index c355a42c9..19e18b177 100644 --- a/ui-vue3/src/base/i18n/zh.ts +++ b/ui-vue3/src/base/i18n/zh.ts @@ -530,6 +530,60 @@ const words: I18nType = { copy: '您已经成功复制一条信息' } }, + ruleVersionDomain: { + versionRecords: '版本记录', + versionJson: '版本 JSON', + versionDiff: '版本差异', + current: '当前', + currentVersion: '当前版本', + currentDeleted: '当前已删除', + targetVersion: '目标版本', + versionConflict: '版本冲突', + versionConflictDescription: '规则已被其他操作更新,请重新加载当前版本后再提交。', + ledgerPending: '版本账本待恢复', + ledgerPendingDescription: '当前规则存在未完成的版本 intent,请重新加载后再提交。', + ledgerPendingWithIntent: + '当前规则存在未完成的版本 intent #{intentId},请先修复或放弃后再提交。', + empty: '暂无版本记录', + view: '查看', + diffCurrent: '对比当前', + rollback: '回滚', + rollbackConfirmTitle: '确认回滚', + rollbackDeletedWarning: '当前规则已删除,回滚会重新创建该规则', + rollbackCasHint: '如果当前版本已变化,后端会拒绝本次回滚,请刷新后重试。', + rollbackAppendHint: '回滚会基于该历史版本创建一条新的版本记录,已有历史记录不会被修改。', + rollbackReason: '回滚原因', + rollbackReasonPlaceholder: '请输入回滚原因(必填)', + rollbackReasonRequired: '请输入回滚原因', + rollbackSuccess: '回滚成功,已创建新版本', + rollbackSuccessWithVersion: '回滚成功,已创建 v{versionNo}', + rollbackConflict: '版本冲突:当前规则已被其他人修改,请刷新后重试', + rollbackPending: '当前规则存在未完成的版本 intent,请先修复或放弃后再重试', + rollbackFailed: '回滚失败', + diffFailed: '加载版本差异失败', + rollbackCurrentDisabled: '当前版本无需回滚', + rollbackDeleteDisabled: '删除标记不能回滚', + source: '来源', + author: '作者', + createdAt: '创建时间', + modifiedAt: '修改时间', + sourceAdmin: '控制台', + sourceUpstream: '外部同步', + sourceBootstrap: '启动同步', + sourceRollback: '回滚', + changeReason: '变更原因', + reason: '原因', + none: '暂无', + reload: '重新加载', + repair: '修复', + repairFailed: '修复 intent 失败', + abandon: '放弃', + cancel: '取消', + abandonIntentTitle: '放弃版本 intent', + abandonReasonPlaceholder: '请输入放弃原因(必填)', + abandonReasonRequired: '请输入放弃原因', + abandonFailed: '放弃 intent 失败' + }, backHome: '回到首页', noPageTip: '抱歉,你访问的页面不存在', globalSearchTip: '搜索ip,应用,实例,服务', diff --git a/ui-vue3/src/mocks/handlers.ts b/ui-vue3/src/mocks/handlers.ts index f46ff3bda..8691eb460 100644 --- a/ui-vue3/src/mocks/handlers.ts +++ b/ui-vue3/src/mocks/handlers.ts @@ -27,6 +27,7 @@ import { versionHandlers } from './handlers/version' import { dynamicConfigHandlers } from './handlers/dynamicConfig' import { routingRuleHandlers } from './handlers/routingRule' import { tagRuleHandlers } from './handlers/tagRule' +import { ruleVersionHandlers } from './handlers/ruleVersion' import { destinationRuleHandlers, virtualServiceHandlers } from './handlers/istio' import { promQLHandlers } from './handlers/promQL' import { serverHandlers } from './handlers/server' @@ -46,6 +47,7 @@ export const handlers: HttpHandler[] = [ ...dynamicConfigHandlers, ...routingRuleHandlers, ...tagRuleHandlers, + ...ruleVersionHandlers, ...destinationRuleHandlers, ...virtualServiceHandlers, ...promQLHandlers, diff --git a/ui-vue3/src/mocks/handlers/dynamicConfig.ts b/ui-vue3/src/mocks/handlers/dynamicConfig.ts index 3e5b355af..c5864ce20 100644 --- a/ui-vue3/src/mocks/handlers/dynamicConfig.ts +++ b/ui-vue3/src/mocks/handlers/dynamicConfig.ts @@ -17,6 +17,7 @@ import { http, type HttpHandler } from 'msw' import { success, base } from '../utils' +import { ruleVersionMock } from './ruleVersion' import type { ConfiguratorRule, ConfiguratorDetail, PaginatedData } from '@/types/api' function randomInt(min: number, max: number): number { @@ -28,6 +29,24 @@ function randomString(min: number, max: number): string { return Array.from({ length: len }, () => String.fromCharCode(97 + randomInt(0, 25))).join('') } +const decodeRuleName = (raw: string) => { + try { + return decodeURIComponent(raw) + } catch { + return raw + } +} + +const writeOrConflict = (rawName: string, operation: 'CREATE' | 'UPDATE' | 'DELETE') => { + const ruleName = decodeRuleName(rawName) + if (ruleVersionMock.shouldConflict(ruleName)) + return ruleVersionMock.conflictResponse('configurator', ruleName) + if (ruleVersionMock.shouldPend(ruleName)) + return ruleVersionMock.pendingResponse('configurator', ruleName) + void operation + return success(null) +} + export const dynamicConfigHandlers: HttpHandler[] = [ http.get(`${base}/configurator/search`, () => { const total = randomInt(8, 1000) @@ -45,15 +64,21 @@ export const dynamicConfigHandlers: HttpHandler[] = [ http.get(`${base}/configurator/:ruleName`, ({ params }) => { const detail: ConfiguratorDetail = { - name: params.ruleName as string, + name: decodeRuleName(params.ruleName as string), configs: [{ side: 'provider', timeout: 3000, retries: 2, loadbalance: 'roundrobin' }] } return success(detail) }), - http.delete(`${base}/configurator/:ruleName`, () => success(null)), + http.delete(`${base}/configurator/:ruleName`, ({ params }) => + writeOrConflict(params.ruleName as string, 'DELETE') + ), - http.put(`${base}/configurator/:ruleName`, () => success(null)), + http.put(`${base}/configurator/:ruleName`, ({ params }) => + writeOrConflict(params.ruleName as string, 'UPDATE') + ), - http.post(`${base}/configurator/:ruleName`, () => success(null)) + http.post(`${base}/configurator/:ruleName`, ({ params }) => + writeOrConflict(params.ruleName as string, 'CREATE') + ) ] diff --git a/ui-vue3/src/mocks/handlers/routingRule.ts b/ui-vue3/src/mocks/handlers/routingRule.ts index 7dad6b644..22538439a 100644 --- a/ui-vue3/src/mocks/handlers/routingRule.ts +++ b/ui-vue3/src/mocks/handlers/routingRule.ts @@ -17,6 +17,7 @@ import { http, type HttpHandler } from 'msw' import { success, base } from '../utils' +import { ruleVersionMock } from './ruleVersion' import type { RoutingRule, RoutingRuleDetail, PaginatedData } from '@/types/api' function randomInt(min: number, max: number): number { @@ -28,6 +29,15 @@ function randomString(min: number, max: number): string { return Array.from({ length: len }, () => String.fromCharCode(97 + randomInt(0, 25))).join('') } +const writeOrConflict = (ruleName: string, operation: 'CREATE' | 'UPDATE' | 'DELETE') => { + if (ruleVersionMock.shouldConflict(ruleName)) + return ruleVersionMock.conflictResponse('condition-rule', ruleName) + if (ruleVersionMock.shouldPend(ruleName)) + return ruleVersionMock.pendingResponse('condition-rule', ruleName) + void operation + return success(null) +} + export const routingRuleHandlers: HttpHandler[] = [ http.get(`${base}/condition-rule/search`, () => { const total = randomInt(8, 1000) @@ -59,9 +69,15 @@ export const routingRuleHandlers: HttpHandler[] = [ return success(detail) }), - http.delete(`${base}/condition-rule/:ruleName`, () => success(null)), + http.delete(`${base}/condition-rule/:ruleName`, ({ params }) => + writeOrConflict(params.ruleName as string, 'DELETE') + ), - http.put(`${base}/condition-rule/:ruleName`, () => success(null)), + http.put(`${base}/condition-rule/:ruleName`, ({ params }) => + writeOrConflict(params.ruleName as string, 'UPDATE') + ), - http.post(`${base}/condition-rule/:ruleName`, () => success(null)) + http.post(`${base}/condition-rule/:ruleName`, ({ params }) => + writeOrConflict(params.ruleName as string, 'CREATE') + ) ] diff --git a/ui-vue3/src/mocks/handlers/ruleVersion.ts b/ui-vue3/src/mocks/handlers/ruleVersion.ts new file mode 100644 index 000000000..8502a5e8b --- /dev/null +++ b/ui-vue3/src/mocks/handlers/ruleVersion.ts @@ -0,0 +1,327 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { http, HttpResponse, type HttpHandler } from 'msw' +import { success, base } from '../utils' +import type { + RuleVersion, + RuleVersionDiff, + RuleVersionList, + TrafficRuleKind +} from '@/api/service/traffic' + +const KINDS: TrafficRuleKind[] = ['condition-rule', 'tag-rule', 'configurator'] +const SCENARIOS = [ + 'normal', + 'deleted', + 'empty', + 'conflict', + 'pending', + 'repair-success', + 'repair-failure', + 'abandon-success', + 'backend-error', + 'diff' +] as const + +type Scenario = (typeof SCENARIOS)[number] + +const pendingIntentByRule = new Map() +let nextIntentID = 9001 + +const ledgerKey = (kind: TrafficRuleKind, ruleName: string) => `${kind}:${ruleName}` +const scenarioOf = (ruleName: string): Scenario => + SCENARIOS.find((scenario) => ruleName.includes(`-${scenario}`)) ?? 'normal' + +const decodeName = (raw: string) => { + try { + return decodeURIComponent(raw) + } catch { + return raw + } +} + +const spec = (ruleName: string, marker: string) => + JSON.stringify({ configVersion: 'v3.0', key: ruleName, marker }) + +const version = ( + kind: TrafficRuleKind, + ruleName: string, + id: string, + versionNo: number, + operation: RuleVersion['operation'], + source: RuleVersion['source'], + isCurrent: boolean, + marker = `v${versionNo}` +): RuleVersion => ({ + id, + ruleKind: kind, + mesh: 'default', + resourceKey: `/${ruleName}`, + ruleName, + versionNo, + contentHash: `sha256:${ruleName}:${marker}`, + specJson: operation === 'DELETE' ? '{}' : spec(ruleName, marker), + source, + operation, + author: source === 'UPSTREAM' ? 'system:upstream' : 'user name', + createdAt: `2026-05-${(20 + versionNo).toString().padStart(2, '0')}T08:00:00Z`, + committedAt: `2026-05-${(20 + versionNo).toString().padStart(2, '0')}T08:01:00Z`, + isCurrent +}) + +const fixtureVersions = (kind: TrafficRuleKind, ruleName: string): RuleVersion[] => { + switch (scenarioOf(ruleName)) { + case 'empty': + return [] + case 'deleted': + return [ + version(kind, ruleName, '2003', 3, 'DELETE', 'ADMIN', false, 'deleted'), + version(kind, ruleName, '2002', 2, 'UPDATE', 'ADMIN', false), + version(kind, ruleName, '2001', 1, 'CREATE', 'BOOTSTRAP', false) + ] + case 'pending': + case 'repair-success': + case 'repair-failure': + case 'abandon-success': + return [ + version(kind, ruleName, '3002', 2, 'UPDATE', 'ADMIN', true), + version(kind, ruleName, '3001', 1, 'CREATE', 'BOOTSTRAP', false) + ] + case 'diff': + return [ + version(kind, ruleName, '4002', 2, 'UPDATE', 'ADMIN', true, 'right'), + version(kind, ruleName, '4001', 1, 'CREATE', 'BOOTSTRAP', false, 'left') + ] + default: + return [ + version(kind, ruleName, '1005', 5, 'UPDATE', 'ADMIN', true), + version(kind, ruleName, '1004', 4, 'UPDATE', 'ADMIN', false), + version(kind, ruleName, '1003', 3, 'UPDATE', 'UPSTREAM', false), + version(kind, ruleName, '1002', 2, 'UPDATE', 'ADMIN', false), + version(kind, ruleName, '1001', 1, 'CREATE', 'BOOTSTRAP', false) + ] + } +} + +const currentVersionOf = (versions: RuleVersion[]) => versions.find((version) => version.isCurrent) + +const versionList = (versions: RuleVersion[]): RuleVersionList => { + const current = currentVersionOf(versions) + const head = versions[0] + return { + items: versions, + total: versions.length, + currentVersionId: current?.id, + currentVersionNo: current?.versionNo, + deleted: Boolean(!current && head?.operation === 'DELETE') + } +} + +const conflictResp = (currentVersionId?: string | null) => + HttpResponse.json( + { + code: 'VERSION_CONFLICT', + message: 'rule version conflict', + currentVersionId: currentVersionId ?? null + }, + { status: 409 } + ) + +const pendingResp = (intentId: string) => + HttpResponse.json( + { + code: 'VERSION_LEDGER_PENDING', + message: 'rule version intent is pending', + intentId + }, + { status: 409 } + ) + +const bizError = (code: string, message: string, status = 200) => + HttpResponse.json({ code, message, data: null }, { status }) + +const notFoundResp = (message: string) => bizError('NotFoundError', message, 404) + +const readJsonBody = async (request: Request): Promise> => { + try { + const body = (await request.json()) as Record | null + return body ?? {} + } catch { + return {} + } +} + +const validateReason = (reason: string) => { + const trimmed = reason.trim() + if (!trimmed) return bizError('InvalidArgument', 'reason must not be empty', 400) + if (trimmed.length > 1024) + return bizError('InvalidArgument', 'reason must be at most 1024 characters', 400) + return null +} + +const ensurePendingIntent = (kind: TrafficRuleKind, ruleName: string) => { + const key = ledgerKey(kind, ruleName) + let intentID = pendingIntentByRule.get(key) + if (!intentID) { + intentID = `${nextIntentID++}` + pendingIntentByRule.set(key, intentID) + } + return intentID +} + +const buildVersionHandlersForKind = (kind: TrafficRuleKind): HttpHandler[] => [ + http.get(`${base}/${kind}/:ruleName/versions`, ({ params }) => { + const ruleName = decodeName(params.ruleName as string) + if (scenarioOf(ruleName) === 'backend-error') + return bizError('InternalError', 'backend error', 500) + const versions = fixtureVersions(kind, ruleName) + return success(versionList(versions)) + }), + + http.get(`${base}/${kind}/:ruleName/versions/:versionId`, ({ params }) => { + const ruleName = decodeName(params.ruleName as string) + const versionId = String(params.versionId || '').trim() + if (!versionId) return bizError('InvalidArgument', 'versionId must be an integer', 400) + const found = fixtureVersions(kind, ruleName).find((item) => item.id === versionId) + return found ? success(found) : notFoundResp('rule version not found') + }), + + http.get(`${base}/${kind}/:ruleName/versions/:versionId/diff`, ({ params, request }) => { + const ruleName = decodeName(params.ruleName as string) + const versionId = String(params.versionId || '').trim() + if (!versionId) return bizError('InvalidArgument', 'versionId must be an integer', 400) + const versions = fixtureVersions(kind, ruleName) + const left = versions.find((item) => item.id === versionId) + if (!left) return notFoundResp('rule version not found') + const against = new URL(request.url).searchParams.get('against') || 'current' + if (against !== 'current' && against !== 'previous' && !/^\d+$/.test(against)) { + return bizError( + 'InvalidArgument', + "against must be 'current', 'previous', or a version ID", + 400 + ) + } + const leftIndex = versions.findIndex((item) => item.id === versionId) + const right = + against === 'current' + ? currentVersionOf(versions) + : against === 'previous' + ? versions[leftIndex + 1] + : versions.find((item) => item.id === against) + if (!right) return notFoundResp('rule version not found') + return success({ + left: { id: left.id, versionNo: left.versionNo, specJson: left.specJson }, + right: { id: right.id, versionNo: right.versionNo, specJson: right.specJson } + }) + }), + + http.post( + `${base}/${kind}/:ruleName/versions/:versionId/rollback`, + async ({ params, request }) => { + const ruleName = decodeName(params.ruleName as string) + const body = await readJsonBody(request) + const reasonErr = validateReason(typeof body.reason === 'string' ? body.reason : '') + if (reasonErr) return reasonErr + + const versions = fixtureVersions(kind, ruleName) + const target = versions.find((item) => item.id === String(params.versionId || '').trim()) + if (!target) return notFoundResp('rule version not found') + if (target.operation === 'DELETE') + return bizError('InvalidArgument', 'cannot roll back to a deleted rule version', 400) + if (scenarioOf(ruleName) === 'pending') + return pendingResp(ensurePendingIntent(kind, ruleName)) + + const current = currentVersionOf(versions) + const expected = + typeof body.expectedVersionId === 'string' ? body.expectedVersionId.trim() : undefined + if (expected !== undefined) { + if (!current && expected !== '0') return conflictResp(null) + if (current && expected !== current.id) return conflictResp(current.id) + } + if (scenarioOf(ruleName) === 'conflict') return conflictResp(current?.id ?? null) + + return success({ + rolledBackFromId: target.id, + versionId: '9901', + versionNo: (current?.versionNo ?? 0) + 1, + source: 'ROLLBACK', + committed: true + }) + } + ) +] + +const intentHandlers: HttpHandler[] = [ + http.post(`${base}/rule-version-intents/:intentId/repair`, ({ params }) => { + const intentId = String(params.intentId || '').trim() + if (!intentId) return bizError('InvalidArgument', 'intentId must be an integer', 400) + const shouldFail = Array.from(pendingIntentByRule).some( + ([key, value]) => value === intentId && key.includes('-repair-failure') + ) + if (shouldFail) return bizError('InternalError', 'repair failed', 500) + const matched = Array.from(pendingIntentByRule).find(([, value]) => value === intentId) + if (!matched) return notFoundResp('rule version intent not found') + pendingIntentByRule.delete(matched[0]) + const [kind, ruleName] = matched[0].split(':') as [TrafficRuleKind, string] + return success(version(kind, ruleName, '9101', 3, 'UPDATE', 'ADMIN', true, 'repair')) + }), + + http.post(`${base}/rule-version-intents/:intentId/abandon`, async ({ params, request }) => { + const intentId = String(params.intentId || '').trim() + if (!intentId) return bizError('InvalidArgument', 'intentId must be an integer', 400) + const body = await readJsonBody(request) + const reasonErr = validateReason(typeof body.reason === 'string' ? body.reason : '') + if (reasonErr) return reasonErr + const matched = Array.from(pendingIntentByRule).find(([, value]) => value === intentId) + if (!matched) return notFoundResp('rule version intent not found') + pendingIntentByRule.delete(matched[0]) + return success('') + }) +] + +export const ruleVersionHandlers: HttpHandler[] = [ + ...KINDS.flatMap(buildVersionHandlersForKind), + ...intentHandlers +] + +export const ruleVersionMock = { + scenarios: SCENARIOS, + scenarioOf, + reset() { + pendingIntentByRule.clear() + nextIntentID = 9001 + }, + shouldConflict(ruleName: string) { + return scenarioOf(ruleName) === 'conflict' + }, + shouldPend(ruleName: string) { + const scenario = scenarioOf(ruleName) + return ( + scenario === 'pending' || + scenario === 'repair-success' || + scenario === 'repair-failure' || + scenario === 'abandon-success' + ) + }, + conflictResponse(kind: TrafficRuleKind, ruleName: string) { + return conflictResp(currentVersionOf(fixtureVersions(kind, ruleName))?.id ?? null) + }, + pendingResponse(kind: TrafficRuleKind, ruleName: string) { + return pendingResp(ensurePendingIntent(kind, ruleName)) + } +} diff --git a/ui-vue3/src/mocks/handlers/tagRule.ts b/ui-vue3/src/mocks/handlers/tagRule.ts index 7c656d430..2635aa6bb 100644 --- a/ui-vue3/src/mocks/handlers/tagRule.ts +++ b/ui-vue3/src/mocks/handlers/tagRule.ts @@ -17,6 +17,7 @@ import { http, type HttpHandler } from 'msw' import { success, base } from '../utils' +import { ruleVersionMock } from './ruleVersion' import type { TagRule, TagRuleDetail, PaginatedData } from '@/types/api' function randomInt(min: number, max: number): number { @@ -28,6 +29,15 @@ function randomString(min: number, max: number): string { return Array.from({ length: len }, () => String.fromCharCode(97 + randomInt(0, 25))).join('') } +const writeOrConflict = (ruleName: string, operation: 'CREATE' | 'UPDATE' | 'DELETE') => { + if (ruleVersionMock.shouldConflict(ruleName)) + return ruleVersionMock.conflictResponse('tag-rule', ruleName) + if (ruleVersionMock.shouldPend(ruleName)) + return ruleVersionMock.pendingResponse('tag-rule', ruleName) + void operation + return success(null) +} + export const tagRuleHandlers: HttpHandler[] = [ http.get(`${base}/tag-rule/search`, () => { const total = randomInt(8, 1000) @@ -55,9 +65,15 @@ export const tagRuleHandlers: HttpHandler[] = [ return success(detail) }), - http.delete(`${base}/tag-rule/:ruleName`, () => success(null)), + http.delete(`${base}/tag-rule/:ruleName`, ({ params }) => + writeOrConflict(params.ruleName as string, 'DELETE') + ), - http.put(`${base}/tag-rule/:ruleName`, () => success(null)), + http.put(`${base}/tag-rule/:ruleName`, ({ params }) => + writeOrConflict(params.ruleName as string, 'UPDATE') + ), - http.post(`${base}/tag-rule/:ruleName`, () => success(null)) + http.post(`${base}/tag-rule/:ruleName`, ({ params }) => + writeOrConflict(params.ruleName as string, 'CREATE') + ) ] diff --git a/ui-vue3/src/views/traffic/_shared/RuleDiffEditor.vue b/ui-vue3/src/views/traffic/_shared/RuleDiffEditor.vue new file mode 100644 index 000000000..e57b45050 --- /dev/null +++ b/ui-vue3/src/views/traffic/_shared/RuleDiffEditor.vue @@ -0,0 +1,116 @@ + + + + + + + diff --git a/ui-vue3/src/views/traffic/_shared/RuleHistoryDrawer.vue b/ui-vue3/src/views/traffic/_shared/RuleHistoryDrawer.vue new file mode 100644 index 000000000..979921ad6 --- /dev/null +++ b/ui-vue3/src/views/traffic/_shared/RuleHistoryDrawer.vue @@ -0,0 +1,163 @@ + + + + + + + diff --git a/ui-vue3/src/views/traffic/_shared/RuleHistoryPanel.spec.ts b/ui-vue3/src/views/traffic/_shared/RuleHistoryPanel.spec.ts new file mode 100644 index 000000000..26aca8ab4 --- /dev/null +++ b/ui-vue3/src/views/traffic/_shared/RuleHistoryPanel.spec.ts @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { flushPromises, mount } from '@vue/test-utils' +import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest' +import { defineComponent, h, nextTick } from 'vue' +import { HTTP_STATUS } from '@/base/http/constants' +import type { RuleVersion } from '@/api/service/traffic' +import type RuleHistoryPanelType from './RuleHistoryPanel.vue' + +const mocks = vi.hoisted(() => { + Object.defineProperty(globalThis, 'localStorage', { + value: { + getItem: () => null, + setItem: () => undefined, + removeItem: () => undefined + }, + configurable: true + }) + + return { + listRuleVersionsAPI: vi.fn(), + rollbackRuleVersionAPI: vi.fn(), + diffRuleVersionAPI: vi.fn() + } +}) + +vi.mock('@/api/service/traffic', async (importOriginal) => { + const actual = await importOriginal() + return { + ...actual, + listRuleVersionsAPI: mocks.listRuleVersionsAPI, + rollbackRuleVersionAPI: mocks.rollbackRuleVersionAPI, + diffRuleVersionAPI: mocks.diffRuleVersionAPI + } +}) + +vi.mock('ant-design-vue', async (importOriginal) => { + const actual = await importOriginal() + return { + ...actual, + message: { + success: vi.fn(), + warning: vi.fn(), + error: vi.fn() + } + } +}) + +vi.mock('@/components/editor/MonacoEditor.vue', () => ({ + default: { + name: 'MonacoEditor', + template: '
' + } +})) + +vi.mock('./RuleDiffEditor.vue', () => ({ + default: { + name: 'RuleDiffEditor', + template: '
' + } +})) + +const version = (id: string, versionNo: number, isCurrent: boolean): RuleVersion => ({ + id, + ruleKind: 'ConditionRoute', + mesh: '', + resourceKey: '/demo-rule', + ruleName: 'demo-rule', + versionNo, + contentHash: `hash-${id}`, + specJson: '{"key":"demo-rule"}', + source: 'ADMIN', + operation: 'UPDATE', + author: 'admin', + createdAt: '2026-06-19T00:00:00Z', + isCurrent +}) + +const drawerStub = defineComponent({ + props: ['items'], + emits: ['rollback'], + setup(props, { emit }) { + return () => + h( + 'div', + { 'data-test': 'history-drawer' }, + (props.items as RuleVersion[]).map((item) => + h( + 'button', + { + type: 'button', + 'data-test': `rollback-${item.id}`, + onClick: () => emit('rollback', item) + }, + `rollback-${item.id}` + ) + ) + ) + } +}) + +const modalStub = defineComponent({ + props: ['open'], + emits: ['ok'], + setup(props, { emit, slots }) { + return () => + props.open + ? h('div', { 'data-test': 'modal' }, [ + slots.default?.(), + h( + 'button', + { type: 'button', 'data-test': 'modal-ok', onClick: () => emit('ok') }, + 'ok' + ) + ]) + : null + } +}) + +const textAreaStub = defineComponent({ + emits: ['update:value'], + setup(_props, { emit }) { + return () => + h('textarea', { + 'data-test': 'rollback-reason', + onInput: (event: Event) => { + emit('update:value', (event.target as HTMLTextAreaElement).value) + } + }) + } +}) + +const mountPanel = (props: Partial['$props']> = {}) => + mount(RuleHistoryPanel, { + props: { + open: true, + kind: 'condition-rule', + ruleName: 'demo-rule', + title: 'History', + ...props + }, + global: { + plugins: [i18n], + stubs: { + RuleHistoryDrawer: drawerStub, + MonacoEditor: true, + RuleDiffEditor: true, + AModal: modalStub, + 'a-modal': modalStub, + AAlert: { template: '
' }, + 'a-alert': { template: '
' }, + ATypographyText: { template: '' }, + 'a-typography-text': { template: '' }, + AForm: { template: '
' }, + 'a-form': { template: '
' }, + AFormItem: { template: '' }, + 'a-form-item': { template: '' }, + ATextarea: textAreaStub, + 'a-textarea': textAreaStub + } + } + }) + +let i18n: typeof import('@/base/i18n').i18n +let RuleHistoryPanel: typeof RuleHistoryPanelType + +beforeAll(async () => { + i18n = (await import('@/base/i18n')).i18n + RuleHistoryPanel = (await import('./RuleHistoryPanel.vue')).default +}) + +beforeEach(() => { + mocks.listRuleVersionsAPI.mockReset() + mocks.rollbackRuleVersionAPI.mockReset() + mocks.diffRuleVersionAPI.mockReset() +}) + +describe('RuleHistoryPanel', () => { + it('ignores stale history responses after ruleName changes', async () => { + let resolveFirst: (value: unknown) => void = () => undefined + mocks.listRuleVersionsAPI + .mockReturnValueOnce(new Promise((resolve) => (resolveFirst = resolve))) + .mockResolvedValueOnce({ + code: HTTP_STATUS.SUCCESS, + data: { + items: [version('new-current', 7, true)], + total: 1, + currentVersionId: 'new-current', + currentVersionNo: 7, + deleted: false + } + }) + + const wrapper = mountPanel({ ruleName: 'old-rule' }) + await wrapper.setProps({ ruleName: 'new-rule' }) + await flushPromises() + + resolveFirst({ + code: HTTP_STATUS.SUCCESS, + data: { + items: [version('old-current', 3, true)], + total: 1, + currentVersionId: 'old-current', + currentVersionNo: 3, + deleted: false + } + }) + await flushPromises() + + expect(wrapper.emitted('current-version-change')?.at(-1)).toEqual(['new-current']) + expect(wrapper.emitted('current-version-no-change')?.at(-1)).toEqual([7]) + expect(wrapper.text()).toContain('rollback-new-current') + expect(wrapper.text()).not.toContain('rollback-old-current') + }) + + it('ignores stale rollback success after ruleName changes', async () => { + mocks.listRuleVersionsAPI + .mockResolvedValueOnce({ + code: HTTP_STATUS.SUCCESS, + data: { + items: [version('old-target', 1, false)], + total: 1, + currentVersionId: 'old-current', + currentVersionNo: 2, + deleted: false + } + }) + .mockResolvedValueOnce({ + code: HTTP_STATUS.SUCCESS, + data: { + items: [version('new-target', 3, false)], + total: 1, + currentVersionId: 'new-current', + currentVersionNo: 4, + deleted: false + } + }) + let resolveRollback: (value: unknown) => void = () => undefined + mocks.rollbackRuleVersionAPI.mockReturnValueOnce( + new Promise((resolve) => (resolveRollback = resolve)) + ) + + const wrapper = mountPanel({ ruleName: 'old-rule' }) + await flushPromises() + await wrapper.get('[data-test="rollback-old-target"]').trigger('click') + await nextTick() + await wrapper.get('[data-test="rollback-reason"]').setValue('restore old') + await wrapper.get('[data-test="modal-ok"]').trigger('click') + + await wrapper.setProps({ ruleName: 'new-rule' }) + await flushPromises() + await wrapper.get('[data-test="rollback-new-target"]').trigger('click') + await nextTick() + + resolveRollback({ + code: HTTP_STATUS.SUCCESS, + data: { + rolledBackFromId: 'old-target', + versionId: 'old-rollback', + versionNo: 5, + source: 'ROLLBACK', + committed: true + } + }) + await flushPromises() + + expect(wrapper.text()).toContain('rollback-new-target') + expect(wrapper.find('[data-test="modal"]').exists()).toBe(true) + expect(mocks.listRuleVersionsAPI).toHaveBeenCalledTimes(2) + }) +}) diff --git a/ui-vue3/src/views/traffic/_shared/RuleHistoryPanel.vue b/ui-vue3/src/views/traffic/_shared/RuleHistoryPanel.vue new file mode 100644 index 000000000..e3894381f --- /dev/null +++ b/ui-vue3/src/views/traffic/_shared/RuleHistoryPanel.vue @@ -0,0 +1,398 @@ + + + + + + + diff --git a/ui-vue3/src/views/traffic/_shared/ruleVersion.ts b/ui-vue3/src/views/traffic/_shared/ruleVersion.ts new file mode 100644 index 000000000..6b57ab145 --- /dev/null +++ b/ui-vue3/src/views/traffic/_shared/ruleVersion.ts @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { h } from 'vue' +import { Button, Input, Modal, notification, Space } from 'ant-design-vue' +import { + abandonRuleVersionIntentAPI, + listRuleVersionsAPI, + repairRuleVersionIntentAPI, + type RuleVersion, + type RuleVersionList, + type TrafficRuleKind, + type VersionConflictError, + type VersionLedgerPendingError +} from '@/api/service/traffic' +import { HTTP_STATUS } from '@/base/http/constants' +import { i18n } from '@/base/i18n' + +export interface CurrentVersionState { + id?: string + versionNo?: number + deleted: boolean +} + +export const currentVersionStateFromItems = (items: RuleVersion[]): CurrentVersionState => { + const current = items.find((item) => item.isCurrent) + const head = items[0] + return { + id: current?.id, + versionNo: current?.versionNo, + deleted: !current && head?.operation === 'DELETE' + } +} + +export const currentVersionStateFromList = (list?: RuleVersionList): CurrentVersionState => { + if (!list) { + return { deleted: false } + } + if (list.currentVersionId !== undefined || list.deleted !== undefined) { + return { + id: list.currentVersionId, + versionNo: list.currentVersionNo, + deleted: Boolean(list.deleted) + } + } + return currentVersionStateFromItems(list.items || []) +} + +export const rollbackExpectedVersionId = (state: CurrentVersionState): string | undefined => { + if (state.id !== undefined) { + return state.id + } + // A deleted ledger head has no current version ID, but rollback must still + // assert that the rule is absent. "0" is the API's deleted-state precondition. + return state.deleted ? '0' : undefined +} + +export const versionDiffLabel = (prefix: string, versionNo?: number): string => + typeof versionNo === 'number' ? `${prefix} v${versionNo}` : prefix + +export const normalizeIntentReason = (reason: string): string => reason.trim() + +export const isCurrentHistoryRequest = ( + requestSeq: number, + latestSeq: number, + disposed: boolean +) => { + return !disposed && requestSeq === latestSeq +} + +export const fetchCurrentVersionState = async ( + kind: TrafficRuleKind, + ruleName: string +): Promise => { + const res = await listRuleVersionsAPI(kind, ruleName) + if (res.code === HTTP_STATUS.SUCCESS) { + return currentVersionStateFromList(res.data) + } + return { deleted: false } +} + +export const isVersionConflict = (e: any): e is VersionConflictError => { + return e?.code === 'VERSION_CONFLICT' +} + +export const isVersionLedgerPending = (e: any): e is VersionLedgerPendingError => { + return e?.code === 'VERSION_LEDGER_PENDING' +} + +const t = (key: string, params?: Record) => i18n.global.t(key, params) + +export const ruleVersionErrorMessage = (e: any): string => e?.message || String(e) + +const repairingIntentIds = new Set() + +const openAbandonReasonModal = ( + intentId: string, + options?: { reload?: () => void | Promise; isCurrent?: () => boolean } +) => { + let reason = '' + let submitting = false + const modal = Modal.confirm({ + title: t('ruleVersionDomain.abandonIntentTitle'), + content: () => + h(Input.TextArea, { + rows: 3, + maxlength: 1024, + placeholder: t('ruleVersionDomain.abandonReasonPlaceholder'), + onChange: (event: Event) => { + reason = (event.target as HTMLTextAreaElement).value + } + }), + okText: t('ruleVersionDomain.abandon'), + cancelText: t('ruleVersionDomain.cancel'), + okButtonProps: { danger: true }, + async onOk() { + if (submitting) { + return Promise.reject() + } + const trimmed = normalizeIntentReason(reason) + if (!trimmed) { + notification.warning({ + key: 'rule-version-abandon-reason-required', + message: t('ruleVersionDomain.abandonReasonRequired') + }) + return Promise.reject() + } + submitting = true + modal.update({ okButtonProps: { danger: true, loading: true } }) + try { + await abandonRuleVersionIntentAPI(intentId, trimmed) + if (options?.isCurrent && !options.isCurrent()) { + submitting = false + modal.update({ okButtonProps: { danger: true, loading: false } }) + return Promise.reject() + } + notification.close('rule-version-ledger-pending') + notification.close('rule-version-abandon-reason-required') + await options?.reload?.() + } catch (e: any) { + if (options?.isCurrent && !options.isCurrent()) { + submitting = false + modal.update({ okButtonProps: { danger: true, loading: false } }) + return Promise.reject() + } + notification.error({ + key: 'rule-version-abandon-error', + message: t('ruleVersionDomain.abandonFailed'), + description: e?.message || String(e) + }) + submitting = false + modal.update({ okButtonProps: { danger: true, loading: false } }) + return Promise.reject() + } + } + }) +} + +export const notifyVersionConflict = ( + e: any, + options?: { reload?: () => void | Promise; isCurrent?: () => boolean } +): boolean => { + if (isVersionConflict(e)) { + notification.warning({ + key: 'rule-version-conflict', + duration: 0, + message: t('ruleVersionDomain.versionConflict'), + description: t('ruleVersionDomain.versionConflictDescription'), + btn: options?.reload + ? () => + h( + Button, + { + type: 'link', + size: 'small', + onClick: () => { + notification.close('rule-version-conflict') + if (!options.isCurrent || options.isCurrent()) { + options.reload?.() + } + } + }, + { default: () => t('ruleVersionDomain.reload') } + ) + : undefined + }) + return true + } + return false +} + +export const notifyVersionLedgerPending = ( + e: any, + options?: { reload?: () => void | Promise; isCurrent?: () => boolean } +): boolean => { + if (!isVersionLedgerPending(e)) { + return false + } + const intentId = e.intentId + notification.warning({ + key: 'rule-version-ledger-pending', + duration: 0, + message: t('ruleVersionDomain.ledgerPending'), + description: intentId + ? t('ruleVersionDomain.ledgerPendingWithIntent', { intentId }) + : t('ruleVersionDomain.ledgerPendingDescription'), + btn: intentId + ? () => + h( + Space, + {}, + { + default: () => [ + h( + Button, + { + type: 'link', + size: 'small', + onClick: async () => { + if (repairingIntentIds.has(intentId)) { + return + } + // Repair mutates the durable intent. Suppress duplicate + // clicks for the same intent while preserving normal + // error reporting for unrelated failures. + repairingIntentIds.add(intentId) + try { + await repairRuleVersionIntentAPI(intentId) + if (options?.isCurrent && !options.isCurrent()) { + return + } + notification.close('rule-version-ledger-pending') + await options?.reload?.() + } catch (e: any) { + if (options?.isCurrent && !options.isCurrent()) { + return + } + notification.error({ + key: 'rule-version-repair-error', + message: t('ruleVersionDomain.repairFailed'), + description: e?.message || String(e) + }) + } finally { + repairingIntentIds.delete(intentId) + } + } + }, + { default: () => t('ruleVersionDomain.repair') } + ), + h( + Button, + { + type: 'link', + size: 'small', + danger: true, + onClick: () => { + openAbandonReasonModal(intentId, options) + } + }, + { default: () => t('ruleVersionDomain.abandon') } + ) + ] + } + ) + : options?.reload + ? () => + h( + Button, + { + type: 'link', + size: 'small', + onClick: () => { + notification.close('rule-version-ledger-pending') + if (!options.isCurrent || options.isCurrent()) { + options.reload?.() + } + } + }, + { default: () => t('ruleVersionDomain.reload') } + ) + : undefined + }) + return true +} + +export const notifyRuleVersionError = ( + e: any, + options?: { reload?: () => void | Promise; isCurrent?: () => boolean } +): boolean => { + if (notifyVersionLedgerPending(e, options)) { + return true + } + return notifyVersionConflict(e, options) +} + +export const formatRuleSpec = (specJson?: string): string => { + if (!specJson) { + return '' + } + try { + return JSON.stringify(JSON.parse(specJson), null, 2) + } catch (e) { + return specJson + } +} diff --git a/ui-vue3/src/views/traffic/dynamicConfig/index.vue b/ui-vue3/src/views/traffic/dynamicConfig/index.vue index baad34d02..817c90ae0 100644 --- a/ui-vue3/src/views/traffic/dynamicConfig/index.vue +++ b/ui-vue3/src/views/traffic/dynamicConfig/index.vue @@ -73,6 +73,12 @@ import { PROVIDE_INJECT_KEY } from '@/base/enums/ProvideInject' import { useRouter } from 'vue-router' import { PRIMARY_COLOR } from '@/base/constants' import { Icon } from '@iconify/vue' +import { message } from 'ant-design-vue' +import { + fetchCurrentVersionState, + notifyRuleVersionError, + ruleVersionErrorMessage +} from '../_shared/ruleVersion' const router = useRouter() @@ -143,8 +149,16 @@ onMounted(async () => { }) const delDynamicConfig = async (record: any) => { - await delConfiguratorDetail({ name: record.ruleName }) - await searchDomain.onSearch() + try { + const expectedVersionId = (await fetchCurrentVersionState('configurator', record.ruleName)).id + await delConfiguratorDetail({ name: record.ruleName }, { expectedVersionId }) + await searchDomain.onSearch() + } catch (e: any) { + const handled = notifyRuleVersionError(e, { reload: () => searchDomain.onSearch() }) + if (!handled) { + message.error(ruleVersionErrorMessage(e)) + } + } } provide(PROVIDE_INJECT_KEY.SEARCH_DOMAIN, searchDomain) diff --git a/ui-vue3/src/views/traffic/dynamicConfig/tabs/YAMLView.vue b/ui-vue3/src/views/traffic/dynamicConfig/tabs/YAMLView.vue index df8ea5257..8a52b38d3 100644 --- a/ui-vue3/src/views/traffic/dynamicConfig/tabs/YAMLView.vue +++ b/ui-vue3/src/views/traffic/dynamicConfig/tabs/YAMLView.vue @@ -21,19 +21,16 @@ - - - - - - - - - - - - - + + + + + current v{{ currentVersionNo }} + + + {{ $t('flowControlDomain.versionRecords') }} + + @@ -71,11 +68,20 @@ 保存 重置 + + diff --git a/ui-vue3/src/views/traffic/routingRule/tabs/formView.vue b/ui-vue3/src/views/traffic/routingRule/tabs/formView.vue index fb42ce908..d5dbcd21c 100644 --- a/ui-vue3/src/views/traffic/routingRule/tabs/formView.vue +++ b/ui-vue3/src/views/traffic/routingRule/tabs/formView.vue @@ -18,15 +18,18 @@ @@ -208,23 +205,21 @@ import { import { CopyOutlined } from '@ant-design/icons-vue' import useClipboard from 'vue-clipboard3' import { message } from 'ant-design-vue' -import { PRIMARY_COLOR } from '@/base/constants' import { getConditionRuleDetailAPI } from '@/api/service/traffic' import { useRoute } from 'vue-router' import { HTTP_STATUS } from '@/base/http/constants' +import RuleHistoryPanel from '../../_shared/RuleHistoryPanel.vue' const { appContext: { config: { globalProperties } } -} = getCurrentInstance() +} = getCurrentInstance() as ComponentInternalInstance const route = useRoute() +const ruleName = computed(() => String(route.params?.ruleName || '')) -const isDrawerOpened = ref(false) - -const sliderSpan = ref(8) - -let __ = PRIMARY_COLOR +const isHistoryOpen = ref(false) +const currentVersionNo = ref(undefined) const toClipboard = useClipboard().toClipboard @@ -239,8 +234,6 @@ const conditionRuleDetail = reactive({}) const actionObj = computed(() => { const key = conditionRuleDetail.key || '' const arr = typeof key === 'string' ? key.split(':') : [] - conditionRuleDetail.version = arr[1] || '' - conditionRuleDetail.group = arr[2] || '' return arr[0] || '' }) @@ -252,11 +245,13 @@ const addressSubsetMatch = ref([]) // Get condition routing details async function getRoutingRuleDetail() { - let res = await getConditionRuleDetailAPI(route.params?.ruleName) + let res = await getConditionRuleDetailAPI(ruleName.value) if (res?.code === HTTP_STATUS.SUCCESS) { Object.assign(conditionRuleDetail, res?.data || {}) - conditionRuleDetail.conditions.forEach((item: any, index: number) => { + requestParameterMatch.value = [] + addressSubsetMatch.value = [] + conditionRuleDetail.conditions.forEach((item: any) => { const arr = item.split(' => ') const addressArr = arr[1]?.split(' & ') const requestMatchArr = arr[0]?.split(' & ') @@ -267,11 +262,11 @@ async function getRoutingRuleDetail() { } const getVersionAndGroup = () => { - const conditionName = route.params?.ruleName + const conditionName = ruleName.value if (conditionName && conditionRuleDetail.scope === 'service') { const arr = conditionName?.split(':') - conditionRuleDetail.version = arr[1] - conditionRuleDetail.group = arr[2].split('.')[0] + conditionRuleDetail.version = arr[1] || '' + conditionRuleDetail.group = arr[2]?.split('.')[0] || '' } } diff --git a/ui-vue3/src/views/traffic/routingRule/tabs/updateByFormView.vue b/ui-vue3/src/views/traffic/routingRule/tabs/updateByFormView.vue index 42795adef..754199919 100644 --- a/ui-vue3/src/views/traffic/routingRule/tabs/updateByFormView.vue +++ b/ui-vue3/src/views/traffic/routingRule/tabs/updateByFormView.vue @@ -144,19 +144,9 @@