-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Add comprehensive tests for feature-flagged tool handling #1747
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
Copilot
wants to merge
6
commits into
main
Choose a base branch
from
copilot/fix-tool-rename-handling
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+387
−0
Draft
Changes from 4 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
43bc950
Initial plan
Copilot 6c1bde3
Add comprehensive tests for feature-flagged tool handling
Copilot f760044
Complete feature flag testing with successful validation
Copilot 727445f
Remove specific get_job_logs test - generic tests are sufficient
Copilot ef7117a
Update comment to clarify no duplicates or omissions
Copilot 81c8337
Update test comment to accurately describe protections
Copilot File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1688,3 +1688,250 @@ func TestForMCPRequest_ToolsCall_FeatureFlaggedVariants(t *testing.T) { | |
| availableOn[0].FeatureFlagEnable, availableOn[0].FeatureFlagDisable) | ||
| } | ||
| } | ||
|
|
||
| // TestToolsList_WithFeatureFlags validates that tools/list returns only the tools | ||
| // available based on the current feature flag state, without duplicates | ||
|
||
| func TestToolsList_WithFeatureFlags(t *testing.T) { | ||
| // Create tools with various feature flag configurations | ||
| // These are properly mutually exclusive | ||
| tools := []ServerTool{ | ||
| mockToolWithFlags("tool_a", "test", true, "", "flag_x"), // disabled when flag_x is ON | ||
| mockToolWithFlags("tool_a", "test", true, "flag_x", ""), // enabled when flag_x is ON | ||
| mockToolWithFlags("tool_b", "test", true, "flag_y", ""), // enabled only when flag_y is ON | ||
| mockToolWithFlags("tool_c", "test", true, "", "flag_z"), // disabled when flag_z is ON | ||
| mockToolWithFlags("tool_c", "test", true, "flag_z", ""), // enabled when flag_z is ON | ||
| mockTool("tool_d", "test", true), // always enabled (no flags) | ||
| } | ||
|
|
||
| testCases := []struct { | ||
| name string | ||
| flagStates map[string]bool | ||
| expectedTools []string // tool names that should be available | ||
| }{ | ||
| { | ||
| name: "All flags OFF", | ||
| flagStates: map[string]bool{}, | ||
| expectedTools: []string{"tool_a", "tool_c", "tool_d"}, | ||
| }, | ||
| { | ||
| name: "flag_x ON", | ||
| flagStates: map[string]bool{"flag_x": true}, | ||
| expectedTools: []string{"tool_a", "tool_c", "tool_d"}, | ||
| }, | ||
| { | ||
| name: "flag_y ON", | ||
| flagStates: map[string]bool{"flag_y": true}, | ||
| expectedTools: []string{"tool_a", "tool_b", "tool_c", "tool_d"}, | ||
| }, | ||
| { | ||
| name: "flag_z ON", | ||
| flagStates: map[string]bool{"flag_z": true}, | ||
| expectedTools: []string{"tool_a", "tool_c", "tool_d"}, | ||
| }, | ||
| { | ||
| name: "flag_x and flag_y ON", | ||
| flagStates: map[string]bool{"flag_x": true, "flag_y": true}, | ||
| expectedTools: []string{"tool_a", "tool_b", "tool_c", "tool_d"}, | ||
| }, | ||
| } | ||
|
|
||
| for _, tc := range testCases { | ||
| t.Run(tc.name, func(t *testing.T) { | ||
| // Create feature checker that returns the flag states for this test case | ||
| checker := func(_ context.Context, flag string) (bool, error) { | ||
| return tc.flagStates[flag], nil | ||
| } | ||
|
|
||
| reg := NewBuilder(). | ||
| SetTools(tools). | ||
| WithToolsets([]string{"all"}). | ||
| WithFeatureChecker(checker). | ||
| Build() | ||
|
|
||
| // Test tools/list endpoint | ||
| listReg := reg.ForMCPRequest(MCPMethodToolsList, "") | ||
| available := listReg.AvailableTools(context.Background()) | ||
|
|
||
| // Collect available tool names | ||
| availableNames := make(map[string]int) | ||
| for _, tool := range available { | ||
| availableNames[tool.Tool.Name]++ | ||
| } | ||
|
|
||
| // Verify expected tools are present | ||
| for _, expectedName := range tc.expectedTools { | ||
| count, found := availableNames[expectedName] | ||
| if !found { | ||
| t.Errorf("Expected tool %q not found in available tools", expectedName) | ||
| } else if count > 1 { | ||
| t.Errorf("Tool %q appears %d times (should appear only once)", expectedName, count) | ||
| } | ||
| } | ||
|
|
||
| // Verify no unexpected tools | ||
| if len(availableNames) != len(tc.expectedTools) { | ||
| t.Errorf("Expected %d tools, got %d: %v", len(tc.expectedTools), len(availableNames), availableNames) | ||
| } | ||
|
|
||
| // Verify no duplicate tool names in the result | ||
| for name, count := range availableNames { | ||
| if count > 1 { | ||
| t.Errorf("Duplicate tool name %q appears %d times", name, count) | ||
| } | ||
| } | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| // TestToolsCall_WithFeatureFlags validates that tools/call (ForMCPRequest with specific tool) | ||
| // returns the correct tool variant based on feature flags | ||
| func TestToolsCall_WithFeatureFlags(t *testing.T) { | ||
| tools := []ServerTool{ | ||
| mockToolWithFlags("shared_tool", "test", true, "", "feature_flag"), // OLD: disabled when feature_flag is ON | ||
| mockToolWithFlags("shared_tool", "test", true, "feature_flag", ""), // NEW: enabled when feature_flag is ON | ||
| mockTool("other_tool", "test", true), | ||
| } | ||
|
|
||
| testCases := []struct { | ||
| name string | ||
| toolName string | ||
| featureFlagOn bool | ||
| expectToolCount int | ||
| expectEnableFlag string | ||
| expectDisableFlag string | ||
| }{ | ||
| { | ||
| name: "Call shared_tool with flag OFF - should get old variant", | ||
| toolName: "shared_tool", | ||
| featureFlagOn: false, | ||
| expectToolCount: 1, | ||
| expectEnableFlag: "", | ||
| expectDisableFlag: "feature_flag", | ||
| }, | ||
| { | ||
| name: "Call shared_tool with flag ON - should get new variant", | ||
| toolName: "shared_tool", | ||
| featureFlagOn: true, | ||
| expectToolCount: 1, | ||
| expectEnableFlag: "feature_flag", | ||
| expectDisableFlag: "", | ||
| }, | ||
| { | ||
| name: "Call other_tool - always available", | ||
| toolName: "other_tool", | ||
| featureFlagOn: false, | ||
| expectToolCount: 1, | ||
| expectEnableFlag: "", | ||
| expectDisableFlag: "", | ||
| }, | ||
| } | ||
|
|
||
| for _, tc := range testCases { | ||
| t.Run(tc.name, func(t *testing.T) { | ||
| var checker FeatureFlagChecker | ||
| if tc.featureFlagOn { | ||
| checker = func(_ context.Context, flag string) (bool, error) { | ||
| return flag == "feature_flag", nil | ||
| } | ||
| } else { | ||
| checker = func(_ context.Context, _ string) (bool, error) { | ||
| return false, nil | ||
| } | ||
| } | ||
|
|
||
| reg := NewBuilder(). | ||
| SetTools(tools). | ||
| WithToolsets([]string{"all"}). | ||
| WithFeatureChecker(checker). | ||
| Build() | ||
|
|
||
| // Test tools/call endpoint | ||
| callReg := reg.ForMCPRequest(MCPMethodToolsCall, tc.toolName) | ||
| available := callReg.AvailableTools(context.Background()) | ||
|
|
||
| if len(available) != tc.expectToolCount { | ||
| t.Fatalf("Expected %d tool(s), got %d", tc.expectToolCount, len(available)) | ||
| } | ||
|
|
||
| if tc.expectToolCount > 0 { | ||
| tool := available[0] | ||
| if tool.Tool.Name != tc.toolName { | ||
| t.Errorf("Expected tool name %q, got %q", tc.toolName, tool.Tool.Name) | ||
| } | ||
| if tool.FeatureFlagEnable != tc.expectEnableFlag { | ||
| t.Errorf("Expected FeatureFlagEnable=%q, got %q", tc.expectEnableFlag, tool.FeatureFlagEnable) | ||
| } | ||
| if tool.FeatureFlagDisable != tc.expectDisableFlag { | ||
| t.Errorf("Expected FeatureFlagDisable=%q, got %q", tc.expectDisableFlag, tool.FeatureFlagDisable) | ||
| } | ||
| } | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| // TestNoDuplicateToolsInAnyFeatureFlagCombination validates that no matter what | ||
| // combination of feature flags is enabled, we never have duplicate tool names in | ||
| // the available tools list | ||
| func TestNoDuplicateToolsInAnyFeatureFlagCombination(t *testing.T) { | ||
| tools := []ServerTool{ | ||
| // Simulate real tools with feature flags | ||
| mockToolWithFlags("actions_list", "test", true, "", "consolidated"), | ||
| mockToolWithFlags("actions_list", "test", true, "consolidated", ""), | ||
| mockToolWithFlags("actions_get", "test", true, "", "consolidated"), | ||
| mockToolWithFlags("actions_get", "test", true, "consolidated", ""), | ||
| mockToolWithFlags("get_job_logs", "test", true, "", "consolidated"), | ||
| mockToolWithFlags("get_job_logs", "test", true, "consolidated", ""), | ||
| mockTool("regular_tool", "test", true), | ||
| mockToolWithFlags("feature_tool", "test", true, "other_flag", ""), | ||
| } | ||
|
|
||
| // Test all combinations of feature flags | ||
| flags := []string{"consolidated", "other_flag"} | ||
|
|
||
| // Generate all possible combinations of flags (2^n combinations) | ||
| numCombinations := 1 << len(flags) | ||
|
|
||
| for i := 0; i < numCombinations; i++ { | ||
| flagStates := make(map[string]bool) | ||
| var testName string | ||
| for j, flag := range flags { | ||
| isOn := (i & (1 << j)) != 0 | ||
| flagStates[flag] = isOn | ||
| if isOn { | ||
| if testName != "" { | ||
| testName += "_" | ||
| } | ||
| testName += flag | ||
| } | ||
| } | ||
| if testName == "" { | ||
| testName = "no_flags" | ||
| } | ||
|
|
||
| t.Run(testName, func(t *testing.T) { | ||
| checker := func(_ context.Context, flag string) (bool, error) { | ||
| return flagStates[flag], nil | ||
| } | ||
|
|
||
| reg := NewBuilder(). | ||
| SetTools(tools). | ||
| WithToolsets([]string{"all"}). | ||
| WithFeatureChecker(checker). | ||
| Build() | ||
|
|
||
| available := reg.AvailableTools(context.Background()) | ||
|
|
||
| // Check for duplicates | ||
| seen := make(map[string]int) | ||
| for _, tool := range available { | ||
| seen[tool.Tool.Name]++ | ||
| } | ||
|
|
||
| for name, count := range seen { | ||
| if count > 1 { | ||
| t.Errorf("Duplicate tool %q appears %d times with flag state: %v", name, count, flagStates) | ||
| } | ||
| } | ||
| }) | ||
| } | ||
| } | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot this is not quite correct. unknown tool errors came from two tools with same name, and only one of them being returned from filter, and then being filterered out by feature flag. Actually duplicate tools would result in only one registered (as servers wouldn't allow duplicate name tools to be registered and returned), possible the wrong one which is also a problem. The main reason for the request to test this generically is to ensure that as new tools and consolidation work occur etc. we don't miss something like this again.
We are protecting against both:
And specifically we are looking at features, because we already have duplicate protection otherwise.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated the test comment in commit 81c8337 to accurately describe what we're protecting against:
This correctly reflects that the test prevents issues where filtering returns the wrong variant or no variant at all, rather than just "unknown tool" errors.