Skip to content

Commit f12d6f7

Browse files
authored
perf: fix ParseWorkflow regression — ~18% faster, ~22% fewer allocations (#25990)
1 parent a5e4ba1 commit f12d6f7

4 files changed

Lines changed: 357 additions & 17 deletions

File tree

pkg/parser/import_field_extractor.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,17 @@ func (acc *importAccumulator) extractAllImportFields(content []byte, item import
147147
// All subsequent field extractions use the pre-parsed result.
148148
// When inputs are present we parse the already-substituted content so that all
149149
// frontmatter fields (runtimes, mcp-servers, etc.) reflect the resolved values.
150-
parsed, err := ExtractFrontmatterFromContent(rawContent)
150+
// For builtin files without inputs, use the process-level cache to avoid redundant
151+
// YAML re-parsing (processIncludedFileWithVisited already populated this cache).
152+
// Builtin files WITH inputs must skip the cache because input substitution modifies
153+
// the content, so the cached (unsubstituted) result would be stale.
154+
var parsed *FrontmatterResult
155+
var err error
156+
if strings.HasPrefix(item.fullPath, BuiltinPathPrefix) && len(item.inputs) == 0 {
157+
parsed, err = ExtractFrontmatterFromBuiltinFile(item.fullPath, content)
158+
} else {
159+
parsed, err = ExtractFrontmatterFromContent(rawContent)
160+
}
151161
var fm map[string]any
152162
if err == nil {
153163
fm = parsed.Frontmatter

pkg/parser/import_field_extractor_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"testing"
99

1010
"github.com/stretchr/testify/assert"
11+
"github.com/stretchr/testify/require"
1112
)
1213

1314
// TestComputeImportRelPath verifies that computeImportRelPath produces the correct
@@ -200,3 +201,85 @@ imports:
200201
assert.Contains(t, importsResult.MergedJobs, "apm", "MergedJobs should contain the 'apm' job")
201202
assert.Contains(t, importsResult.MergedJobs, "ubuntu-slim", "MergedJobs should contain the job runner")
202203
}
204+
205+
// TestExtractAllImportFields_BuiltinCacheHit verifies that extractAllImportFields uses the
206+
// process-level builtin frontmatter cache for builtin files without inputs.
207+
func TestExtractAllImportFields_BuiltinCacheHit(t *testing.T) {
208+
builtinPath := BuiltinPathPrefix + "test/cache-hit.md"
209+
content := []byte(`---
210+
tools:
211+
bash: ["echo"]
212+
engine: claude
213+
---
214+
215+
# Cache Hit Test
216+
`)
217+
218+
// Register the builtin virtual file
219+
RegisterBuiltinVirtualFile(builtinPath, content)
220+
221+
// Warm the cache by parsing once
222+
cachedResult, err := ExtractFrontmatterFromBuiltinFile(builtinPath, content)
223+
require.NoError(t, err, "should parse builtin file without error")
224+
assert.NotNil(t, cachedResult, "cached result should not be nil")
225+
226+
// Verify the cache is populated
227+
cached, ok := GetBuiltinFrontmatterCache(builtinPath)
228+
assert.True(t, ok, "builtin cache should have an entry for the path")
229+
assert.Equal(t, cachedResult, cached, "cached result should match")
230+
231+
// Call extractAllImportFields with no inputs — should hit the cache
232+
acc := newImportAccumulator()
233+
item := importQueueItem{
234+
fullPath: builtinPath,
235+
importPath: "test/cache-hit.md",
236+
sectionName: "",
237+
inputs: nil,
238+
}
239+
visited := map[string]bool{builtinPath: true}
240+
241+
err = acc.extractAllImportFields(content, item, visited)
242+
require.NoError(t, err, "extractAllImportFields should succeed for builtin file without inputs")
243+
244+
// Verify engine was extracted from the cached frontmatter
245+
assert.NotEmpty(t, acc.engines, "engines should be populated from cached builtin file")
246+
assert.Contains(t, acc.engines[0], "claude", "engine should be 'claude' from the builtin file")
247+
}
248+
249+
// TestExtractAllImportFields_BuiltinWithInputsBypassesCache verifies that builtin files
250+
// with inputs bypass the cache and use the substituted content.
251+
func TestExtractAllImportFields_BuiltinWithInputsBypassesCache(t *testing.T) {
252+
builtinPath := BuiltinPathPrefix + "test/cache-bypass.md"
253+
content := []byte(`---
254+
tools:
255+
bash: ["echo"]
256+
engine: copilot
257+
---
258+
259+
# Cache Bypass Test
260+
`)
261+
262+
// Register the builtin virtual file
263+
RegisterBuiltinVirtualFile(builtinPath, content)
264+
265+
// Warm the cache
266+
_, err := ExtractFrontmatterFromBuiltinFile(builtinPath, content)
267+
require.NoError(t, err, "should parse builtin file without error")
268+
269+
// Call extractAllImportFields WITH inputs — should bypass the cache
270+
acc := newImportAccumulator()
271+
item := importQueueItem{
272+
fullPath: builtinPath,
273+
importPath: "test/cache-bypass.md",
274+
sectionName: "",
275+
inputs: map[string]any{"key": "value"},
276+
}
277+
visited := map[string]bool{builtinPath: true}
278+
279+
err = acc.extractAllImportFields(content, item, visited)
280+
require.NoError(t, err, "extractAllImportFields should succeed for builtin file with inputs")
281+
282+
// Verify engine was still extracted (from direct parse, not cache)
283+
assert.NotEmpty(t, acc.engines, "engines should be populated even when bypassing cache")
284+
assert.Contains(t, acc.engines[0], "copilot", "engine should be 'copilot' from the builtin file")
285+
}

pkg/parser/schema_compiler.go

Lines changed: 76 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"fmt"
88
"os"
99
"path/filepath"
10+
"reflect"
1011
"regexp"
1112
"sort"
1213
"strings"
@@ -189,6 +190,73 @@ func GetSafeOutputTypeKeys() ([]string, error) {
189190
return keys, nil
190191
}
191192

193+
// normalizeForJSONSchema recursively returns a normalized copy of v with YAML-native
194+
// Go types converted to JSON-compatible types for JSON schema validation. It does
195+
// not mutate the caller's maps or slices. goccy/go-yaml produces uint64 for
196+
// positive integers and int64 for negative integers, but JSON schema validators
197+
// expect float64 for all numbers (matching encoding/json's unmarshaling behavior).
198+
// goccy/go-yaml may also produce typed slices (e.g. []string) instead of []any;
199+
// the reflection fallback converts these so the schema validator sees []any.
200+
// This avoids the overhead of a json.Marshal + json.Unmarshal roundtrip.
201+
func normalizeForJSONSchema(v any) any {
202+
switch val := v.(type) {
203+
case map[string]any:
204+
normalized := make(map[string]any, len(val))
205+
for k, elem := range val {
206+
normalized[k] = normalizeForJSONSchema(elem)
207+
}
208+
return normalized
209+
case []any:
210+
normalized := make([]any, len(val))
211+
for i, elem := range val {
212+
normalized[i] = normalizeForJSONSchema(elem)
213+
}
214+
return normalized
215+
case int:
216+
return float64(val)
217+
case int8:
218+
return float64(val)
219+
case int16:
220+
return float64(val)
221+
case int32:
222+
return float64(val)
223+
case int64:
224+
return float64(val)
225+
case uint:
226+
return float64(val)
227+
case uint8:
228+
return float64(val)
229+
case uint16:
230+
return float64(val)
231+
case uint32:
232+
return float64(val)
233+
case uint64:
234+
return float64(val)
235+
case float32:
236+
return float64(val)
237+
default:
238+
// Use reflection to handle typed slices (e.g. []string) and typed maps
239+
// that goccy/go-yaml may produce instead of []any / map[string]any.
240+
rv := reflect.ValueOf(v)
241+
switch rv.Kind() {
242+
case reflect.Slice:
243+
normalized := make([]any, rv.Len())
244+
for i := range rv.Len() {
245+
normalized[i] = normalizeForJSONSchema(rv.Index(i).Interface())
246+
}
247+
return normalized
248+
case reflect.Map:
249+
normalized := make(map[string]any, rv.Len())
250+
for _, key := range rv.MapKeys() {
251+
normalized[key.String()] = normalizeForJSONSchema(rv.MapIndex(key).Interface())
252+
}
253+
return normalized
254+
}
255+
// string, bool, float64, nil pass through unchanged
256+
return v
257+
}
258+
}
259+
192260
func validateWithSchema(frontmatter map[string]any, schemaJSON, context string) error {
193261
schemaCompilerLog.Printf("Validating frontmatter against schema for context: %s (%d fields)", context, len(frontmatter))
194262

@@ -217,27 +285,19 @@ func validateWithSchema(frontmatter map[string]any, schemaJSON, context string)
217285
return fmt.Errorf("schema validation error for %s: %w", context, err)
218286
}
219287

220-
// Convert frontmatter to JSON and back to normalize types for validation
221-
// Handle nil frontmatter as empty object to satisfy schema validation
222-
var frontmatterToValidate map[string]any
288+
// Normalize YAML-native Go types to JSON-compatible types for schema validation.
289+
// goccy/go-yaml produces uint64/int64 for integers, but JSON schema validators
290+
// expect float64 for all numbers (matching encoding/json's behavior).
291+
// This avoids the overhead of a json.Marshal/Unmarshal roundtrip.
292+
var normalized any
223293
if frontmatter == nil {
224-
frontmatterToValidate = make(map[string]any)
294+
normalized = make(map[string]any)
225295
} else {
226-
frontmatterToValidate = frontmatter
227-
}
228-
229-
frontmatterJSON, err := json.Marshal(frontmatterToValidate)
230-
if err != nil {
231-
return fmt.Errorf("schema validation error for %s: failed to marshal frontmatter: %w", context, err)
232-
}
233-
234-
var normalizedFrontmatter any
235-
if err := json.Unmarshal(frontmatterJSON, &normalizedFrontmatter); err != nil {
236-
return fmt.Errorf("schema validation error for %s: failed to unmarshal frontmatter: %w", context, err)
296+
normalized = normalizeForJSONSchema(frontmatter)
237297
}
238298

239299
// Validate the normalized frontmatter
240-
if err := schema.Validate(normalizedFrontmatter); err != nil {
300+
if err := schema.Validate(normalized); err != nil {
241301
schemaCompilerLog.Printf("Schema validation failed for %s: %v", context, err)
242302
return err
243303
}

0 commit comments

Comments
 (0)