@@ -97,17 +97,18 @@ func (j *Judge) Validate(ctx context.Context) error {
9797// RelevanceResult contains the result of a single relevance check.
9898type RelevanceResult struct {
9999 Criterion string `json:"criterion"`
100+ Passed bool `json:"passed"`
100101 Reason string `json:"reason"`
101102}
102103
103104// CheckRelevance runs all relevance checks concurrently with the configured concurrency.
104- // It returns the number of passed checks, a slice of failed results with reasons, and an error
105- // if any check encountered an error (e.g. judge model misconfiguration). Errors cause a hard
106- // failure so that configuration issues are surfaced immediately rather than silently producing
107- // zero-relevance results.
108- func (j * Judge ) CheckRelevance (ctx context.Context , response string , criteria []string ) (passed int , failed []RelevanceResult , err error ) {
105+ // It returns a result for every criterion (both passed and failed, each with a reason from
106+ // the judge model), and an error if any check encountered an error (e.g. judge model
107+ // misconfiguration). Errors cause a hard failure so that configuration issues are surfaced
108+ // immediately rather than silently producing zero-relevance results.
109+ func (j * Judge ) CheckRelevance (ctx context.Context , response string , criteria []string ) (results []RelevanceResult , err error ) {
109110 if len (criteria ) == 0 {
110- return 0 , nil , nil
111+ return nil , nil
111112 }
112113
113114 // Create work channel
@@ -122,23 +123,23 @@ func (j *Judge) CheckRelevance(ctx context.Context, response string, criteria []
122123 close (work )
123124
124125 // Results slice preserves order
125- type result struct {
126+ type rawResult struct {
126127 passed bool
127128 reason string
128129 err error
129130 }
130- results := make ([]result , len (criteria ))
131+ rawResults := make ([]rawResult , len (criteria ))
131132
132133 var wg sync.WaitGroup
133134 for range j .concurrency {
134135 wg .Go (func () {
135136 for item := range work {
136137 if ctx .Err () != nil {
137- results [item .index ] = result {err : fmt .Errorf ("context cancelled: %w" , ctx .Err ())}
138+ rawResults [item .index ] = rawResult {err : fmt .Errorf ("context cancelled: %w" , ctx .Err ())}
138139 continue
139140 }
140141 pass , reason , checkErr := j .checkSingle (ctx , response , item .criterion )
141- results [item .index ] = result {passed : pass , reason : reason , err : checkErr }
142+ rawResults [item .index ] = rawResult {passed : pass , reason : reason , err : checkErr }
142143 }
143144 })
144145 }
@@ -147,26 +148,24 @@ func (j *Judge) CheckRelevance(ctx context.Context, response string, criteria []
147148 // Aggregate results. Any error is fatal — return it immediately so the
148149 // caller can fail fast on judge misconfiguration.
149150 var errs []error
150- for i , r := range results {
151+ results = make ([]RelevanceResult , len (criteria ))
152+ for i , r := range rawResults {
151153 if r .err != nil {
152154 errs = append (errs , fmt .Errorf ("checking %q: %w" , criteria [i ], r .err ))
153155 continue
154156 }
155- if r .passed {
156- passed ++
157- } else {
158- failed = append (failed , RelevanceResult {
159- Criterion : criteria [i ],
160- Reason : r .reason ,
161- })
157+ results [i ] = RelevanceResult {
158+ Criterion : criteria [i ],
159+ Passed : r .passed ,
160+ Reason : r .reason ,
162161 }
163162 }
164163
165164 if len (errs ) > 0 {
166- return passed , failed , errors .Join (errs ... )
165+ return results , errors .Join (errs ... )
167166 }
168167
169- return passed , failed , nil
168+ return results , nil
170169}
171170
172171// checkSingle checks a single relevance criterion against the response.
0 commit comments