diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts index 2e69b463..8a878eee 100644 --- a/packages/core/src/evaluation/loaders/evaluator-parser.ts +++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts @@ -288,7 +288,15 @@ async function parseEvaluatorList( } const placeholderIndex = result.indexOf(PLACEHOLDER); if (strings.length > 0 && placeholderIndex !== -1) { - result[placeholderIndex] = { type: 'rubrics', criteria: strings }; + // Set weight = number of criteria so each user-visible string assertion contributes + // equal weight to the overall score alongside other explicit graders. + // e.g. [contains, "crit1", "crit2", "crit3"] → contains(w=1) + rubrics(w=3) + // → each of the 4 visible assertions counts equally. + result[placeholderIndex] = { + type: 'rubrics', + criteria: strings, + weight: strings.length, + }; } else if (placeholderIndex !== -1) { // All strings were empty — remove the placeholder result.splice(placeholderIndex, 1); diff --git a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts index bf2998c5..e5cd4e57 100644 --- a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts +++ b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts @@ -1989,6 +1989,32 @@ describe('parseEvaluators - string shorthand in assertions', () => { expect(evaluators).toBeUndefined(); }); + + it('sets rubrics grader weight = criteria count when mixed with other graders', async () => { + // User sees 4 assertions; each should contribute equal weight. + // rubrics(w=3) + contains(w=1) → each visible assertion = 1/4. + const evaluators = await parseEvaluators( + { + assertions: [ + 'Identifies the undefined access', + 'Suggests a null-safe fix', + 'Explains why the original code is dangerous', + { type: 'contains', value: 'null' }, + ], + }, + undefined, + ['/tmp'], + 'test-id', + ); + + expect(evaluators).toHaveLength(2); + const rubrics = evaluators?.[0] as LlmGraderEvaluatorConfig; + expect(rubrics.type).toBe('llm-grader'); + expect(rubrics.rubrics).toHaveLength(3); + expect(rubrics.weight).toBe(3); + expect(evaluators?.[1].type).toBe('contains'); + expect(evaluators?.[1].weight).toBeUndefined(); // explicit graders keep their own weight + }); }); describe('parseEvaluators - file:// prefix prompt resolution', () => {