infinite-agents-public/infinite_variants/infinite_variant_2/utils/quality_metrics.json

140 lines
4.7 KiB
JSON

{
"version": "1.0",
"description": "Quality metric definitions for infinite loop generation validation",
"metrics": {
"completeness": {
"name": "Completeness",
"description": "Measures whether all required components are present",
"weight": 0.25,
"scoring": {
"method": "percentage",
"calculation": "present_components / required_components * 100"
},
"thresholds": {
"excellent": 100,
"good": 90,
"acceptable": 75,
"poor": 60,
"failing": 0
}
},
"technical_correctness": {
"name": "Technical Correctness",
"description": "Measures syntax validity and technical errors",
"weight": 0.25,
"scoring": {
"method": "error_deduction",
"calculation": "100 - (critical_errors * 20 + minor_errors * 5)"
},
"thresholds": {
"excellent": 95,
"good": 85,
"acceptable": 70,
"poor": 50,
"failing": 0
}
},
"spec_compliance": {
"name": "Specification Compliance",
"description": "Measures adherence to specification requirements",
"weight": 0.25,
"scoring": {
"method": "requirement_matching",
"calculation": "met_requirements / total_requirements * 100"
},
"thresholds": {
"excellent": 95,
"good": 85,
"acceptable": 75,
"poor": 60,
"failing": 0
}
},
"uniqueness": {
"name": "Uniqueness",
"description": "Measures variation from other iterations",
"weight": 0.15,
"scoring": {
"method": "similarity_inversion",
"calculation": "100 - (max_similarity_percentage)"
},
"thresholds": {
"excellent": 85,
"good": 70,
"acceptable": 60,
"poor": 40,
"failing": 0
}
},
"innovation": {
"name": "Innovation/Creativity",
"description": "Measures creative approach and novel implementation",
"weight": 0.10,
"scoring": {
"method": "qualitative_assessment",
"calculation": "subjective_score based on creativity indicators"
},
"thresholds": {
"excellent": 90,
"good": 75,
"acceptable": 60,
"poor": 40,
"failing": 0
},
"indicators": [
"Novel visualization technique",
"Unique interaction pattern",
"Creative data presentation",
"Innovative design approach",
"Unexpected but effective solution"
]
}
},
"composite_score": {
"name": "Overall Quality Score",
"calculation": "weighted_average of all metric scores",
"formula": "sum(metric_score * metric_weight) for all metrics",
"interpretation": {
"90-100": "Excellent - Exceeds expectations, production-ready",
"80-89": "Good - Meets all requirements, minor improvements possible",
"70-79": "Acceptable - Meets minimum standards, some improvements needed",
"60-69": "Below Standard - Significant improvements required",
"0-59": "Failing - Does not meet minimum requirements"
}
},
"usage_notes": {
"automatic_metrics": [
"completeness",
"technical_correctness",
"spec_compliance",
"uniqueness"
],
"manual_metrics": [
"innovation"
],
"test_output_integration": "The /test-output command uses these metrics to calculate quality scores",
"report_integration": "The /report command aggregates these metrics across all iterations",
"analyze_integration": "The /analyze command uses these metrics to identify quality patterns"
},
"chain_of_thought_application": {
"reasoning": "These metrics make quality assessment transparent and reproducible",
"benefits": [
"Clear criteria - No ambiguity about what makes quality high or low",
"Weighted priorities - Important aspects (completeness, correctness) weighted higher",
"Explicit thresholds - Specific boundaries between quality levels",
"Actionable feedback - Scores point to specific improvement areas",
"Consistent evaluation - Same standards applied to all iterations"
],
"example_reasoning_chain": [
"Step 1: Check completeness - Are all required sections present?",
"Step 2: Validate syntax - Are there technical errors?",
"Step 3: Verify spec compliance - Do outputs match requirements?",
"Step 4: Assess uniqueness - How different from other iterations?",
"Step 5: Evaluate innovation - Is approach creative and novel?",
"Step 6: Calculate composite score - Weighted average of all metrics",
"Step 7: Interpret score - Map to quality level (excellent/good/etc.)",
"Step 8: Generate feedback - Identify specific strengths and improvements"
]
}
}