140 lines
4.7 KiB
JSON
140 lines
4.7 KiB
JSON
{
|
|
"version": "1.0",
|
|
"description": "Quality metric definitions for infinite loop generation validation",
|
|
"metrics": {
|
|
"completeness": {
|
|
"name": "Completeness",
|
|
"description": "Measures whether all required components are present",
|
|
"weight": 0.25,
|
|
"scoring": {
|
|
"method": "percentage",
|
|
"calculation": "present_components / required_components * 100"
|
|
},
|
|
"thresholds": {
|
|
"excellent": 100,
|
|
"good": 90,
|
|
"acceptable": 75,
|
|
"poor": 60,
|
|
"failing": 0
|
|
}
|
|
},
|
|
"technical_correctness": {
|
|
"name": "Technical Correctness",
|
|
"description": "Measures syntax validity and technical errors",
|
|
"weight": 0.25,
|
|
"scoring": {
|
|
"method": "error_deduction",
|
|
"calculation": "100 - (critical_errors * 20 + minor_errors * 5)"
|
|
},
|
|
"thresholds": {
|
|
"excellent": 95,
|
|
"good": 85,
|
|
"acceptable": 70,
|
|
"poor": 50,
|
|
"failing": 0
|
|
}
|
|
},
|
|
"spec_compliance": {
|
|
"name": "Specification Compliance",
|
|
"description": "Measures adherence to specification requirements",
|
|
"weight": 0.25,
|
|
"scoring": {
|
|
"method": "requirement_matching",
|
|
"calculation": "met_requirements / total_requirements * 100"
|
|
},
|
|
"thresholds": {
|
|
"excellent": 95,
|
|
"good": 85,
|
|
"acceptable": 75,
|
|
"poor": 60,
|
|
"failing": 0
|
|
}
|
|
},
|
|
"uniqueness": {
|
|
"name": "Uniqueness",
|
|
"description": "Measures variation from other iterations",
|
|
"weight": 0.15,
|
|
"scoring": {
|
|
"method": "similarity_inversion",
|
|
"calculation": "100 - (max_similarity_percentage)"
|
|
},
|
|
"thresholds": {
|
|
"excellent": 85,
|
|
"good": 70,
|
|
"acceptable": 60,
|
|
"poor": 40,
|
|
"failing": 0
|
|
}
|
|
},
|
|
"innovation": {
|
|
"name": "Innovation/Creativity",
|
|
"description": "Measures creative approach and novel implementation",
|
|
"weight": 0.10,
|
|
"scoring": {
|
|
"method": "qualitative_assessment",
|
|
"calculation": "subjective_score based on creativity indicators"
|
|
},
|
|
"thresholds": {
|
|
"excellent": 90,
|
|
"good": 75,
|
|
"acceptable": 60,
|
|
"poor": 40,
|
|
"failing": 0
|
|
},
|
|
"indicators": [
|
|
"Novel visualization technique",
|
|
"Unique interaction pattern",
|
|
"Creative data presentation",
|
|
"Innovative design approach",
|
|
"Unexpected but effective solution"
|
|
]
|
|
}
|
|
},
|
|
"composite_score": {
|
|
"name": "Overall Quality Score",
|
|
"calculation": "weighted_average of all metric scores",
|
|
"formula": "sum(metric_score * metric_weight) for all metrics",
|
|
"interpretation": {
|
|
"90-100": "Excellent - Exceeds expectations, production-ready",
|
|
"80-89": "Good - Meets all requirements, minor improvements possible",
|
|
"70-79": "Acceptable - Meets minimum standards, some improvements needed",
|
|
"60-69": "Below Standard - Significant improvements required",
|
|
"0-59": "Failing - Does not meet minimum requirements"
|
|
}
|
|
},
|
|
"usage_notes": {
|
|
"automatic_metrics": [
|
|
"completeness",
|
|
"technical_correctness",
|
|
"spec_compliance",
|
|
"uniqueness"
|
|
],
|
|
"manual_metrics": [
|
|
"innovation"
|
|
],
|
|
"test_output_integration": "The /test-output command uses these metrics to calculate quality scores",
|
|
"report_integration": "The /report command aggregates these metrics across all iterations",
|
|
"analyze_integration": "The /analyze command uses these metrics to identify quality patterns"
|
|
},
|
|
"chain_of_thought_application": {
|
|
"reasoning": "These metrics make quality assessment transparent and reproducible",
|
|
"benefits": [
|
|
"Clear criteria - No ambiguity about what makes quality high or low",
|
|
"Weighted priorities - Important aspects (completeness, correctness) weighted higher",
|
|
"Explicit thresholds - Specific boundaries between quality levels",
|
|
"Actionable feedback - Scores point to specific improvement areas",
|
|
"Consistent evaluation - Same standards applied to all iterations"
|
|
],
|
|
"example_reasoning_chain": [
|
|
"Step 1: Check completeness - Are all required sections present?",
|
|
"Step 2: Validate syntax - Are there technical errors?",
|
|
"Step 3: Verify spec compliance - Do outputs match requirements?",
|
|
"Step 4: Assess uniqueness - How different from other iterations?",
|
|
"Step 5: Evaluate innovation - Is approach creative and novel?",
|
|
"Step 6: Calculate composite score - Weighted average of all metrics",
|
|
"Step 7: Interpret score - Map to quality level (excellent/good/etc.)",
|
|
"Step 8: Generate feedback - Identify specific strengths and improvements"
|
|
]
|
|
}
|
|
}
|