{ "version": "1.0", "description": "Quality metric definitions for infinite loop generation validation", "metrics": { "completeness": { "name": "Completeness", "description": "Measures whether all required components are present", "weight": 0.25, "scoring": { "method": "percentage", "calculation": "present_components / required_components * 100" }, "thresholds": { "excellent": 100, "good": 90, "acceptable": 75, "poor": 60, "failing": 0 } }, "technical_correctness": { "name": "Technical Correctness", "description": "Measures syntax validity and technical errors", "weight": 0.25, "scoring": { "method": "error_deduction", "calculation": "100 - (critical_errors * 20 + minor_errors * 5)" }, "thresholds": { "excellent": 95, "good": 85, "acceptable": 70, "poor": 50, "failing": 0 } }, "spec_compliance": { "name": "Specification Compliance", "description": "Measures adherence to specification requirements", "weight": 0.25, "scoring": { "method": "requirement_matching", "calculation": "met_requirements / total_requirements * 100" }, "thresholds": { "excellent": 95, "good": 85, "acceptable": 75, "poor": 60, "failing": 0 } }, "uniqueness": { "name": "Uniqueness", "description": "Measures variation from other iterations", "weight": 0.15, "scoring": { "method": "similarity_inversion", "calculation": "100 - (max_similarity_percentage)" }, "thresholds": { "excellent": 85, "good": 70, "acceptable": 60, "poor": 40, "failing": 0 } }, "innovation": { "name": "Innovation/Creativity", "description": "Measures creative approach and novel implementation", "weight": 0.10, "scoring": { "method": "qualitative_assessment", "calculation": "subjective_score based on creativity indicators" }, "thresholds": { "excellent": 90, "good": 75, "acceptable": 60, "poor": 40, "failing": 0 }, "indicators": [ "Novel visualization technique", "Unique interaction pattern", "Creative data presentation", "Innovative design approach", "Unexpected but effective solution" ] } }, "composite_score": { "name": "Overall Quality Score", "calculation": "weighted_average of all metric scores", "formula": "sum(metric_score * metric_weight) for all metrics", "interpretation": { "90-100": "Excellent - Exceeds expectations, production-ready", "80-89": "Good - Meets all requirements, minor improvements possible", "70-79": "Acceptable - Meets minimum standards, some improvements needed", "60-69": "Below Standard - Significant improvements required", "0-59": "Failing - Does not meet minimum requirements" } }, "usage_notes": { "automatic_metrics": [ "completeness", "technical_correctness", "spec_compliance", "uniqueness" ], "manual_metrics": [ "innovation" ], "test_output_integration": "The /test-output command uses these metrics to calculate quality scores", "report_integration": "The /report command aggregates these metrics across all iterations", "analyze_integration": "The /analyze command uses these metrics to identify quality patterns" }, "chain_of_thought_application": { "reasoning": "These metrics make quality assessment transparent and reproducible", "benefits": [ "Clear criteria - No ambiguity about what makes quality high or low", "Weighted priorities - Important aspects (completeness, correctness) weighted higher", "Explicit thresholds - Specific boundaries between quality levels", "Actionable feedback - Scores point to specific improvement areas", "Consistent evaluation - Same standards applied to all iterations" ], "example_reasoning_chain": [ "Step 1: Check completeness - Are all required sections present?", "Step 2: Validate syntax - Are there technical errors?", "Step 3: Verify spec compliance - Do outputs match requirements?", "Step 4: Assess uniqueness - How different from other iterations?", "Step 5: Evaluate innovation - Is approach creative and novel?", "Step 6: Calculate composite score - Weighted average of all metrics", "Step 7: Interpret score - Map to quality level (excellent/good/etc.)", "Step 8: Generate feedback - Identify specific strengths and improvements" ] } }