infinite-agents-public/infinite_variants/infinite_variant_2/utils/quality_metrics.json

{
  "version": "1.0",
  "description": "Quality metric definitions for infinite loop generation validation",
  "metrics": {
    "completeness": {
      "name": "Completeness",
      "description": "Measures whether all required components are present",
      "weight": 0.25,
      "scoring": {
        "method": "percentage",
        "calculation": "present_components / required_components * 100"
      },
      "thresholds": {
        "excellent": 100,
        "good": 90,
        "acceptable": 75,
        "poor": 60,
        "failing": 0
      }
    },
    "technical_correctness": {
      "name": "Technical Correctness",
      "description": "Measures syntax validity and technical errors",
      "weight": 0.25,
      "scoring": {
        "method": "error_deduction",
        "calculation": "100 - (critical_errors * 20 + minor_errors * 5)"
      },
      "thresholds": {
        "excellent": 95,
        "good": 85,
        "acceptable": 70,
        "poor": 50,
        "failing": 0
      }
    },
    "spec_compliance": {
      "name": "Specification Compliance",
      "description": "Measures adherence to specification requirements",
      "weight": 0.25,
      "scoring": {
        "method": "requirement_matching",
        "calculation": "met_requirements / total_requirements * 100"
      },
      "thresholds": {
        "excellent": 95,
        "good": 85,
        "acceptable": 75,
        "poor": 60,
        "failing": 0
      }
    },
    "uniqueness": {
      "name": "Uniqueness",
      "description": "Measures variation from other iterations",
      "weight": 0.15,
      "scoring": {
        "method": "similarity_inversion",
        "calculation": "100 - (max_similarity_percentage)"
      },
      "thresholds": {
        "excellent": 85,
        "good": 70,
        "acceptable": 60,
        "poor": 40,
        "failing": 0
      }
    },
    "innovation": {
      "name": "Innovation/Creativity",
      "description": "Measures creative approach and novel implementation",
      "weight": 0.10,
      "scoring": {
        "method": "qualitative_assessment",
        "calculation": "subjective_score based on creativity indicators"
      },
      "thresholds": {
        "excellent": 90,
        "good": 75,
        "acceptable": 60,
        "poor": 40,
        "failing": 0
      },
      "indicators": [
        "Novel visualization technique",
        "Unique interaction pattern",
        "Creative data presentation",
        "Innovative design approach",
        "Unexpected but effective solution"
      ]
    }
  },
  "composite_score": {
    "name": "Overall Quality Score",
    "calculation": "weighted_average of all metric scores",
    "formula": "sum(metric_score * metric_weight) for all metrics",
    "interpretation": {
      "90-100": "Excellent - Exceeds expectations, production-ready",
      "80-89": "Good - Meets all requirements, minor improvements possible",
      "70-79": "Acceptable - Meets minimum standards, some improvements needed",
      "60-69": "Below Standard - Significant improvements required",
      "0-59": "Failing - Does not meet minimum requirements"
    }
  },
  "usage_notes": {
    "automatic_metrics": [
      "completeness",
      "technical_correctness",
      "spec_compliance",
      "uniqueness"
    ],
    "manual_metrics": [
      "innovation"
    ],
    "test_output_integration": "The /test-output command uses these metrics to calculate quality scores",
    "report_integration": "The /report command aggregates these metrics across all iterations",
    "analyze_integration": "The /analyze command uses these metrics to identify quality patterns"
  },
  "chain_of_thought_application": {
    "reasoning": "These metrics make quality assessment transparent and reproducible",
    "benefits": [
      "Clear criteria - No ambiguity about what makes quality high or low",
      "Weighted priorities - Important aspects (completeness, correctness) weighted higher",
      "Explicit thresholds - Specific boundaries between quality levels",
      "Actionable feedback - Scores point to specific improvement areas",
      "Consistent evaluation - Same standards applied to all iterations"
    ],
    "example_reasoning_chain": [
      "Step 1: Check completeness - Are all required sections present?",
      "Step 2: Validate syntax - Are there technical errors?",
      "Step 3: Verify spec compliance - Do outputs match requirements?",
      "Step 4: Assess uniqueness - How different from other iterations?",
      "Step 5: Evaluate innovation - Is approach creative and novel?",
      "Step 6: Calculate composite score - Weighted average of all metrics",
      "Step 7: Interpret score - Map to quality level (excellent/good/etc.)",
      "Step 8: Generate feedback - Identify specific strengths and improvements"
    ]
  }
}