Rohan03's picture
Sprint 10B: optimizer.py — agentic decision policy (improving/plateau/degrading/rollback)
8f09a93 verified
"""
optimizer.py — Agentic optimization decision policy.
Monitors agent performance and decides what to optimize:
improving → continue (don't fix what works)
plateau → optimize prompts/skills/router first (epigenetic)
epigenetic_plateau → suggest LoRA/distillation only if ROI positive
degrading → rollback immediately
Never distills without eval data AND ROI check.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
from typing import Any
class OptimizationState(str, Enum):
IMPROVING = "improving"
PLATEAU = "plateau"
EPIGENETIC_PLATEAU = "epigenetic_plateau"
DEGRADING = "degrading"
@dataclass
class PerformanceWindow:
"""Rolling window of performance scores."""
scores: list[float] = field(default_factory=list)
window_size: int = 10
def add(self, score: float) -> None:
self.scores.append(score)
if len(self.scores) > self.window_size * 3:
self.scores = self.scores[-self.window_size * 2:]
@property
def recent(self) -> list[float]:
return self.scores[-self.window_size:]
@property
def previous(self) -> list[float]:
if len(self.scores) < self.window_size * 2:
return self.scores[:len(self.scores)//2]
return self.scores[-self.window_size*2:-self.window_size]
@property
def trend(self) -> float:
"""Positive = improving, negative = degrading, ~0 = plateau."""
r = self.recent
p = self.previous
if not r or not p:
return 0.0
return (sum(r)/len(r)) - (sum(p)/len(p))
@dataclass
class OptimizationRecommendation:
"""What the optimizer recommends."""
state: OptimizationState
action: str # "continue", "optimize_prompts", "optimize_skills", "suggest_lora", "rollback"
reason: str
details: dict[str, Any] = field(default_factory=dict)
class AgenticOptimizer:
"""
Monitors performance and recommends optimization actions.
Usage:
optimizer = AgenticOptimizer()
# After each task:
optimizer.record_score(phi_score)
# Periodically check:
rec = optimizer.recommend()
if rec.action == "optimize_prompts":
# Rebuild prompt pack with new skills
elif rec.action == "rollback":
# Revert to previous configuration
"""
def __init__(
self,
plateau_threshold: float = 0.05,
degradation_threshold: float = -0.1,
min_samples: int = 5,
epigenetic_attempts_before_lora: int = 3,
):
self.plateau_threshold = plateau_threshold
self.degradation_threshold = degradation_threshold
self.min_samples = min_samples
self.epigenetic_attempts = epigenetic_attempts_before_lora
self.window = PerformanceWindow()
self._epigenetic_count = 0
self._recommendations: list[OptimizationRecommendation] = []
def record_score(self, score: float) -> None:
"""Record a performance score (typically final Φ)."""
self.window.add(score)
def recommend(self) -> OptimizationRecommendation:
"""Get optimization recommendation based on current performance trend."""
if len(self.window.scores) < self.min_samples:
rec = OptimizationRecommendation(
state=OptimizationState.IMPROVING,
action="continue",
reason=f"Insufficient data ({len(self.window.scores)}/{self.min_samples} samples)",
)
self._recommendations.append(rec)
return rec
trend = self.window.trend
state = self._classify_state(trend)
if state == OptimizationState.IMPROVING:
rec = OptimizationRecommendation(
state=state, action="continue",
reason=f"Performance improving (trend={trend:+.3f})",
)
elif state == OptimizationState.DEGRADING:
rec = OptimizationRecommendation(
state=state, action="rollback",
reason=f"Performance degrading (trend={trend:+.3f}). Rollback to previous config.",
)
elif state == OptimizationState.PLATEAU:
self._epigenetic_count += 1
if self._epigenetic_count >= self.epigenetic_attempts:
rec = OptimizationRecommendation(
state=OptimizationState.EPIGENETIC_PLATEAU,
action="suggest_lora",
reason=f"Plateau persists after {self._epigenetic_count} epigenetic attempts. Consider LoRA/distillation.",
details={"epigenetic_attempts": self._epigenetic_count},
)
else:
rec = OptimizationRecommendation(
state=state,
action="optimize_prompts" if self._epigenetic_count <= 1 else "optimize_skills",
reason=f"Performance plateau (trend={trend:+.3f}). Trying epigenetic optimization #{self._epigenetic_count}.",
)
else:
rec = OptimizationRecommendation(state=state, action="continue", reason="Unknown state")
self._recommendations.append(rec)
return rec
def _classify_state(self, trend: float) -> OptimizationState:
if trend > self.plateau_threshold:
return OptimizationState.IMPROVING
elif trend < self.degradation_threshold:
return OptimizationState.DEGRADING
else:
return OptimizationState.PLATEAU
def reset_epigenetic_counter(self) -> None:
"""Call after successful epigenetic optimization breaks plateau."""
self._epigenetic_count = 0
@property
def current_state(self) -> OptimizationState:
if len(self.window.scores) < self.min_samples:
return OptimizationState.IMPROVING
return self._classify_state(self.window.trend)
@property
def history(self) -> list[OptimizationRecommendation]:
return self._recommendations