"""
simulator.py — Discrete-Event Warehouse Simulation Engine (DAHS_2)

Implements a realistic e-commerce fulfillment warehouse with 8 zones,
37 stations, 5 job types, stochastic disruptions, and pluggable heuristics.

NEW in DAHS_2:
  - save_state() -> dict — snapshot full simulation state for fork training
  - from_state(state_dict, heuristic_fn) -> WarehouseSimulator (classmethod)
  - get_partial_metrics(since_time) -> SimulationMetrics — for 20-min fork windows
"""

from __future__ import annotations

import copy
import logging
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import numpy as np
import simpy

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# Data Structures
# ---------------------------------------------------------------------------

@dataclass
class ZoneConfig:
    """Configuration for a single warehouse zone."""
    zone_id: int
    name: str
    num_stations: int
    zone_type: str  # e.g. "receiving", "picking", "packing", "shipping"


@dataclass
class JobType:
    """Specification for a category of warehouse jobs."""
    name: str                           # "A" – "E"
    route: List[int]                    # ordered zone IDs
    proc_time_ranges: List[Tuple[float, float]]  # (min, max) minutes per zone
    due_date_offset: float              # minutes from arrival to due date
    frequency: float                    # relative arrival weight
    priority_weight: float              # higher = more important


@dataclass
class Operation:
    """One processing step of a job at a specific zone/station."""
    zone_id: int
    nominal_proc_time: float
    actual_proc_time: float = 0.0
    start_time: float = -1.0
    end_time: float = -1.0
    station_id: int = -1


@dataclass
class Job:
    """A single warehouse order moving through the system."""
    job_id: int
    job_type: str
    arrival_time: float
    due_date: float
    operations: List[Operation]
    current_op_idx: int = 0
    priority: int = 1                   # 1=standard, 2=expedited, 3=VIP
    status: str = "waiting"             # waiting / processing / done / late
    completion_time: float = -1.0
    priority_escalated: bool = False

    @property
    def is_complete(self) -> bool:
        return self.current_op_idx >= len(self.operations)

    @property
    def next_zone_id(self) -> Optional[int]:
        if self.is_complete:
            return None
        return self.operations[self.current_op_idx].zone_id

    def remaining_proc_time(self) -> float:
        """Sum of nominal proc times for all remaining operations."""
        return sum(op.nominal_proc_time for op in self.operations[self.current_op_idx:])


@dataclass
class StationState:
    """Runtime state of a single processing station."""
    station_id: int
    zone_id: int
    is_broken: bool = False
    repair_end_time: float = 0.0
    current_job: Optional[int] = None   # job_id or None
    busy_until: float = 0.0


@dataclass
class SimulationMetrics:
    """All performance metrics from one simulation run."""
    makespan: float = 0.0
    total_tardiness: float = 0.0
    sla_breach_rate: float = 0.0
    avg_cycle_time: float = 0.0
    zone_utilization: Dict[int, float] = field(default_factory=dict)
    throughput: float = 0.0
    queue_max: int = 0
    queue_history: List[Tuple[float, Dict[int, int]]] = field(default_factory=list)
    completed_jobs: int = 0
    total_jobs: int = 0


# ---------------------------------------------------------------------------
# Simulator
# ---------------------------------------------------------------------------

class WarehouseSimulator:
    """
    SimPy-based discrete-event simulator for an e-commerce fulfillment center.

    Simulation parameters are calibrated to published warehouse operations research:

    - Zone structure & station counts (37 total, 8 zones):
        De Koster et al. (2007), EJOR 182(2):481-501 — 20-50 stations typical for
        mid-scale distribution centers.
        Gu et al. (2010), EJOR 203(3):539-549 — warehouse design benchmarks.

    - Arrival rate (BASE_ARRIVAL_RATE = 1.5 jobs/min = 90/hr):
        Gu et al. (2010) — 60-150 orders/hour for mid-scale DCs.
        (Default constructor arg is 2.5, calibrated preset uses 1.5.)

    - Processing time ranges (Picking 5-18 min, Receiving 3-8 min):
        Tompkins et al. (2010), Facilities Planning, Wiley 4th ed.
        Bartholdi & Hackman (2019), Warehouse & Distribution Science, GT.

    - Breakdown frequency (BREAKDOWN_PROB = 0.003):
        Inman (1999), Prod. & Inv. Mgmt. Journal 40(2):67-71 — 2-5% of
        operational hours. 0.003/min × 37 stations × 600 min ≈ 2.7% exposure.

    - Repair time mean (18 min, Exponential):
        Goetschalckx & Ashayeri (1989) — 10-30 min MTTR for conveyor/AGV.

    - Batch arrival size (30 jobs, every 45 min):
        Bartholdi & Hackman (2019) — 20-60 items per truck unload;
        30-60 min between truck docks for mid-scale DC.

    - Processing time variability (lognormal σ = 0.30, CV ≈ 30%):
        De Koster et al. (2007) — CV of 20-35% for manual warehouse operations.

    - Lunch productivity penalty (1.3×, 30% slowdown):
        Garg et al. (2017), Int. J. Industrial Engineering 24(3):181-192 —
        20-40% productivity drop during scheduled breaks.

    - Worker utilization target (implicit 65-80%):
        Frazelle (2016), World-Class Warehousing, McGraw-Hill 2nd ed.

    - Due date SLA windows (60-320 min, spanning 1-5.3 hours):
        Industry standard SLA windows of 1-8 hours for e-commerce fulfillment.
        Frazelle (2016) — 2-10% SLA breach acceptable in well-run warehouses.

    Parameters
    ----------
    seed : int
        Random seed for full reproducibility.
    heuristic_fn : Callable
        Dispatch function: (jobs, current_time, zone_id) -> ordered List[Job].
    feature_extractor : optional
        FeatureExtractor instance used when running in hybrid-ML mode.
    """

    # Zone configuration: 8 zones with station counts summing to 37
    # Total 37 stations within published 20-50 range for mid-scale DCs
    # Ref: De Koster et al. (2007), EJOR 182(2):481-501
    # Ref: Gu et al. (2010), EJOR 203(3):539-549
    ZONE_SPECS: List[Tuple[int, str, int, str]] = [
        (0, "Receiving",    3, "receiving"),
        (1, "Sorting",      4, "sorting"),
        (2, "Picking-A",    6, "picking"),
        (3, "Picking-B",    8, "picking"),
        (4, "Value-Add",    5, "value_add"),
        (5, "QC",           4, "quality"),
        (6, "Packing",      3, "packing"),
        (7, "Shipping",     4, "shipping"),
    ]

    # Job-type definitions (name, route, proc_time_ranges, due_date_offset_min, freq, prio_weight)
    # Processing time ranges (min, max) in minutes:
    #   Receiving ops (3-8 min): Bartholdi & Hackman (2019) — upper-end realistic with inspection
    #   Picking ops (5-18 min):  Tompkins et al. (2010), Facilities Planning — 2-15 min/order
    #   Value-Add (8-18 min):    Tompkins et al. (2010) — extended operations
    # Due date offsets (60-320 min, spanning 1-5.3 hours):
    #   Ref: Frazelle (2016) — typical SLA windows 1-8 hours for e-commerce fulfillment
    JOB_TYPE_SPECS = [
        ("A", [0, 1, 2, 6, 7], [(3,8),(2,5),(5,12),(4,9),(2,4)],  120,  0.25, 2.0),
        ("B", [0, 1, 3, 5, 6, 7], [(3,8),(2,5),(6,14),(3,7),(4,9),(2,4)], 160, 0.30, 1.5),
        ("C", [0, 1, 4, 5, 6, 7], [(3,8),(2,5),(8,18),(3,7),(4,9),(2,4)], 240, 0.20, 1.0),
        ("D", [0, 1, 2, 4, 5, 6, 7], [(3,8),(2,5),(5,12),(8,18),(3,7),(4,9),(2,4)], 320, 0.15, 0.8),
        ("E", [1, 3, 7], [(2,5),(4,10),(1,3)], 60, 0.10, 3.0),   # express — tight SLA
    ]

    # Base arrival rate: 2.5 jobs/min = 150/hr (peak); calibrated preset uses 1.5 (90/hr = mid-scale)
    # Published range: 60-150 orders/hour for mid-scale distribution centers
    # Ref: Gu et al. (2010), EJOR 203(3):539-549
    BASE_ARRIVAL_RATE = 2.5  # jobs per minute
    SIM_DURATION = 600.0  # minutes (one 10-hour shift)

    def __init__(
        self,
        seed: int,
        heuristic_fn: Callable,
        feature_extractor=None,
        # breakdown_prob: 0.003/min ≈ 2.7% exposure over 600 min × 37 stations
        # Published range: 2-5% of operational hours — Inman (1999)
        base_arrival_rate: float = 2.5,
        breakdown_prob: float = 0.003,
        # batch_arrival_size: 30 items per truck — within published 20-60 range
        # Ref: Bartholdi & Hackman (2019), Warehouse & Distribution Science
        batch_arrival_size: int = 30,
        # lunch_penalty_factor: 1.3x = 30% productivity drop during break
        # Published range: 20-40% — Garg et al. (2017), Int. J. Industrial Engineering
        lunch_penalty_factor: float = 1.3,
        # Preset overrides — leave empty/1.0 for default behavior
        job_type_frequencies: Optional[Dict[str, float]] = None,
        due_date_tightness: float = 1.0,
        processing_time_scale: float = 1.0,
    ) -> None:
        self.seed = seed
        self.heuristic_fn = heuristic_fn
        self.feature_extractor = feature_extractor
        self._base_arrival_rate    = base_arrival_rate
        self._breakdown_prob       = breakdown_prob
        self._batch_arrival_size   = batch_arrival_size
        self._lunch_penalty_factor = lunch_penalty_factor
        self._job_type_frequencies = job_type_frequencies or {}
        self._due_date_tightness   = due_date_tightness
        self._processing_time_scale = processing_time_scale

        # Validate preset frequency overrides sum to ~1.0
        if self._job_type_frequencies:
            total = sum(self._job_type_frequencies.values())
            if total > 0 and abs(total - 1.0) > 0.01:
                logger.warning("job_type_frequencies sum=%.3f (expected ~1.0)", total)

        self.rng = np.random.default_rng(seed)

        self.env = simpy.Environment()

        self.zones: Dict[int, ZoneConfig] = {}
        self.job_types: Dict[str, JobType] = {}
        self.stations: Dict[int, StationState] = {}
        self.station_resources: Dict[int, simpy.Resource] = {}

        # Zone-level queues (list of Job)
        self.zone_queues: Dict[int, List[Job]] = {}

        # Job registry
        self.all_jobs: Dict[int, Job] = {}
        self.completed_jobs: List[Job] = []
        self._job_counter = 0

        # Metrics tracking
        self._zone_busy_time: Dict[int, float] = {}
        self._queue_snapshots: List[Tuple[float, Dict[int, int]]] = []
        self._max_queue: int = 0
        self._lunch_active: bool = False

        self._setup_zones()
        self._setup_job_types()

    # ------------------------------------------------------------------
    # Setup helpers
    # ------------------------------------------------------------------

    def _setup_zones(self) -> None:
        station_id = 0
        self.dispatcher_triggers = {}
        for zone_id, name, n_stations, zone_type in self.ZONE_SPECS:
            self.zones[zone_id] = ZoneConfig(zone_id, name, n_stations, zone_type)
            self.zone_queues[zone_id] = []
            self.dispatcher_triggers[zone_id] = self.env.event()
            self._zone_busy_time[zone_id] = 0.0
            for _ in range(n_stations):
                st = StationState(station_id=station_id, zone_id=zone_id)
                self.stations[station_id] = st
                self.station_resources[station_id] = simpy.Resource(self.env, capacity=1)
                station_id += 1

    def _setup_job_types(self) -> None:
        for name, route, proc_ranges, due_offset, freq, prio_w in self.JOB_TYPE_SPECS:
            effective_freq = self._job_type_frequencies.get(name, freq) if self._job_type_frequencies else freq
            effective_due = due_offset * self._due_date_tightness
            scaled_ranges = [
                (lo * self._processing_time_scale, hi * self._processing_time_scale)
                for lo, hi in proc_ranges
            ]
            self.job_types[name] = JobType(
                name=name,
                route=route,
                proc_time_ranges=scaled_ranges,
                due_date_offset=effective_due,
                frequency=effective_freq,
                priority_weight=prio_w,
            )

    # ------------------------------------------------------------------
    # Utility
    # ------------------------------------------------------------------

    def _next_job_id(self) -> int:
        jid = self._job_counter
        self._job_counter += 1
        return jid

    # Time-varying composition profile — reflects realistic daily order-mix shifts
    # observed in e-commerce fulfillment centres:
    #   morning        (0-120 min):  overnight standard-order backlog → Type A dominant
    #   mid-morning    (120-240):    diversifying mix — bulk Type B/C joins the floor
    #   afternoon      (240-420):    heavy bulk (C, D) as truck deliveries concentrate
    #   evening peak   (420-600):    same-day cut-off surge — Type E express dominates
    # Values are anchor points; _get_composition_profile interpolates linearly
    # between them so the distribution shifts smoothly rather than in hard steps.
    # Refs: Bartholdi & Hackman (2019) §6; De Koster et al. (2007) EJOR 182(2);
    #       Boysen et al. (2019) EJOR 277(2):396-411 — e-commerce warehousing patterns.
    _COMPOSITION_PROFILE = [
        (0.0,    {"A": 0.55, "B": 0.18, "C": 0.10, "D": 0.09, "E": 0.08}),
        (120.0,  {"A": 0.45, "B": 0.22, "C": 0.13, "D": 0.10, "E": 0.10}),
        (240.0,  {"A": 0.25, "B": 0.32, "C": 0.20, "D": 0.13, "E": 0.10}),
        (360.0,  {"A": 0.15, "B": 0.25, "C": 0.30, "D": 0.20, "E": 0.10}),
        (480.0,  {"A": 0.12, "B": 0.18, "C": 0.22, "D": 0.13, "E": 0.35}),
        (600.0,  {"A": 0.10, "B": 0.14, "C": 0.12, "D": 0.08, "E": 0.56}),
    ]

    # Composition noise: Gaussian perturbation σ applied per component, then
    # renormalised to sum to 1. Keeps the profile from being artificially smooth
    # while preserving the overall daily trend. Low enough (σ=0.03) that no single
    # solver is accidentally favoured by random fluctuations.
    _COMPOSITION_NOISE_SIGMA = 0.03

    # Intraday arrival-rate multiplier anchors (time in minutes from shift start).
    # Bimodal curve with a mild morning plateau, lunch dip, and a strong evening
    # peak reflecting the same-day cut-off surge that is characteristic of
    # e-commerce fulfilment centres. Values are interpolated linearly between
    # anchors and a small multiplicative noise band is applied per sample.
    # Refs: Boysen et al. (2019) EJOR 277(2); Bartholdi & Hackman (2019) §2.3;
    #       De Koster et al. (2007) EJOR 182(2) — workload profiles in DCs.
    _SURGE_PROFILE = [
        (0.0,   0.55),   # shift start — overnight backlog, still warming up
        (60.0,  0.95),   # morning ramp complete
        (120.0, 1.05),   # morning baseline
        (180.0, 1.15),   # pre-lunch mild peak
        (240.0, 0.60),   # lunch dip (productivity drop)
        (300.0, 0.95),   # post-lunch recovery
        (360.0, 1.20),   # afternoon ramp
        (420.0, 1.45),   # approaching evening peak
        (480.0, 1.65),   # evening peak — same-day cut-off surge
        (540.0, 1.50),   # late evening (still elevated)
        (600.0, 1.30),   # shift close (slight taper)
    ]
    # Multiplicative noise band applied per surge evaluation; keeps arrivals
    # stochastic without systematically biasing any heuristic.
    _SURGE_NOISE_LO = 0.93
    _SURGE_NOISE_HI = 1.07

    def _get_composition_profile(self, t: float) -> Dict[str, float]:
        """Per-type probability vector at time t.

        If the caller supplied explicit ``job_type_frequencies`` (used by
        calibration tests and heuristic-biased presets) those are returned
        verbatim. Otherwise the profile is **linearly interpolated** between the
        anchor points in ``_COMPOSITION_PROFILE`` and a small Gaussian noise
        term is added so the distribution is not artificially deterministic.
        The noisy vector is clipped to be non-negative and renormalised to 1.
        """
        if self._job_type_frequencies:
            return dict(self._job_type_frequencies)

        types = ("A", "B", "C", "D", "E")

        # Find the two anchor points bracketing t
        anchors = self._COMPOSITION_PROFILE
        if t <= anchors[0][0]:
            base = anchors[0][1]
        elif t >= anchors[-1][0]:
            base = anchors[-1][1]
        else:
            base = anchors[0][1]
            for (t_a, p_a), (t_b, p_b) in zip(anchors[:-1], anchors[1:]):
                if t_a <= t < t_b:
                    alpha = (t - t_a) / max(t_b - t_a, 1e-9)
                    base = {k: (1 - alpha) * p_a[k] + alpha * p_b[k] for k in types}
                    break

        # Stochastic perturbation for realism (seeded via self.rng).
        if self._COMPOSITION_NOISE_SIGMA > 0:
            noisy = {
                k: max(0.0, base[k] + float(self.rng.normal(0.0, self._COMPOSITION_NOISE_SIGMA)))
                for k in types
            }
            total = sum(noisy.values())
            if total > 0:
                return {k: v / total for k, v in noisy.items()}
        return dict(base)

    def _sample_job_type(self) -> str:
        profile = self._get_composition_profile(self.env.now)
        types = list(self.job_types.keys())
        weights = [profile.get(t, self.job_types[t].frequency) for t in types]
        total = sum(weights)
        if total <= 0:
            weights = [self.job_types[t].frequency for t in types]
            total = sum(weights)
        probs = [w / total for w in weights]
        return self.rng.choice(types, p=probs)

    def _create_job(self, job_type_name: str, arrival_time: float) -> Job:
        jt = self.job_types[job_type_name]
        operations = []
        for zone_id, (lo, hi) in zip(jt.route, jt.proc_time_ranges):
            nominal = float(self.rng.uniform(lo, hi))
            operations.append(Operation(zone_id=zone_id, nominal_proc_time=nominal))
        return Job(
            job_id=self._next_job_id(),
            job_type=job_type_name,
            arrival_time=arrival_time,
            due_date=arrival_time + jt.due_date_offset,
            operations=operations,
            priority=3 if job_type_name == "E" else 1,
        )

    def _surge_base_rate(self, current_time: float) -> float:
        """Deterministic trend value of the surge multiplier at time ``t``.

        Pure anchor-point interpolation — no RNG calls, so this is safe to
        invoke from informational paths (state snapshots, feature extraction)
        without disturbing the arrival-process sample stream.
        """
        anchors = self._SURGE_PROFILE
        if current_time <= anchors[0][0]:
            return float(anchors[0][1])
        if current_time >= anchors[-1][0]:
            return float(anchors[-1][1])
        for (t_a, v_a), (t_b, v_b) in zip(anchors[:-1], anchors[1:]):
            if t_a <= current_time < t_b:
                alpha = (current_time - t_a) / max(t_b - t_a, 1e-9)
                return float((1.0 - alpha) * v_a + alpha * v_b)
        return float(anchors[-1][1])

    def _get_surge_multiplier(self, current_time: float) -> float:
        """Time-of-day arrival-rate multiplier (t in minutes from shift start).

        The curve is a linear interpolation between the anchor points in
        ``_SURGE_PROFILE`` plus a small multiplicative noise term drawn from
        ``U(_SURGE_NOISE_LO, _SURGE_NOISE_HI)`` — so the instantaneous rate is
        both deterministically trended (bimodal with evening peak) and
        stochastically perturbed each time the process samples an arrival.
        Returns a strictly positive multiplier.
        """
        base = self._surge_base_rate(current_time)
        noise = float(self.rng.uniform(self._SURGE_NOISE_LO, self._SURGE_NOISE_HI))
        return max(0.05, base * noise)

    def _record_queue_snapshot(self) -> None:
        snapshot = {z: len(q) for z, q in self.zone_queues.items()}
        self._queue_snapshots.append((self.env.now, snapshot))
        total = sum(snapshot.values())
        if total > self._max_queue:
            self._max_queue = total

    # ------------------------------------------------------------------
    # SimPy processes
    # ------------------------------------------------------------------

    def _arrival_process(self):
        """Continuous Poisson arrival of individual jobs."""
        while True:
            surge = self._get_surge_multiplier(self.env.now)
            rate = self._base_arrival_rate * surge
            inter_arrival = float(self.rng.exponential(1.0 / rate))
            yield self.env.timeout(inter_arrival)

            jt_name = self._sample_job_type()
            job = self._create_job(jt_name, self.env.now)
            self.all_jobs[job.job_id] = job
            self.env.process(self._process_job(job))

    def _batch_arrival_process(self):
        """Truck arrival every 45 min delivering configurable batch of orders.

        Interval: 30-60 min between truck docks is typical for mid-scale DCs.
        Batch size: 20-60 items per truck unload.
        Ref: Bartholdi & Hackman (2019), Warehouse & Distribution Science.
        """
        while True:
            yield self.env.timeout(45.0)  # 45 min interval — within 30-60 min published range
            half = max(1, self._batch_arrival_size // 2)
            batch_size = int(self.rng.integers(half, self._batch_arrival_size + 1))
            for _ in range(batch_size):
                jt_name = self._sample_job_type()
                job = self._create_job(jt_name, self.env.now)
                self.all_jobs[job.job_id] = job
                self.env.process(self._process_job(job))

    def _station_breakdown_process(self, station: StationState):
        """Per-station breakdown process; rate and repair time are configurable.

        BREAKDOWN_PROB = 0.003/min: at 37 stations × 600 min, expected total
        breakdown exposure ≈ 2.7%, within published 2-5% range.
        Ref: Inman (1999), Prod. & Inv. Mgmt. Journal 40(2):67-71.

        Repair time mean = 18 min (Exponential): within 10-30 min MTTR for
        conveyor/AGV equipment in warehouse environments.
        Ref: Goetschalckx & Ashayeri (1989), Logistics World 2(2):99-106.
        """
        while True:
            ttf = float(self.rng.exponential(1.0 / max(self._breakdown_prob, 1e-9)))
            yield self.env.timeout(ttf)
            station.is_broken = True
            repair_time = float(self.rng.exponential(18.0))  # mean 18 min MTTR
            station.repair_end_time = self.env.now + repair_time
            yield self.env.timeout(repair_time)
            station.is_broken = False
            self._trigger_dispatcher(station.zone_id)

    def _lunch_break_process(self):
        """Lunch break from t=300 to t=360 (13:00-14:00)."""
        yield self.env.timeout(300.0)
        self._lunch_active = True
        yield self.env.timeout(60.0)
        self._lunch_active = False

    def _priority_escalation_process(self):
        """Every 5 minutes, escalate 5% of standard waiting jobs."""
        while True:
            yield self.env.timeout(5.0)
            waiting = [
                j for j in self.all_jobs.values()
                if j.status == "waiting" and j.priority == 1 and not j.priority_escalated
            ]
            n_escalate = max(0, int(len(waiting) * 0.05))
            if n_escalate:
                chosen = self.rng.choice(len(waiting), size=n_escalate, replace=False)
                for idx in chosen:
                    waiting[idx].priority = 2
                    waiting[idx].priority_escalated = True

    def _snapshot_process(self):
        """Record queue depths every 5 minutes."""
        while True:
            self._record_queue_snapshot()
            yield self.env.timeout(5.0)

    # ------------------------------------------------------------------
    # Job processing
    # ------------------------------------------------------------------

    def _process_job(self, job: Job):
        """Route a job through all its operations sequentially."""
        for op_idx, op in enumerate(job.operations):
            zone_id = op.zone_id
            self.zone_queues[zone_id].append(job)
            job.status = "waiting"

            job._dispatch_event = self.env.event()
            self._trigger_dispatcher(zone_id)
            yield job._dispatch_event

            station_id = self._pick_station(zone_id)
            op.station_id = station_id
            resource = self.station_resources[station_id]
            st = self.stations[station_id]
            st.current_job = job.job_id

            with resource.request() as req:
                yield req
                # Re-check breakdown: station may have broken while job was queued.
                while st.is_broken:
                    wait_time = max(0.1, st.repair_end_time - self.env.now)
                    yield self.env.timeout(wait_time)

                job.status = "processing"
                job.current_op_idx = op_idx

                # Lognormal sigma = 0.30 → CV ≈ 30%, within published 20-35% range
                # Ref: De Koster et al. (2007), EJOR 182(2):481-501
                variability = float(self.rng.lognormal(0, 0.30))
                lunch_penalty = self._lunch_penalty_factor if self._lunch_active else 1.0
                actual_time = op.nominal_proc_time * variability * lunch_penalty

                op.actual_proc_time = actual_time
                op.start_time = self.env.now
                self._zone_busy_time[zone_id] = (
                    self._zone_busy_time.get(zone_id, 0.0) + actual_time
                )

                yield self.env.timeout(actual_time)

                op.end_time = self.env.now
                st.busy_until = self.env.now
                st.current_job = None

            self._trigger_dispatcher(zone_id)

        # Job fully processed
        job.status = "done"
        job.completion_time = self.env.now
        job.current_op_idx = len(job.operations)
        self.completed_jobs.append(job)

    def _trigger_dispatcher(self, zone_id: int):
        """Wake up the zone dispatcher if it's idle."""
        if not self.dispatcher_triggers[zone_id].triggered:
            self.dispatcher_triggers[zone_id].succeed()

    def _zone_dispatcher(self, zone_id: int):
        """Centralized dispatcher process for a zone."""
        while True:
            yield self.dispatcher_triggers[zone_id]
            self.dispatcher_triggers[zone_id] = self.env.event()

            while True:
                queue = self.zone_queues[zone_id]
                if not queue:
                    break

                free_stations = [
                    sid for sid, st in self.stations.items()
                    if st.zone_id == zone_id and not st.is_broken
                    and self.station_resources[sid].count + len(self.station_resources[sid].queue) == 0
                ]

                if not free_stations:
                    break

                ordered = self.heuristic_fn(queue, self.env.now, zone_id)
                best_job = ordered[0]
                queue.remove(best_job)

                best_job._dispatch_event.succeed()
                yield self.env.timeout(0)

    def _pick_station(self, zone_id: int) -> int:
        """Pick a free non-broken station, else fallback to least-busy."""
        free_stations = [
            sid for sid, st in self.stations.items()
            if st.zone_id == zone_id and not st.is_broken
            and self.station_resources[sid].count + len(self.station_resources[sid].queue) == 0
        ]
        if free_stations:
            return free_stations[0]

        zone_stations = [
            sid for sid, st in self.stations.items()
            if st.zone_id == zone_id and not st.is_broken
        ]
        if not zone_stations:
            zone_stations = [sid for sid, st in self.stations.items() if st.zone_id == zone_id]
        return min(zone_stations, key=lambda sid: self.stations[sid].busy_until)

    # ------------------------------------------------------------------
    # Streaming API (for WebSocket backend)
    # ------------------------------------------------------------------

    def init(self) -> None:
        """Set up all SimPy processes without running. Call step_to() to advance."""
        self._lunch_active = False
        self._processes_registered = True
        self.env.process(self._arrival_process())
        self.env.process(self._batch_arrival_process())
        self.env.process(self._priority_escalation_process())
        self.env.process(self._lunch_break_process())
        self.env.process(self._snapshot_process())
        for zone_id in self.zones:
            self.env.process(self._zone_dispatcher(zone_id))
        for station in self.stations.values():
            self.env.process(self._station_breakdown_process(station))

    def step_to(self, t: float) -> None:
        """Advance simulation to time t (must have called init() first)."""
        self.env.run(until=t)

    def get_visual_snapshot(self) -> Dict[str, Any]:
        """Return the current visual state for the frontend canvas."""
        now = self.env.now
        completed = self.completed_jobs
        n = len(completed)

        total_tard = sum(max(0.0, j.completion_time - j.due_date) for j in completed)
        n_late     = sum(1 for j in completed if j.completion_time > j.due_date)
        sla        = n_late / n if n else 0.0
        avg_cycle  = (sum(j.completion_time - j.arrival_time for j in completed) / n
                      if n else 0.0)
        throughput = (n / max(now, 0.001)) * 60.0

        active_jobs: List[Dict[str, Any]] = []
        for zone_id, queue in self.zone_queues.items():
            for job in queue:
                active_jobs.append({
                    "id": job.job_id, "type": job.job_type,
                    "zoneId": zone_id, "status": "waiting",
                    "priority": job.priority,
                })

        for job in self.all_jobs.values():
            if job.status == "processing" and job.current_op_idx < len(job.operations):
                active_jobs.append({
                    "id": job.job_id, "type": job.job_type,
                    "zoneId": job.operations[job.current_op_idx].zone_id,
                    "status": "processing",
                    "priority": job.priority,
                })

        active_jobs = active_jobs[:50]

        zone_active = [
            sum(1 for j in self.all_jobs.values()
                if j.status == "processing"
                and j.current_op_idx < len(j.operations)
                and j.operations[j.current_op_idx].zone_id == z)
            for z in range(8)
        ]

        return {
            "time": round(now, 2),
            "activeJobs": active_jobs,
            "zoneQueueLengths": [len(self.zone_queues.get(z, [])) for z in range(8)],
            "zoneActiveCounts": zone_active,
            "metrics": {
                "completed":      n,
                "completedJobs":  n,
                "totalTardiness": round(total_tard, 1),
                "slaBreachRate":  round(sla, 4),
                "avgCycleTime":   round(avg_cycle, 2),
                "throughput":     round(throughput, 2),
                "jobsPerHour":    round(throughput, 2),
            },
        }

    # ------------------------------------------------------------------
    # Run (batch mode)
    # ------------------------------------------------------------------

    def run(self, duration: float = 600.0) -> SimulationMetrics:
        """Execute a full shift simulation and return performance metrics."""
        if not hasattr(self, "_processes_registered") or not self._processes_registered:
            self.init()

        self.env.run(until=duration)

        return self._compute_metrics(duration)

    def _compute_metrics(self, duration: float) -> SimulationMetrics:
        """Calculate all 7 performance metrics from the completed simulation."""
        completed = self.completed_jobs
        total_jobs = len(self.all_jobs)
        n_completed = len(completed)

        if not completed:
            return SimulationMetrics(
                makespan=duration,
                zone_utilization={z: 0.0 for z in self.zones},
                queue_history=self._queue_snapshots,
            )

        makespan = max((j.completion_time for j in completed), default=duration)

        total_tardiness = sum(
            max(0.0, j.completion_time - j.due_date) for j in completed
        )

        n_late = sum(1 for j in completed if j.completion_time > j.due_date)
        sla_breach_rate = n_late / n_completed if n_completed else 0.0

        avg_cycle_time = float(np.mean(
            [j.completion_time - j.arrival_time for j in completed]
        )) if completed else 0.0

        zone_utilization = {}
        for zone_id, zone in self.zones.items():
            busy = self._zone_busy_time.get(zone_id, 0.0)
            capacity = zone.num_stations * duration
            zone_utilization[zone_id] = min(1.0, busy / capacity) if capacity > 0 else 0.0

        throughput = (n_completed / duration) * 60.0

        queue_max = self._max_queue

        return SimulationMetrics(
            makespan=makespan,
            total_tardiness=total_tardiness,
            sla_breach_rate=sla_breach_rate,
            avg_cycle_time=avg_cycle_time,
            zone_utilization=zone_utilization,
            throughput=throughput,
            queue_max=queue_max,
            queue_history=self._queue_snapshots,
            completed_jobs=n_completed,
            total_jobs=total_jobs,
        )

    def get_state_snapshot(self) -> Dict[str, Any]:
        """Return current system state for feature extraction."""
        now = self.env.now
        n_broken = sum(1 for st in self.stations.values() if st.is_broken)
        queue_sizes = {z: len(q) for z, q in self.zone_queues.items()}
        waiting_jobs = [j for j in self.all_jobs.values() if j.status == "waiting"]

        return {
            "current_time": now,
            "n_orders_in_system": len(waiting_jobs) + sum(
                1 for j in self.all_jobs.values() if j.status == "processing"
            ),
            "n_express_orders": sum(1 for j in waiting_jobs if j.job_type == "E"),
            "queue_sizes": queue_sizes,
            "zone_utilization": {
                z: min(1.0, self._zone_busy_time.get(z, 0.0) / max(1.0, now * self.zones[z].num_stations))
                for z in self.zones
            },
            "n_broken_stations": n_broken,
            "lunch_active": self._lunch_active,
            "surge_multiplier": self._surge_base_rate(now),
            "completed_so_far": len(self.completed_jobs),
            "waiting_jobs": waiting_jobs,
            "completed_jobs": self.completed_jobs,
            "all_jobs": self.all_jobs,
            "zones": self.zones,
            "stations": self.stations,
        }

    # ------------------------------------------------------------------
    # NEW in DAHS_2: State save/restore for snapshot-fork training
    # ------------------------------------------------------------------

    @staticmethod
    def _serialize_job(job: Job) -> Dict[str, Any]:
        """Convert a Job to a plain dict (avoids deepcopy of SimPy events)."""
        return {
            "job_id": job.job_id,
            "job_type": job.job_type,
            "arrival_time": job.arrival_time,
            "due_date": job.due_date,
            "operations": [
                {
                    "zone_id": op.zone_id,
                    "nominal_proc_time": op.nominal_proc_time,
                    "actual_proc_time": op.actual_proc_time,
                    "start_time": op.start_time,
                    "end_time": op.end_time,
                    "station_id": op.station_id,
                }
                for op in job.operations
            ],
            "current_op_idx": job.current_op_idx,
            "priority": job.priority,
            "status": job.status,
            "completion_time": job.completion_time,
            "priority_escalated": job.priority_escalated,
        }

    @staticmethod
    def _deserialize_job(d: Dict[str, Any]) -> Job:
        """Reconstruct a Job from a plain dict."""
        ops = [
            Operation(
                zone_id=o["zone_id"],
                nominal_proc_time=o["nominal_proc_time"],
                actual_proc_time=o["actual_proc_time"],
                start_time=o["start_time"],
                end_time=o["end_time"],
                station_id=o["station_id"],
            )
            for o in d["operations"]
        ]
        job = Job(
            job_id=d["job_id"],
            job_type=d["job_type"],
            arrival_time=d["arrival_time"],
            due_date=d["due_date"],
            operations=ops,
            current_op_idx=d["current_op_idx"],
            priority=d["priority"],
            status=d["status"],
            completion_time=d["completion_time"],
            priority_escalated=d["priority_escalated"],
        )
        return job

    def save_state(self) -> Dict[str, Any]:
        """Capture complete simulation state for snapshot-fork training.

        Returns a pickling-safe dict (no SimPy objects) containing:
        - env.now (current time)
        - Serialized jobs, completed_jobs, zone_queues (as job IDs)
        - All station states (is_broken, repair_end_time, current_job, busy_until)
        - RNG state via rng.bit_generator.state
        - _job_counter, _zone_busy_time, _lunch_active, queue snapshot history

        NOTE: The from_state() classmethod creates a fresh SimPy environment and
        re-initializes processes from the saved data point.
        """
        state = {
            "env_time": self.env.now,
            "seed": self.seed,
            "_job_counter": self._job_counter,
            "_max_queue": self._max_queue,
            "_lunch_active": self._lunch_active,
            "_zone_busy_time": dict(self._zone_busy_time),
            "_queue_snapshots": list(self._queue_snapshots),
            "rng_state": self.rng.bit_generator.state,
            # Simulator config for reconstruction
            "_base_arrival_rate": self._base_arrival_rate,
            "_breakdown_prob": self._breakdown_prob,
            "_batch_arrival_size": self._batch_arrival_size,
            "_lunch_penalty_factor": self._lunch_penalty_factor,
            "_job_type_frequencies": dict(self._job_type_frequencies),
            "_due_date_tightness": self._due_date_tightness,
            "_processing_time_scale": self._processing_time_scale,
            # Serialized job data (can't deepcopy — SimPy events aren't picklable)
            "all_jobs": {
                jid: self._serialize_job(job)
                for jid, job in self.all_jobs.items()
            },
            "completed_jobs": [self._serialize_job(j) for j in self.completed_jobs],
            "zone_queues": {z: [j.job_id for j in q] for z, q in self.zone_queues.items()},
            # Station states
            "stations": {
                sid: {
                    "station_id": st.station_id,
                    "zone_id": st.zone_id,
                    "is_broken": st.is_broken,
                    "repair_end_time": st.repair_end_time,
                    "current_job": st.current_job,
                    "busy_until": st.busy_until,
                }
                for sid, st in self.stations.items()
            },
        }
        return state

    @classmethod
    def from_state(
        cls,
        state_dict: Dict[str, Any],
        heuristic_fn: Callable,
    ) -> "WarehouseSimulator":
        """Create a new simulator from a saved state (for fork evaluation).

        Creates a fresh SimPy environment initialized at saved_time,
        restores all job/station/queue data, and continues RNG from saved state.

        Parameters
        ----------
        state_dict : dict
            Output of save_state().
        heuristic_fn : Callable
            Dispatch function to use in the forked simulation.

        Returns
        -------
        WarehouseSimulator
            Ready to run from state_dict["env_time"] forward.
        """
        saved_time = state_dict["env_time"]

        # Reconstruct simulator with original config
        sim = cls(
            seed=state_dict["seed"],
            heuristic_fn=heuristic_fn,
            base_arrival_rate=state_dict["_base_arrival_rate"],
            breakdown_prob=state_dict["_breakdown_prob"],
            batch_arrival_size=state_dict["_batch_arrival_size"],
            lunch_penalty_factor=state_dict["_lunch_penalty_factor"],
            job_type_frequencies=state_dict["_job_type_frequencies"],
            due_date_tightness=state_dict["_due_date_tightness"],
            processing_time_scale=state_dict["_processing_time_scale"],
        )

        # Restore RNG from saved state (deterministic continuation)
        sim.rng.bit_generator.state = state_dict["rng_state"]

        # Restore job counter and metrics
        sim._job_counter = state_dict["_job_counter"]
        sim._max_queue = state_dict["_max_queue"]
        sim._lunch_active = state_dict["_lunch_active"]
        sim._zone_busy_time = dict(state_dict["_zone_busy_time"])
        sim._queue_snapshots = list(state_dict["_queue_snapshots"])

        # Restore jobs from serialized dicts
        sim.all_jobs = {
            jid: cls._deserialize_job(jdata)
            for jid, jdata in state_dict["all_jobs"].items()
        }
        sim.completed_jobs = [
            cls._deserialize_job(jdata)
            for jdata in state_dict["completed_jobs"]
        ]

        # Restore zone queues (using saved job IDs to reference restored jobs)
        job_by_id = sim.all_jobs
        for z, queue_job_ids in state_dict["zone_queues"].items():
            sim.zone_queues[int(z)] = [
                job_by_id[jid] for jid in queue_job_ids
                if jid in job_by_id
            ]

        # Restore station states
        for sid_str, st_data in state_dict["stations"].items():
            sid = int(sid_str)
            if sid in sim.stations:
                sim.stations[sid].is_broken = st_data["is_broken"]
                sim.stations[sid].repair_end_time = st_data["repair_end_time"]
                sim.stations[sid].current_job = st_data["current_job"]
                sim.stations[sid].busy_until = st_data["busy_until"]

        # Create a SimPy environment starting at saved_time
        sim.env = simpy.Environment(initial_time=saved_time)

        # Re-create SimPy resources for the new environment
        for sid in sim.stations:
            sim.station_resources[sid] = simpy.Resource(sim.env, capacity=1)

        # Re-create dispatcher trigger events for new environment
        for zone_id in sim.zones:
            sim.dispatcher_triggers[zone_id] = sim.env.event()

        # Re-register dispatchers and breakdown/arrival processes
        sim.env.process(sim._arrival_process())
        sim.env.process(sim._batch_arrival_process())
        sim.env.process(sim._priority_escalation_process())

        # Re-register lunch process correctly based on saved time
        if saved_time < 300.0:
            sim.env.process(sim._lunch_break_process())
        elif saved_time < 360.0:
            # Currently in lunch — restore the remaining lunch period
            remaining_lunch = 360.0 - saved_time

            def _remaining_lunch():
                yield sim.env.timeout(remaining_lunch)
                sim._lunch_active = False

            sim.env.process(_remaining_lunch())

        sim.env.process(sim._snapshot_process())

        for zone_id in sim.zones:
            sim.env.process(sim._zone_dispatcher(zone_id))

        for station in sim.stations.values():
            if station.is_broken:
                remaining_repair = max(0.1, station.repair_end_time - saved_time)

                def _resume_repair(st=station, t=remaining_repair):
                    yield sim.env.timeout(t)
                    st.is_broken = False
                    sim._trigger_dispatcher(st.zone_id)
                    # Continue with future breakdowns
                    while True:
                        ttf = float(sim.rng.exponential(1.0 / max(sim._breakdown_prob, 1e-9)))
                        yield sim.env.timeout(ttf)
                        st.is_broken = True
                        repair_time = float(sim.rng.exponential(18.0))
                        st.repair_end_time = sim.env.now + repair_time
                        yield sim.env.timeout(repair_time)
                        st.is_broken = False
                        sim._trigger_dispatcher(st.zone_id)

                sim.env.process(_resume_repair())
            else:
                sim.env.process(sim._station_breakdown_process(station))

        # Resume WAITING jobs in zone queues:
        # These need a full _process_job-like coroutine that waits for dispatch
        # then routes through remaining operations.
        for zone_id, queue in sim.zone_queues.items():
            for job in queue:
                job._dispatch_event = sim.env.event()
                sim.env.process(sim._resume_waiting_job(job, zone_id))
            if queue:
                sim._trigger_dispatcher(zone_id)

        # Resume PROCESSING jobs with correct remaining time:
        # At save time, op.start_time and op.actual_proc_time are set,
        # but op.end_time is still -1.0 (only set after timeout completes).
        # Remaining = (start_time + actual_proc_time) - saved_time
        for job in sim.all_jobs.values():
            if job.status == "processing" and job.current_op_idx < len(job.operations):
                op = job.operations[job.current_op_idx]
                if op.start_time >= 0 and op.actual_proc_time > 0:
                    expected_end = op.start_time + op.actual_proc_time
                    remaining = max(0.0, expected_end - saved_time)
                else:
                    remaining = 0.0
                sim.env.process(sim._resume_job(job, remaining))

        return sim

    def _resume_job(self, job: Job, remaining_time: float):
        """Continue processing a job that was in-progress at save_state time."""
        op_idx = job.current_op_idx
        op = job.operations[op_idx]

        yield self.env.timeout(remaining_time)
        op.end_time = self.env.now

        # Continue with remaining operations
        for next_op_idx in range(op_idx + 1, len(job.operations)):
            next_op = job.operations[next_op_idx]
            zone_id = next_op.zone_id

            self.zone_queues[zone_id].append(job)
            job.status = "waiting"
            job._dispatch_event = self.env.event()
            self._trigger_dispatcher(zone_id)
            yield job._dispatch_event

            station_id = self._pick_station(zone_id)
            next_op.station_id = station_id
            resource = self.station_resources[station_id]
            st = self.stations[station_id]
            st.current_job = job.job_id

            with resource.request() as req:
                yield req
                while st.is_broken:
                    wait_time = max(0.1, st.repair_end_time - self.env.now)
                    yield self.env.timeout(wait_time)

                job.status = "processing"
                job.current_op_idx = next_op_idx

                variability = float(self.rng.lognormal(0, 0.30))
                lunch_penalty = self._lunch_penalty_factor if self._lunch_active else 1.0
                actual_time = next_op.nominal_proc_time * variability * lunch_penalty

                next_op.actual_proc_time = actual_time
                next_op.start_time = self.env.now
                self._zone_busy_time[zone_id] = self._zone_busy_time.get(zone_id, 0.0) + actual_time

                yield self.env.timeout(actual_time)

                next_op.end_time = self.env.now
                st.busy_until = self.env.now
                st.current_job = None

            self._trigger_dispatcher(zone_id)

        job.status = "done"
        job.completion_time = self.env.now
        job.current_op_idx = len(job.operations)
        self.completed_jobs.append(job)

    def _resume_waiting_job(self, job: Job, current_zone_id: int):
        """Resume a job that was waiting in a zone queue at save_state time.

        This replaces the missing _process_job coroutine for waiting jobs
        restored via from_state(). The job waits for dispatch in its current
        zone, processes that operation, then routes through all remaining ops.
        """
        # Wait for dispatcher to select this job in the current zone
        yield job._dispatch_event

        # Process the current operation (the one the job was waiting for)
        op_idx = job.current_op_idx
        op = job.operations[op_idx]
        zone_id = current_zone_id

        station_id = self._pick_station(zone_id)
        op.station_id = station_id
        resource = self.station_resources[station_id]
        st = self.stations[station_id]
        st.current_job = job.job_id

        with resource.request() as req:
            yield req
            while st.is_broken:
                wait_time = max(0.1, st.repair_end_time - self.env.now)
                yield self.env.timeout(wait_time)

            job.status = "processing"
            job.current_op_idx = op_idx

            variability = float(self.rng.lognormal(0, 0.30))
            lunch_penalty = self._lunch_penalty_factor if self._lunch_active else 1.0
            actual_time = op.nominal_proc_time * variability * lunch_penalty

            op.actual_proc_time = actual_time
            op.start_time = self.env.now
            self._zone_busy_time[zone_id] = self._zone_busy_time.get(zone_id, 0.0) + actual_time

            yield self.env.timeout(actual_time)

            op.end_time = self.env.now
            st.busy_until = self.env.now
            st.current_job = None

        self._trigger_dispatcher(zone_id)

        # Continue with remaining operations (same as _resume_job)
        for next_op_idx in range(op_idx + 1, len(job.operations)):
            next_op = job.operations[next_op_idx]
            next_zone_id = next_op.zone_id

            self.zone_queues[next_zone_id].append(job)
            job.status = "waiting"
            job._dispatch_event = self.env.event()
            self._trigger_dispatcher(next_zone_id)
            yield job._dispatch_event

            station_id = self._pick_station(next_zone_id)
            next_op.station_id = station_id
            resource = self.station_resources[station_id]
            st = self.stations[station_id]
            st.current_job = job.job_id

            with resource.request() as req:
                yield req
                while st.is_broken:
                    wait_time = max(0.1, st.repair_end_time - self.env.now)
                    yield self.env.timeout(wait_time)

                job.status = "processing"
                job.current_op_idx = next_op_idx

                variability = float(self.rng.lognormal(0, 0.30))
                lunch_penalty = self._lunch_penalty_factor if self._lunch_active else 1.0
                actual_time = next_op.nominal_proc_time * variability * lunch_penalty

                next_op.actual_proc_time = actual_time
                next_op.start_time = self.env.now
                self._zone_busy_time[next_zone_id] = self._zone_busy_time.get(next_zone_id, 0.0) + actual_time

                yield self.env.timeout(actual_time)

                next_op.end_time = self.env.now
                st.busy_until = self.env.now
                st.current_job = None

            self._trigger_dispatcher(next_zone_id)

        job.status = "done"
        job.completion_time = self.env.now
        job.current_op_idx = len(job.operations)
        self.completed_jobs.append(job)

    # ------------------------------------------------------------------
    # NEW in DAHS_2: Partial metrics for fork evaluation windows
    # ------------------------------------------------------------------

    def get_partial_metrics(self, since_time: float) -> SimulationMetrics:
        """Compute metrics only for jobs completed between since_time and env.now.

        Used in the 20-minute fork evaluation window during data generation.

        Parameters
        ----------
        since_time : float
            Start of evaluation window (simulation time).

        Returns
        -------
        SimulationMetrics
            Metrics computed only over jobs completed in [since_time, now].
        """
        now = self.env.now
        window_jobs = [
            j for j in self.completed_jobs
            if j.completion_time >= since_time
        ]

        if not window_jobs:
            return SimulationMetrics(
                makespan=now,
                zone_utilization={z: 0.0 for z in self.zones},
            )

        n = len(window_jobs)
        total_tardiness = sum(max(0.0, j.completion_time - j.due_date) for j in window_jobs)
        n_late = sum(1 for j in window_jobs if j.completion_time > j.due_date)
        sla_breach_rate = n_late / n
        avg_cycle_time = float(np.mean([j.completion_time - j.arrival_time for j in window_jobs]))
        duration = max(now - since_time, 1.0)
        throughput = (n / duration) * 60.0

        zone_utilization = {
            z: min(1.0, self._zone_busy_time.get(z, 0.0) / max(1.0, now * self.zones[z].num_stations))
            for z in self.zones
        }

        return SimulationMetrics(
            makespan=max(j.completion_time for j in window_jobs),
            total_tardiness=total_tardiness,
            sla_breach_rate=sla_breach_rate,
            avg_cycle_time=avg_cycle_time,
            zone_utilization=zone_utilization,
            throughput=throughput,
            queue_max=self._max_queue,
            completed_jobs=n,
            total_jobs=len(self.all_jobs),
        )