Source code for evoproc_procedures.pipelines

from __future__ import annotations
from typing import Any, Dict, Callable, Optional, Tuple
import json

from evoproc_procedures.models import Procedure
from evoproc_procedures.prompts import create_procedure_prompt
from evoproc_procedures.repairs import repair_procedure_structured

JSON = Dict[str, Any]
QueryFn = Callable[[str, str, Optional[Dict[str, Any]], Optional[int]], str]
RunStepsFn = Callable[..., Dict[str, Any]]  # see notes in docstring


[docs]
def run_full_procedure_structured(
    idx: int,
    question: str,
    *,
    model: str,
    query_fn: QueryFn,
    run_steps_fn: RunStepsFn,
    answer_schema: Dict[str, Any],
    seed: Optional[int] = 1234,
    print_diagnostics: bool = False,
) -> Tuple[JSON, Dict[str, Any]]:
    """Create → repair → execute a structured procedure for one task.

    Steps
    -----
    1) Build a creation prompt (global-state constraints) for `question`.
    2) Ask the LLM (via `query_fn`) to emit a **Procedure** JSON that validates
       `Procedure.model_json_schema()`.
    3) Iteratively repair with `repair_procedure_structured(...)` until it validates.
    4) Execute the procedure with `run_steps_fn` to obtain the final answer/state.

    Args
    ----
    idx:
        An index for logging (e.g., dataset row).
    question:
        Natural-language task text (fed as `problem_text`).
    model:
        Backend model name (passed through to `query_fn`).
    query_fn:
        Callable `(prompt, model, fmt, seed) -> str`. Provide your backend.
    run_steps_fn:
        Your executor. Two common signatures:
          (a) `run_steps_fn(proc, question, answer_schema, model, print_bool=False) -> state`
          (b) `run_steps_fn(proc, *, inputs: dict, answer_schema: dict, model: str) -> state`
        This wrapper will try (a) first, then (b).
    answer_schema:
        JSON Schema for the final answer object (e.g., GSM/ARC).
    seed:
        Optional seed for determinism (if backend supports it).
    print_diagnostics:
        If True, prints repair diagnostics each iteration.

    Returns
    -------
    (procedure_json, state_dict)
        Validated procedure JSON and the execution state (should include the final answer).

    Raises
    ------
    ValueError
        If the model response cannot be parsed as JSON.
    RuntimeError
        If the procedure cannot be repaired to pass validation.
    """
    # 1) Create prompt + schema
    prompt = create_procedure_prompt(question)
    schema = Procedure.model_json_schema()

    # 2) Structured generation
    raw = query_fn(prompt, model, schema, seed)
    try:
        proc: JSON = json.loads(raw) if isinstance(raw, str) else raw
    except Exception as e:
        raise ValueError(f"[{idx}] Non-JSON response from model: {e}") from e

    # 3) Repair until valid
    proc = repair_procedure_structured(
        proc, model=model, query_fn=query_fn, max_tries=10, print_diagnostics=print_diagnostics
    )

    # 4) Execute (support either executor signature)
    try:
        state = run_steps_fn(proc, question, answer_schema, model, print_diagnostics)  # type: ignore[arg-type]
    except TypeError:
        # Fallback to keyword-style API
        state = run_steps_fn(
            proc, inputs={"problem_text": question}, answer_schema=answer_schema, model=model  # type: ignore[call-arg]
        )

    return proc, state