Source code for vibeblocks.utils.schema

"""
Utilities for generating JSON Schemas from Flow manifests and data models.
Designed to work with OpenAI Function Calling or similar LLM structured outputs.
"""

import dataclasses
from typing import Any, Dict, List, Type, get_args, get_origin, Union


[docs]
def generate_function_schema(flow_manifest: Dict[str, Any], context_model: Type[Any]) -> Dict[str, Any]:
    """
    Generates a JSON Schema for the Flow execution request.

    Args:
        flow_manifest: The dictionary returned by Flow.get_manifest().
        context_model: The class (Dataclass or Pydantic Model) used for ExecutionContext.data.

    Returns:
        A JSON Schema compatible with OpenAI Function Calling.
    """

    # 1. Generate Schema for the Context Data
    data_schema = _get_model_schema(context_model)

    # 2. Build the full parameters schema
    # We want the LLM to provide the data to initialize the context.
    parameters_schema = {
        "type": "object",
        "properties": {
            "initial_data": data_schema
        },
        "required": ["initial_data"]
    }

    # 3. Construct the function definition
    function_def = {
        "name": f"run_{flow_manifest['name'].lower().replace(' ', '_')}",
        "description": flow_manifest['description'],
        "parameters": parameters_schema
    }

    return function_def


def _get_model_schema(model: Type[Any]) -> Dict[str, Any]:
    """
    Extracts JSON Schema from a Pydantic model or Dataclass.
    """
    # 1. Pydantic v2
    if hasattr(model, "model_json_schema"):
        return model.model_json_schema()

    # 2. Pydantic v1
    if hasattr(model, "schema"):
        return model.schema()

    # 3. Dataclasses
    if dataclasses.is_dataclass(model):
        return _dataclass_to_schema(model)

    # 4. Fallback for simple types or dicts (limited support)
    return {"type": "object", "additionalProperties": True}

def _dataclass_to_schema(dc: Type[Any]) -> Dict[str, Any]:
    """
    Manually converts a dataclass to a simple JSON Schema.
    """
    properties = {}
    required = []

    for field in dataclasses.fields(dc):
        # field.type might be a string if "from __future__ import annotations" is used,
        # or a type object. This simple extractor assumes type object or simple forward ref.
        field_type = field.type
        prop_schema = _type_to_schema(field_type)
        properties[field.name] = prop_schema

        # Assume fields without default values are required
        if field.default == dataclasses.MISSING and field.default_factory == dataclasses.MISSING:
            required.append(field.name)

    return {
        "title": dc.__name__,
        "type": "object",
        "properties": properties,
        "required": required
    }

def _type_to_schema(py_type: Any) -> Dict[str, Any]:
    """
    Maps Python types to JSON Schema types.
    """
    if py_type == str:
        return {"type": "string"}
    if py_type == int:
        return {"type": "integer"}
    if py_type == float:
        return {"type": "number"}
    if py_type == bool:
        return {"type": "boolean"}

    # Handle Optional[T] -> Union[T, None]
    origin = get_origin(py_type)
    args = get_args(py_type)

    if origin is Union:
        # Simplification: take the first non-None type
        non_none_args = [arg for arg in args if arg is not type(None)]
        if non_none_args:
            return _type_to_schema(non_none_args[0])

    if origin is list or origin is List:
        item_schema = _type_to_schema(args[0]) if args else {}
        return {"type": "array", "items": item_schema}

    if origin is dict or origin is Dict:
        return {"type": "object"}

    # Handle nested dataclasses
    if dataclasses.is_dataclass(py_type):
        return _dataclass_to_schema(py_type)

    return {"type": "string"} # Default fallback