Source code for vibeblocks.utils.schema

"""
Utilities for generating JSON Schemas from Flow manifests and data models.
Designed to work with OpenAI Function Calling or similar LLM structured outputs.
"""

import dataclasses
from typing import Any, Dict, List, Type, get_args, get_origin, Union

[docs] def generate_function_schema(flow_manifest: Dict[str, Any], context_model: Type[Any]) -> Dict[str, Any]: """ Generates a JSON Schema for the Flow execution request. Args: flow_manifest: The dictionary returned by Flow.get_manifest(). context_model: The class (Dataclass or Pydantic Model) used for ExecutionContext.data. Returns: A JSON Schema compatible with OpenAI Function Calling. """ # 1. Generate Schema for the Context Data data_schema = _get_model_schema(context_model) # 2. Build the full parameters schema # We want the LLM to provide the data to initialize the context. parameters_schema = { "type": "object", "properties": { "initial_data": data_schema }, "required": ["initial_data"] } # 3. Construct the function definition function_def = { "name": f"run_{flow_manifest['name'].lower().replace(' ', '_')}", "description": flow_manifest['description'], "parameters": parameters_schema } return function_def
def _get_model_schema(model: Type[Any]) -> Dict[str, Any]: """ Extracts JSON Schema from a Pydantic model or Dataclass. """ # 1. Pydantic v2 if hasattr(model, "model_json_schema"): return model.model_json_schema() # 2. Pydantic v1 if hasattr(model, "schema"): return model.schema() # 3. Dataclasses if dataclasses.is_dataclass(model): return _dataclass_to_schema(model) # 4. Fallback for simple types or dicts (limited support) return {"type": "object", "additionalProperties": True} def _dataclass_to_schema(dc: Type[Any]) -> Dict[str, Any]: """ Manually converts a dataclass to a simple JSON Schema. """ properties = {} required = [] for field in dataclasses.fields(dc): # field.type might be a string if "from __future__ import annotations" is used, # or a type object. This simple extractor assumes type object or simple forward ref. field_type = field.type prop_schema = _type_to_schema(field_type) properties[field.name] = prop_schema # Assume fields without default values are required if field.default == dataclasses.MISSING and field.default_factory == dataclasses.MISSING: required.append(field.name) return { "title": dc.__name__, "type": "object", "properties": properties, "required": required } def _type_to_schema(py_type: Any) -> Dict[str, Any]: """ Maps Python types to JSON Schema types. """ if py_type == str: return {"type": "string"} if py_type == int: return {"type": "integer"} if py_type == float: return {"type": "number"} if py_type == bool: return {"type": "boolean"} # Handle Optional[T] -> Union[T, None] origin = get_origin(py_type) args = get_args(py_type) if origin is Union: # Simplification: take the first non-None type non_none_args = [arg for arg in args if arg is not type(None)] if non_none_args: return _type_to_schema(non_none_args[0]) if origin is list or origin is List: item_schema = _type_to_schema(args[0]) if args else {} return {"type": "array", "items": item_schema} if origin is dict or origin is Dict: return {"type": "object"} # Handle nested dataclasses if dataclasses.is_dataclass(py_type): return _dataclass_to_schema(py_type) return {"type": "string"} # Default fallback