Creating a Struct From an Unknown JSON Schema
Question
HI. Is it possible to dynamically create a struct from an unknown JSON schema? I mean, the user will inform me the way he wants the struct to be created, and I will create a Struct from this dynamic JSON schema.
I'm not aware there's any functionality that allows to do this directly from a JSON schema, but I'm also not sure if that's what you're asking.
If all you're looking for is the ability to dynamically generate structs from a given spec, you can use defstruct.
Thanks. Didn't see that in the docs. I made my own converter and used defstruct to make it work:
import msgspec
from typing import Any, Dict, Optional, Tuple, Type, List, Union, Callable, Literal, cast
import re
def get_struct_from_schema(
json_schema: Dict[str, Any],
bases: Optional[Tuple[Type[msgspec.Struct], ...]] = None,
name: Optional[str] = None,
module: Optional[str] = None,
namespace: Optional[Dict[str, Any]] = None,
tag_field: Optional[str] = None,
tag: Union[None, bool, str, int, Callable[[str], Union[str, int]]] = None,
rename: Union[None, Literal["lower", "upper", "camel", "pascal", "kebab"], Callable[[str], Optional[str]], Dict[str, str]] = None,
omit_defaults: bool = False,
forbid_unknown_fields: bool = False,
frozen: bool = False,
eq: bool = True,
order: bool = False,
kw_only: bool = False,
repr_omit_defaults: bool = False,
array_like: bool = False,
gc: bool = True,
weakref: bool = False,
dict_: bool = False,
cache_hash: bool = False
) -> Type[msgspec.Struct]:
"""
Create a msgspec.Struct type from a JSON schema at runtime.
Args:
json_schema (Dict[str, Any]): The JSON schema defining the structure.
bases (Optional[Tuple[Type[msgspec.Struct], ...]]): Base classes for the new Struct.
name (Optional[str]): Name for the new Struct. If not provided, it's derived from the schema title.
module (Optional[str]): Module name for the new Struct.
namespace (Optional[Dict[str, Any]]): Additional namespace for the new Struct.
tag_field (Optional[str]): Name of the field to use for tagging.
tag (Union[None, bool, str, int, Callable]): Tag value or function to generate tag.
rename (Union[None, str, Callable, Dict[str, str]]): Field renaming strategy.
omit_defaults (bool): Whether to omit fields with default values during serialization.
forbid_unknown_fields (bool): Whether to raise an error for unknown fields during deserialization.
frozen (bool): Whether the resulting struct should be immutable.
eq (bool): Whether to add __eq__ method to the struct.
order (bool): Whether to add ordering methods to the struct.
kw_only (bool): Whether all fields should be keyword-only in the __init__ method.
repr_omit_defaults (bool): Whether to omit fields with default values in __repr__.
array_like (bool): Whether to make the struct behave like an array.
gc (bool): Whether the struct should be tracked by the garbage collector.
weakref (bool): Whether to add support for weak references to the struct.
dict_ (bool): Whether to add a __dict__ to the struct.
cache_hash (bool): Whether to cache the hash value of the struct.
Returns:
Type[msgspec.Struct]: A new msgspec.Struct type based on the provided JSON schema.
Raises:
ValueError: If the JSON schema is invalid or missing required information.
"""
if not hasattr(json_schema, 'get'):
raise ValueError("JSON schema must be a dictionary-like object")
if json_schema.get("type") != "object":
raise ValueError("JSON schema must define an object type")
if "properties" not in json_schema:
raise ValueError("JSON schema must define properties")
if name is None:
name = json_schema.get("title", "DynamicStruct")
name = re.sub(r'\W|^(?=\d)', '_', name) # Ensure valid Python identifier
fields: List[Tuple[str, Type[Any], Any]] = []
for prop_name, prop_schema in json_schema["properties"].items():
field_type = _get_field_type(prop_schema)
default = prop_schema.get("default", msgspec.NODEFAULT)
if default is not msgspec.NODEFAULT:
fields.append((prop_name, field_type, default))
else:
fields.append((prop_name, field_type, msgspec.NODEFAULT))
return cast(Type[msgspec.Struct], msgspec.defstruct(
name,
fields,
bases=bases,
module=module,
namespace=namespace,
tag_field=tag_field,
tag=tag,
rename=rename,
omit_defaults=omit_defaults,
forbid_unknown_fields=forbid_unknown_fields,
frozen=frozen,
eq=eq,
order=order,
kw_only=kw_only,
repr_omit_defaults=repr_omit_defaults,
array_like=array_like,
gc=gc,
weakref=weakref,
dict=dict_,
cache_hash=cache_hash
))
def _get_field_type(prop_schema: Dict[str, Any]) -> Type[Any]:
"""
Determine the appropriate Python type based on the JSON schema property definition.
Args:
prop_schema (Dict[str, Any]): The schema for a single property.
Returns:
Type[Any]: The corresponding Python type.
Raises:
ValueError: If the property type is not supported or is invalid.
"""
type_mapping: Dict[str, Type[Any]] = {
"string": str,
"integer": int,
"number": float,
"boolean": bool,
"null": type(None),
"array": List[Any],
"object": Dict[str, Any]
}
if "type" not in prop_schema:
return type(Any) # Return type(Any) instead of Any
prop_type = prop_schema["type"]
if isinstance(prop_type, list):
return cast(Type[Any], Union[Tuple[Type[Any], ...]])
if prop_type not in type_mapping:
raise ValueError(f"Unsupported type: {prop_type}")
if prop_type == "array" and "items" in prop_schema:
return cast(Type[Any], List[Any])
return type_mapping[prop_type]
Example usage:
json_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"email": {"type": ["string", "null"]},
"tags": {"type": "array", "items": {"type": "string"}}
},
"required": ["name", "age"]
}
UserStruct = get_struct_from_schema(
json_schema,
name="User",
frozen=True,
forbid_unknown_fields=True
)
# Now we can use UserStruct to create instances, I guess
user = UserStruct(name="Alice", age=30, email="[email protected]", tags=["staff", "admin"])
You might want to check out datamodel-code-generator, which can generate msgspec.Structs from JSON Schema.
If you know the schema in advance, you can specify the schema to the datamodel-code-generator CLI to generate a Struct (or multiple Structs) for you (as generated code).
Alternatively, if you want to use it programmatically for cases where a user of whatever your writing might dynamically supply a JSON Schema as input, and thus require you to generate the Struct(s) programmatically, you can use datamodel-code-generator as a module.