[Multi-part upload] TypeError: Object of type BytesIO is not JSON serializable
I have a generated client code for FastAPI multi-file upload endpoint which I'm using like this:
file_names = ['fd1.py', 'fd2.py']
files = []
for f in file_names:
with open(f, 'rb') as fh:
buf = BytesIO(fh.read())
files.append(buf)
multipart_data = BodyUploadFeatureDefinitionFeatureDefinitionPost(
files=[File(payload=files[i], file_name=file_names[i], mime_type='text/x-python') for i in range(len(files))]
tags=[] # not relevant
)
res = upload_feature_definition_feature_definition_post.sync(
client=self.client,
multipart_data=multipart_data
)
I get an error:
Traceback (most recent call last):
File "/Users/anov/IdeaProjects/svoe/client/featurizer_client/featurizer_client.py", line 77, in <module>
client.register_feature_definition(
File "/Users/anov/IdeaProjects/svoe/client/featurizer_client/featurizer_client.py", line 63, in register_feature_definition
res = upload_feature_definition_feature_definition_post.sync(
File "/Users/anov/IdeaProjects/svoe/client/fast_api_client/api/default/upload_feature_definition_feature_definition_post.py", line 146, in sync
return sync_detailed(
File "/Users/anov/IdeaProjects/svoe/client/fast_api_client/api/default/upload_feature_definition_feature_definition_post.py", line 103, in sync_detailed
kwargs = _get_kwargs(
File "/Users/anov/IdeaProjects/svoe/client/fast_api_client/api/default/upload_feature_definition_feature_definition_post.py", line 40, in _get_kwargs
multipart_multipart_data = multipart_data.to_multipart()
File "/Users/anov/IdeaProjects/svoe/client/fast_api_client/models/body_upload_feature_definition_feature_definition_post.py", line 67, in to_multipart
files = (None, json.dumps(_temp_files).encode(), "application/json")
File "/Users/anov/miniconda3/envs/env_py_3-10/lib/python3.10/json/__init__.py", line 231, in dumps
return _default_encoder.encode(obj)
File "/Users/anov/miniconda3/envs/env_py_3-10/lib/python3.10/json/encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/Users/anov/miniconda3/envs/env_py_3-10/lib/python3.10/json/encoder.py", line 257, in iterencode
return _iterencode(o, 0)
File "/Users/anov/miniconda3/envs/env_py_3-10/lib/python3.10/site-packages/frozendict/__init__.py", line 32, in default
return BaseJsonEncoder.default(self, obj)
File "/Users/anov/miniconda3/envs/env_py_3-10/lib/python3.10/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type BytesIO is not JSON serializable
Generated File class:
@attr.s(auto_attribs=True)
class File:
"""Contains information for file uploads"""
payload: BinaryIO
file_name: Optional[str] = None
mime_type: Optional[str] = None
def to_tuple(self) -> FileJsonType:
"""Return a tuple representation that httpx will accept for multipart/form-data"""
return self.file_name, self.payload, self.mime_type
Looks like generated code tries to call json.dumps() on tuple representation of File, which has BinaryIO field, this causes the exception.
What am I doing wrong?
I had a similar problem, but with a endpoint for downloads. Have a look at the OpenAPI file from which you generated the client. In my case it looks like this:
"/api/Data/.../DownloadData": {
"get": {
"responses": {
"200": {
"description": "Success",
"headers": { "..." },
"content": {
"application/octet-stream": {
"schema": {
"type": "string",
"format": "binary"
}
}
}
}
}
The problem occurs, if you define the MIME type as something else than application/octet-stream, in your case probably application/json or something ending on +json. The "string"/"binary" schema creates the File object and then it tries to squeeze that into the Json encoder, hence your error message.
Even if you up/download Json data, the mime type of the endpoint should be octet-stream, because it could be anything. The type of the data you download should be in the header of the response ... if everything works fine.
Cheers
Edit:: I had a look at the upload section, that looks similar, but a bit different:
"/api/Data/ ... /UploadData": {
"post": {
"requestBody": {
"description": "File to upload",
"content": {
"multipart/form-data": {
"schema": {
"required": [
"SomeData"
],
"type": "object",
"properties": {
"SomeData": {
"type": "string",
"format": "binary"
}
}
}
}
}
}
I am having the same issue for an API body that receives an array of files (binary objects). The relevant schema looks like this:
requestBody:
content:
multipart/form-data:
schema:
type: object
properties:
product[main_image]:
type: string
format: binary
product[files][]:
type: array
items:
type: string
format: binary
The code generated correctly parses main_image, but assumes the list of files should, somehow, be json encoded:
def to_multipart(self) -> Dict[str, Any]:
productmain_image: Union[Unset, FileJsonType] = UNSET
if not isinstance(self.productmain_image, Unset):
productmain_image = self.productmain_image.to_tuple()
productappend_files: Union[Unset, Tuple[None, bytes, str]] = UNSET
if not isinstance(self.productappend_files, Unset):
_temp_productappend_files = []
for productappend_files_item_data in self.productappend_files:
productappend_files_item = productappend_files_item_data.to_tuple()
_temp_productappend_files.append(productappend_files_item)
productappend_files = (None, json.dumps(_temp_productappend_files).encode(), "application/json")
field_dict: Dict[str, Any] = {}
field_dict.update(
{key: (None, str(value).encode(), "text/plain") for key, value in self.additional_properties.items()}
)
field_dict.update({})
if productmain_image is not UNSET:
field_dict["product[main_image]"] = productmain_image
if productappend_files is not UNSET:
field_dict["product[append_files][]"] = productappend_files
return field_dict
I use the generated client code to build the input:
input_dict = {}
if "main_image" in attachments_paths:
input_dict["product[main_image]"] = self._build_file(
attachments_paths.pop("main_image"))
input_dict["product[files][]"] = [
self._build_file(path) for path in attachments_paths.values()
]
media_creation_data = ProductMediaCreationInput(
productmain_image=input_dict["product[main_image]"],
productfiles=input_dict["product[files][]"]
)
...
def _build_file(self, file_path: str) -> File:
return File(
payload=BytesIO(open(file_path, 'rb').read()),
file_name=file_path.split('/')[-1],
mime_type=magic.Magic(mime=True).from_file(file_path),
)
But when I call the API client:
post_api_v1_products_product_id_media.sync_detailed(
client=self.client, product_id=product_id, multipart_data=media_creation_data)
it fails with "TypeError: Object of type BytesIO is not JSON serializable"
I was able to patch my generated code to look like this, and now it works fine:
def to_multipart(self) -> list[Tuple[str, FileJsonType]]:
productmain_image: Union[Unset, FileJsonType] = UNSET
if not isinstance(self.productmain_image, Unset):
productmain_image = self.productmain_image.to_tuple()
productappend_files: Union[Unset, list[FileJsonType]] = UNSET
if not isinstance(self.productappend_files, Unset):
_temp_productappend_files = []
for productappend_files_item_data in self.productappend_files:
productappend_files_item = productappend_files_item_data.to_tuple()
_temp_productappend_files.append(productappend_files_item)
productappend_files = _temp_productappend_files
field_dict: list[Tuple[str, FileJsonType]] = []
for key, value in self.additional_properties.items():
field_dict.append((key, (None, str(value).encode(), "text/plain")))
if productmain_image is not UNSET:
field_dict.append(("product[main_image]", productmain_image))
if productappend_files is not UNSET:
for productappend_files_item in productappend_files:
field_dict.append(
("product[append_files][]", productappend_files_item))
return field_dict
It feels reasonable that a multipart request with multiple files on the same key (an array of files) would not be json encoded and would require to be handled as a list of tuples (as documented by httpx).
It would be great if the code generation logic used this (or something closer) to fix the issue.