openml-python
openml-python copied to clipboard
Problem with get_dataset when dataset does not have qualities
Description
I tried using openml.datasets.get_dataset(202, download_data=False), but got the error
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
File ~/lib/python3.9/site-packages/openml/datasets/functions.py:1241, in _get_dataset_qualities_file(did_cache_dir, dataset_id)
1240 try:
-> 1241 with io.open(qualities_file, encoding="utf8") as fh:
1242 qualities_xml = fh.read()
FileNotFoundError: [Errno 2] No such file or directory: '/home/.cache/openml/org/openml/www/datasets/202/qualities.xml'
During handling of the above exception, another exception occurred:
OpenMLServerException Traceback (most recent call last)
Cell In[12], line 2
1 for did in did_list:
----> 2 ds = openml.datasets.get_dataset(int(did), download_data=False)
File ~/lib/python3.9/site-packages/openml/datasets/functions.py:514, in get_dataset(dataset_id, download_data, version, error_if_multiple, cache_format, download_qualities, download_features_meta_data, download_all_files, force_refresh_cache)
512 raise OpenMLPrivateDatasetError(e.message) from None
513 else:
--> 514 raise e
515 finally:
516 if remove_dataset_cache:
File ~/lib/python3.9/site-packages/openml/datasets/functions.py:493, in get_dataset(dataset_id, download_data, version, error_if_multiple, cache_format, download_qualities, download_features_meta_data, download_all_files, force_refresh_cache)
491 features_file = _get_dataset_features_file(did_cache_dir, dataset_id)
492 if download_qualities:
--> 493 qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id)
495 arff_file = _get_dataset_arff(description) if download_data else None
496 if "oml:minio_url" in description and download_data:
File ~/lib/python3.9/site-packages/openml/datasets/functions.py:1245, in _get_dataset_qualities_file(did_cache_dir, dataset_id)
1243 except (OSError, IOError):
1244 try:
-> 1245 qualities_xml = _get_qualities_xml(dataset_id)
1246 with io.open(qualities_file, "w", encoding="utf8") as fh:
1247 fh.write(qualities_xml)
File ~/lib/python3.9/site-packages/openml/datasets/functions.py:1205, in _get_qualities_xml(dataset_id)
1203 def _get_qualities_xml(dataset_id):
1204 url_extension = f"data/qualities/{dataset_id}"
-> 1205 return openml._api_calls._perform_api_call(url_extension, "get")
File ~/lib/python3.9/site-packages/openml/_api_calls.py:99, in _perform_api_call(call, request_method, data, file_elements)
97 response = _read_url_files(url, data=data, file_elements=file_elements)
98 else:
---> 99 response = __read_url(url, request_method, data)
101 __check_response(response, url, file_elements)
103 logging.info(
104 "%.7fs taken for [%s] request for the URL %s",
105 time.time() - start,
106 request_method,
107 url,
108 )
File ~/lib/python3.9/site-packages/openml/_api_calls.py:308, in __read_url(url, request_method, data, md5_checksum)
306 if config.apikey:
307 data["api_key"] = config.apikey
--> 308 return _send_request(
309 request_method=request_method, url=url, data=data, md5_checksum=md5_checksum
310 )
File ~/lib/python3.9/site-packages/openml/_api_calls.py:344, in _send_request(request_method, url, data, files, md5_checksum)
342 else:
343 raise NotImplementedError()
--> 344 __check_response(response=response, url=url, file_elements=files)
345 if request_method == "get" and not __is_checksum_equal(
346 response.text.encode("utf-8"), md5_checksum
347 ):
348 # -- Check if encoding is not UTF-8 perhaps
349 if __is_checksum_equal(response.content, md5_checksum):
File ~/lib/python3.9/site-packages/openml/_api_calls.py:409, in __check_response(response, url, file_elements)
405 def __check_response(
406 response: requests.Response, url: str, file_elements: Optional[FILE_ELEMENTS_TYPE]
407 ) -> None:
408 if response.status_code != 200:
--> 409 raise __parse_server_exception(response, url, file_elements=file_elements)
410 elif (
411 "Content-Encoding" not in response.headers or response.headers["Content-Encoding"] != "gzip"
412 ):
413 logging.warning("Received uncompressed content from OpenML for {}.".format(url))
OpenMLServerException: https://www.openml.org/api/v1/xml/data/qualities/202 returned code 362: No qualities found - None
Versions
Linux-5.10.0-26-amd64-x86_64-with-glibc2.31 Python 3.9.2 (default, Feb 28 2021, 17:03:44) [GCC 10.2.1 20210110] NumPy 1.26.2 SciPy 1.11.4 Scikit-Learn 1.3.2 OpenML 0.14.1
We'll look into that, the error message surely should be nicer at the very least. As a work around, for now you can opt-out from trying to download qualities altogether (if you do not need them):
import openml
dataset = openml.datasets.get_dataset(202, download_data=False, download_qualities=False)