openfl
openfl copied to clipboard
[Windows] Unsuccessful TensorSliceReader constructor
Describe the bug While running a Director-based workflow on Windows, there is an error while unpickling the Keras model. This issue is known: https://github.com/keras-team/tf-keras/issues/87
Steps To Reproduce
I ran Director-based workflow from the windows-ci branch, including:
tests/github/interactive_api_director/experiments/tensorflow_mnist/run.py
from pathlib import Path
from tests.github.interactive_api_director import utils
if __name__ == '__main__':
director = utils.start_director(Path(__file__).parent / 'director')
envoy = utils.start_envoy(Path(__file__).parent / 'envoy')
from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet import experiment
experiment.run()
tests/github/interactive_api_director/utils.py
import subprocess
import time
import sys
def start_director(cwd):
director = subprocess.Popen(
'fx director start '
'--disable-tls '
'-c director_config.yaml',
cwd=cwd, shell=True
)
time.sleep(3)
if director.poll() is not None:
print('Error: failed to create director')
sys.exit(1)
return director
def start_envoy(cwd):
subprocess.check_call(
[sys.executable, '-m', 'pip', 'install', '-r', 'sd_requirements.txt'], cwd=cwd
)
envoy = subprocess.Popen(
'fx envoy start '
'-n env_one '
'--disable-tls '
'--envoy-config-path envoy_config.yaml '
'-dh localhost '
'-dp 50051',
cwd=cwd, shell=True
)
time.sleep(10)
if envoy.poll() is not None:
print('Error: failed to create envoy')
sys.exit(1)
return envoy
import tensorflow as tf
import cloudpickle
model = tf.Sequential()
model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
model.compile(optimizer='sgd', loss='mse')
with open('file.pkl', 'wb') as f:
cloudpickle.dump(model, f)
with open('file.pkl', 'rb') as f:
cloudpickle.load(f)
Desktop:
- OS: Windows 11
- Python 3.10, Python 3.8
- TensorFlow 2.8
The error itself:
File "C:\hostedtoolcache\windows\Python\3.8.10\x64\lib\runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\hostedtoolcache\windows\Python\3.8.10\x64\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "D:\a\openfl\openfl\tests\github\interactive_api_director\experiments\tensorflow_mnist\run.py", line 49, in <module>
main()
File "D:\a\openfl\openfl\tests\github\interactive_api_director\experiments\tensorflow_mnist\run.py", line 41, in main
experiment.run()
File "D:\a\openfl\openfl\tests\github\interactive_api_director\experiments\tensorflow_mnist\experiment.py", line 125, in run
best_model = fl_experiment.get_best_model()
File "D:\a\openfl\openfl\openfl\interface\interactive_api\experiment.py", line 107, in get_best_model
return self._rebuild_model(tensor_dict, upcoming_model_status=ModelStatus.BEST)
File "D:\a\openfl\openfl\openfl\interface\interactive_api\experiment.py", line 137, in _rebuild_model
return deepcopy(self.task_runner_stub.model)
File "C:\hostedtoolcache\windows\Python\3.8.10\x64\lib\copy.py", line 153, in deepcopy
y = copier(memo)
File "C:\hostedtoolcache\windows\Python\3.8.10\x64\lib\site-packages\keras\engine\training.py", line 328, in __deepcopy__
new = pickle_utils.deserialize_model_from_bytecode(
File "C:\hostedtoolcache\windows\Python\3.8.10\x64\lib\site-packages\keras\saving\pickle_utils.py", line 48, in deserialize_model_from_bytecode
model = save_module.load_model(temp_dir)
File "C:\hostedtoolcache\windows\Python\3.8.10\x64\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\hostedtoolcache\windows\Python\3.8.10\x64\lib\site-packages\tensorflow\python\saved_model\load.py", line 977, in load_internal
raise FileNotFoundError(
FileNotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for ram://d6aec716-7bc3-4fe2-ad0d-6183a87fbe75/variables/variables
You may be trying to load on a different device from the computational device. Consider setting the `experimental_io_device` option in `tf.saved_model.LoadOptions` to the io_device such as '/job:localhost'.
dill also does not work?
dill also does not work?
@igor-davidyuk No, it does not. Raises the same error.