GPU and multiprocessing error with flask
Hi,
I'm trying to use a spacy model with GPU + Flask/Gunicorn but run into trouble whenever there is some sort of multiprocessing.
Is there a way to run Spacy sequentially but in a multithreaded environment? I want to run the spacy model sequentially but other functions in flask concurrently
Sample app.py
import argparse
from flask import Flask, request, jsonify, make_response
import torch
from multiprocessing import Lock
import spacy
import torch
import json
import os
app = Flask(__name__)
spacy.require_gpu()
nlp = spacy.load('en_core_web_trf')
print("Ready to serve")
@app.route('/test/extractEntities', methods=['POST'])
def extractEntities():
'''
Extract entities test
'''
entities = []
texts = ['Paris is the capital of France', 'My best friends name is Moboko']
docs = nlp.pipe(texts, batch_size=10, n_process=1)
for d in docs:
entities.append([{'text': e.text, 'label': e.label_} for e in d.ents])
response = make_response(json.dumps(entities, indent = 2), 200)
response.mimetype = 'application/json'
return response
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--debug_mode', action = 'store_true', default=False, help='bool for whether to run app in debug mode')
parser.add_argument('--threaded', action = 'store_true', default=False, help='bool for whether to run app with threading')
# Parse args obtained from the command line
args, _ = parser.parse_known_args()
app.run(host='0.0.0.0', port=9190, debug = args.debug_mode, threaded = args.threaded)
Flask
With threaded = False, calling extractEntities works normally. With Flask and threaded = True, it gives
[2024-08-07 15:16:50,536] ERROR in app: Exception on /test/extractEntities [POST]
Traceback (most recent call last):
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/flask/app.py", line 1473, in wsgi_app
response = self.full_dispatch_request()
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/flask/app.py", line 882, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/flask/app.py", line 880, in full_dispatch_request
rv = self.dispatch_request()
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/flask/app.py", line 865, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]
File "/home/devops/MWX_HOME/products/deepnotes/meaningware.DeepNotes.learning/pin/src/test_app.py", line 29, in extractEntities
for d in docs:
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/language.py", line 1618, in pipe
for doc in docs:
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/transition_parser.pyx", line 245, in pipe
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1650, in minibatch
batch = list(itertools.islice(items, int(batch_size)))
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/pipe.pyx", line 55, in pipe
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/pipe.pyx", line 55, in pipe
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/transition_parser.pyx", line 245, in pipe
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1650, in minibatch
batch = list(itertools.islice(items, int(batch_size)))
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/trainable_pipe.pyx", line 73, in pipe
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1650, in minibatch
batch = list(itertools.islice(items, int(batch_size)))
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy_curated_transformers/pipeline/transformer.py", line 210, in pipe
preds = self.predict(batch)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy_curated_transformers/pipeline/transformer.py", line 242, in predict
return self.model.predict(docs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/model.py", line 334, in predict
return self._func(self, X, is_train=False)[0]
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy_curated_transformers/models/architectures.py", line 651, in transformer_model_forward
Y, backprop_layer = model.layers[0](docs, is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/model.py", line 310, in __call__
return self._func(self, X, is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy_curated_transformers/models/with_non_ws_tokens.py", line 72, in with_non_ws_tokens_forward
Y_no_ws, backprop_no_ws = inner(tokens, is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/model.py", line 310, in __call__
return self._func(self, X, is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/model.py", line 310, in __call__
return self._func(self, X, is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy_curated_transformers/models/with_strided_spans.py", line 108, in with_strided_spans_forward
output, bp = transformer(cast(TorchTransformerInT, batch), is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/model.py", line 310, in __call__
return self._func(self, X, is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/layers/pytorchwrapper.py", line 225, in forward
Ytorch, torch_backprop = model.shims[0](Xtorch, is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/shims/pytorch.py", line 97, in __call__
return self.predict(inputs), lambda a: ...
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/shims/pytorch.py", line 115, in predict
outputs = self._model(*inputs.args, **inputs.kwargs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/curated_transformers/models/curated_transformer.py", line 37, in forward
return self.curated_encoder.forward(input_ids, attention_mask, token_type_ids)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/curated_transformers/models/roberta/encoder.py", line 46, in forward
embeddings = self.embeddings(input_ids, token_type_ids, None)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/curated_transformers/models/roberta/embeddings.py", line 42, in forward
return self.inner(input_ids, token_type_ids, position_ids)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/curated_transformers/models/bert/embeddings.py", line 61, in forward
input_embeddings = self.word_embeddings(input_ids)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/torch/nn/modules/sparse.py", line 163, in forward
return F.embedding(
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/torch/nn/functional.py", line 2237, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA__index_select)
Gunicorn
With Gunicorn, I'm only using 1 worker but I get an error similar to the one here, which makes me suspect it has something to do with multiprocessing.
gunicorn --bind 0.0.0.0:9190 --preload -w 1 wsgi:app
Error when calling extractEntities:
[2024-08-07 15:11:07,197] ERROR in app: Exception on /test/extractEntities [POST]
Traceback (most recent call last):
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/flask/app.py", line 1473, in wsgi_app
response = self.full_dispatch_request()
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/flask/app.py", line 882, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/flask/app.py", line 880, in full_dispatch_request
rv = self.dispatch_request()
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/flask/app.py", line 865, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]
File "/home/devops/MWX_HOME/products/deepnotes/meaningware.DeepNotes.learning/pin/src/test_app.py", line 27, in extractEntities
for d in docs:
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/language.py", line 1618, in pipe
for doc in docs:
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/transition_parser.pyx", line 245, in pipe
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1650, in minibatch
batch = list(itertools.islice(items, int(batch_size)))
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/pipe.pyx", line 55, in pipe
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/pipe.pyx", line 55, in pipe
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/transition_parser.pyx", line 245, in pipe
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1650, in minibatch
batch = list(itertools.islice(items, int(batch_size)))
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/trainable_pipe.pyx", line 73, in pipe
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1650, in minibatch
batch = list(itertools.islice(items, int(batch_size)))
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy/util.py", line 1703, in _pipe
yield from proc.pipe(docs, **kwargs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy_curated_transformers/pipeline/transformer.py", line 210, in pipe
preds = self.predict(batch)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy_curated_transformers/pipeline/transformer.py", line 242, in predict
return self.model.predict(docs)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/model.py", line 334, in predict
return self._func(self, X, is_train=False)[0]
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy_curated_transformers/models/architectures.py", line 651, in transformer_model_forward
Y, backprop_layer = model.layers[0](docs, is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/model.py", line 310, in __call__
return self._func(self, X, is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy_curated_transformers/models/with_non_ws_tokens.py", line 72, in with_non_ws_tokens_forward
Y_no_ws, backprop_no_ws = inner(tokens, is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/model.py", line 310, in __call__
return self._func(self, X, is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/model.py", line 310, in __call__
return self._func(self, X, is_train=is_train)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/spacy_curated_transformers/tokenization/bbpe_encoder.py", line 92, in byte_bpe_encoder_forward
model.ops.asarray1i(doc_pieces),
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/backends/ops.py", line 710, in asarray1i
return cast(Ints1d, self.asarray(data, dtype=dtype))
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/thinc/backends/cupy_ops.py", line 97, in asarray
array = self.xp.array(data, dtype=dtype)
File "/home/devops/.venv/pytorch/lib/python3.10/site-packages/cupy/_creation/from_data.py", line 46, in array
return _core.array(obj, dtype, copy, order, subok, ndmin)
File "cupy/_core/core.pyx", line 2376, in cupy._core.core.array
File "cupy/_core/core.pyx", line 2400, in cupy._core.core.array
File "cupy/_core/core.pyx", line 2531, in cupy._core.core._array_default
File "cupy/_core/core.pyx", line 132, in cupy._core.core.ndarray.__new__
File "cupy/_core/core.pyx", line 220, in cupy._core.core._ndarray_base._init
File "cupy/cuda/memory.pyx", line 740, in cupy.cuda.memory.alloc
File "cupy/cuda/memory.pyx", line 1426, in cupy.cuda.memory.MemoryPool.malloc
File "cupy/cuda/memory.pyx", line 1446, in cupy.cuda.memory.MemoryPool.malloc
File "cupy/cuda/device.pyx", line 47, in cupy.cuda.device.get_device_id
File "cupy_backends/cuda/api/runtime.pyx", line 179, in cupy_backends.cuda.api.runtime.getDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInitializationError: initialization error
Is there any workaround for this?
What version of SpaCy do you have ? , have you ever tried with fastapi? , i'll try to reproduce it
Maybe you should use this before start multiprocessing: import multiprocessing as mp mp.set_start_method('spawn')