pystatsd
pystatsd copied to clipboard
Reconnect to socket on exception
Hi there, we're using a fork of your library at Lyft, and some time ago bumped into the issue with TCP socket, describing the issue below:
Turns out TCP socket connection could close after some time and current setup does not handle that well.
Traceback (most recent call last):
File "/code/etaonlinelearner/models/main/unified/eta_model_trainer_unified.py", line 305, in train
model = self._train(start_time)
File "/code/etaonlinelearner/models/main/unified/eta_model_trainer_unified.py", line 419, in _train
region_allow_list = self.metrics_helper.calculate_test_accuracy_metrics(
File "/code/etaonlinelearner/models/helpers/metrics_helper.py", line 189, in calculate_test_accuracy_metrics
regional_accuracy_metrics = self._calculate_regional_accuracy_metrics(
File "/code/etaonlinelearner/models/helpers/metrics_helper.py", line 313, in _calculate_regional_accuracy_metrics
return combined.groupby(self.region).apply(
File "/code/venvs/venv/lib/python3.8/site-packages/pandas/core/groupby/groupby.py", line 1567, in apply
result = self._python_apply_general(f, self._selected_obj)
File "/code/venvs/venv/lib/python3.8/site-packages/pandas/core/groupby/groupby.py", line 1629, in _python_apply_general
values, mutated = self.grouper.apply(f, data, self.axis)
File "/code/venvs/venv/lib/python3.8/site-packages/pandas/core/groupby/ops.py", line 839, in apply
res = f(group)
File "/code/etaonlinelearner/models/helpers/metrics_helper.py", line 326, in <lambda>
self.mae_improvement: self._mae_improvement(
File "/code/etaonlinelearner/models/helpers/metrics_helper.py", line 572, in _mae_improvement
self._log_accuracy_metrics(f"learner_{stats_prefix}", mae_learner, region)
File "/code/etaonlinelearner/models/helpers/metrics_helper.py", line 453, in _log_accuracy_metrics
self.statsd.incr(f"{stats_prefix}_counter", metric, tags=tags)
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/stats.py", line 74, in incr
self._client.incr(self._p(stat), count, rate, tags=self._process_tags(tags, per_host))
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 135, in incr
self._send_stat(stat, '%s|c' % count, rate, tags)
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 159, in _send_stat
self._after(self._prepare(stat, value, rate, tags))
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 177, in _after
self._send(data)
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 246, in _send
self._do_send(data)
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 251, in _do_send
self._sock.sendall(data.encode('ascii') + b'\n')
TimeoutError: [Errno 110] Connection timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/code/etaonlinelearner/models/main/unified/eta_model_trainer_unified.py", line 311, in train
statsd.incr("training_failed", tags=self.tags)
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/stats.py", line 74, in incr
self._client.incr(self._p(stat), count, rate, tags=self._process_tags(tags, per_host))
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 135, in incr
self._send_stat(stat, '%s|c' % count, rate, tags)
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 159, in _send_stat
self._after(self._prepare(stat, value, rate, tags))
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 177, in _after
self._send(data)
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 246, in _send
self._do_send(data)
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 251, in _do_send
self._sock.sendall(data.encode('ascii') + b'\n')
BrokenPipeError: [Errno 32] Broken pipe
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/code/venvs/venv/lib/python3.8/site-packages/lyftlearnclient/build_tools/model_image_tools.py", line 392, in train_with_model_hyperparameters
trained_model = model.train()
File "/code/etaonlinelearner/models/main/unified/eta_model_trainer_unified.py", line 315, in train
statsd.timing(
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/stats.py", line 70, in timing
self._client.timing(self._p(stat), delta, rate, tags=self._process_tags(tags, per_host))
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 131, in timing
self._send_stat(stat, '%0.6f|ms' % delta, rate, tags)
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 159, in _send_stat
self._after(self._prepare(stat, value, rate, tags))
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 177, in _after
self._send(data)
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 246, in _send
self._do_send(data)
File "/code/venvs/venv/lib/python3.8/site-packages/lyft_stats/client.py", line 251, in _do_send
self._sock.sendall(data.encode('ascii') + b'\n')
BrokenPipeError: [Errno 32] Broken pipe
Proposed solution is reconnecting to socket on exception
Hi @jsocol , don't see an option to tag you as a reviewer, so mentioning you here Please take a look at the above issue and proposed solution. We at Lyft use it for a while now and it fixed the issue for us, so I wanted to populate it in your library for fix to be available for everyone Thanks in advance