Segault with pulsar 2.9.1 and node 16.13.2
Attempting to connect to a local standalone pulsar container using token based auth, pulsar-client-node installed at 1.5.0, local pulsar installed at 2.9.1 and node 16.13.2 / npm 8.3.0.
#0 0x00007fffcf6ebf10 in std::__atomic_base<long>::operator++ (this=0xb8) at /usr/include/c++/6/bits/atomic_base.h:296
#1 boost::asio::detail::task_io_service::work_started (this=0x0) at /usr/local/include/boost/asio/detail/task_io_service.hpp:81
#2 boost::asio::detail::resolver_service_base::start_resolve_op (op=<optimized out>, this=0x7fffc0002458) at /usr/local/include/boost/asio/detail/impl/resolver_service_base.ipp:112
#3 boost::asio::detail::resolver_service<boost::asio::ip::tcp>::async_resolve<std::_Bind<std::_Mem_fn<void (pulsar::ClientConnection::*)(boost::system::error_code const&, boost::asio::ip::basic_resolver_iterator<boost::asio::ip::tcp>)> (std::shared_ptr<pulsar::ClientConnection>, std::_Placeholder<1>, std::_Placeholder<2>)> >(std::shared_ptr<void>&, boost::asio::ip::basic_resolver_query<boost::asio::ip::tcp> const&, std::_Bind<std::_Mem_fn<void (pulsar::ClientConnection::*)(boost::system::error_code const&, boost::asio::ip::basic_resolver_iterator<boost::asio::ip::tcp>)> (std::shared_ptr<pulsar::ClientConnection>, std::_Placeholder<1>, std::_Placeholder<2>)>&) (this=0x7fffc0002458,
impl=..., query=..., handler=...) at /usr/local/include/boost/asio/detail/resolver_service.hpp:86
#4 0x00007fffcf6cff9b in boost::asio::ip::resolver_service<boost::asio::ip::tcp>::async_resolve<std::_Bind<std::_Mem_fn<void (pulsar::ClientConnection::*)(boost::system::error_code const&, boost::asio::ip::basic_resolver_iterator<boost::asio::ip::tcp>)> (std::shared_ptr<pulsar::ClientConnection>, std::_Placeholder<1>, std::_Placeholder<2>)> >(std::shared_ptr<void>&, boost::asio::ip::basic_resolver_query<boost::asio::ip::tcp> const&, std::_Bind<std::_Mem_fn<void (pulsar::ClientConnection::*)(boost::system::error_code const&, boost::asio::ip::basic_resolver_iterator<boost::asio::ip::tcp>)> (std::shared_ptr<pulsar::ClientConnection>, std::_Placeholder<1>, std::_Placeholder<2>)>&&) (
handler=..., query=..., impl=..., this=<optimized out>) at /usr/local/include/boost/asio/ip/resolver_service.hpp:127
#5 boost::asio::ip::basic_resolver<boost::asio::ip::tcp, boost::asio::ip::resolver_service<boost::asio::ip::tcp> >::async_resolve<std::_Bind<std::_Mem_fn<void (pulsar::ClientConnection::*)(boost::system::error_code const&, boost::asio::ip::basic_resolver_iterator<boost::asio::ip::tcp>)> (std::shared_ptr<pulsar::ClientConnection>, std::_Placeholder<1>, std::_Placeholder<2>)> >(boost::asio::ip::basic_resolver_query<boost::asio::ip::tcp> const&, std::_Bind<std::_Mem_fn<void (pulsar::ClientConnection::*)(boost::system::error_code const&, boost::asio::ip::basic_resolver_iterator<boost::asio::ip::tcp>)> (std::shared_ptr<pulsar::ClientConnection>, std::_Placeholder<1>, std::_Placeholder<2>)>&&) (handler=..., q=..., this=<optimized out>) at /usr/local/include/boost/asio/ip/basic_resolver.hpp:167
#6 pulsar::ClientConnection::tcpConnectAsync (this=this@entry=0x7fffc00014d0) at /pulsar/pulsar-client-cpp/pkg/deb/BUILD/apache-pulsar-2.9.1-src/pulsar-client-cpp/lib/ClientConnection.cc:524
#7 0x00007fffcf7298f7 in pulsar::ConnectionPool::getConnectionAsync (this=<optimized out>, logicalAddress="pulsar://127.0.0.1:16651", physicalAddress="pulsar://127.0.0.1:16651") at /pulsar/pulsar-client-cpp/pkg/deb/BUILD/apache-pulsar-2.9.1-src/pulsar-client-cpp/lib/ConnectionPool.cc:103
#8 0x00007fffcf6c01eb in pulsar::BinaryProtoLookupService::getPartitionMetadataAsync (this=0x55555b996b70, topicName=std::shared_ptr<pulsar::TopicName> (use count 2, weak count 0) = {...}) at /pulsar/pulsar-client-cpp/pkg/deb/BUILD/apache-pulsar-2.9.1-src/pulsar-client-cpp/lib/BinaryProtoLookupService.cc:76
#9 0x00007fffcf707672 in pulsar::ClientImpl::createProducerAsync(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, pulsar::ProducerConfiguration, std::function<void (pulsar::Result, pulsar::Producer)>) (this=<optimized out>, topic="public/default/test-topic", conf=..., callback=...)
at /pulsar/pulsar-client-cpp/pkg/deb/BUILD/apache-pulsar-2.9.1-src/pulsar-client-cpp/lib/ClientImpl.cc:163
#10 0x00007fffcf6c8985 in pulsar::Client::createProducerAsync(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, pulsar::ProducerConfiguration, std::function<void (pulsar::Result, pulsar::Producer)>) (this=this@entry=0x55555bb626c0, topic="public/default/test-topic", conf=..., callback=...)
at /pulsar/pulsar-client-cpp/pkg/deb/BUILD/apache-pulsar-2.9.1-src/pulsar-client-cpp/lib/Client.cc:65
#11 0x00007fffcf6c95af in pulsar::Client::createProducer (this=0x55555bb626c0, topic="public/default/test-topic", conf=..., producer=...) at /pulsar/pulsar-client-cpp/pkg/deb/BUILD/apache-pulsar-2.9.1-src/pulsar-client-cpp/lib/Client.cc:53
#12 0x00007fffcf7e36bf in pulsar_client_create_producer (client=0x55555bb73640, topic=0x7fffc0000b20 "public/default/test-topic", conf=0x55555b107b30, c_producer=0x55555b1ca888) at /pulsar/pulsar-client-cpp/pkg/deb/BUILD/apache-pulsar-2.9.1-src/pulsar-client-cpp/lib/c/c_Client.cc:37
#13 0x00007fffd442cbec in ProducerNewInstanceWorker::Execute() () from /home/mattesch/repos/realtime/node_modules/pulsar-client/build/Release/Pulsar.node
#14 0x00007fffd44235ad in Napi::AsyncWorker::OnAsyncWorkExecute(napi_env__*, void*) () from /home/mattesch/repos/realtime/node_modules/pulsar-client/build/Release/Pulsar.node
#15 0x00005555562c3f5b in (anonymous namespace)::uvimpl::Work::DoThreadPoolWork (this=0x55555b0a26c0) at ../src/node_api.cc:1074
#16 0x00005555562c78d3 in node::ThreadPoolWork::ScheduleWork()::{lambda(uv_work_s*)#1}::operator()(uv_work_s*) const (__closure=0x0, req=0x55555b0a26f8) at ../src/threadpoolwork-inl.h:39
#17 0x00005555562c7907 in node::ThreadPoolWork::ScheduleWork()::{lambda(uv_work_s*)#1}::_FUN(uv_work_s*) () at ../src/threadpoolwork-inl.h:40
#18 0x00005555573c4b81 in uv__queue_work (w=0x55555b0a2750) at ../deps/uv/src/threadpool.c:319
#19 0x00005555573c42d0 in worker (arg=0x0) at ../deps/uv/src/threadpool.c:122
#20 0x00007ffff707a6db in start_thread (arg=0x7fffd7fff700) at pthread_create.c:463
#21 0x00007ffff6da3a3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
This is caused by a race condition. Currently nothing prevents you from closing the client with pending async operations in flight. Closing the client ends up killing all of the boost::asio executors. This means we see async workers like ProducerNewInstanceWorker calling into pulsar_client_create_producer on a client that has either already been closed or is about to be closed. By the time it gets to invoking a certain async function, the state of the executor is invalid because it has been closed prematurely. We have to avoid invoking methods that create resources from a closed client.
@Matt-Esch Could you provide the code to reproduce this issue? And how often does this issue occur?
It's quite trivial to reproduce if you queue up some operations on a producer and close the client immediately afterwards.
@Matt-Esch It would be helpful if you could share the sample code for reproduction...