Comment 0 for bug 2029417

Revision history for this message
Joao Andre Simioni (jasimioni) wrote : After applying fix on LP#2027735 RPC Communitation is failing

[Problem Description]

After applying the fixes proposed in LP#2027735 to MAAS 3.2.8 (taken from ppa:r00ta/maas-2027735), MAAS started to behave well, with the expected improved performance. But after around ~24 hours, provisioning of nodes started to fail, and the following traces were seen in:

rackd.log:
----------
2023-07-31 23:16:36 provisioningserver.rpc.clusterservice: [critical] Failed to contact region. (While requesting RPC info at http://10.217.0.11:5240/MAAS/, http://10.217.0.66:5240/MAAS/).
 Traceback (most recent call last):
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 460, in callback
     self._startRunCallbacks(result)
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 568, in _startRunCallbacks
     self._runCallbacks()
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
     current.result = callback(current.result, *args, **kw)
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1475, in gotResult
     _inlineCallbacks(r, g, status)
 --- <exception caught here> ---
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1292, in _doUpdate
     eventloops, maas_url = yield self._get_rpc_info(urls)
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1549, in _get_rpc_info
     raise config_exc
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1520, in _get_rpc_info
     eventloops, maas_url = yield self._parallel_fetch_rpc_info(urls)
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
     current.result = callback(current.result, *args, **kw)
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1494, in handle_responses
     errors[0].raiseException()
   File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 467, in raiseException
     raise self.value.with_traceback(self.tb)
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1455, in _serial_fetch_rpc_info
     raise last_exc
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1447, in _serial_fetch_rpc_info
     response = yield self._fetch_rpc_info(url, orig_url)
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1416, in _inlineCallbacks
     result = result.throwExceptionIntoGenerator(g)
   File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 491, in throwExceptionIntoGenerator
     return g.throw(self.type, self.value, self.tb)
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1549, in _get_rpc_info
     raise config_exc
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1520, in _get_rpc_info
     eventloops, maas_url = yield self._parallel_fetch_rpc_info(urls)
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
     current.result = callback(current.result, *args, **kw)
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1494, in handle_responses
     errors[0].raiseException()
   File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 467, in raiseException
     raise self.value.with_traceback(self.tb)
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1416, in _inlineCallbacks
     result = result.throwExceptionIntoGenerator(g)
   File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 491, in throwExceptionIntoGenerator
     return g.throw(self.type, self.value, self.tb)
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1455, in _serial_fetch_rpc_info
     raise last_exc
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1447, in _serial_fetch_rpc_info
     response = yield self._fetch_rpc_info(url, orig_url)
 twisted.internet.error.ConnectingCancelledError: HostnameAddress(hostname=b'10.217.0.11', port=5240)

2023-07-31 23:16:36 provisioningserver.rpc.common: [debug] [RPC -> sent] AmpBox({b'_command': b'Ping'})

regiond.log:
------------
2023-07-31 23:17:23 maasserver.dhcp: [critical] Error configuring DHCPv6 on rack controller 'pdx01-m01-c34-cpu-01 (xfhrbn)': unable to perform operation on <UVPoll closed=True 0x7f33f5cf0660>; the handler is closed
 Traceback (most recent call last):
   File "/usr/lib/python3/dist-packages/provisioningserver/prometheus/utils.py", line 127, in wrapper
     result = func(*args, **kwargs)
   File "/usr/lib/python3/dist-packages/provisioningserver/utils/twisted.py", line 127, in wrapper
     return func(*args, **kwargs)
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/common.py", line 176, in __call__
     return deferWithTimeout(
   File "/usr/lib/python3/dist-packages/provisioningserver/utils/twisted.py", line 325, in deferWithTimeout
     d = maybeDeferred(func, *args, **kwargs)
 --- <exception caught here> ---
   File "/usr/lib/python3/dist-packages/maasserver/dhcp.py", line 898, in configure_dhcp
     yield client(
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 151, in maybeDeferred
     result = f(*args, **kw)
   File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 971, in callRemote
     return co._doCommand(self)
   File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 2000, in _doCommand
     d = proto._sendBoxCommand(self.commandName,
   File "/usr/lib/python3/dist-packages/provisioningserver/rpc/common.py", line 261, in _sendBoxCommand
     return super()._sendBoxCommand(
   File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 902, in _sendBoxCommand
     box._sendTo(self.boxSender)
   File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 723, in _sendTo
     proto.sendBox(self)
   File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 2386, in sendBox
     self.transport.write(box.serialize())
   File "/usr/lib/python3/dist-packages/twisted/internet/_newtls.py", line 191, in write
     FileDescriptor.write(self, bytes)
   File "/usr/lib/python3/dist-packages/twisted/internet/abstract.py", line 356, in write
     self.startWriting()
   File "/usr/lib/python3/dist-packages/twisted/internet/abstract.py", line 443, in startWriting
     self.reactor.addWriter(self)
   File "/usr/lib/python3/dist-packages/twisted/internet/asyncioreactor.py", line 173, in addWriter
     self._asyncioEventloop.add_writer(fd, callWithLogger, writer,
   File "uvloop/loop.pyx", line 2399, in uvloop.loop.Loop.add_writer

   File "uvloop/loop.pyx", line 808, in uvloop.loop.Loop._add_writer

   File "uvloop/handles/poll.pyx", line 122, in uvloop.loop.UVPoll.start_writing

   File "uvloop/handles/poll.pyx", line 39, in uvloop.loop.UVPoll._poll_start

   File "uvloop/handles/handle.pyx", line 159, in uvloop.loop.UVHandle._ensure_alive

 builtins.RuntimeError: unable to perform operation on <UVPoll closed=True 0x7f33f5cf0660>; the handler is closed

2023-07-31 23:17:23 maasserver.rack_controller: [critical] Failed configuring DHCP on rack controller 'id:12'.
 Traceback (most recent call last):
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1475, in gotResult
     _inlineCallbacks(r, g, status)
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1464, in _inlineCallbacks
     status.deferred.errback()
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 501, in errback
     self._startRunCallbacks(fail)
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 568, in _startRunCallbacks
     self._runCallbacks()
 --- <exception caught here> ---
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
     current.result = callback(current.result, *args, **kw)
   File "/usr/lib/python3/dist-packages/maasserver/rack_controller.py", line 281, in <lambda>
     d.addErrback(lambda f: f.trap(NoConnectionsAvailable))
   File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 439, in trap
     self.raiseException()
   File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 467, in raiseException
     raise self.value.with_traceback(self.tb)
   File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1418, in _inlineCallbacks
     result = g.send(result)
   File "/usr/lib/python3/dist-packages/maasserver/dhcp.py", line 951, in configure_dhcp
     raise ipv4_exc
   File "/usr/lib/python3/dist-packages/maasserver/dhcp.py", line 869, in configure_dhcp
     yield client(
 builtins.RuntimeError: unable to perform operation on <UVPoll closed=True 0x7f33f5cf0660>; the handler is closed

Ubuntu version: 20.04
MAAS: 3.2.99 (Interim version from PPA)
Format: Debian
PostgreSQL 12