nvidia-docker
nvidia-docker copied to clipboard
Cannot kill container running with failed GPU
1. Issue or feature description
I have container running with nvidia-docker.
The GPU linked to the container entered some error state that caused it to drop off the bus.
The container is still alive, and docker kill simply hangs forever and fails to kill the container.
The same is true of calling into the docker API directly.
2. Steps to reproduce the issue
- Run a container linked to a GPU
- Make the GPU drop off the bus
3. Information to attach (optional if deemed irrelevant)
- [x] Some nvidia-container information:
nvidia-container-cli -k -d /dev/tty info
I1005 15:16:32.476111 2560705 nvc.c:372] initializing library context (version=1.4.0, build=704a698b7a0ceec07a48e56c37365c741718c2df)
I1005 15:16:32.476155 2560705 nvc.c:346] using root /
I1005 15:16:32.476163 2560705 nvc.c:347] using ldcache /etc/ld.so.cache
I1005 15:16:32.476168 2560705 nvc.c:348] using unprivileged user 1000:1000
I1005 15:16:32.476191 2560705 nvc.c:389] attempting to load dxcore to see if we are running under Windows Subsystem for Linux (WSL)
I1005 15:16:32.476302 2560705 nvc.c:391] dxcore initialization failed, continuing assuming a non-WSL environment
W1005 15:16:32.478854 2560706 nvc.c:269] failed to set inheritable capabilities
W1005 15:16:32.478914 2560706 nvc.c:270] skipping kernel modules load due to failure
I1005 15:16:32.479156 2560707 driver.c:101] starting driver service
I1005 15:16:32.481821 2560705 nvc_info.c:676] requesting driver information with ''
I1005 15:16:32.482808 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvoptix.so.465.19.01
I1005 15:16:32.482854 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-tls.so.465.19.01
I1005 15:16:32.482884 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-rtcore.so.465.19.01
I1005 15:16:32.482915 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.465.19.01
I1005 15:16:32.482955 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-opticalflow.so.465.19.01
I1005 15:16:32.482992 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.465.19.01
I1005 15:16:32.483025 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-ngx.so.465.19.01
I1005 15:16:32.483054 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.465.19.01
I1005 15:16:32.483091 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-ifr.so.465.19.01
I1005 15:16:32.483134 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so.465.19.01
I1005 15:16:32.483161 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-glsi.so.465.19.01
I1005 15:16:32.483188 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.465.19.01
I1005 15:16:32.483215 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-fbc.so.465.19.01
I1005 15:16:32.483250 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-encode.so.465.19.01
I1005 15:16:32.483285 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-eglcore.so.465.19.01
I1005 15:16:32.483312 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-compiler.so.465.19.01
I1005 15:16:32.483342 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-cfg.so.465.19.01
I1005 15:16:32.483382 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-cbl.so.465.19.01
I1005 15:16:32.483412 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvidia-allocator.so.465.19.01
I1005 15:16:32.483451 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libnvcuvid.so.465.19.01
I1005 15:16:32.483664 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libcuda.so.465.19.01
I1005 15:16:32.483791 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libGLX_nvidia.so.465.19.01
I1005 15:16:32.483818 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libGLESv2_nvidia.so.465.19.01
I1005 15:16:32.483843 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libGLESv1_CM_nvidia.so.465.19.01
I1005 15:16:32.483870 2560705 nvc_info.c:169] selecting /usr/lib/x86_64-linux-gnu/libEGL_nvidia.so.465.19.01
I1005 15:16:32.483910 2560705 nvc_info.c:171] skipping /usr/lib/i386-linux-gnu/libnvidia-ptxjitcompiler.so.390.116
I1005 15:16:32.483937 2560705 nvc_info.c:171] skipping /usr/lib/i386-linux-gnu/libnvidia-opencl.so.390.116
I1005 15:16:32.483963 2560705 nvc_info.c:171] skipping /usr/lib/i386-linux-gnu/libnvidia-ml.so.390.116
I1005 15:16:32.483988 2560705 nvc_info.c:171] skipping /usr/lib/i386-linux-gnu/libnvidia-fbc.so.390.116
I1005 15:16:32.484012 2560705 nvc_info.c:171] skipping /usr/lib/i386-linux-gnu/libnvidia-fatbinaryloader.so.390.116
I1005 15:16:32.484035 2560705 nvc_info.c:171] skipping /usr/lib/i386-linux-gnu/libnvidia-encode.so.390.116
I1005 15:16:32.484059 2560705 nvc_info.c:171] skipping /usr/lib/i386-linux-gnu/libnvidia-compiler.so.390.116
I1005 15:16:32.484084 2560705 nvc_info.c:171] skipping /usr/lib/i386-linux-gnu/libnvcuvid.so.390.116
I1005 15:16:32.484119 2560705 nvc_info.c:171] skipping /usr/lib/i386-linux-gnu/libcuda.so.390.116
W1005 15:16:32.484136 2560705 nvc_info.c:350] missing library libnvidia-nscq.so
W1005 15:16:32.484141 2560705 nvc_info.c:350] missing library libnvidia-fatbinaryloader.so
W1005 15:16:32.484148 2560705 nvc_info.c:350] missing library libvdpau_nvidia.so
W1005 15:16:32.484155 2560705 nvc_info.c:354] missing compat32 library libnvidia-ml.so
W1005 15:16:32.484161 2560705 nvc_info.c:354] missing compat32 library libnvidia-cfg.so
W1005 15:16:32.484168 2560705 nvc_info.c:354] missing compat32 library libnvidia-nscq.so
W1005 15:16:32.484175 2560705 nvc_info.c:354] missing compat32 library libcuda.so
W1005 15:16:32.484181 2560705 nvc_info.c:354] missing compat32 library libnvidia-opencl.so
W1005 15:16:32.484186 2560705 nvc_info.c:354] missing compat32 library libnvidia-ptxjitcompiler.so
W1005 15:16:32.484191 2560705 nvc_info.c:354] missing compat32 library libnvidia-fatbinaryloader.so
W1005 15:16:32.484196 2560705 nvc_info.c:354] missing compat32 library libnvidia-allocator.so
W1005 15:16:32.484201 2560705 nvc_info.c:354] missing compat32 library libnvidia-compiler.so
W1005 15:16:32.484205 2560705 nvc_info.c:354] missing compat32 library libnvidia-ngx.so
W1005 15:16:32.484213 2560705 nvc_info.c:354] missing compat32 library libvdpau_nvidia.so
W1005 15:16:32.484220 2560705 nvc_info.c:354] missing compat32 library libnvidia-encode.so
W1005 15:16:32.484225 2560705 nvc_info.c:354] missing compat32 library libnvidia-opticalflow.so
W1005 15:16:32.484231 2560705 nvc_info.c:354] missing compat32 library libnvcuvid.so
W1005 15:16:32.484236 2560705 nvc_info.c:354] missing compat32 library libnvidia-eglcore.so
W1005 15:16:32.484245 2560705 nvc_info.c:354] missing compat32 library libnvidia-glcore.so
W1005 15:16:32.484249 2560705 nvc_info.c:354] missing compat32 library libnvidia-tls.so
W1005 15:16:32.484256 2560705 nvc_info.c:354] missing compat32 library libnvidia-glsi.so
W1005 15:16:32.484261 2560705 nvc_info.c:354] missing compat32 library libnvidia-fbc.so
W1005 15:16:32.484268 2560705 nvc_info.c:354] missing compat32 library libnvidia-ifr.so
W1005 15:16:32.484273 2560705 nvc_info.c:354] missing compat32 library libnvidia-rtcore.so
W1005 15:16:32.484279 2560705 nvc_info.c:354] missing compat32 library libnvoptix.so
W1005 15:16:32.484287 2560705 nvc_info.c:354] missing compat32 library libGLX_nvidia.so
W1005 15:16:32.484291 2560705 nvc_info.c:354] missing compat32 library libEGL_nvidia.so
W1005 15:16:32.484299 2560705 nvc_info.c:354] missing compat32 library libGLESv2_nvidia.so
W1005 15:16:32.484304 2560705 nvc_info.c:354] missing compat32 library libGLESv1_CM_nvidia.so
W1005 15:16:32.484311 2560705 nvc_info.c:354] missing compat32 library libnvidia-glvkspirv.so
W1005 15:16:32.484315 2560705 nvc_info.c:354] missing compat32 library libnvidia-cbl.so
I1005 15:16:32.484709 2560705 nvc_info.c:276] selecting /usr/bin/nvidia-smi
I1005 15:16:32.484725 2560705 nvc_info.c:276] selecting /usr/bin/nvidia-debugdump
I1005 15:16:32.484740 2560705 nvc_info.c:276] selecting /usr/bin/nvidia-persistenced
I1005 15:16:32.484763 2560705 nvc_info.c:276] selecting /usr/bin/nvidia-cuda-mps-control
I1005 15:16:32.484775 2560705 nvc_info.c:276] selecting /usr/bin/nvidia-cuda-mps-server
W1005 15:16:32.484835 2560705 nvc_info.c:376] missing binary nv-fabricmanager
I1005 15:16:32.484853 2560705 nvc_info.c:438] listing device /dev/nvidiactl
I1005 15:16:32.484857 2560705 nvc_info.c:438] listing device /dev/nvidia-uvm
I1005 15:16:32.484864 2560705 nvc_info.c:438] listing device /dev/nvidia-uvm-tools
I1005 15:16:32.484869 2560705 nvc_info.c:438] listing device /dev/nvidia-modeset
I1005 15:16:32.484893 2560705 nvc_info.c:317] listing ipc /run/nvidia-persistenced/socket
W1005 15:16:32.484911 2560705 nvc_info.c:321] missing ipc /var/run/nvidia-fabricmanager/socket
W1005 15:16:32.484922 2560705 nvc_info.c:321] missing ipc /tmp/nvidia-mps
I1005 15:16:32.484926 2560705 nvc_info.c:733] requesting device information with ''
nvidia-container-cli: detection error: nvml error: unknown error
I1005 15:16:32.485234 2560705 nvc.c:423] shutting down library context
I1005 15:16:32.486002 2560707 driver.c:163] terminating driver service
I1005 15:16:32.486418 2560705 driver.c:203] driver service terminated successfully
- [x] Kernel version from
uname -a
Linux Nexus 5.4.0-86-generic #97-Ubuntu SMP Fri Sep 17 19:19:40 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
- [x] Any relevant kernel output lines from
dmesg
[1084456.074554] NVRM: GPU at PCI:0000:09:00: GPU-26fba6b2-9e52-006e-68db-4ce18382a5ea
[1084456.074559] NVRM: GPU Board Serial Number:
[1084456.074562] NVRM: Xid (PCI:0000:09:00): 79, pid=0, GPU has fallen off the bus.
[1084456.074566] NVRM: GPU 0000:09:00.0: GPU has fallen off the bus.
[1084456.074567] NVRM: GPU 0000:09:00.0: GPU is on Board .
[1084456.074591] NVRM: A GPU crash dump has been created. If possible, please run
NVRM: nvidia-bug-report.sh as root to collect this data before
NVRM: the NVIDIA kernel module is unloaded.
[1084456.746765] nvidia-gpu 0000:09:00.3: Refused to change power state, currently in D3
[1084456.807851] xhci_hcd 0000:09:00.2: Refused to change power state, currently in D3
[1084456.886514] xhci_hcd 0000:09:00.2: Refused to change power state, currently in D3
[1084456.886523] xhci_hcd 0000:09:00.2: Controller not ready at resume -19
[1084456.886524] xhci_hcd 0000:09:00.2: PCI post-resume error -19!
[1084456.886527] xhci_hcd 0000:09:00.2: HC died; cleaning up
[1084457.868993] nvidia-gpu 0000:09:00.3: i2c timeout error ffffffff
[1084457.868999] ucsi_ccg 1-0008: i2c_transfer failed -110
[1086487.613065] pcieport 0000:00:01.1: AER: Corrected error received: 0000:00:00.0
[1086487.613071] pcieport 0000:00:01.1: AER: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Receiver ID)
[1086487.613074] pcieport 0000:00:01.1: AER: device [1022:1453] error status/mask=00000040/00006000
[1086487.613075] pcieport 0000:00:01.1: AER: [ 6] BadTLP
[1086886.297985] br-c0ec5dd4a4d1: port 2(veth372490a) entered disabled state
[1086886.298302] veth62eab79: renamed from eth0
[1086886.378764] br-c0ec5dd4a4d1: port 2(veth372490a) entered disabled state
[1086886.424116] device veth372490a left promiscuous mode
[1086886.424122] br-c0ec5dd4a4d1: port 2(veth372490a) entered disabled state
[1088629.432059] br-c0ec5dd4a4d1: port 4(vetha0beed6) entered disabled state
[1088629.432189] veth6805632: renamed from eth0
[1088629.488366] br-c0ec5dd4a4d1: port 4(vetha0beed6) entered disabled state
[1088629.511279] device vetha0beed6 left promiscuous mode
[1088629.511282] br-c0ec5dd4a4d1: port 4(vetha0beed6) entered disabled state
[1089057.661775] pcieport 0000:00:01.1: AER: Corrected error received: 0000:00:00.0
[1089057.661783] pcieport 0000:00:01.1: AER: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Receiver ID)
[1089057.661788] pcieport 0000:00:01.1: AER: device [1022:1453] error status/mask=00000040/00006000
[1089057.661791] pcieport 0000:00:01.1: AER: [ 6] BadTLP
[1091659.501703] br-c0ec5dd4a4d1: port 6(veth9e3fed1) entered disabled state
[1091659.501884] vethfb23d27: renamed from eth0
[1091659.565201] br-c0ec5dd4a4d1: port 6(veth9e3fed1) entered disabled state
[1091659.585728] device veth9e3fed1 left promiscuous mode
[1091659.585736] br-c0ec5dd4a4d1: port 6(veth9e3fed1) entered disabled state
[1091865.568618] br-c0ec5dd4a4d1: port 8(vethd911b0c) entered disabled state
[1091865.568908] veth0dc21df: renamed from eth0
[1091865.618992] br-c0ec5dd4a4d1: port 8(vethd911b0c) entered disabled state
[1091865.637512] device vethd911b0c left promiscuous mode
[1091865.637516] br-c0ec5dd4a4d1: port 8(vethd911b0c) entered disabled state
[1094086.994722] pcieport 0000:00:01.1: AER: Corrected error received: 0000:00:00.0
[1094086.994727] pcieport 0000:00:01.1: AER: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Receiver ID)
[1094086.994732] pcieport 0000:00:01.1: AER: device [1022:1453] error status/mask=00000040/00006000
[1094086.994734] pcieport 0000:00:01.1: AER: [ 6] BadTLP
[1096065.301987] pcieport 0000:00:01.1: AER: Corrected error received: 0000:00:00.0
[1096065.301994] pcieport 0000:00:01.1: AER: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Transmitter ID)
[1096065.301999] pcieport 0000:00:01.1: AER: device [1022:1453] error status/mask=00001000/00006000
[1096065.302002] pcieport 0000:00:01.1: AER: [12] Timeout
[1100712.135957] pcieport 0000:00:01.1: AER: Corrected error received: 0000:00:00.0
[1100712.135963] pcieport 0000:00:01.1: AER: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Transmitter ID)
[1100712.135968] pcieport 0000:00:01.1: AER: device [1022:1453] error status/mask=00001000/00006000
[1100712.135971] pcieport 0000:00:01.1: AER: [12] Timeout
[1100747.816310] pcieport 0000:00:01.1: AER: Corrected error received: 0000:00:00.0
[1100747.816316] pcieport 0000:00:01.1: AER: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Transmitter ID)
[1100747.816321] pcieport 0000:00:01.1: AER: device [1022:1453] error status/mask=00001000/00006000
[1100747.816324] pcieport 0000:00:01.1: AER: [12] Timeout
[1104707.662123] pcieport 0000:00:01.1: AER: Corrected error received: 0000:00:00.0
[1104707.662129] pcieport 0000:00:01.1: AER: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Transmitter ID)
[1104707.662133] pcieport 0000:00:01.1: AER: device [1022:1453] error status/mask=00001000/00006000
[1104707.662136] pcieport 0000:00:01.1: AER: [12] Timeout
[1106160.329977] pcieport 0000:00:01.1: AER: Corrected error received: 0000:00:00.0
[1106160.329983] pcieport 0000:00:01.1: AER: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Receiver ID)
[1106160.329987] pcieport 0000:00:01.1: AER: device [1022:1453] error status/mask=00000080/00006000
[1106160.329990] pcieport 0000:00:01.1: AER: [ 7] BadDLLP
[1109655.621167] pcieport 0000:00:01.1: AER: Corrected error received: 0000:00:00.0
[1109655.621173] pcieport 0000:00:01.1: AER: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Transmitter ID)
[1109655.621178] pcieport 0000:00:01.1: AER: device [1022:1453] error status/mask=00001000/00006000
[1109655.621181] pcieport 0000:00:01.1: AER: [12] Timeout
- [x] Driver information from
nvidia-smi -a
nvidia-smi -a -i 0
Unable to determine the device handle for GPU 0000:09:00.0: Unknown Error
nvidia-smi -a -i 1
==============NVSMI LOG==============
Timestamp : Tue Oct 5 08:18:21 2021
Driver Version : 465.19.01
CUDA Version : 11.3
Attached GPUs : 3
GPU 00000000:0A:00.0
Product Name : NVIDIA GeForce RTX 2080 Ti
Product Brand : GeForce
Display Mode : Disabled
Display Active : Disabled
Persistence Mode : Enabled
MIG Mode
Current : N/A
Pending : N/A
Accounting Mode : Disabled
Accounting Mode Buffer Size : 4000
Driver Model
Current : N/A
Pending : N/A
Serial Number : N/A
GPU UUID : GPU-65a4f66e-21ab-9843-1dfd-36cde5b6417f
Minor Number : 1
VBIOS Version : 90.02.17.00.8E
MultiGPU Board : No
Board ID : 0xa00
GPU Part Number : N/A
Inforom Version
Image Version : G001.0000.02.04
OEM Object : 1.1
ECC Object : N/A
Power Management Object : N/A
GPU Operation Mode
Current : N/A
Pending : N/A
GPU Virtualization Mode
Virtualization Mode : None
Host VGPU Mode : N/A
IBMNPU
Relaxed Ordering Mode : N/A
PCI
Bus : 0x0A
Device : 0x00
Domain : 0x0000
Device Id : 0x1E0410DE
Bus Id : 00000000:0A:00.0
Sub System Id : 0x22813842
GPU Link Info
PCIe Generation
Max : 3
Current : 1
Link Width
Max : 16x
Current : 16x
Bridge Chip
Type : N/A
Firmware : N/A
Replays Since Reset : 0
Replay Number Rollovers : 0
Tx Throughput : 0 KB/s
Rx Throughput : 0 KB/s
Fan Speed : 0 %
Performance State : P8
Clocks Throttle Reasons
Idle : Active
Applications Clocks Setting : Not Active
SW Power Cap : Not Active
HW Slowdown : Not Active
HW Thermal Slowdown : Not Active
HW Power Brake Slowdown : Not Active
Sync Boost : Not Active
SW Thermal Slowdown : Not Active
Display Clock Setting : Not Active
FB Memory Usage
Total : 11019 MiB
Used : 10 MiB
Free : 11009 MiB
BAR1 Memory Usage
Total : 256 MiB
Used : 4 MiB
Free : 252 MiB
Compute Mode : Default
Utilization
Gpu : 0 %
Memory : 0 %
Encoder : 0 %
Decoder : 0 %
Encoder Stats
Active Sessions : 0
Average FPS : 0
Average Latency : 0
FBC Stats
Active Sessions : 0
Average FPS : 0
Average Latency : 0
Ecc Mode
Current : N/A
Pending : N/A
ECC Errors
Volatile
SRAM Correctable : N/A
SRAM Uncorrectable : N/A
DRAM Correctable : N/A
DRAM Uncorrectable : N/A
Aggregate
SRAM Correctable : N/A
SRAM Uncorrectable : N/A
DRAM Correctable : N/A
DRAM Uncorrectable : N/A
Retired Pages
Single Bit ECC : N/A
Double Bit ECC : N/A
Pending Page Blacklist : N/A
Remapped Rows : N/A
Temperature
GPU Current Temp : 35 C
GPU Shutdown Temp : 94 C
GPU Slowdown Temp : 91 C
GPU Max Operating Temp : 89 C
GPU Target Temperature : 84 C
Memory Current Temp : N/A
Memory Max Operating Temp : N/A
Power Readings
Power Management : Supported
Power Draw : 2.35 W
Power Limit : 250.00 W
Default Power Limit : 250.00 W
Enforced Power Limit : 250.00 W
Min Power Limit : 100.00 W
Max Power Limit : 280.00 W
Clocks
Graphics : 300 MHz
SM : 300 MHz
Memory : 405 MHz
Video : 540 MHz
Applications Clocks
Graphics : N/A
Memory : N/A
Default Applications Clocks
Graphics : N/A
Memory : N/A
Max Clocks
Graphics : 2100 MHz
SM : 2100 MHz
Memory : 7000 MHz
Video : 1950 MHz
Max Customer Boost Clocks
Graphics : N/A
Clock Policy
Auto Boost : N/A
Auto Boost Default : N/A
Processes
GPU instance ID : N/A
Compute instance ID : N/A
Process ID : 2330
Type : G
Name : /usr/lib/xorg/Xorg
Used GPU Memory : 4 MiB
GPU instance ID : N/A
Compute instance ID : N/A
Process ID : 931071
Type : G
Name : /usr/lib/xorg/Xorg
Used GPU Memory : 4 MiB
- [x] Docker version from
docker version
Client: Docker Engine - Community
Version: 20.10.6
API version: 1.41
Go version: go1.13.15
Git commit: 370c289
Built: Fri Apr 9 22:46:01 2021
OS/Arch: linux/amd64
Context: default
Experimental: true
Server: Docker Engine - Community
Engine:
Version: 20.10.6
API version: 1.41 (minimum version 1.12)
Go version: go1.13.15
Git commit: 8728dd2
Built: Fri Apr 9 22:44:13 2021
OS/Arch: linux/amd64
Experimental: false
containerd:
Version: 1.4.4
GitCommit: 05f951a3781f4f2c1911b05e61c160e9c30eaa8e
runc:
Version: 1.0.0-rc93
GitCommit: 12644e614e25b05da6fd08a38ffa0cfe1903fdec
docker-init:
Version: 0.19.0
GitCommit: de40ad0
- [x] NVIDIA packages version from
dpkg -l '*nvidia*'orrpm -qa '*nvidia*'
Desired=Unknown/Install/Remove/Purge/Hold
| Status=Not/Inst/Conf-files/Unpacked/halF-conf/Half-inst/trig-aWait/Trig-pend
|/ Err?=(none)/Reinst-required (Status,Err: uppercase=bad)
||/ Name Version Architecture Description
+++-================================-===========================-============-=========================================================
un libgldispatch0-nvidia <none> <none> (no description available)
ii libnvidia-cfg1-465:amd64 465.19.01-0ubuntu1 amd64 NVIDIA binary OpenGL/GLX configuration library
un libnvidia-cfg1-any <none> <none> (no description available)
un libnvidia-common <none> <none> (no description available)
ii libnvidia-common-465 465.19.01-0ubuntu1 all Shared files used by the NVIDIA libraries
rc libnvidia-compute-390:i386 390.116-0ubuntu0.18.04.1 i386 NVIDIA libcompute package
rc libnvidia-compute-418:amd64 430.64-0ubuntu0~gpu18.04.1 amd64 Transitional package for libnvidia-compute-430
un libnvidia-compute-430 <none> <none> (no description available)
rc libnvidia-compute-450:amd64 450.102.04-0ubuntu0.18.04.1 amd64 NVIDIA libcompute package
ii libnvidia-compute-465:amd64 465.19.01-0ubuntu1 amd64 NVIDIA libcompute package
ii libnvidia-container-tools 1.4.0-1 amd64 NVIDIA container runtime library (command-line tools)
ii libnvidia-container1:amd64 1.4.0-1 amd64 NVIDIA container runtime library
un libnvidia-decode <none> <none> (no description available)
ii libnvidia-decode-465:amd64 465.19.01-0ubuntu1 amd64 NVIDIA Video Decoding runtime libraries
un libnvidia-encode <none> <none> (no description available)
ii libnvidia-encode-465:amd64 465.19.01-0ubuntu1 amd64 NVENC Video Encoding runtime library
un libnvidia-extra <none> <none> (no description available)
ii libnvidia-extra-465:amd64 465.19.01-0ubuntu1 amd64 Extra libraries for the NVIDIA driver
un libnvidia-fbc1 <none> <none> (no description available)
ii libnvidia-fbc1-465:amd64 465.19.01-0ubuntu1 amd64 NVIDIA OpenGL-based Framebuffer Capture runtime library
un libnvidia-gl <none> <none> (no description available)
ii libnvidia-gl-465:amd64 465.19.01-0ubuntu1 amd64 NVIDIA OpenGL/GLX/EGL/GLES GLVND libraries and Vulkan ICD
un libnvidia-ifr1 <none> <none> (no description available)
ii libnvidia-ifr1-465:amd64 465.19.01-0ubuntu1 amd64 NVIDIA OpenGL-based Inband Frame Readback runtime library
un libnvidia-ml1 <none> <none> (no description available)
un nvidia-384 <none> <none> (no description available)
un nvidia-390 <none> <none> (no description available)
un nvidia-common <none> <none> (no description available)
ii nvidia-compute-utils-465 465.19.01-0ubuntu1 amd64 NVIDIA compute utilities
ii nvidia-container-runtime 3.5.0-1 amd64 NVIDIA container runtime
un nvidia-container-runtime-hook <none> <none> (no description available)
ii nvidia-container-toolkit 1.5.0-1 amd64 NVIDIA container runtime hook
ii nvidia-dkms-465 465.19.01-0ubuntu1 amd64 NVIDIA DKMS package
un nvidia-dkms-kernel <none> <none> (no description available)
un nvidia-docker <none> <none> (no description available)
ii nvidia-docker2 2.6.0-1 all nvidia-docker CLI wrapper
ii nvidia-driver-465 465.19.01-0ubuntu1 amd64 NVIDIA driver metapackage
un nvidia-driver-binary <none> <none> (no description available)
un nvidia-kernel-common <none> <none> (no description available)
ii nvidia-kernel-common-465 465.19.01-0ubuntu1 amd64 Shared files used with the kernel module
un nvidia-kernel-source <none> <none> (no description available)
ii nvidia-kernel-source-465 465.19.01-0ubuntu1 amd64 NVIDIA kernel source package
un nvidia-legacy-304xx-vdpau-driver <none> <none> (no description available)
un nvidia-legacy-340xx-vdpau-driver <none> <none> (no description available)
un nvidia-libopencl1-dev <none> <none> (no description available)
ii nvidia-modprobe 465.19.01-0ubuntu1 amd64 Load the NVIDIA kernel driver and create device files
un nvidia-opencl-icd <none> <none> (no description available)
un nvidia-persistenced <none> <none> (no description available)
ii nvidia-prime 0.8.16~0.20.04.1 all Tools to enable NVIDIA's Prime
ii nvidia-settings 470.57.01-0ubuntu0.20.04.1 amd64 Tool for configuring the NVIDIA graphics driver
un nvidia-settings-binary <none> <none> (no description available)
un nvidia-smi <none> <none> (no description available)
un nvidia-utils <none> <none> (no description available)
ii nvidia-utils-465 465.19.01-0ubuntu1 amd64 NVIDIA driver support binaries
un nvidia-vdpau-driver <none> <none> (no description available)
ii xserver-xorg-video-nvidia-465 465.19.01-0ubuntu1 amd64 NVIDIA binary Xorg driver
- [x] NVIDIA container library version from
nvidia-container-cli -V
version: 1.4.0
build date: 2021-04-24T14:25+00:00
build revision: 704a698b7a0ceec07a48e56c37365c741718c2df
build compiler: x86_64-linux-gnu-gcc-7 7.5.0
build platform: x86_64
build flags: -D_GNU_SOURCE -D_FORTIFY_SOURCE=2 -DNDEBUG -std=gnu11 -O2 -g -fdata-sections -ffunction-sections -fstack-protector -fno-strict-aliasing -fvisibility=hidden -Wall -Wextra -Wcast-align -Wpointer-arith -Wmissing-prototypes -Wnonnull -Wwrite-strings -Wlogical-op -Wformat=2 -Wmissing-format-attribute -Winit-self -Wshadow -Wstrict-prototypes -Wunreachable-code -Wconversion -Wsign-conversion -Wno-unknown-warning-option -Wno-format-extra-args -Wno-gnu-alignof-expression -Wl,-zrelro -Wl,-znow -Wl,-zdefs -Wl,--gc-sections
- [ ] NVIDIA container library logs (see troubleshooting)
- [x] Docker command, image and tag used
docker inspect of container. Image is based off nvcr.io/nvidia/pytorch:21.03-py3.
[
{
"Id": "957d21a0cba38ebee1020b5e5b451456d80617816ee1e7d4f5d578793ecfc7de",
"Created": "2021-10-05T07:49:53.797531246Z",
"Path": "/usr/local/bin/nvidia_entrypoint.sh",
"Args": [
"python",
"fine_tune_step.py",
"--local=False"
],
"State": {
"Status": "running",
"Running": true,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 2230946,
"ExitCode": 0,
"Error": "",
"StartedAt": "2021-10-05T07:49:54.568132913Z",
"FinishedAt": "0001-01-01T00:00:00Z"
},
"Image": "sha256:f181827ebc7ebedf053ff890ed7369e2d1e41d4bd6dba80008b51065e666bda7",
"ResolvConfPath": "/var/lib/docker/containers/957d21a0cba38ebee1020b5e5b451456d80617816ee1e7d4f5d578793ecfc7de/resolv.conf",
"HostnamePath": "/var/lib/docker/containers/957d21a0cba38ebee1020b5e5b451456d80617816ee1e7d4f5d578793ecfc7de/hostname",
"HostsPath": "/var/lib/docker/containers/957d21a0cba38ebee1020b5e5b451456d80617816ee1e7d4f5d578793ecfc7de/hosts",
"LogPath": "/var/lib/docker/containers/957d21a0cba38ebee1020b5e5b451456d80617816ee1e7d4f5d578793ecfc7de/957d21a0cba38ebee1020b5e5b451456d80617816ee1e7d4f5d578793ecfc7de-json.log",
"Name": "/xprun.7375562f0b84409f86024c62a1520eb2.main-0",
"RestartCount": 0,
"Driver": "overlay2",
"Platform": "linux",
"MountLabel": "",
"ProcessLabel": "",
"AppArmorProfile": "docker-default",
"ExecIDs": null,
"HostConfig": {
"Binds": [
"<REDACTED>",
],
"ContainerIDFile": "",
"LogConfig": {
"Type": "json-file",
"Config": {}
},
"NetworkMode": "xprun.shared_network",
"PortBindings": {},
"RestartPolicy": {
"Name": "no",
"MaximumRetryCount": 0
},
"AutoRemove": false,
"VolumeDriver": "",
"VolumesFrom": null,
"CapAdd": null,
"CapDrop": null,
"CgroupnsMode": "host",
"Dns": [],
"DnsOptions": [],
"DnsSearch": [],
"ExtraHosts": null,
"GroupAdd": null,
"IpcMode": "private",
"Cgroup": "",
"Links": null,
"OomScoreAdj": 0,
"PidMode": "",
"Privileged": false,
"PublishAllPorts": false,
"ReadonlyRootfs": false,
"SecurityOpt": null,
"UTSMode": "",
"UsernsMode": "",
"ShmSize": 67108864,
"Runtime": "runc",
"ConsoleSize": [
0,
0
],
"Isolation": "",
"CpuShares": 0,
"Memory": 0,
"NanoCpus": 0,
"CgroupParent": "",
"BlkioWeight": 0,
"BlkioWeightDevice": [],
"BlkioDeviceReadBps": null,
"BlkioDeviceWriteBps": null,
"BlkioDeviceReadIOps": null,
"BlkioDeviceWriteIOps": null,
"CpuPeriod": 0,
"CpuQuota": 0,
"CpuRealtimePeriod": 0,
"CpuRealtimeRuntime": 0,
"CpusetCpus": "",
"CpusetMems": "",
"Devices": [],
"DeviceCgroupRules": null,
"DeviceRequests": [
{
"Driver": "",
"Count": 0,
"DeviceIDs": [
"0"
],
"Capabilities": [
[
"gpu"
]
],
"Options": {}
}
],
"KernelMemory": 0,
"KernelMemoryTCP": 0,
"MemoryReservation": 0,
"MemorySwap": 0,
"MemorySwappiness": null,
"OomKillDisable": false,
"PidsLimit": null,
"Ulimits": null,
"CpuCount": 0,
"CpuPercent": 0,
"IOMaximumIOps": 0,
"IOMaximumBandwidth": 0,
"MaskedPaths": [
"/proc/asound",
"/proc/acpi",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/proc/scsi",
"/sys/firmware"
],
"ReadonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
},
"GraphDriver": {
"Data": {
"LowerDir": "/var/lib/docker/overlay2/6e7d6311a0d62485d5acf6e1917c765af7893d784a8f9771453f79a268bdb4f9-init/diff:/var/lib/docker/overlay2/5819376ffcbd175aa05b420b5b600a3aae0d41393a8ccda8b4a2fcf47c976f8b/diff:/var/lib/docker/overlay2/4b2f2c106888b3a2f87fa67fcd721f3eabe29610e48d83b948f58e0b8bf204ac/diff:/var/lib/docker/overlay2/13182d744181d3c22978107837991916fb3c01668db0dfe0a37fbbb4dca73cbf/diff:/var/lib/docker/overlay2/76f7ef2cbe3b183c85a850464837ba31aa248ad0ef811a5fe7bf909bb559723d/diff:/var/lib/docker/overlay2/605bd8d23c19c27771b9f93dabcaba4fccd27fed9a2a2b02a9dba69107b811d1/diff:/var/lib/docker/overlay2/c478a7669dd04bb346b2b6e500abbe23a9ef7719295269c45cb318e7f1e0db13/diff:/var/lib/docker/overlay2/77606d4a9115bb5bf8ac4c6c29f5ad46783363ce801da62d84666d8228b8503b/diff:/var/lib/docker/overlay2/a54cf4f6d1069d8fbabdb64ffa0712adc8b015422b2738b16b3ed9d6867ee14b/diff:/var/lib/docker/overlay2/1876bd7038d458a8933e6025015cf94fdb1e8cfe8e259145790ea7af5687f0f8/diff:/var/lib/docker/overlay2/b13eed5fea5f246facc38476c50bd620b72f56fb411f6668b35ae66da5367ec7/diff:/var/lib/docker/overlay2/89c2a01137487e5af4eabd263bab484aeb50433ad3a640f79a2c1d22f587c653/diff:/var/lib/docker/overlay2/c39394ccfcfb4898d339f560edc03634dbb5988f43526cffbf71a730f838b66f/diff:/var/lib/docker/overlay2/d497703527314f4ad9910998648f9cb7396600ec85159ad1288dbdd721de9edb/diff:/var/lib/docker/overlay2/140022f42fd75ca49556c9716dbac47e1a8344325025fb40eb88ea1bcc2bfb1a/diff:/var/lib/docker/overlay2/c939584fe61ca86fea89d9026e24f15b8497f47ae7772ac6e10dd0ce2c7da5d7/diff:/var/lib/docker/overlay2/a3711f1a6aa63075a598afccdead1e02da579e9a5feb04ab8de5b6ca6074b384/diff:/var/lib/docker/overlay2/3b3f0c59c38410c51a3fadc14f6206f6fa9d5523ccbbbdebfcba9eaf115efd1d/diff:/var/lib/docker/overlay2/32cb0f88346a99b047262fb4e5090afb6741322f028de3dcf1adb02597f5df04/diff:/var/lib/docker/overlay2/317b5e40d9b38a396231c682afdd4eec22930a5e8c9af06d1178216bbf0e2280/diff:/var/lib/docker/overlay2/829d67a87d73fc92b65de0e2c2b36d29b31d9b9849dee845fad94a1308651970/diff:/var/lib/docker/overlay2/ac9b9381c4f7928826467198044b309dee6a5ca8bcc01eaf93734247b5b77190/diff:/var/lib/docker/overlay2/1848e60786659b383c164f2605c5d7166ffd7bd30f9b94f0d2a72188cd565bed/diff:/var/lib/docker/overlay2/29ac62636cf989d3693481b8d290cefd0176448ef6c66a90c2f3ff34d9768c9b/diff:/var/lib/docker/overlay2/2a0ce43d5282c882e79605da3ac219d1e5ed38b767804d463bcd72d2333decd1/diff:/var/lib/docker/overlay2/e1386c6f98e6270aecbe83d4f97bd56e0431c965ffa173d7f7d8a7c8e500e616/diff:/var/lib/docker/overlay2/e82b78226eb4af73b3bdc2d0f14bbfcbca964bed49ba502d5220f623a903cdba/diff:/var/lib/docker/overlay2/b55f797db7c9d94667e26db3a7b9168834907f8fc12e557a49727754952a4b74/diff:/var/lib/docker/overlay2/3c2d39aceaa118c786d8a255d9bc9bf8034efdd5b2c829c0515e0847fc81b470/diff:/var/lib/docker/overlay2/101780446db783454057727757fb8adb2dd4eacee36dad0debd51f671af43ef9/diff:/var/lib/docker/overlay2/249c7988e07ee63e832fe8a180473a430d1a6746e9c027f742fbb5683453073e/diff:/var/lib/docker/overlay2/df2d105cb8a620ed1928e603cc133d72f5b575b10b11ba8d042d8cab7fbb5b32/diff:/var/lib/docker/overlay2/7053f02fb03c07042c774f44e2d533e59a97bcfe051a0ad17a7264b836839db4/diff:/var/lib/docker/overlay2/8def9c2e62240b377727379bd0f372835cb7d7c34c4bcc87e2f462f3143d6c6b/diff:/var/lib/docker/overlay2/e1bd444ad3650fc209603f09f19555bfb290693867c0b5d065eb5db37462e842/diff:/var/lib/docker/overlay2/8b71a94b0888c4addb9dfd66ed052a87c664472094d690d90f2f2572cd5ab1f3/diff:/var/lib/docker/overlay2/069ca13fc4edaff07bd548dddee38c370b2f990c61e3b3be24ad3473b3c812c8/diff:/var/lib/docker/overlay2/b7f9451ba5d1725186885ae8ef2f52b69d298faa29b845c4e4d8a116eb22f5fb/diff:/var/lib/docker/overlay2/fd5f974595bb9d0029d24fbf8c9f07e3733bce26e26eac5475a0c4ab6e854084/diff:/var/lib/docker/overlay2/532482a57839062c2047083b039b85f6319d2ad9d5c2af7f07af14c96777308e/diff:/var/lib/docker/overlay2/279f4dde7b9855bbba3a8548ef5b8b58c2d1d4444dfacc51dcc34a715901da9c/diff:/var/lib/docker/overlay2/a8edb47beb68cbcdca077d5f3642e887ba790b45aed4079f6d2163a88fa2004b/diff:/var/lib/docker/overlay2/f18c830ee40c843d7e354708b690654f1619c9b0be3f88ee588d1d28b895cbbc/diff:/var/lib/docker/overlay2/9e4dc0378f9a363c64343abdab944cf25c54d561ecdf392330f5084c25672293/diff:/var/lib/docker/overlay2/0acb192bc3e43e77ce4496553f8351257c259f2c5c8eae029f76ba82d6368fa2/diff:/var/lib/docker/overlay2/69608d804f3812b3c97297a93b5458ee7012346fd5b46c3c05c791f2d86be9d4/diff:/var/lib/docker/overlay2/1fe774e60c5014443325fdf9cf7597078ce09314a95ad395fdfc1317605e5d22/diff:/var/lib/docker/overlay2/6ee1c1194155f4a0a4b0f19ca8f0190594065ccaf9425753bb35ec902ae2268f/diff:/var/lib/docker/overlay2/998236703c0c89a69ee1c3083523af23d255c9fca355b413319be83518ff913e/diff:/var/lib/docker/overlay2/b9a9a8f7e058cf1c77727a02e15ebaa98630e75e2516f7639807a1059ee81b42/diff:/var/lib/docker/overlay2/becf6a7d5305b2ca446259ee2845b363b6f8d3fb801e8beaaaefea1ffd390f81/diff:/var/lib/docker/overlay2/2093f1a53f77f34522bf9c499c05a910cfcce07f4f5cc8f94082d8578e482593/diff:/var/lib/docker/overlay2/4ed1cbe01e7b2e6f8879f47e3918eab4d4f153bea46b55e164540014f2785841/diff:/var/lib/docker/overlay2/8b16232379ec4b65956e0b8e4bca41eb15509153545c2b75c01b86aa2b8e8439/diff:/var/lib/docker/overlay2/15db1d5353cf265ecfe9953894cceea2776215b42764ca380aad0980fe662b37/diff:/var/lib/docker/overlay2/17be0c5c6461c23354f06c8f119fdcae76639ac851e10e1594b4ced1d730e480/diff:/var/lib/docker/overlay2/fc06476921eeb57b02ff5ab236784a9b03ef61b4de80d4af94e77d7a7b26375c/diff:/var/lib/docker/overlay2/f04cf47c0a53c51d2c1ae5d235ceaf8d7b0caab5022f0824c71bbed720d861ff/diff:/var/lib/docker/overlay2/2beca2c21c44c7e0f7e94a622346d0886b1cc32307692f9bc8c8f2752f223c72/diff:/var/lib/docker/overlay2/69085137b5d5f0fd1942e49ea923ef64a917cffa6eef60b0f84f92eff3e9ba86/diff:/var/lib/docker/overlay2/9ecbfe92aca7a64c0cc852ea85575ea9640d6636ce3d07646ff793bfa3cba629/diff:/var/lib/docker/overlay2/8f767afbb5d24b2d55fd2f9f0271ddbc8423406ff66a7642a0e948eec71c3fc4/diff",
"MergedDir": "/var/lib/docker/overlay2/6e7d6311a0d62485d5acf6e1917c765af7893d784a8f9771453f79a268bdb4f9/merged",
"UpperDir": "/var/lib/docker/overlay2/6e7d6311a0d62485d5acf6e1917c765af7893d784a8f9771453f79a268bdb4f9/diff",
"WorkDir": "/var/lib/docker/overlay2/6e7d6311a0d62485d5acf6e1917c765af7893d784a8f9771453f79a268bdb4f9/work"
},
"Name": "overlay2"
},
"Mounts": [
"<REDACTED">,
],
"Config": {
"Hostname": "957d21a0cba3",
"Domainname": "",
"User": "",
"AttachStdin": false,
"AttachStdout": false,
"AttachStderr": false,
"ExposedPorts": {
"6006/tcp": {},
"8888/tcp": {}
},
"Tty": false,
"OpenStdin": false,
"StdinOnce": false,
"Env": [
"PATH=/opt/conda/bin:/opt/cmake-3.14.6-Linux-x86_64/bin/:/usr/local/mpi/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/tensorrt/bin",
"CUDA_VERSION=11.2.1.007",
"CUDA_DRIVER_VERSION=460.32.03",
"CUDA_CACHE_DISABLE=1",
"_CUDA_COMPAT_PATH=/usr/local/cuda/compat",
"ENV=/etc/shinit_v2",
"BASH_ENV=/etc/bash.bashrc",
"NVIDIA_REQUIRE_CUDA=cuda>=9.0",
"NCCL_VERSION=2.8.4",
"CUBLAS_VERSION=11.4.1.1026",
"CUFFT_VERSION=10.4.0.135",
"CURAND_VERSION=10.2.3.135",
"CUSPARSE_VERSION=11.4.0.135",
"CUSOLVER_VERSION=11.1.0.135",
"NPP_VERSION=11.3.2.139",
"NVJPEG_VERSION=11.4.0.135",
"CUDNN_VERSION=8.1.1.33",
"TRT_VERSION=7.2.2.3+cuda11.1.0.024",
"TRTOSS_VERSION=21.03",
"NSIGHT_SYSTEMS_VERSION=2020.4.3.7",
"NSIGHT_COMPUTE_VERSION=2020.3.1.3",
"DALI_VERSION=0.31.0",
"DALI_BUILD=2054952",
"DLPROF_VERSION=21.03",
"LD_LIBRARY_PATH=/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64",
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_DRIVER_CAPABILITIES=compute,utility,video",
"MOFED_VERSION=5.1-2.3.7",
"OPENUCX_VERSION=1.9.0",
"OPENMPI_VERSION=4.0.5",
"LIBRARY_PATH=/usr/local/cuda/lib64/stubs:",
"PYTORCH_BUILD_VERSION=1.9.0a0+df837d0",
"PYTORCH_VERSION=1.9.0a0+df837d0",
"PYTORCH_BUILD_NUMBER=0",
"NVIDIA_PYTORCH_VERSION=21.03",
"NVM_DIR=/usr/local/nvm",
"JUPYTER_PORT=8888",
"TENSORBOARD_PORT=6006",
"TORCH_CUDA_ARCH_LIST=5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX",
"COCOAPI_VERSION=2.0+nv0.4.0",
"PYTHONIOENCODING=utf-8",
"LC_ALL=C.UTF-8",
"NVIDIA_BUILD_ID=21060478"
],
"Cmd": [
"python",
"fine_tune_step.py",
"--local=False"
],
"Image": "xprun.main:631293ab8ab9943075a2a66c140eedf774583bc97accfadcfd16dae0eb1b3236",
"Volumes": null,
"WorkingDir": "/root/NNgamma",
"Entrypoint": [
"/usr/local/bin/nvidia_entrypoint.sh"
],
"OnBuild": null,
"Labels": {
"com.nvidia.build.id": "21060478",
"com.nvidia.build.ref": "b8c8e4e4a0105e697e84364fac28ae7d8024e4e5",
"com.nvidia.cublas.version": "11.4.1.1026",
"com.nvidia.cuda.version": "9.0",
"com.nvidia.cudnn.version": "8.1.1.33",
"com.nvidia.cufft.version": "10.4.0.135",
"com.nvidia.curand.version": "10.2.3.135",
"com.nvidia.cusolver.version": "11.1.0.135",
"com.nvidia.cusparse.version": "11.4.0.135",
"com.nvidia.nccl.version": "2.8.4",
"com.nvidia.npp.version": "11.3.2.139",
"com.nvidia.nsightcompute.version": "2020.3.1.3",
"com.nvidia.nsightsystems.version": "2020.4.3.7",
"com.nvidia.nvjpeg.version": "11.4.0.135",
"com.nvidia.pytorch.version": "1.9.0a0+df837d0",
"com.nvidia.tensorrt.version": "7.2.2.3+cuda11.1.0.024",
"com.nvidia.tensorrtoss.version": "21.03",
"com.nvidia.volumes.needed": "nvidia_driver"
}
},
"NetworkSettings": {
"Bridge": "",
"SandboxID": "9b680677e35263117b3195ff6992c4e179c46e16d540db280756ce721f5884cb",
"HairpinMode": false,
"LinkLocalIPv6Address": "",
"LinkLocalIPv6PrefixLen": 0,
"Ports": {
"6006/tcp": null,
"8888/tcp": null
},
"SandboxKey": "/var/run/docker/netns/9b680677e352",
"SecondaryIPAddresses": null,
"SecondaryIPv6Addresses": null,
"EndpointID": "",
"Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"IPAddress": "",
"IPPrefixLen": 0,
"IPv6Gateway": "",
"MacAddress": "",
"Networks": {
"xprun.shared_network": {
"IPAMConfig": null,
"Links": null,
"Aliases": [
"957d21a0cba3"
],
"NetworkID": "c0ec5dd4a4d1918dcb2392b3ada1b5236a07198fa950d1a99e583c9fd9b4d2a6",
"EndpointID": "73076eb431e1484b1bff6d3e775b61b8e3da75f3ec25d8724de0157ef426d742",
"Gateway": "172.18.0.1",
"IPAddress": "172.18.0.2",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:12:00:02",
"DriverOpts": null
}
}
}
}
]