Hello author, the following error occurs when I bash tools/dist_test.sh projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth.py work_dirs/panoptic-flashocc-r50-depth/latest.pth 1 --eval ray-iou
Hello author, the following error occurs when I bash tools/dist_test.sh projects/configs/panoptic-flashocc/panoptic-flashocc-r50-depth.py work_dirs/panoptic-flashocc-r50-depth/latest.pth 1 --eval ray-iou
Starting Evaluation...
Traceback (most recent call last):
File "tools/test.py", line 290, in
main()
File "tools/test.py", line 286, in main
print(dataset.evaluate(outputs, **eval_kwargs))
File "/home/leapting/work/2024-07-17-FlashOcc/FlashOCC/projects/mmdet3d_plugin/datasets/nuscenes_dataset_occ.py", line 107, in evaluate
print(occ_results[data_id]['pred_occ'].shape)
IndexError: too many indices for tensor of dimension 3
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 21129) of binary: /home/leapting/anaconda3/envs/flashocc/bin/python
Traceback (most recent call last):
File "/home/leapting/anaconda3/envs/flashocc/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/leapting/anaconda3/envs/flashocc/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home/leapting/anaconda3/envs/flashocc/lib/python3.8/site-packages/torch/distributed/launch.py", line 193, in
main()
File "/home/leapting/anaconda3/envs/flashocc/lib/python3.8/site-packages/torch/distributed/launch.py", line 189, in main
launch(args)
File "/home/leapting/anaconda3/envs/flashocc/lib/python3.8/site-packages/torch/distributed/launch.py", line 174, in launch
run(args)
File "/home/leapting/anaconda3/envs/flashocc/lib/python3.8/site-packages/torch/distributed/run.py", line 710, in run
elastic_launch(
File "/home/leapting/anaconda3/envs/flashocc/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 131, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "/home/leapting/anaconda3/envs/flashocc/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 259, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
tools/test.py FAILED
Failures: <NO_OTHER_FAILURES>
Root Cause (first observed failure): [0]: time : 2024-07-19_14:14:09 host : leapting-Precision-3571 rank : 0 (local_rank: 0) exitcode : 1 (pid: 21129) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
Thanks for your attention. We've fixed the problem, please update the code and weights.
Best wishes.
Thanks for your attention. We've fixed the problem, please update the code and weights.
Best wishes.
Thank you for your reply. My question has been resolved
Hello author, I also ran the code during the test phase by modifying the evaluate function method in nuscenes_dataset_occ.py. The following is the modified function information:
def evaluate(self, occ_results, runner=None, show_dir=None, **eval_kwargs):
metric = eval_kwargs['metric'][0]
if metric == 'ray-iou':
occ_gts = []
occ_preds = []
lidar_origins = []
inst_gts = []
inst_preds = []
print('\nStarting Evaluation...')
data_loader = DataLoader(
EgoPoseDataset(self.data_infos),
batch_size=1,
shuffle=False,
num_workers=8
)
sample_tokens = [info['token'] for info in self.data_infos]
for i, batch in enumerate(data_loader):
token = batch[0][0]
output_origin = batch[1]
data_id = sample_tokens.index(token)
info = self.data_infos[data_id]
occ_gt = np.load(
os.path.join(info['occ_path'].replace('data/nuscenes/gts/', 'data/nuscenes/occ3d_panoptic/'),
'labels.npz'))
gt_semantics = occ_gt['semantics'] # (Dx, Dy, Dz)
mask_lidar = occ_gt['mask_lidar'].astype(bool) # (Dx, Dy, Dz)
mask_camera = occ_gt['mask_camera'].astype(bool) # (Dx, Dy, Dz)
# 获取当前样本的预测结果
occ_result = occ_results[data_id] if isinstance(occ_results, list) else occ_results[token]
try:
# 处理 occ_result 为 Tensor 的情况
if isinstance(occ_result, torch.Tensor):
occ_pred = occ_result.cpu().numpy()
elif isinstance(occ_result, dict):
# 如果 occ_result 是字典,则提取其中的 'pred_occ'
occ_pred = occ_result['pred_occ'].cpu().numpy()
else:
raise ValueError(f"Unexpected structure for occ_result: {type(occ_result)}")
if len(occ_pred.shape) != 3:
raise ValueError(
f"Expected occ_pred to have 3 dimensions, but got {len(occ_pred.shape)} dimensions.")
lidar_origins.append(output_origin)
occ_gts.append(gt_semantics)
occ_preds.append(occ_pred)
if isinstance(occ_result, dict) and 'pano_inst' in occ_result:
pano_inst = occ_result['pano_inst'].cpu()
pano_inst = pano_inst.squeeze(0).numpy()
gt_instances = occ_gt['instances']
inst_gts.append(gt_instances)
inst_preds.append(pano_inst)
except Exception as e:
print(f"Error processing occ_result: {e}")
continue
eval_results = calc_rayiou(occ_preds, occ_gts, lidar_origins)
if len(inst_preds) > 0:
eval_results.update(main_raypq(occ_preds, occ_gts, inst_preds, inst_gts, lidar_origins))
else:
self.occ_eval_metrics = Metric_mIoU(
num_classes=18,
use_lidar_mask=False,
use_image_mask=True)
print('\nStarting Evaluation...')
for index, occ_result in enumerate(tqdm(occ_results)):
info = self.data_infos[index]
occ_gt = np.load(os.path.join(info['occ_path'], 'labels.npz'))
gt_semantics = occ_gt['semantics'] # (Dx, Dy, Dz)
mask_lidar = occ_gt['mask_lidar'].astype(bool) # (Dx, Dy, Dz)
mask_camera = occ_gt['mask_camera'].astype(bool) # (Dx, Dy, Dz)
# 提取预测结果
occ_pred = occ_result if isinstance(occ_result, torch.Tensor) else occ_result['pred_occ']
if len(occ_pred.shape) != 3:
raise ValueError(
f"Expected occ_pred to have 3 dimensions, but got {len(occ_pred.shape)} dimensions.")
self.occ_eval_metrics.add_batch(
occ_pred,
gt_semantics, # (Dx, Dy, Dz)
mask_lidar, # (Dx, Dy, Dz)
mask_camera # (Dx, Dy, Dz)
)
if show_dir is not None:
mmcv.mkdir_or_exist(show_dir)
scene_name = [tem for tem in info['occ_path'].split('/') if 'scene-' in tem][0]
sample_token = info['token']
mmcv.mkdir_or_exist(os.path.join(show_dir, scene_name, sample_token))
save_path = os.path.join(show_dir, scene_name, sample_token, 'pred.npz')
np.savez_compressed(save_path, pred=occ_pred, gt=occ_gt, sample_token=sample_token)
eval_results = self.occ_eval_metrics.count_miou()
return eval_results
In vis_occ.py, I also encountered the same problem, I modified the code on line 233:
for i, data in tqdm(enumerate(val_loader)):
with torch.no_grad():
occ_pred = model(return_loss=False, rescale=True, **data)[0]
# print(f"Type of occ_pred: {type(occ_pred)}")
# if isinstance(occ_pred, dict):
# print(f"Keys in occ_pred: {list(occ_pred.keys())}")
# else:
# print(f"Content of occ_pred: {occ_pred}")
sem_pred = occ_pred.cpu().numpy() if isinstance(occ_pred, torch.Tensor) else occ_pred
cv2.imwrite(os.path.join(args.viz_dir, '%04d-sem.jpg' % i), occ2img(semantics=sem_pred)[..., ::-1])
# print(os.path.join(args.viz_dir, '%04d-sem.jpg' % i))
if isinstance(occ_pred, dict):
if 'pano_inst' in occ_pred:
inst_pred = occ_pred['pano_inst']
cv2.imwrite(os.path.join(args.viz_dir, '%04d-inst.jpg' % i),
occ2img(semantics=sem_pred, is_pano=True, panoptics=inst_pred)[..., ::-1])
# print(os.path.join(args.viz_dir, '%04d-inst.jpg' % i))
# else:
# print("Key 'pano_inst' not found in occ_pred.")
# else:
# print("occ_pred is not a dictionary.")
Do you think my modification will affect the operation of other codes?