You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
尊重的作者您好:我在使用基础的模型时(--config-file .\configs\sparse_inst_r50_base.yaml)遇到了一下问题Traceback (most recent call last):
File "d:\detectron2\detectron2\engine\train_loop.py", line 149, in train
self.run_step()
File "d:\detectron2\detectron2\engine\defaults.py", line 494, in run_step
self._trainer.run_step()
File "d:\detectron2\detectron2\engine\train_loop.py", line 405, in run_step
self._write_metrics(loss_dict, data_time)
File "d:\detectron2\detectron2\engine\train_loop.py", line 302, in _write_metrics
SimpleTrainer.write_metrics(loss_dict, data_time, prefix)
File "d:\detectron2\detectron2\engine\train_loop.py", line 338, in write_metrics
raise FloatingPointError(
FloatingPointError: Loss became infinite or NaN at iteration=49981!
loss_dict = {'loss_ce': 2.1776671409606934, 'loss_objectness': 0.5378763675689697, 'loss_dice': 1.9592366218566895, 'loss_mask': nan}
[12/06 14:12:42 d2.engine.hooks]: Overall training speed: 49979 iterations in 2:50:33 (0.2048 s / it)
[12/06 14:12:42 d2.engine.hooks]: Total training time: 3:20:53 (0:30:20 on hooks)
[12/06 14:12:42 d2.utils.events]: eta: 9:50:03 iter: 49981 total_loss: 466.6 loss_ce: 1.907 loss_objectness: 0.4606 loss_dice: 1.872 loss_mask: 456.6 time: 0.2047 data_time: 0.0187 lr: 5e-05 max_mem: 3398M
Traceback (most recent call last):
File "D:\pythonProject\SparseInst_2\tools\train_net.py", line 194, in
launch(
File "d:\detectron2\detectron2\engine\launch.py", line 82, in launch
main_func(*args)
File "D:\pythonProject\SparseInst_2\tools\train_net.py", line 188, in main
return trainer.train()
File "d:\detectron2\detectron2\engine\defaults.py", line 484, in train
super().train(self.start_iter, self.max_iter)
File "d:\detectron2\detectron2\engine\train_loop.py", line 149, in train
self.run_step()
self._trainer.run_step()
File "d:\detectron2\detectron2\engine\train_loop.py", line 405, in run_step
self._write_metrics(loss_dict, data_time)
File "d:\detectron2\detectron2\engine\train_loop.py", line 302, in _write_metrics
SimpleTrainer.write_metrics(loss_dict, data_time, prefix)
File "d:\detectron2\detectron2\engine\train_loop.py", line 338, in write_metrics
raise FloatingPointError(
FloatingPointError: Loss became infinite or NaN at iteration=49981!
loss_dict = {'loss_ce': 2.1776671409606934, 'loss_objectness': 0.5378763675689697, 'loss_dice': 1.9592366218566895, 'loss_mask': nan}
就是'loss_mask': nan,我在训练的时候大概在iter为40000左右,loss开始逐渐上涨,主要loss_mask的值,大概会为300左右,然后就会出现nan,但是我在自己修改的模型中(仅仅是替换了mask分支的一个卷积操作)却能训练好,其他代码都没有改动。请问这是什么问题呢?
The text was updated successfully, but these errors were encountered:
尊重的作者您好:我在使用基础的模型时(--config-file .\configs\sparse_inst_r50_base.yaml)遇到了一下问题Traceback (most recent call last):
File "d:\detectron2\detectron2\engine\train_loop.py", line 149, in train
self.run_step()
File "d:\detectron2\detectron2\engine\defaults.py", line 494, in run_step
self._trainer.run_step()
File "d:\detectron2\detectron2\engine\train_loop.py", line 405, in run_step
self._write_metrics(loss_dict, data_time)
File "d:\detectron2\detectron2\engine\train_loop.py", line 302, in _write_metrics
SimpleTrainer.write_metrics(loss_dict, data_time, prefix)
File "d:\detectron2\detectron2\engine\train_loop.py", line 338, in write_metrics
raise FloatingPointError(
FloatingPointError: Loss became infinite or NaN at iteration=49981!
loss_dict = {'loss_ce': 2.1776671409606934, 'loss_objectness': 0.5378763675689697, 'loss_dice': 1.9592366218566895, 'loss_mask': nan}
[12/06 14:12:42 d2.engine.hooks]: Overall training speed: 49979 iterations in 2:50:33 (0.2048 s / it)
[12/06 14:12:42 d2.engine.hooks]: Total training time: 3:20:53 (0:30:20 on hooks)
[12/06 14:12:42 d2.utils.events]: eta: 9:50:03 iter: 49981 total_loss: 466.6 loss_ce: 1.907 loss_objectness: 0.4606 loss_dice: 1.872 loss_mask: 456.6 time: 0.2047 data_time: 0.0187 lr: 5e-05 max_mem: 3398M
Traceback (most recent call last):
File "D:\pythonProject\SparseInst_2\tools\train_net.py", line 194, in
launch(
File "d:\detectron2\detectron2\engine\launch.py", line 82, in launch
main_func(*args)
File "D:\pythonProject\SparseInst_2\tools\train_net.py", line 188, in main
return trainer.train()
File "d:\detectron2\detectron2\engine\defaults.py", line 484, in train
super().train(self.start_iter, self.max_iter)
File "d:\detectron2\detectron2\engine\train_loop.py", line 149, in train
self.run_step()
self._trainer.run_step()
File "d:\detectron2\detectron2\engine\train_loop.py", line 405, in run_step
self._write_metrics(loss_dict, data_time)
File "d:\detectron2\detectron2\engine\train_loop.py", line 302, in _write_metrics
SimpleTrainer.write_metrics(loss_dict, data_time, prefix)
File "d:\detectron2\detectron2\engine\train_loop.py", line 338, in write_metrics
raise FloatingPointError(
FloatingPointError: Loss became infinite or NaN at iteration=49981!
loss_dict = {'loss_ce': 2.1776671409606934, 'loss_objectness': 0.5378763675689697, 'loss_dice': 1.9592366218566895, 'loss_mask': nan}
就是'loss_mask': nan,我在训练的时候大概在iter为40000左右,loss开始逐渐上涨,主要loss_mask的值,大概会为300左右,然后就会出现nan,但是我在自己修改的模型中(仅仅是替换了mask分支的一个卷积操作)却能训练好,其他代码都没有改动。请问这是什么问题呢?
The text was updated successfully, but these errors were encountered: