From c963c21f5d93759725f77c4926b38714c2f00650 Mon Sep 17 00:00:00 2001 From: Logan Adams <114770087+loadams@users.noreply.github.com> Date: Thu, 30 Jan 2025 12:03:14 -0800 Subject: [PATCH] Specify torchvision in nv-ds-chat workflow (prevents errors with torch 2.6) (#6982) Fixes #6984. The workflow was pulling the updated torch 2.6, which caused CI failures. This keeps us on torch 2.5 for now, since installing torchvision as a dependency later on was pulling torch 2.6 with it which was unintended. This PR also unsets NCCL_DEBUG to avoid a large print out in the case of any errors. --- .github/workflows/nv-ds-chat.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nv-ds-chat.yml b/.github/workflows/nv-ds-chat.yml index 7e209cbe4397..5d47519fe204 100644 --- a/.github/workflows/nv-ds-chat.yml +++ b/.github/workflows/nv-ds-chat.yml @@ -37,7 +37,7 @@ jobs: - name: Install pytorch run: | - pip3 install -U --cache-dir $TORCH_CACHE torch --index-url https://download.pytorch.org/whl/cu121 + pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121 python -c "import torch; print('torch:', torch.__version__, torch)" python -c "import torch; print('CUDA available:', torch.cuda.is_available())" @@ -67,6 +67,7 @@ jobs: run: | cd DeepSpeedExamples/applications/DeepSpeed-Chat unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch + unset NCCL_DEBUG cd tests pytest $PYTEST_OPTS ./