Updated R1s config. Updated README accordingly.

VectorInstitute · Feb 14, 2025 · 9032747 · 9032747
1 parent ad2425f
commit 9032747
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 8 deletions.
diff --git a/vec_inf/models/README.md b/vec_inf/models/README.md
@@ -152,12 +152,12 @@ More profiling metrics coming soon!
 
 | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
 |:----------:|:----------:|:----------:|:----------:|
-| [`DeepSeek-R1-Distill-Llama-70B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) | 4x a40 | - tokens/s | - tokens/s |
 | [`DeepSeek-R1-Distill-Llama-8B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) | 1x a40 | - tokens/s | - tokens/s |
-| [`DeepSeek-R1-Distill-Qwen-32B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) | 2x a40 | - tokens/s | - tokens/s |
-| [`DeepSeek-R1-Distill-Qwen-14B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | 1x a40 | - tokens/s | - tokens/s |
-| [`DeepSeek-R1-Distill-Qwen-7B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | 1x a40 | - tokens/s | - tokens/s |
+| [`DeepSeek-R1-Distill-Llama-70B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) | 8x a40 (2 nodes, 4 a40/node) | - tokens/s | - tokens/s |
 | [`DeepSeek-R1-Distill-Qwen-1.5B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | 1x a40 | - tokens/s | - tokens/s |
+| [`DeepSeek-R1-Distill-Qwen-7B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | 1x a40 | - tokens/s | - tokens/s |
+| [`DeepSeek-R1-Distill-Qwen-14B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | 2x a40 | - tokens/s | - tokens/s |
+| [`DeepSeek-R1-Distill-Qwen-32B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) | 4x a40 | - tokens/s | - tokens/s |
 
 
 ## Vision Language Models

diff --git a/vec_inf/models/models.csv b/vec_inf/models/models.csv
@@ -77,9 +77,9 @@ Llama-3.3-70B-Instruct,Llama-3.3,70B-Instruct,LLM,4,1,128256,65536,256,true,fals
 InternVL2_5-26B,InternVL2_5,26B,VLM,2,1,92553,32768,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
 InternVL2_5-38B,InternVL2_5,38B,VLM,4,1,92553,32768,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
 Aya-Expanse-32B,Aya-Expanse,32B,LLM,2,1,256000,8192,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
-DeepSeek-R1-Distill-Llama-70B,DeepSeek-R1,Distill-Llama-70B ,LLM,4,1,128256,65536,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
-DeepSeek-R1-Distill-Llama-8B,DeepSeek-R1,Distill-Llama-8B ,LLM,1,1,128256,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
-DeepSeek-R1-Distill-Qwen-32B,DeepSeek-R1,Distill-Qwen-32B,LLM,2,1,152064,65536,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
-DeepSeek-R1-Distill-Qwen-14B,DeepSeek-R1,Distill-Qwen-14B,LLM,1,1,152064,65536,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
+DeepSeek-R1-Distill-Llama-70B,DeepSeek-R1,Distill-Llama-70B,LLM,4,2,128256,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
+DeepSeek-R1-Distill-Llama-8B,DeepSeek-R1,Distill-Llama-8B,LLM,1,1,128256,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
+DeepSeek-R1-Distill-Qwen-32B,DeepSeek-R1,Distill-Qwen-32B,LLM,4,1,152064,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
+DeepSeek-R1-Distill-Qwen-14B,DeepSeek-R1,Distill-Qwen-14B,LLM,2,1,152064,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
 DeepSeek-R1-Distill-Qwen-7B,DeepSeek-R1,Distill-Qwen-7B,LLM,1,1,152064,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
 DeepSeek-R1-Distill-Qwen-1.5B,DeepSeek-R1,Distill-Qwen-1.5B,LLM,1,1,152064,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights