Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding nvlink throughput to nvidia smi monitor #390

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
GPU 0: NVIDIA A100-SXM4-40GB (UUID: GPU-547e49e9-c77f-15a6-b5da-15b9eb0207d3)
Link 0: Data Tx: 1040 KiB
Link 0: Data Rx: 200.3 KiB
Link 1: Data Tx: 1500 KiB
Link 1: Data Rx: 1500 KiB
Link 2: Data Tx: 1500 KiB
Link 2: Data Rx: 1500 KiB
Link 3: Data Tx: 1500 KiB
Link 3: Data Rx: 1500 KiB
Link 4: Data Tx: 1500 KiB
Link 4: Data Rx: 1500 KiB
Link 5: Data Tx: 1500 KiB
Link 5: Data Rx: 1500 KiB
Link 6: Data Tx: 1500 KiB
Link 6: Data Rx: 1500 KiB
Link 7: Data Tx: 1500 KiB
Link 7: Data Rx: 1500 KiB
Link 8: Data Tx: 1500 KiB
Link 8: Data Rx: 1500 KiB
Link 9: Data Tx: 1500 KiB
Link 9: Data Rx: 1500 KiB
Link 10: Data Tx: 1500 KiB
Link 10: Data Rx: 1500 KiB
Link 11: Data Tx: 1500 KiB
Link 11: Data Rx: 1500 KiB
GPU 1: NVIDIA A100-SXM4-40GB (UUID: GPU-dc450a09-2c60-40a5-34fc-77ac7faf167e)
Link 0: Data Tx: 800 KiB
Link 0: Data Rx: 800 KiB
Link 1: Data Tx: 800 KiB
Link 1: Data Rx: 800 KiB
Link 2: Data Tx: 800 KiB
Link 2: Data Rx: 800 KiB
Link 3: Data Tx: 800 KiB
Link 3: Data Rx: 800 KiB
Link 4: Data Tx: 800 KiB
Link 4: Data Rx: 800 KiB
Link 5: Data Tx: 800 KiB
Link 5: Data Rx: 800 KiB
Link 6: Data Tx: 800 KiB
Link 6: Data Rx: 800 KiB
Link 7: Data Tx: 800 KiB
Link 7: Data Rx: 800 KiB
Link 8: Data Tx: 800 KiB
Link 8: Data Rx: 800 KiB
Link 9: Data Tx: 800 KiB
Link 9: Data Rx: 800 KiB
Link 10: Data Tx: 800 KiB
Link 10: Data Rx: 800 KiB
Link 11: Data Tx: 800 KiB
Link 11: Data Rx: 800 KiB
GPU 2: NVIDIA A100-SXM4-40GB (UUID: GPU-38eef22b-8b56-ae96-1f1f-25575b9fc7e7)
Link 0: Data Tx: 500 KiB
Link 0: Data Rx: 500 KiB
Link 1: Data Tx: 500 KiB
Link 1: Data Rx: 500 KiB
Link 2: Data Tx: 500 KiB
Link 2: Data Rx: 500 KiB
Link 3: Data Tx: 500 KiB
Link 3: Data Rx: 500 KiB
Link 4: Data Tx: 500 KiB
Link 4: Data Rx: 500 KiB
Link 5: Data Tx: 500 KiB
Link 5: Data Rx: 500 KiB
Link 6: Data Tx: 500 KiB
Link 6: Data Rx: 500 KiB
Link 7: Data Tx: 500 KiB
Link 7: Data Rx: 500 KiB
Link 8: Data Tx: 500 KiB
Link 8: Data Rx: 500 KiB
Link 9: Data Tx: 500 KiB
Link 9: Data Rx: 500 KiB
Link 10: Data Tx: 500 KiB
Link 10: Data Rx: 500 KiB
Link 11: Data Tx: 500 KiB
Link 11: Data Rx: 500 KiB
GPU 3: NVIDIA A100-SXM4-40GB (UUID: GPU-bb58bf68-496a-a909-f7a6-eb6e8bff5892)
Link 0: Data Tx: 1200 KiB
Link 0: Data Rx: 1200 KiB
Link 1: Data Tx: 1200 KiB
Link 1: Data Rx: 1200 KiB
Link 2: Data Tx: 1200 KiB
Link 2: Data Rx: 1200 KiB
Link 3: Data Tx: 1200 KiB
Link 3: Data Rx: 1200 KiB
Link 4: Data Tx: 1200 KiB
Link 4: Data Rx: 1200 KiB
Link 5: Data Tx: 1200 KiB
Link 5: Data Rx: 1200 KiB
Link 6: Data Tx: 1200 KiB
Link 6: Data Rx: 1200 KiB
Link 7: Data Tx: 1200 KiB
Link 7: Data Rx: 1200 KiB
Link 8: Data Tx: 1200 KiB
Link 8: Data Rx: 1200 KiB
Link 9: Data Tx: 1200 KiB
Link 9: Data Rx: 1200 KiB
Link 10: Data Tx: 1200 KiB
Link 10: Data Rx: 1200 KiB
Link 11: Data Tx: 1200 KiB
Link 11: Data Rx: 1200 KiB
GPU 4: NVIDIA A100-SXM4-40GB (UUID: GPU-e7900065-8d18-a01c-7d45-9ef032d7d1ed)
Link 0: Data Tx: 2000 KiB
Link 0: Data Rx: 2000 KiB
Link 1: Data Tx: 2000 KiB
Link 1: Data Rx: 2000 KiB
Link 2: Data Tx: 2000 KiB
Link 2: Data Rx: 2000 KiB
Link 3: Data Tx: 2000 KiB
Link 3: Data Rx: 2000 KiB
Link 4: Data Tx: 2000 KiB
Link 4: Data Rx: 2000 KiB
Link 5: Data Tx: 2000 KiB
Link 5: Data Rx: 2000 KiB
Link 6: Data Tx: 2000 KiB
Link 6: Data Rx: 2000 KiB
Link 7: Data Tx: 2000 KiB
Link 7: Data Rx: 2000 KiB
Link 8: Data Tx: 2000 KiB
Link 8: Data Rx: 2000 KiB
Link 9: Data Tx: 2000 KiB
Link 9: Data Rx: 2000 KiB
Link 10: Data Tx: 2000 KiB
Link 10: Data Rx: 2000 KiB
Link 11: Data Tx: 2000 KiB
Link 11: Data Rx: 2000 KiB
GPU 5: NVIDIA A100-SXM4-40GB (UUID: GPU-8e000139-4a61-ec47-798b-374ae1cbf96a)
Link 0: Data Tx: 400 KiB
Link 0: Data Rx: 400 KiB
Link 1: Data Tx: 400 KiB
Link 1: Data Rx: 400 KiB
Link 2: Data Tx: 400 KiB
Link 2: Data Rx: 400 KiB
Link 3: Data Tx: 400 KiB
Link 3: Data Rx: 400 KiB
Link 4: Data Tx: 400 KiB
Link 4: Data Rx: 400 KiB
Link 5: Data Tx: 400 KiB
Link 5: Data Rx: 400 KiB
Link 6: Data Tx: 400 KiB
Link 6: Data Rx: 400 KiB
Link 7: Data Tx: 400 KiB
Link 7: Data Rx: 400 KiB
Link 8: Data Tx: 400 KiB
Link 8: Data Rx: 400 KiB
Link 9: Data Tx: 400 KiB
Link 9: Data Rx: 400 KiB
Link 10: Data Tx: 400 KiB
Link 10: Data Rx: 400 KiB
Link 11: Data Tx: 400 KiB
Link 11: Data Rx: 400 KiB
GPU 6: NVIDIA A100-SXM4-40GB (UUID: GPU-53bbb70c-10a4-f0b3-9e6a-0bfc103ed298)
Link 0: Data Tx: 750 KiB
Link 0: Data Rx: 750 KiB
Link 1: Data Tx: 750 KiB
Link 1: Data Rx: 750 KiB
Link 2: Data Tx: 750 KiB
Link 2: Data Rx: 750 KiB
Link 3: Data Tx: 750 KiB
Link 3: Data Rx: 750 KiB
Link 4: Data Tx: 750 KiB
Link 4: Data Rx: 750 KiB
Link 5: Data Tx: 750 KiB
Link 5: Data Rx: 750 KiB
Link 6: Data Tx: 750 KiB
Link 6: Data Rx: 750 KiB
Link 7: Data Tx: 750 KiB
Link 7: Data Rx: 750 KiB
Link 8: Data Tx: 750 KiB
Link 8: Data Rx: 750 KiB
Link 9: Data Tx: 750 KiB
Link 9: Data Rx: 750 KiB
Link 10: Data Tx: 750 KiB
Link 10: Data Rx: 750 KiB
Link 11: Data Tx: 750 KiB
Link 11: Data Rx: 750 KiB
GPU 7: NVIDIA A100-SXM4-40GB (UUID: GPU-f6babbbb-c44f-416a-79ec-8d28350c2ad2)
Link 0: Data Tx: 600 KiB
Link 0: Data Rx: 600 KiB
Link 1: Data Tx: 600 KiB
Link 1: Data Rx: 600 KiB
Link 2: Data Tx: 600 KiB
Link 2: Data Rx: 600 KiB
Link 3: Data Tx: 600 KiB
Link 3: Data Rx: 600 KiB
Link 4: Data Tx: 600 KiB
Link 4: Data Rx: 600 KiB
Link 5: Data Tx: 600 KiB
Link 5: Data Rx: 600 KiB
Link 6: Data Tx: 600 KiB
Link 6: Data Rx: 600 KiB
Link 7: Data Tx: 600 KiB
Link 7: Data Rx: 600 KiB
Link 8: Data Tx: 600 KiB
Link 8: Data Rx: 600 KiB
Link 9: Data Tx: 600 KiB
Link 9: Data Rx: 600 KiB
Link 10: Data Tx: 600 KiB
Link 10: Data Rx: 600 KiB
Link 11: Data Tx: 600 KiB
Link 11: Data Rx: 600 KiB
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

namespace VirtualClient.Monitors.UnitTests
namespace VirtualClient.Monitors
{
using NUnit.Framework;
using System;
Expand All @@ -27,8 +27,8 @@ public void NvidiaSmiC2CParserParsesMetricsCorrectly()
NvidiaSmiC2CParser testParser = new NvidiaSmiC2CParser(rawText);
IList<Metric> metrics = testParser.Parse();

Assert.AreEqual(10, metrics.Count);
MetricAssert.Exists(metrics, "GPU 0: C2C Link 0 Speed", 44.712, "GB/s");
Assert.AreEqual(10, metrics.Count);
MetricAssert.Exists(metrics, "GPU 0: C2C Link 0 Speed", 44.712, "GB/s");
MetricAssert.Exists(metrics, "GPU 0: C2C Link 1 Speed", 44.712, "GB/s");
MetricAssert.Exists(metrics, "GPU 0: C2C Link 2 Speed", 44.712, "GB/s");
MetricAssert.Exists(metrics, "GPU 0: C2C Link 3 Speed", 44.712, "GB/s");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

namespace VirtualClient.Monitors
{
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Threading.Tasks;
using VirtualClient.Contracts;

[TestFixture]
[Category("Unit")]
public class NvidiaSmiQueryNvLinkParserUnitTest
{
[Test]
public void NvidiaSmiNvLinkParserParsesMetricsCorrectly()
{
string workingDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
string outputPath = Path.Combine(workingDirectory, "Examples", "nvidia-smi", "query-nvlink.txt");
string rawText = File.ReadAllText(outputPath);

NvidiaSmiQueryNvLinkParser testParser = new NvidiaSmiQueryNvLinkParser(rawText);
IList<Metric> metrics = testParser.Parse();

Assert.AreEqual(192, metrics.Count);
MetricAssert.Exists(metrics, "GPU 0: NvLink Rx 0 Throughput", 200.3, "KiB");
MetricAssert.Exists(metrics, "GPU 1: NvLink Tx 11 Throughput", 800, "KiB");
MetricAssert.Exists(metrics, "GPU 2: NvLink Rx 9 Throughput", 500, "KiB");
MetricAssert.Exists(metrics, "GPU 3: NvLink Tx 5 Throughput", 1200, "KiB");
MetricAssert.Exists(metrics, "GPU 4: NvLink Rx 1 Throughput", 2000, "KiB");
MetricAssert.Exists(metrics, "GPU 5: NvLink Tx 3 Throughput", 400, "KiB");
MetricAssert.Exists(metrics, "GPU 6: NvLink Rx 2 Throughput", 750, "KiB");
MetricAssert.Exists(metrics, "GPU 7: NvLink Tx 10 Throughput", 600, "KiB");
}
}
}
Loading
Loading