Skip to content

Commit

Permalink
Use sysfs if parsing /proc/cpuinfo doesn't work (#2291)
Browse files Browse the repository at this point in the history
Summary:
On Arm CPUs, such as NVIDIA Grace, /proc/cpuinfo may not contain the physical or core id, which is used to approximate the cache hierarchy. This PR implements a simple work-around: If parsing /proc/cpuinfo does not work, parse /sys/devices instead.

Pull Request resolved: #2291

Test Plan:
Imported from GitHub, without a `Test Plan:` line.

See D62316680 for context.

Confirmed that this addresses the DigestBuilder issue on Grace ARM.

```
buck2 run mode/opt //folly/stats/test:digest_builder_benchmark
============================================================================
[...]stats/test/DigestBuilderBenchmark.cpp     relative  time/iter   iters/s
============================================================================
append(1000x1)                                             18.21ns    54.91M
append(1000x2)                                  99.622%    18.28ns    54.71M
append(1000x4)                                  98.997%    18.39ns    54.36M
append(1000x8)                                  98.705%    18.45ns    54.20M
append(1000x16)                                 98.598%    18.47ns    54.14M
append(1000x32)                                 97.641%    18.65ns    53.62M
----------------------------------------------------------------------------
append(10000x1)                                            13.28ns    75.28M
append(10000x2)                                 99.304%    13.38ns    74.76M
append(10000x4)                                 98.784%    13.45ns    74.36M
append(10000x8)                                 98.265%    13.52ns    73.97M
append(10000x16)                                97.923%    13.57ns    73.72M
append(10000x32)                                96.585%    13.75ns    72.71M
```

CacheLocality.LinuxActual on Grace ARM
**Before** (cpuinfo != sysfs, sysfs is correct)
```
I0911 13:03:23.131605 2626901 CacheLocalityTest.cpp:995] [cpuinfo] numCachesByLevel=[1, 1, 1]
I0911 13:03:23.131691 2626901 CacheLocalityTest.cpp:997] [sysfs] numCachesByLevel=[72, 72, 1]
I0911 13:03:23.131706 2626901 CacheLocalityTest.cpp:1000] [cpuinfo] equivClassesByCpu=[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]
I0911 13:03:23.132083 2626901 CacheLocalityTest.cpp:1002] [sysfs] equivClassesByCpu=[[0, 0, 0], [1, 1, 0], [2, 2, 0], [3, 3, 0], [4, 4, 0], [5, 5, 0], [6, 6, 0], [7, 7, 0], [8, 8, 0], [9, 9, 0], [10, 10, 0], [11, 11, 0], [12, 12, 0], [13, 13, 0], [14, 14, 0], [15, 15, 0], [16, 16, 0], [17, 17, 0], [18, 18, 0], [19, 19, 0], [20, 20, 0], [21, 21, 0], [22, 22, 0], [23, 23, 0], [24, 24, 0], [25, 25, 0], [26, 26, 0], [27, 27, 0], [28, 28, 0], [29, 29, 0], [30, 30, 0], [31, 31, 0], [32, 32, 0], [33, 33, 0], [34, 34, 0], [35, 35, 0], [36, 36, 0], [37, 37, 0], [38, 38, 0], [39, 39, 0], [40, 40, 0], [41, 41, 0], [42, 42, 0], [43, 43, 0], [44, 44, 0], [45, 45, 0], [46, 46, 0], [47, 47, 0], [48, 48, 0], [49, 49, 0], [50, 50, 0], [51, 51, 0], [52, 52, 0], [53, 53, 0], [54, 54, 0], [55, 55, 0], [56, 56, 0], [57, 57, 0], [58, 58, 0], [59, 59, 0], [60, 60, 0], [61, 61, 0], [62, 62, 0], [63, 63, 0], [64, 64, 0], [65, 65, 0], [66, 66, 0], [67, 67, 0], [68, 68, 0], [69, 69, 0], [70, 70, 0], [71, 71, 0]]
```

https://www.internalfb.com/intern/testinfra/testrun/1688850109822279

Reviewed By: yfeldblum, ot

Differential Revision: D62517069

Pulled By: meteorfox

fbshipit-source-id: eced05417e0dd90357c575935a8f50bb2e27d122
  • Loading branch information
krenzland authored and facebook-github-bot committed Sep 17, 2024
1 parent 74edd3b commit f67b93c
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 1 deletion.
20 changes: 20 additions & 0 deletions folly/concurrency/CacheLocality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,18 @@ namespace folly {
/// Returns the CacheLocality information best for this machine
static CacheLocality getSystemLocalityInfo() {
if (kIsLinux) {
// First try to parse /proc/cpuinfo.
// If that fails, then try to parse /sys/devices/.
// The latter is slower but more accurate.
try {
return CacheLocality::readFromProcCpuinfo();
} catch (...) {
// /proc/cpuinfo might be non-standard
// lets try with sysfs /sys/devices/cpu
}

try {
return CacheLocality::readFromSysfs();
} catch (...) {
// keep trying
}
Expand Down Expand Up @@ -221,6 +231,8 @@ std::vector<std::tuple<size_t, size_t, size_t>> parseProcCpuinfoLines(
size_t physicalId = 0;
size_t coreId = 0;
size_t maxCpu = 0;
size_t numberOfPhysicalIds = 0;
size_t numberOfCoreIds = 0;
for (auto iter = lines.rbegin(); iter != lines.rend(); ++iter) {
auto& line = *iter;
if (!procCpuinfoLineRelevant(line)) {
Expand All @@ -240,8 +252,10 @@ std::vector<std::tuple<size_t, size_t, size_t>> parseProcCpuinfoLines(
// the reverse order then we can emit a record.
if (line.find("physical id") == 0) {
physicalId = parseLeadingNumber(arg);
++numberOfPhysicalIds;
} else if (line.find("core id") == 0) {
coreId = parseLeadingNumber(arg);
++numberOfCoreIds;
} else if (line.find("processor") == 0) {
auto cpu = parseLeadingNumber(arg);
maxCpu = std::max(cpu, maxCpu);
Expand All @@ -256,6 +270,12 @@ std::vector<std::tuple<size_t, size_t, size_t>> parseProcCpuinfoLines(
throw std::runtime_error(
"offline CPUs not supported for /proc/cpuinfo cache locality source");
}
if (numberOfPhysicalIds == 0) {
throw std::runtime_error("no physical ids found");
}
if (numberOfCoreIds == 0) {
throw std::runtime_error("no core ids found");
}

return cpus;
}
Expand Down
34 changes: 33 additions & 1 deletion folly/concurrency/test/CacheLocalityTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -986,9 +986,39 @@ TEST(CacheLocality, LinuxActual) {
// CPUs), so we can't use _SC_NPROCESSORS_ONLN or std::hardware_concurrency().
auto expectedNumCpus = sysconf(_SC_NPROCESSORS_CONF);

if (kIsArchArm || kIsArchAArch64) {
// As of Linux v6.9, Arm CPU's have a very basic /proc/cpuinfo
// they are missing many fields typically included in x86.
// https://github.com/torvalds/linux/blob/77f587896757708780a7e8792efe62939f25a5ab/arch/arm64/kernel/cpuinfo.c#L193
//
// Example ARM /proc/cpuinfo
// processor : 0
// BogoMIPS : 2000.00
// Features : [..omitted..]
// CPU implementer : 0x41
// CPU architecture: 8
// CPU variant : 0x0
// CPU part : 0xd4f
// CPU revision : 0
//
// ...
//

EXPECT_THROW(CacheLocality::readFromProcCpuinfo(), std::runtime_error);

auto parsed2 = CacheLocality::readFromSysfs();
EXPECT_EQ(parsed2.numCpus, expectedNumCpus);

LOG(INFO) << fmt::format(
"[sysfs] numCachesByLevel={}", parsed2.numCachesByLevel);
LOG(INFO) << fmt::format(
"[sysfs] equivClassesByCpu={}", parsed2.equivClassesByCpu);

return;
}

auto parsed1 = CacheLocality::readFromProcCpuinfo();
EXPECT_EQ(parsed1.numCpus, expectedNumCpus);

auto parsed2 = CacheLocality::readFromSysfs();
EXPECT_EQ(parsed2.numCpus, expectedNumCpus);

Expand Down Expand Up @@ -1042,6 +1072,8 @@ static void logRusageFor(std::string name, F func) {
}

TEST(CacheLocality, BenchmarkProcCpuinfo) {
// See note about Arm in LinuxActual test
SKIP_IF(kIsArchArm || kIsArchAArch64);
logRusageFor("readFromProcCpuinfo", CacheLocality::readFromProcCpuinfo);
}

Expand Down

0 comments on commit f67b93c

Please sign in to comment.