Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Allocation implementation for mpibind modifier #646

Draft
wants to merge 21 commits into
base: develop
Choose a base branch
from
2 changes: 2 additions & 0 deletions experiments/amg2023/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from benchpark.scaling import WeakScaling
from benchpark.scaling import ThroughputScaling
from benchpark.caliper import Caliper
from benchpark.mpibind import Mpibind


class Amg2023(
Expand All @@ -24,6 +25,7 @@ class Amg2023(
WeakScaling,
ThroughputScaling,
Caliper,
Mpibind,
):
variant(
"workload",
Expand Down
2 changes: 2 additions & 0 deletions experiments/kripke/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from benchpark.scaling import WeakScaling
from benchpark.scaling import ThroughputScaling
from benchpark.caliper import Caliper
from benchpark.mpibind import Mpibind


class Kripke(
Expand All @@ -24,6 +25,7 @@ class Kripke(
WeakScaling,
ThroughputScaling,
Caliper,
Mpibind,
):
variant(
"workload",
Expand Down
8 changes: 7 additions & 1 deletion experiments/saxpy/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@
from benchpark.caliper import Caliper


class Saxpy(Experiment, OpenMPExperiment, CudaExperiment, ROCmExperiment, Caliper):
class Saxpy(
Experiment,
OpenMPExperiment,
CudaExperiment,
ROCmExperiment,
Caliper,
):
variant(
"workload",
default="problem",
Expand Down
20 changes: 19 additions & 1 deletion lib/benchpark/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,21 @@ class Experiment(ExperimentSystemBase, SingleNode):
description="additional spack specs",
)

variant(
"mpibind",
default="standard",
values=(
"standard",
"on",
"off",
"v",
"vv",
"greedy:0",
),
multi=False,
description="Toggle mpibind and set verbosity",
)

def __init__(self, spec):
self.spec: "benchpark.spec.ConcreteExperimentSpec" = spec
super().__init__()
Expand Down Expand Up @@ -130,7 +145,10 @@ def compute_modifiers_section(self):

def compute_modifiers_section_wrapper(self):
# by default we use the allocation modifier and no others
modifier_list = [{"name": "allocation"}, {"name": "exit-code"}]
modifier_list = [
{"name": "allocation", "mode": self.spec.variants["mpibind"][0]},
{"name": "exit-code"},
]
modifier_list += self.compute_modifiers_section()
for cls in self.helpers:
modifier_list += cls.compute_modifiers_section()
Expand Down
70 changes: 70 additions & 0 deletions modifiers/allocation/modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import math
from enum import Enum
from ramble.modkit import *
import os
from ramble.util.executable import CommandExecutable


class AllocOpt(Enum):
Expand Down Expand Up @@ -230,6 +232,30 @@ class Allocation(BasicModifier):
# know how many CPUs we want)"
mode("standard", description="Standard execution mode for allocation")
default_mode("standard")
mode(
name="off",
description="Turn off mpibind",
)

mode(
name="on",
description="Turn on mpibind",
)

mode(
name="v",
description="Run mpibind in verbose mode",
)

mode(
name="vv",
description="Run mpibind in very verbose mode",
)
mode(
name="greedy:0",
description="Run mpibind in very greedy mode",
)
depends_on = ["mpibind"]

def inherit_from_application(self, app):
super().inherit_from_application(app)
Expand All @@ -255,6 +281,10 @@ def inherit_from_application(self, app):
modification="{n_threads_per_proc}",
mode="standard",
)
if self._usage_mode != "standard":
scheduler = v.scheduler
base_string = app.variables.get("mpi_command")
app.variables["mpi_command"] = self.set_mpibind(scheduler, base_string)

def determine_allocation(self, v):
if not v.n_ranks:
Expand Down Expand Up @@ -461,3 +491,43 @@ def determine_scheduler_instructions(self, v):
v.timeout = 120

handler[v.scheduler](v)

def set_mpibind(self, scheduler, base_string):
handler = {
"slurm": "--mpibind=",
"flux": "-o mpibind=",
"mpi": "--mpibind=",
"lsf": "--mpibind=",
"pjm": "--mpibind=",
}
mpi_string = handler.get(scheduler)
if scheduler == "flux":
flags = {
"v": "verbose:1",
"vv": "verbose:2",
"on": "on",
"off": "off",
"greedy:0": "greedy:0",
}
mpi_end = flags.get(self._usage_mode)
return f"{base_string} {mpi_string}{mpi_end}"
else:
mpi_end = self._usage_mode
return f"{base_string} {mpi_string}{mpi_end}"

executable_modifier("mpibind")

def mpibind(self, executable_name, executable, app_inst=None):
pre_exec = []
post_exec = []
output_file = "{experiment_run_dir}/{experiment_name}.out"
mpibind_parser_dir = os.path.dirname(f"{self._file_path}")
post_exec.append(
CommandExecutable(
f"parse-stdout-{executable_name}",
template=[
f"python3 {mpibind_parser_dir}/parse_mpibind_output.py {output_file}"
],
)
)
return pre_exec, post_exec
48 changes: 48 additions & 0 deletions modifiers/allocation/parse_mpibind_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

import json
import re
import argparse


def parse_mpibind(input_file):

with open(input_file, "r") as f:
lines = f.readlines()

task_map = {}

for line in lines:

# Match lines beginning with "mpibind:"
mpibind_match = re.match(r"^mpibind:.*(?:\r?\n|$)", line)
if mpibind_match:

pattern = r"task\s+(\d+)\s+nths\s+([\d,]*)\s+gpus\s+([\d,]*)\s+cpus\s+([\d,]*-[\d,]*)"
match = re.search(pattern, line)
if match:
task = match.group(1) # Task number
nths = match.group(2) # Nths value
gpus = match.group(3) # GPUs value
cpus = match.group(4) # CPUs value

task_map["task " + task] = {
"cpus": cpus,
"gpus": gpus,
"nths": nths,
}

output_file = "mpibind_log_file.json"
with open(output_file, "w") as json_file:
json.dump(task_map, json_file, indent=4)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="affinity output to JSON")
parser.add_argument("output_file", type=str, help="mpibind log file (text)")

args = parser.parse_args()
parse_mpibind(args.output_file)
Loading