Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cost per job #93

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions gantry/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ async def apply_migrations(db: aiosqlite.Connection):
# and not inadvertently added to the migrations folder
("001_initial.sql", 1),
("002_spec_index.sql", 2),
("003_job_cost.sql", 3),
]

# apply migrations that have not been applied
Expand All @@ -45,6 +46,8 @@ async def apply_migrations(db: aiosqlite.Connection):
async def init_db(app: web.Application):
db = await aiosqlite.connect(os.environ["DB_FILE"])
await apply_migrations(db)
# ensure foreign key constraints are enabled
await db.execute("PRAGMA foreign_keys = ON")
app["db"] = db
yield
await db.close()
Expand Down
118 changes: 118 additions & 0 deletions gantry/clients/prometheus/job.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json

import aiosqlite

from gantry.clients.prometheus import util
from gantry.util.spec import spec_variants

Expand Down Expand Up @@ -152,3 +154,119 @@ async def get_usage(self, pod: str, start: float, end: float) -> dict:
"mem_min": mem_usage["min"],
"mem_stddev": mem_usage["stddev"],
}

async def get_costs(
self,
db: aiosqlite.Connection,
resources: dict,
usage: dict,
start: float,
end: float,
node_id: int,
) -> dict:
"""
Calculates the costs associated with a job.

Objectives:
- we want to measure the cost of a job's submission and execution
- measure efficiency of resource usage to discourage wasted cycles

The cost should be independent of other activity on the node in order
to be comparable against other jobs.

To normalize the cost of resources within instance types, we calculate
the cost of each CPU and memory unit in the node during the lifetime
of the job.

Rather than using real usage as a factor in the cost, we use the requests,
as they block other jobs from using resources. In this case, jobs will be
incentivized to make lower requests, while also factoring in the runtime.

To account for instances where jobs do not use their requested resources (+/-),
we compute a penalty factor that can be used to understand the cost imposed
on the rest of the node, or jobs that could have been scheduled on the machine.

Job cost and the penalties are stored separately for each resource to allow for
flexibility. When analyzing these costs, instance type should be factored in,
as the cost per job is influence by the cost per resource, which will vary.

args:
db: a database connection
resources: job requests and limits
usage: job memory and cpu usage
start: job start time
end: job end time
node_id: the node that the job ran on

returns:
dict of: cpu_cost, mem_cost, cpu_penalty, mem_penalty
"""
costs = {}
async with db.execute(
"""
select capacity_type, instance_type, zone, cores, mem
from nodes where id = ?
""",
(node_id,),
) as cursor:
node = await cursor.fetchone()

if not node:
# this is a temporary condition that will happen during the transition
# to collecting
raise util.IncompleteData(
f"node instance metadata is missing from db. node={node_id}"
)

capacity_type, instance_type, zone, cores, mem = node

# spot instance prices can change, so we avg the cost over the job's runtime
instance_costs = await self.client.query_range(
query={
"metric": "karpenter_cloudprovider_instance_type_offering_price_estimate", # noqa: E501
"filters": {
"capacity_type": capacity_type,
"instance_type": instance_type,
"zone": zone,
},
},
start=start,
end=end,
)

if not instance_costs:
raise util.IncompleteData(f"node cost is missing. node={node_id}")

instance_costs = [float(value) for _, value in instance_costs[0]["values"]]
# average hourly cost of the instance over the job's lifetime
instance_cost = sum(instance_costs) / len(instance_costs)
# compute cost relative to duration of the job (in seconds)
node_cost = instance_cost * ((end - start) / 60 / 60)

# we assume that the cost of the node is split evenly between cpu and memory
# cost of each CPU in the node during the lifetime of the job
cost_per_cpu = (node_cost * 0.5) / cores
# cost of each unit of memory (byte)
cost_per_mem = (node_cost * 0.5) / mem

# base cost of a job is the resources it consumed (usage)
costs["cpu_cost"] = usage["cpu_mean"] * cost_per_cpu
costs["mem_cost"] = usage["mem_mean"] * cost_per_mem

# penalty factors are meant to capture misallocation, or the
# opportunity cost of the job's behavior on the cluster/node
# underallocation delays scheduling of other jobs, increasing pipeline duration
# overallocation interferes with the work of other jobs and crowds the node
# the penalty is the absolute difference between the job's usage and request
costs["cpu_penalty"] = (
abs(usage["cpu_mean"] - resources["cpu_request"]) * cost_per_cpu
)
costs["mem_penalty"] = (
abs(usage["mem_mean"] - resources["mem_request"]) * cost_per_mem
)

# these should be stored if we want to make modifications to the analysis
costs["cost_per_cpu"] = cost_per_cpu
costs["cost_per_mem"] = cost_per_mem

return costs
2 changes: 2 additions & 0 deletions gantry/clients/prometheus/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,6 @@ async def get_labels(self, hostname: str, time: float) -> dict:
"arch": labels["label_kubernetes_io_arch"],
"os": labels["label_kubernetes_io_os"],
"instance_type": labels["label_node_kubernetes_io_instance_type"],
"capacity_type": labels["label_karpenter_sh_capacity_type"],
"zone": labels["label_topology_kubernetes_io_zone"],
}
7 changes: 7 additions & 0 deletions gantry/routes/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ async def fetch_job(
)
usage = await prometheus.job.get_usage(annotations["pod"], job.start, job.end)
node_id = await fetch_node(db_conn, prometheus, node_hostname, job.midpoint)
costs = await prometheus.job.get_costs(
db_conn, resources, usage, job.start, job.end, node_id
)

except aiohttp.ClientError as e:
logger.error(f"Request failed: {e}")
return
Expand All @@ -93,6 +97,7 @@ async def fetch_job(
**annotations,
**resources,
**usage,
**costs,
},
)

Expand Down Expand Up @@ -139,5 +144,7 @@ async def fetch_node(
"arch": node_labels["arch"],
"os": node_labels["os"],
"instance_type": node_labels["instance_type"],
"capacity_type": node_labels["capacity_type"],
"zone": node_labels["zone"],
},
)
5 changes: 3 additions & 2 deletions gantry/tests/defs/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@

# used to compare successful insertions
# run SELECT * FROM table_name WHERE id = 1; from python sqlite api and grab fetchone() result
INSERTED_JOB = (1, 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 1, 1706117046, 1706118420, 9892514, 'success', 'pr42264_bugfix/mathomp4/hdf5-appleclang15', 'gmsh', '4.8.4', '{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}', 'gcc', '11.4.0', 'linux-ubuntu20.04-x86_64_v3', 'e4s', 16, 0.75, None, 1.899768349523097, 0.2971597591741076, 4.128116379389054, 0.2483743618267752, 1.7602635378120381, 2000000000.0, 48000000000.0, 143698407.6190476, 2785280.0, 594620416.0, 2785280.0, 252073065.82263485)
INSERTED_NODE = (1, 'ec253b04-b1dc-f08b-acac-e23df83b3602', 'ip-192-168-86-107.ec2.internal', 24.0, 196608000000.0, 'amd64', 'linux', 'i3en.6xlarge')
INSERTED_JOB = (1, 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 1, 1706117046, 1706118420, 9892514, 'success', 'pr42264_bugfix/mathomp4/hdf5-appleclang15', 'gmsh', '4.8.4', '{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}', 'gcc', '11.4.0', 'linux-ubuntu20.04-x86_64_v3', 'e4s', 16, 0.75, None, 1.899768349523097, 0.2971597591741076, 4.128116379389054, 0.2483743618267752, 1.7602635378120381, 2000000000.0, 48000000000.0, 143698407.6190476, 2785280.0, 594620416.0, 2785280.0, 252073065.82263485, 0.007552898472930367, 6.973888682208994e-05, 0.0045711276395970345, 0.0009008896396536045, 0.003975694444444444, 4.853142632378472e-13)
INSERTED_NODE = (1, 'ec253b04-b1dc-f08b-acac-e23df83b3602', 'ip-192-168-86-107.ec2.internal', 24.0, 196608000000.0, 'amd64', 'linux', 'i3en.6xlarge', 'us-east-1c', 'spot')

# these were obtained by executing the respective queries to Prometheus and capturing the JSON output
# or the raw output of PrometheusClient._query
Expand All @@ -32,6 +32,7 @@
VALID_CPU_USAGE = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'container': 'build', 'cpu': 'total', 'endpoint': 'https-metrics', 'id': '/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podd7aa13e0_998c_4f21_b1d6_62781f4980b0.slice/cri-containerd-48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1.scope', 'image': 'ghcr.io/spack/ubuntu20.04-runner-amd64-gcc-11.4:2023.08.01', 'instance': '192.168.86.107:10250', 'job': 'kubelet', 'metrics_path': '/metrics/cadvisor', 'name': '48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kubelet'}, 'values': [[1706117145, '0.2483743618267752'], [1706117146, '0.25650526138466395'], [1706117147, '0.26463616094255266'], [1706117148, '0.2727670605004414'], [1706117149, '0.28089796005833007'], [1706117150, '0.2890288596162188'], [1706117151, '0.2971597591741076'], [1706117357, '3.7319005481816236'], [1706117358, '3.7319005481816236'], [1706117359, '3.7319005481816236'], [1706117360, '3.7319005481816245'], [1706117361, '3.7319005481816245'], [1706118420, '4.128116379389054']]}]}}
VALID_NODE_INFO = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_node_info', 'container': 'kube-state-metrics', 'container_runtime_version': 'containerd://1.7.2', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'internal_ip': '192.168.86.107', 'job': 'kube-state-metrics', 'kernel_version': '5.10.205-195.804.amzn2.x86_64', 'kubelet_version': 'v1.27.9-eks-5e0fdde', 'kubeproxy_version': 'v1.27.9-eks-5e0fdde', 'namespace': 'monitoring', 'node': 'ip-192-168-86-107.ec2.internal', 'os_image': 'Amazon Linux 2', 'pod': 'kube-prometheus-stack-kube-state-metrics-dbd66d8c7-6ftw8', 'provider_id': 'aws:///us-east-1c/i-0fe9d9c99fdb3631d', 'service': 'kube-prometheus-stack-kube-state-metrics', 'system_uuid': 'ec253b04-b1dc-f08b-acac-e23df83b3602'}, 'value': [1706117733, '1']}]}}
VALID_NODE_LABELS = {'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'__name__': 'kube_node_labels', 'container': 'kube-state-metrics', 'endpoint': 'http', 'instance': '192.168.164.84:8080', 'job': 'kube-state-metrics', 'label_beta_kubernetes_io_arch': 'amd64', 'label_beta_kubernetes_io_instance_type': 'i3en.6xlarge', 'label_beta_kubernetes_io_os': 'linux', 'label_failure_domain_beta_kubernetes_io_region': 'us-east-1', 'label_failure_domain_beta_kubernetes_io_zone': 'us-east-1c', 'label_k8s_io_cloud_provider_aws': 'ceb9f9cc8e47252a6f7fe7d6bded2655', 'label_karpenter_k8s_aws_instance_category': 'i', 'label_karpenter_k8s_aws_instance_cpu': '24', 'label_karpenter_k8s_aws_instance_encryption_in_transit_supported': 'true', 'label_karpenter_k8s_aws_instance_family': 'i3en', 'label_karpenter_k8s_aws_instance_generation': '3', 'label_karpenter_k8s_aws_instance_hypervisor': 'nitro', 'label_karpenter_k8s_aws_instance_local_nvme': '15000', 'label_karpenter_k8s_aws_instance_memory': '196608', 'label_karpenter_k8s_aws_instance_network_bandwidth': '25000', 'label_karpenter_k8s_aws_instance_pods': '234', 'label_karpenter_k8s_aws_instance_size': '6xlarge', 'label_karpenter_sh_capacity_type': 'spot', 'label_karpenter_sh_initialized': 'true', 'label_karpenter_sh_provisioner_name': 'glr-x86-64-v4', 'label_kubernetes_io_arch': 'amd64', 'label_kubernetes_io_hostname': 'ip-192-168-86-107.ec2.internal', 'label_kubernetes_io_os': 'linux', 'label_node_kubernetes_io_instance_type': 'i3en.6xlarge', 'label_spack_io_pipeline': 'true', 'label_spack_io_x86_64': 'v4', 'label_topology_ebs_csi_aws_com_zone': 'us-east-1c', 'label_topology_kubernetes_io_region': 'us-east-1', 'label_topology_kubernetes_io_zone': 'us-east-1c', 'namespace': 'monitoring', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'kube-prometheus-stack-kube-state-metrics-dbd66d8c7-6ftw8', 'service': 'kube-prometheus-stack-kube-state-metrics'}, 'value': [1706117733, '1']}]}}
VALID_NODE_COST = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'__name__': 'karpenter_cloudprovider_instance_type_offering_price_estimate', 'capacity_type': 'spot', 'container': 'controller', 'endpoint': 'http-metrics', 'instance': '192.168.240.113:8000', 'instance_type': 'i3en.6xlarge', 'job': 'karpenter', 'namespace': 'karpenter', 'pod': 'karpenter-8488f7f6dc-ml7q8', 'region': 'us-east-1', 'service': 'karpenter', 'zone': 'us-east-1c'}, 'values': [[1723838829, '0.5']]}]}}

# modified version of VALID_MEMORY_USAGE to make the mean/stddev 0
INVALID_MEMORY_USAGE = {'status': 'success', 'data': {'resultType': 'matrix', 'result': [{'metric': {'__name__': 'container_memory_working_set_bytes', 'container': 'build', 'endpoint': 'https-metrics', 'id': '/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podd7aa13e0_998c_4f21_b1d6_62781f4980b0.slice/cri-containerd-48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1.scope', 'image': 'ghcr.io/spack/ubuntu20.04-runner-amd64-gcc-11.4:2023.08.01', 'instance': '192.168.86.107:10250', 'job': 'kubelet', 'metrics_path': '/metrics/cadvisor', 'name': '48a5e9e7d46655e73ba119fa16b65fa94ceed23c55157db8269b0b12f18f55d1', 'namespace': 'pipeline', 'node': 'ip-192-168-86-107.ec2.internal', 'pod': 'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z', 'service': 'kube-prometheus-stack-kubelet'}, 'values': [[1706117115, '0']]}]}}
2 changes: 1 addition & 1 deletion gantry/tests/defs/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# fmt: off

# valid input into insert_node
NODE_INSERT_DICT = {"uuid": "ec253b04-b1dc-f08b-acac-e23df83b3602", "hostname": "ip-192-168-86-107.ec2.internal", "cores": 24.0, "mem": 196608000000.0, "arch": "amd64", "os": "linux", "instance_type": "i3en.6xlarge"}
NODE_INSERT_DICT = {"uuid": "ec253b04-b1dc-f08b-acac-e23df83b3602", "hostname": "ip-192-168-86-107.ec2.internal", "cores": 24.0, "mem": 196608000000.0, "arch": "amd64", "os": "linux", "instance_type": "i3en.6xlarge", "zone": "us-east-1c", "capacity_type": "spot"}
2 changes: 1 addition & 1 deletion gantry/tests/sql/insert_job.sql
Original file line number Diff line number Diff line change
@@ -1 +1 @@
INSERT INTO jobs VALUES(1,'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z',2,1706117046,1706118420,9892514,'success','pr42264_bugfix/mathomp4/hdf5-appleclang15','gmsh','4.8.4','{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',16,0.75,NULL,4.12532286694540495,3.15805864677520409,11.6038107294648877,0.248374361826775191,3.34888880339475214,2000000000.0,48000000000.0,1649868862.72588062,999763968.0,5679742976.0,2785280.0,1378705563.21018671);
INSERT INTO jobs VALUES(1,'runner-hwwb-i3u-project-2-concurrent-1-s10tq41z',2,1706117046,1706118420,9892514,'success','pr42264_bugfix/mathomp4/hdf5-appleclang15','gmsh','4.8.4','{"alglib": true, "cairo": false, "cgns": true, "compression": true, "eigen": false, "external": false, "fltk": true, "gmp": true, "hdf5": false, "ipo": false, "med": true, "metis": true, "mmg": true, "mpi": true, "netgen": true, "oce": true, "opencascade": false, "openmp": false, "petsc": false, "privateapi": false, "shared": true, "slepc": false, "tetgen": true, "voropp": true, "build_system": "cmake", "build_type": "Release", "generator": "make"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',16,0.75,NULL,4.12532286694540495,3.15805864677520409,11.6038107294648877,0.248374361826775191,3.34888880339475214,2000000000.0,48000000000.0,1649868862.72588062,999763968.0,5679742976.0,2785280.0,1378705563.21018671,0.007552898472930367, 6.973888682208994e-05, 0.0045711276395970345, 0.0009008896396536045, 0.003975694444444444, 4.853142632378472e-13);
2 changes: 1 addition & 1 deletion gantry/tests/sql/insert_node.sql
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
--- primary key is set to 2 to set up the test that checks for race conditions
INSERT INTO nodes VALUES(2,'ec253b04-b1dc-f08b-acac-e23df83b3602','ip-192-168-86-107.ec2.internal',24.0,196608000000.0,'amd64','linux','i3en.6xlarge');
INSERT INTO nodes VALUES(2,'ec253b04-b1dc-f08b-acac-e23df83b3602','ip-192-168-86-107.ec2.internal',24.0,196608000000.0,'amd64','linux','i3en.6xlarge','us-east-1c','spot');
Loading