Skip to content

Commit

Permalink
Bug 1905938 Support events with no metrics in glean_usage generator (#…
Browse files Browse the repository at this point in the history
…6358)

* Bug 1905938 Support events with no metrics in glean_usage generator

* fix event_error_monitoring

* not

* Apply suggestions from code review

Co-authored-by: Sean Rose <[email protected]>

---------

Co-authored-by: Sean Rose <[email protected]>
  • Loading branch information
BenWu and sean-rose authored Oct 16, 2024
1 parent 8d17916 commit d7c8a12
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 18 deletions.
2 changes: 0 additions & 2 deletions bqetl_project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,6 @@ generate:
- org_mozilla_ios_tiktok_reporter
- org_mozilla_ios_tiktok_reporter_tiktok_reportershare
- org_mozilla_tiktokreporter
- relay_backend # see https://mozilla-hub.atlassian.net/browse/DENG-3720
- monitor_backend # see https://bugzilla.mozilla.org/show_bug.cgi?id=1905444
events_stream:
skip_apps:
- bergamot
Expand Down
21 changes: 21 additions & 0 deletions sql_generators/glean_usage/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import re
from collections import namedtuple
from functools import cache
from pathlib import Path

import requests
Expand Down Expand Up @@ -109,6 +110,26 @@ def list_tables(project_id, only_tables, table_filter, table_name="baseline_v1")
]


@cache
def _get_pings_without_metrics():
def metrics_in_schema(fields):
for field in fields:
if field["name"] == "metrics":
return True
return False

return {
(schema.bq_dataset_family, schema.bq_table_unversioned)
for schema in get_stable_table_schemas()
if "glean" in schema.schema_id and not metrics_in_schema(schema.schema)
}


def ping_has_metrics(dataset_family: str, unversioned_table: str) -> bool:
"""Return true if the given stable table schema has a metrics column at the top-level."""
return (dataset_family, unversioned_table) not in _get_pings_without_metrics()


def table_names_from_baseline(baseline_table, include_project_id=True):
"""Return a dict with full table IDs for derived tables and views.
Expand Down
17 changes: 10 additions & 7 deletions sql_generators/glean_usage/event_error_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,31 +45,34 @@ def generate_across_apps(
and s.bq_table == "events_v1"
]

default_events_table = ConfigLoader.get(
default_event_table = ConfigLoader.get(
"generate",
"glean_usage",
"events_monitoring",
"default_event_table",
fallback="events_v1",
)
events_table_overwrites = ConfigLoader.get(
"generate", "glean_usage", "events_monitoring", "event_table", fallback={}
)

# Skip any not-allowed app.
skip_apps = ConfigLoader.get(
"generate", "glean_usage", "events_monitoring", "skip_apps", fallback=[]
)

apps = [app for app in apps if app[0]["app_name"] not in skip_apps]
apps = [
app
for app in apps
if app[0]["app_name"] not in skip_apps
# errors are from metrics in glean-core/js; nothing to monitor for server apps
and "glean-server" not in app[0]["dependencies"]
and "glean-server-metrics-compat" not in app[0]["dependencies"]
]

render_kwargs = dict(
project_id=project_id,
target_table=f"{TARGET_DATASET_CROSS_APP}_derived.{AGGREGATE_TABLE_NAME}",
apps=apps,
prod_datasets=prod_datasets_with_event,
default_events_table=default_events_table,
events_table_overwrites=events_table_overwrites,
default_events_table=default_event_table,
)
render_kwargs.update(self.custom_render_kwargs)

Expand Down
5 changes: 4 additions & 1 deletion sql_generators/glean_usage/events_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re

from bigquery_etl.config import ConfigLoader
from sql_generators.glean_usage.common import GleanTable
from sql_generators.glean_usage.common import GleanTable, ping_has_metrics

TARGET_TABLE_ID = "events_stream_v1"
PREFIX = "events_stream"
Expand Down Expand Up @@ -61,10 +61,13 @@ def generate_per_app_id(
else:
has_profile_group_id = False

unversioned_table_name = re.sub(r"_v[0-9]+$", "", baseline_table.split(".")[-1])

self.custom_render_kwargs = {
"has_profile_group_id": has_profile_group_id,
"has_legacy_telemetry_client_id": has_legacy_telemetry_client_id,
"metrics_as_struct": metrics_as_struct,
"has_metrics": ping_has_metrics(app_id, unversioned_table_name),
}

super().generate_per_app_id(
Expand Down
7 changes: 6 additions & 1 deletion sql_generators/glean_usage/events_unnested.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Generate unnested events queries for Glean apps."""

from sql_generators.glean_usage.common import GleanTable
from sql_generators.glean_usage.common import GleanTable, ping_has_metrics

TARGET_TABLE_ID = "events_unnested_v1"
PREFIX = "events_unnested"
Expand Down Expand Up @@ -34,4 +34,9 @@ def generate_per_app(
"""Generate the events_unnested table query per app_name."""
target_dataset = app_info[0]["app_name"]
if target_dataset not in DATASET_SKIP:
self.custom_render_kwargs = {
"has_metrics": ping_has_metrics(
app_info[0]["bq_dataset_family"], "events"
)
}
super().generate_per_app(project_id, app_info, output_dir, id_token=id_token)
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ UNION ALL
SELECT
"{{ dataset }}" AS normalized_app_id,
e.*
EXCEPT (events, metrics)
EXCEPT (events {%- if has_metrics %}, metrics{% endif %})
REPLACE(
{% if app_name == "fenix" -%}
mozfun.norm.fenix_app_info("{{ dataset }}", client_info.app_build).channel AS normalized_channel,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@
client_info.app_channel AS channel,
metrics.labeled_counter
FROM
`{{ project_id }}.{{ dataset['bq_dataset_family'] }}_stable.{{
default_events_table
if dataset['bq_dataset_family'] not in events_table_overwrites
else events_table_overwrites[dataset['bq_dataset_family']]
}}`
`{{ project_id }}.{{ dataset['bq_dataset_family'] }}_stable.{{ default_events_table }}`
WHERE
DATE(submission_timestamp) = @submission_date
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ WITH base AS (
ping_info.parsed_end_time,
ping_info.ping_type
) AS ping_info
{% if not metrics_as_struct %}
{% if not metrics_as_struct and has_metrics %}
,
metrics_to_json(TO_JSON(metrics)) AS metrics
{% endif %}
Expand Down

0 comments on commit d7c8a12

Please sign in to comment.