-
Notifications
You must be signed in to change notification settings - Fork 101
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: create ETL for computing KPIs using the new dau_reporting ping
- Loading branch information
Showing
5 changed files
with
325 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
"""Generate active users aggregates per app.""" | ||
|
||
from dataclasses import asdict, dataclass | ||
from enum import Enum | ||
from os import path | ||
from pathlib import Path | ||
|
||
import click | ||
from jinja2 import Environment, FileSystemLoader | ||
|
||
from bigquery_etl.cli.utils import use_cloud_function_option | ||
from bigquery_etl.format_sql.formatter import reformat | ||
from bigquery_etl.util.common import write_sql | ||
|
||
GENERATOR_ROOT = Path(path.dirname(__file__)) | ||
|
||
HEADER = f"-- Query generated via `{GENERATOR_ROOT.name}` SQL generator." | ||
VERSION = "v1" | ||
TEMPLATES = (("active_users.view.sql"),) | ||
BIGEYE_COLLECTION = "Browser Metrics KPI" | ||
|
||
|
||
@dataclass | ||
class Product: | ||
"""Encapsulation of what we expect a 'Product' to look like in this generator.""" | ||
|
||
friendly_name: str | ||
is_mobile_kpi: bool = False | ||
|
||
|
||
class MobileProducts(Enum): | ||
"""Enumeration with browser names and equivalent dataset names.""" | ||
|
||
fenix = Product( | ||
friendly_name="Fenix", | ||
is_mobile_kpi=True, | ||
) | ||
# focus_android = Product( | ||
# friendly_name="Focus Android", | ||
# is_mobile_kpi=True, | ||
# ) | ||
# klar_android = Product( | ||
# friendly_name="Klar Android", | ||
# ) | ||
# firefox_ios = Product( | ||
# friendly_name="Firefox iOS", | ||
# is_mobile_kpi=True, | ||
# ) | ||
# focus_ios = Product( | ||
# friendly_name="Focus iOS", | ||
# is_mobile_kpi=True, | ||
# ) | ||
# klar_ios = Product( | ||
# friendly_name="Klar iOS", | ||
# ) | ||
|
||
|
||
@click.command() | ||
@click.option( | ||
"--output-dir", | ||
"--output_dir", | ||
help="Output directory generated SQL is written to", | ||
type=click.Path(file_okay=False), | ||
default="sql", | ||
) | ||
@click.option( | ||
"--target-project", | ||
"--target_project", | ||
help="Google Cloud project ID", | ||
default="moz-fx-data-shared-prod", | ||
) | ||
@use_cloud_function_option | ||
def generate(target_project, output_dir, use_cloud_function): | ||
"""Generate per-app queries, views and metadata for active users and search counts aggregates. | ||
The parent folders will be created if not existing and existing files will be overwritten. | ||
""" | ||
env = Environment(loader=FileSystemLoader(str(GENERATOR_ROOT / "templates"))) | ||
output_dir = Path(output_dir) / target_project | ||
|
||
default_template_args = { | ||
"header": HEADER, | ||
"version": VERSION, | ||
"project_id": target_project, | ||
"bigeye_collection": BIGEYE_COLLECTION, | ||
} | ||
|
||
query_support_configs = ( | ||
"checks.sql", | ||
"metadata.yaml", | ||
"schema.yaml", | ||
"bigconfig.yml", | ||
) | ||
|
||
for template_grain, template in TEMPLATES: | ||
for product in MobileProducts: | ||
|
||
target_name, target_filename, target_extension = template.split(".") | ||
|
||
target_dataset = ( | ||
product.name + "_derived" | ||
if target_filename == "query" | ||
else product.name | ||
) | ||
|
||
table_id = f"{target_project}.{target_dataset}.{target_name}" | ||
|
||
full_table_id = ( | ||
table_id + f"_{VERSION}" if target_filename == "query" else table_id | ||
) | ||
|
||
product_args = { | ||
"dataset": product.name, | ||
"target_name": target_name, | ||
"app_name": product.name, | ||
"name": target_name, | ||
} | ||
|
||
sql_template = env.get_template(template) | ||
rendered_sql = reformat( | ||
sql_template.render( | ||
**default_template_args, | ||
**asdict(product.value), | ||
**product_args, | ||
) | ||
) | ||
|
||
write_sql( | ||
output_dir=output_dir, | ||
full_table_id=full_table_id, | ||
basename=f"{target_filename}.{target_extension}", | ||
sql=rendered_sql, | ||
skip_existing=False, | ||
) | ||
|
||
# we only want to copy files defined in query_support_configs for query files. | ||
if target_filename != "query": | ||
continue | ||
|
||
for query_support_config in query_support_configs: | ||
support_config_template = env.get_template( | ||
f"{target_name}.{query_support_config}" | ||
) | ||
support_config_rendered = support_config_template.render( | ||
**default_template_args, | ||
**asdict(product.value), | ||
**product_args, | ||
format=False, | ||
) | ||
|
||
write_sql( | ||
output_dir=output_dir, | ||
full_table_id=full_table_id, | ||
basename=query_support_config, | ||
sql=( | ||
reformat(support_config_rendered) | ||
if query_support_config.endswith(".sql") | ||
else support_config_rendered | ||
), | ||
skip_existing=False, | ||
) | ||
|
||
# we only want to generate a union view inside telemetry for views | ||
if target_filename != "view": | ||
continue | ||
|
||
target_dataset = "telemetry" | ||
|
||
union_target_name = f"mobile_{target_name}" | ||
|
||
union_sql_template = env.get_template("union.view.sql") | ||
union_sql_rendered = union_sql_template.render( | ||
**default_template_args, | ||
dataset=target_dataset, | ||
name=target_name, | ||
target_name=union_target_name, | ||
target_filename=target_filename, | ||
format=False, | ||
products=[ | ||
{ | ||
"name": product.name, | ||
} | ||
for product in MobileProducts | ||
], | ||
) | ||
|
||
write_sql( | ||
output_dir=output_dir, | ||
full_table_id=f"{target_project}.{target_dataset}.{union_target_name}", | ||
basename=f"{target_filename}.{target_extension}", | ||
sql=(reformat(union_sql_rendered)), | ||
skip_existing=False, | ||
) |
59 changes: 59 additions & 0 deletions
59
sql_generators/kpi_reporting/templates/active_user_aggregates.query.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
{{ header }} | ||
SELECT | ||
submission_date, | ||
client_id, | ||
app_name, | ||
normalized_channel, | ||
EXTRACT(YEAR FROM first_seen_date) AS first_seen_year, | ||
submission_date = first_seen_date AS is_new_profile, | ||
IFNULL(country, '??') country, | ||
city, | ||
COALESCE(REGEXP_EXTRACT(locale, r'^(.+?)-'), locale, NULL) AS locale, | ||
normalized_os, | ||
-- normalized_os_version, | ||
COALESCE( | ||
SAFE_CAST(NULLIF(SPLIT(normalized_os_version, ".")[SAFE_OFFSET(0)], "") AS INTEGER), | ||
0 | ||
) AS os_version_major, | ||
COALESCE( | ||
SAFE_CAST(NULLIF(SPLIT(normalized_os_version, ".")[SAFE_OFFSET(1)], "") AS INTEGER), | ||
0 | ||
) AS os_version_minor, | ||
COALESCE( | ||
SAFE_CAST(NULLIF(SPLIT(normalized_os_version, ".")[SAFE_OFFSET(2)], "") AS INTEGER), | ||
0 | ||
) AS os_version_patch, | ||
app_display_version AS app_version, | ||
device_model, | ||
distribution_id, | ||
activity_segment AS segment, | ||
COUNTIF(is_daily_user) AS daily_users, | ||
COUNTIF(is_weekly_user) AS weekly_users, | ||
COUNTIF(is_monthly_user) AS monthly_users, | ||
COUNTIF(is_dau) AS dau, | ||
COUNTIF(is_wau) AS wau, | ||
COUNTIF(is_mau) AS mau, | ||
-- SUM(uri_count) AS uri_count, | ||
-- SUM(active_hours_sum) AS active_hours, | ||
FROM | ||
`{{ project_id }}.{{ dataset }}.reporting_users` | ||
WHERE | ||
submission_date = @submission_date | ||
GROUP BY | ||
submission_date, | ||
client_id, | ||
app_name, | ||
normalized_channel, | ||
first_seen_year, | ||
is_new_profile, | ||
country, | ||
city, | ||
locale, | ||
normalized_os, | ||
os_version_major, | ||
os_version_minor, | ||
os_version_patch, | ||
app_version, | ||
device_model, | ||
distribution_id, | ||
segment |
8 changes: 8 additions & 0 deletions
8
sql_generators/kpi_reporting/templates/active_user_aggregates.view.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
{{ header }} | ||
CREATE OR REPLACE VIEW | ||
`{{ project_id }}.{{ dataset }}.{{ name }}` | ||
AS | ||
SELECT | ||
* | ||
FROM | ||
`{{ project_id }}.{{ dataset }}.{{ name }}_{{ version }}` |
51 changes: 51 additions & 0 deletions
51
sql_generators/kpi_reporting/templates/reporting_users.view.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
{{ header }} | ||
CREATE OR REPLACE VIEW | ||
`{{ project_id }}.{{ dataset }}.{{ name }}` | ||
AS | ||
SELECT | ||
* EXCEPT (isp), | ||
CASE | ||
WHEN LOWER(isp) = "browserstack" | ||
THEN CONCAT("{{ friendly_name }}", " ", isp) | ||
WHEN LOWER(distribution_id) = "mozillaonline" | ||
THEN CONCAT("{{ friendly_name }}", " ", distribution_id) | ||
ELSE "{{ friendly_name }}" | ||
END AS app_name, | ||
-- Activity fields to support metrics built on top of activity | ||
CASE | ||
WHEN BIT_COUNT(days_active_bits) | ||
BETWEEN 1 | ||
AND 6 | ||
THEN "infrequent_user" | ||
WHEN BIT_COUNT(days_active_bits) | ||
BETWEEN 7 | ||
AND 13 | ||
THEN "casual_user" | ||
WHEN BIT_COUNT(days_active_bits) | ||
BETWEEN 14 | ||
AND 20 | ||
THEN "regular_user" | ||
WHEN BIT_COUNT(days_active_bits) >= 21 | ||
THEN "core_user" | ||
ELSE "other" | ||
END AS activity_segment, | ||
IFNULL(mozfun.bits28.days_since_seen(days_active_bits) = 0, FALSE) AS is_dau, | ||
IFNULL(mozfun.bits28.days_since_seen(days_active_bits) < 7, FALSE) AS is_wau, | ||
IFNULL(mozfun.bits28.days_since_seen(days_active_bits) < 28, FALSE) AS is_mau, | ||
-- Metrics based on pings sent | ||
IFNULL(mozfun.bits28.days_since_seen(days_seen_bits) = 0, FALSE) AS is_daily_user, | ||
IFNULL(mozfun.bits28.days_since_seen(days_seen_bits) < 7, FALSE) AS is_weekly_user, | ||
IFNULL(mozfun.bits28.days_since_seen(days_seen_bits) < 28, FALSE) AS is_monthly_user, | ||
{% if is_mobile_kpi %} | ||
( | ||
LOWER(IFNULL(isp, "")) <> "browserstack" | ||
AND LOWER(IFNULL(distribution_id, "")) <> "mozillaonline" | ||
) | ||
{% else %} | ||
FALSE | ||
{% endif %} AS is_mobile, | ||
-- Adding isp at the end because it's in different column index in baseline table for some products. | ||
-- This is to make sure downstream union works as intended. | ||
isp, | ||
FROM | ||
`{{ project_id }}.{{ dataset }}.dau_reporting_last_seen` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{{ header }} | ||
CREATE OR REPLACE VIEW | ||
`{{ project_id }}.{{ dataset }}.{{ target_name }}` | ||
AS | ||
{% for product in products %} | ||
{% if not loop.first %} | ||
UNION ALL | ||
{% endif %} | ||
SELECT | ||
*, | ||
"{{ product.name }}" AS product_name, | ||
FROM | ||
`{{ project_id }}.{{ product.name }}.{{ name }}` | ||
{% endfor %} |