From 0a2a599e1e3b243feba6b807599afc0c8e0a5b83 Mon Sep 17 00:00:00 2001 From: Arkadiusz Komarzewski Date: Wed, 17 Jul 2024 18:08:17 +0200 Subject: [PATCH 1/2] DENG-3274 Add login funnel based on event metrics --- sql_generators/funnels/__init__.py | 18 ++- .../configs/login_funnels_by_service_v2.toml | 148 ++++++++++++++++++ sql_generators/funnels/templates/funnel.sql | 4 +- 3 files changed, 163 insertions(+), 7 deletions(-) create mode 100644 sql_generators/funnels/configs/login_funnels_by_service_v2.toml diff --git a/sql_generators/funnels/__init__.py b/sql_generators/funnels/__init__.py index e3db4e7c049..8c400db78ad 100644 --- a/sql_generators/funnels/__init__.py +++ b/sql_generators/funnels/__init__.py @@ -31,6 +31,17 @@ def generate_funnels(target_project, path, output_dir): config = converter.structure(toml.load(config_file), FunnelConfig) table_name = bq_normalize_name(config_file.stem) + match = re.match(r".+_v(\d+)$", table_name) + if match: + # file name ends with version number, check if it matches the version in the config + name_version = match.groups()[0] + if name_version != config.version: + raise ValueError( + f"Version in file name ({table_name}) does not match version in config ({config.version})" + ) + else: + table_name = f"{table_name}_v{config.version}" + env = Environment(loader=FileSystemLoader(TEMPLATES_PATH)) sql_template = env.get_template("funnel.sql") @@ -45,7 +56,7 @@ def generate_funnels(target_project, path, output_dir): ) write_sql( output_dir=output_dir, - full_table_id=f"{target_project}.{config.destination_dataset}.{table_name}_v{config.version}", + full_table_id=f"{target_project}.{config.destination_dataset}.{table_name}", basename="query.sql", sql=funnel_sql, skip_existing=False, @@ -59,10 +70,7 @@ def generate_funnels(target_project, path, output_dir): } ) ( - output_dir - / config.destination_dataset - / f"{table_name}_v{config.version}" - / "metadata.yaml" + output_dir / config.destination_dataset / f"{table_name}" / "metadata.yaml" ).write_text(rendered_metadata + "\n") diff --git a/sql_generators/funnels/configs/login_funnels_by_service_v2.toml b/sql_generators/funnels/configs/login_funnels_by_service_v2.toml new file mode 100644 index 00000000000..27b20bc5558 --- /dev/null +++ b/sql_generators/funnels/configs/login_funnels_by_service_v2.toml @@ -0,0 +1,148 @@ +destination_dataset = "accounts_frontend_derived" +platform = "accounts_frontend" +owners = ["ksiegler@mozilla.org"] # optional; users getting notification if funnel run fails +version = "2" +start_date = "2024-01-01" + +[funnels] + +[funnels.login_overall_success_by_service] + +friendly_name = "Login Funnel Conversion by Service" +description = "Overall login funnel conversion rates by Service" +steps = ["login_view", "login_complete"] +dimensions = ["service"] + +[funnels.login_submit_overall_success_by_service] + +friendly_name = "Login Funnel Conversion with Submit Events by Service" +description = "Overall login funnel conversion rates by Service" +steps = ["login_view", "login_submit", "login_success", "login_complete"] +dimensions = ["service"] + +[funnels.login_success_with_email_by_service] + +friendly_name = "Login Funnel Conversion by Service with Email Confirmation Step" +description = "Overall login funnel conversion rates by Service including email confirmation step" +steps = ["login_view", "login_submit", "login_success", "login_email_confirmation_view", "login_email_confirmation_submit", "login_email_confirmation_success", "login_complete"] +dimensions = ["service"] + +[funnels.login_success_with_2fa_by_service] + +friendly_name = "Login Funnel Conversion with 2FA Step by Service" +description = "Funnel steps from Login View through 2FA (no backup codes) by Service" +steps = ["login_view", "login_submit", "login_success", "login_two_factor_view", "login_two_factor_submit", "login_two_factor_success", "login_complete"] +dimensions = ["service"] + +[steps] + +[steps.login_view] +friendly_name = "Login View Form" +description = "View of the top of the login funnel" +data_source = "accounts_frontend" +select_expression = "metrics.string.session_flow_id" +where_expression = "event = 'login.view' AND metrics.string.session_flow_id != ''" +aggregation = "count distinct" +join_previous_step_on = "metrics.string.session_flow_id" + +[steps.login_submit] +friendly_name = "Login Submit" +description = "Attempt to submit login form" +data_source = "accounts_frontend" +select_expression = "metrics.string.session_flow_id" +where_expression = "event = 'login.submit' AND metrics.string.session_flow_id != ''" +aggregation = "count distinct" +join_previous_step_on = "metrics.string.session_flow_id" + +[steps.login_success] +friendly_name = "Login Success" +description = "Successful password submission on login form" +data_source = "accounts_backend" +select_expression = "metrics.string.session_flow_id" +where_expression = "event = 'login.success' AND metrics.string.session_flow_id != ''" +aggregation = "count distinct" +join_previous_step_on = "metrics.string.session_flow_id" + +[steps.login_email_confirmation_view] +friendly_name = "Login Email Confirmation View" +description = "View of the email confirmaition form for login" +data_source = "accounts_frontend" +select_expression = "metrics.string.session_flow_id" +where_expression = "event = 'login.email_confirmation_view' AND metrics.string.session_flow_id != ''" +aggregation = "count distinct" +join_previous_step_on = "metrics.string.session_flow_id" + +[steps.login_email_confirmation_submit] +friendly_name = "Login Email Confirmation Submit" +description = "The user successfully attempted to submit the email confirmation form" +data_source = "accounts_frontend" +select_expression = "metrics.string.session_flow_id" +where_expression = "event = 'login.email_confirmation_submit' AND metrics.string.session_flow_id != ''" +aggregation = "count distinct" +join_previous_step_on = "metrics.string.session_flow_id" + +[steps.login_email_confirmation_success] +friendly_name = "Successful Email Confirmation" +description = "The user successfully confirmed their email in login flow" +data_source = "accounts_backend" +select_expression = "metrics.string.session_flow_id" +where_expression = "event = 'login.email_confirmation_success' AND metrics.string.session_flow_id != ''" +aggregation = "count distinct" +join_previous_step_on = "metrics.string.session_flow_id" + +[steps.login_two_factor_view] +friendly_name = "Login 2FA Form" +description = "View of the login 2FA form" +data_source = "accounts_frontend" +select_expression = "metrics.string.session_flow_id" +where_expression = "event = 'login.totp_form_view' AND metrics.string.session_flow_id != ''" +aggregation = "count distinct" +join_previous_step_on = "metrics.string.session_flow_id" + +[steps.login_two_factor_submit] +friendly_name = "Attempt to submit login 2FA form" +description = "The user successfully authenticated through 2FA" +data_source = "accounts_frontend" +select_expression = "metrics.string.session_flow_id" +where_expression = "event = 'login.totp_code_submit' AND metrics.string.session_flow_id != ''" +aggregation = "count distinct" +join_previous_step_on = "metrics.string.session_flow_id" + +[steps.login_two_factor_success] +friendly_name = "Login 2FA Success" +description = "Successful submission of 2FA form" +data_source = "accounts_frontend" +select_expression = "metrics.string.session_flow_id" +where_expression = "event = 'login.totp_code_success_view' AND metrics.string.session_flow_id != ''" +aggregation = "count distinct" +join_previous_step_on = "metrics.string.session_flow_id" + +[steps.login_complete] +friendly_name = "Successful Login Completion" +description = "The login flow was completed successfully" +data_source = "accounts_backend" +select_expression = "metrics.string.session_flow_id" +where_expression = "event = 'login.complete' AND metrics.string.session_flow_id != ''" +aggregation = "count distinct" +join_previous_step_on = "metrics.string.session_flow_id" + +[data_sources] + +[data_sources.accounts_frontend] +from_expression = "mozdata.accounts_frontend.events_stream" +submission_date_column = "DATE(submission_timestamp)" +client_id_column = "client_id" + +[data_sources.accounts_backend] +from_expression = "mozdata.accounts_backend.events_stream" +submission_date_column = "DATE(submission_timestamp)" +client_id_column = "client_id" + +[dimensions] + +[dimensions.service] +data_source = "accounts_frontend" +select_expression = "IF(COALESCE(NULLIF(metrics.string.relying_party_oauth_client_id, ''), NULLIF(metrics.string.relying_party_service, '')) = 'sync', '5882386c6d801776', COALESCE(NULLIF(metrics.string.relying_party_oauth_client_id, ''), NULLIF(metrics.string.relying_party_service, '')))" +friendly_name = "Service" +description = "Oauth Client ID is used to map to service name for which service the user logged in through" +client_id_column = "client_id" diff --git a/sql_generators/funnels/templates/funnel.sql b/sql_generators/funnels/templates/funnel.sql index 28f5d8b8c2b..eb87eb030fc 100644 --- a/sql_generators/funnels/templates/funnel.sql +++ b/sql_generators/funnels/templates/funnel.sql @@ -20,10 +20,10 @@ {% endfor %} {% endif %} {{ data_sources[steps[step_name].data_source].submission_date_column }} AS submission_date, - {{ data_sources[steps[step_name].data_source].client_id_column }} AS client_id, + curr.{{ data_sources[steps[step_name].data_source].client_id_column }} AS client_id, {{ steps[step_name].select_expression }} AS column FROM - {{ data_sources[steps[step_name].data_source].from_expression }} + {{ data_sources[steps[step_name].data_source].from_expression }} AS curr {% if not loop.first and steps[step_name].join_previous_step_on %} INNER JOIN {{ funnel_name }}_{{ loop.previtem }} AS prev ON prev.submission_date = {{ data_sources[steps[step_name].data_source].submission_date_column }} From 57192273d6a9c62b1a2521a8f410dc4db33b928f Mon Sep 17 00:00:00 2001 From: Arkadiusz Komarzewski Date: Wed, 17 Jul 2024 18:45:17 +0200 Subject: [PATCH 2/2] Fix template --- .../funnels/configs/login_funnels_by_service_v2.toml | 8 ++++---- sql_generators/funnels/templates/funnel.sql | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sql_generators/funnels/configs/login_funnels_by_service_v2.toml b/sql_generators/funnels/configs/login_funnels_by_service_v2.toml index 27b20bc5558..77ea7556840 100644 --- a/sql_generators/funnels/configs/login_funnels_by_service_v2.toml +++ b/sql_generators/funnels/configs/login_funnels_by_service_v2.toml @@ -129,14 +129,14 @@ join_previous_step_on = "metrics.string.session_flow_id" [data_sources] [data_sources.accounts_frontend] -from_expression = "mozdata.accounts_frontend.events_stream" +from_expression = "mozdata.accounts_frontend.events_stream AS es" submission_date_column = "DATE(submission_timestamp)" -client_id_column = "client_id" +client_id_column = "es.client_id" [data_sources.accounts_backend] -from_expression = "mozdata.accounts_backend.events_stream" +from_expression = "mozdata.accounts_backend.events_stream AS es" submission_date_column = "DATE(submission_timestamp)" -client_id_column = "client_id" +client_id_column = "es.client_id" [dimensions] diff --git a/sql_generators/funnels/templates/funnel.sql b/sql_generators/funnels/templates/funnel.sql index eb87eb030fc..28f5d8b8c2b 100644 --- a/sql_generators/funnels/templates/funnel.sql +++ b/sql_generators/funnels/templates/funnel.sql @@ -20,10 +20,10 @@ {% endfor %} {% endif %} {{ data_sources[steps[step_name].data_source].submission_date_column }} AS submission_date, - curr.{{ data_sources[steps[step_name].data_source].client_id_column }} AS client_id, + {{ data_sources[steps[step_name].data_source].client_id_column }} AS client_id, {{ steps[step_name].select_expression }} AS column FROM - {{ data_sources[steps[step_name].data_source].from_expression }} AS curr + {{ data_sources[steps[step_name].data_source].from_expression }} {% if not loop.first and steps[step_name].join_previous_step_on %} INNER JOIN {{ funnel_name }}_{{ loop.previtem }} AS prev ON prev.submission_date = {{ data_sources[steps[step_name].data_source].submission_date_column }}