Skip to content

Commit

Permalink
fix duplicate click records problem
Browse files Browse the repository at this point in the history
  • Loading branch information
doryokujin committed Feb 19, 2024
1 parent bc17ee8 commit 6a276c3
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ td:
url_col: td_url
time_col: time
filter: td_url IS NOT NULL
# use_distinct: false
# use_distinct: true
-
is_audience_table: true
# db:
table: behavior_behv_orders
url_col: td_url
# time_col: timestamp
# filter:
# use_distinct: false
# use_distinct: true

conversions_tables:
507568:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def run(
cdp_audience_db,
input_table_customers,
input_table_clicks,
input_table_master_campaigns,
dest_db,
dest_table,
query_store_table,
Expand Down Expand Up @@ -126,7 +127,31 @@ def run(
f",MAX_BY(utm_content,time) AS utm_content",
f",MAX_BY(utm_connector,time) AS utm_connector",
f",MAX_BY(utm_term,time) AS utm_term",
f"FROM {input_db}.{input_table_clicks}",
f"FROM (",
f"SELECT",
f"time",
f",CAST(activation_step_id AS VARCHAR) AS activation_step_id",
f",cv_name",
f",utm_campaign",
f",utm_medium",
f",utm_source",
f",utm_content",
f",utm_connector",
f",utm_term",
f"FROM {input_db}.{input_table_clicks}",
f"UNION ALL",
f"SELECT",
f"time",
f",CAST(activation_step_id AS VARCHAR) AS activation_step_id",
f",cv_name",
f",utm_campaign",
f",utm_medium",
f",utm_source",
f",utm_content",
f",utm_connector",
f",utm_term",
f"FROM {input_db}.{input_table_master_campaigns}",
f")",
f"GROUP BY 1",
f") s3",
f"ON s1.activation_step_id = s3.activation_step_id",
Expand Down
26 changes: 25 additions & 1 deletion scenarios/cdp_campaign_management/queries/ingest_activations.sql
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,31 @@ LEFT OUTER JOIN (
,MAX_BY(utm_content,time) AS utm_content
,MAX_BY(utm_connector,time) AS utm_connector
,MAX_BY(utm_term,time) AS utm_term
FROM ${td.database}.${td.tables.clicks}
FROM (
SELECT
time
,CAST(activation_step_id AS VARCHAR) AS activation_step_id
,cv_name
,utm_campaign
,utm_medium
,utm_source
,utm_content
,utm_connector
,utm_term
FROM ${td.database}.${td.tables.clicks}
UNION ALL
SELECT
time
,CAST(activation_step_id AS VARCHAR) AS activation_step_id
,cv_name
,utm_campaign
,utm_medium
,utm_source
,utm_content
,utm_connector
,utm_term
FROM ${td.database}.${td.tables.master_campaigns}
)
GROUP BY 1
) s2
ON s1.activation_step_id = s2.activation_step_id
4 changes: 3 additions & 1 deletion scenarios/cdp_campaign_management/queries/ingest_clicks.sql
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ FROM
${join_part}
LEFT OUTER JOIN (
SELECT
activation_step_id
CAST(activation_step_id AS VARCHAR) AS activation_step_id
,utm_source
,utm_medium
,utm_campaign
Expand All @@ -52,4 +52,6 @@ ON (
t1.utm_campaign = t3.utm_campaign
AND t1.utm_medium = t3.utm_medium
AND t1.utm_source = t3.utm_source
AND t1.cv_name = t3.cv_name
AND t1.activation_step_id = t3.activation_step_id
)
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
cdp_audience_db: cdp_audience_${ps_id}
input_table_customers: customers
input_table_clicks: ${td.tables.clicks}
input_table_master_campaigns: ${td.tables.master_campaigns}
dest_db: ${td.database}
dest_table: ${td.tables.tmp_activations}
query_store_table: ${td.tables.query_store}
Expand Down

0 comments on commit 6a276c3

Please sign in to comment.