Skip to content

Commit

Permalink
Improvements to allow user_id to be set per ps_id
Browse files Browse the repository at this point in the history
  • Loading branch information
doryokujin committed Feb 15, 2024
1 parent 26d9145 commit 7670643
Show file tree
Hide file tree
Showing 14 changed files with 46 additions and 39 deletions.
2 changes: 1 addition & 1 deletion scenarios/cdp_campaign_management/check_settings.dig
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
+check_settings:
py>: py_scripts.check_settings.run
user_id: ${td.user_id}
user_id: ${user_id}
clicks_tables: ${td.clicks_tables}
conversions_tables: ${td.conversions_tables}
mta_settings: ${td.mta}
Expand Down
4 changes: 2 additions & 2 deletions scenarios/cdp_campaign_management/common/user_settings.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# timezone:
td:
user_id: #required

ps: #required

user_id: #required

activations_tables: #required

clicks_tables: #required
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
timezone: JST
td:
user_id: td_client_id # One of the user identifiers defined in the Parent Segment
ps:
- 507568

user_id:
507568: td_client_id

activations_tables:
507568:
scan_journey_tables: true
Expand All @@ -15,7 +18,7 @@ td:
url_col: td_url
time_col: time
filter: td_url IS NOT NULL
use_distinct: false
# use_distinct: false
-
is_audience_table: true
# db:
Expand All @@ -35,7 +38,7 @@ td:
time_col: time
val_col: 1
acquired_revenue_per_person: 20000
use_distinct: true
# use_distinct: false
-
is_audience_table: true
table: behavior_behv_website
Expand All @@ -44,7 +47,7 @@ td:
# time_col: timestamp
val_col: 1
acquired_revenue_per_person: 20000
use_distinct: false
# use_distinct: false
-
is_audience_table: true
table: behavior_behv_orders
Expand All @@ -65,6 +68,6 @@ td:
session_model:
allowable_time_to_cv: 24*10

# utm_names:
# utm_cv: utm_term
utm_names:
utm_cv: utm_term

9 changes: 4 additions & 5 deletions scenarios/cdp_campaign_management/incremental_ingest.dig
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ _export:

time_column: "${tbl_info.is_audience_table ? 'timestamp' : tbl_info.time_col}"
input_db: "${tbl_info.is_audience_table ? 'cdp_audience_' + ps_id : tbl_info.db}"
user_column: "${tbl_info.is_audience_table ? 't2.' + td.user_id : 't1.' + td.user_id}"
user_column_inner: "${tbl_info.is_audience_table ? 'cdp_customer_id' : td.user_id}"
user_column: "${tbl_info.is_audience_table ? 't2.' + user_id : 't1.' + user_id}"
user_column_inner: "${tbl_info.is_audience_table ? 'cdp_customer_id' : user_id}"
join_part: "${tbl_info.is_audience_table ? 'JOIN cdp_audience_' + ps_id + '.customers t2 ON t1.cdp_customer_id = t2.cdp_customer_id' : ''}"
distinct: "${typeof tbl_info.use_distinct === 'undefined' || !tbl_info.use_distinct ? '' : 'DISTINCT'}"

Expand Down Expand Up @@ -167,8 +167,8 @@ _export:
acquired_revenue_per_person: ${tbl_info.acquired_revenue_per_person}

time_column: "${tbl_info.is_audience_table ? 'timestamp' : tbl_info.time_col}"
user_column: "${tbl_info.is_audience_table ? 't2.' + td.user_id : 't1.' + td.user_id}"
inner_user_column: "${tbl_info.is_audience_table ? 'cdp_customer_id' : td.user_id}"
user_column: "${tbl_info.is_audience_table ? 't2.' + user_id : 't1.' + user_id}"
inner_user_column: "${tbl_info.is_audience_table ? 'cdp_customer_id' : user_id}"
input_db: "${tbl_info.is_audience_table ? 'cdp_audience_' + ps_id : tbl_info.db}"
join_part: "${tbl_info.is_audience_table ? 'JOIN cdp_audience_' + ps_id + '.customers t2 ON t1.cdp_customer_id = t2.cdp_customer_id' : ''}"
distinct: "${typeof tbl_info.use_distinct === 'undefined' || !tbl_info.use_distinct ? '' : 'DISTINCT'}"
Expand Down Expand Up @@ -209,7 +209,6 @@ _export:
_do:
_export:
cv_name: ${tbl_info.cv_name}
user_id: ${td.user_id}
input_db: ${td.database}
input_table_activations: ${td.tables.activations}
input_table_clicks: ${td.tables.clicks}
Expand Down
9 changes: 4 additions & 5 deletions scenarios/cdp_campaign_management/initial_ingest.dig
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ _export:

time_column: "${tbl_info.is_audience_table ? 'timestamp' : tbl_info.time_col}"
input_db: "${tbl_info.is_audience_table ? 'cdp_audience_' + ps_id : tbl_info.db}"
user_column: "${tbl_info.is_audience_table ? 't2.' + td.user_id : 't1.' + td.user_id}"
user_column_inner: "${tbl_info.is_audience_table ? 'cdp_customer_id' : td.user_id}"
user_column: "${tbl_info.is_audience_table ? 't2.' + user_id : 't1.' + user_id}"
user_column_inner: "${tbl_info.is_audience_table ? 'cdp_customer_id' : user_id}"
join_part: "${tbl_info.is_audience_table ? 'JOIN cdp_audience_' + ps_id + '.customers t2 ON t1.cdp_customer_id = t2.cdp_customer_id' : ''}"
distinct: "${typeof tbl_info.use_distinct === 'undefined' || !tbl_info.use_distinct ? '' : 'DISTINCT'}"

Expand Down Expand Up @@ -249,8 +249,8 @@ _export:
acquired_revenue_per_person: ${tbl_info.acquired_revenue_per_person}

time_column: "${tbl_info.is_audience_table ? 'timestamp' : tbl_info.time_col}"
user_column: "${tbl_info.is_audience_table ? 't2.' + td.user_id : 't1.' + td.user_id}"
inner_user_column: "${tbl_info.is_audience_table ? 'cdp_customer_id' : td.user_id}"
user_column: "${tbl_info.is_audience_table ? 't2.' + user_id : 't1.' + user_id}"
inner_user_column: "${tbl_info.is_audience_table ? 'cdp_customer_id' : user_id}"
input_db: "${tbl_info.is_audience_table ? 'cdp_audience_' + ps_id : tbl_info.db}"
join_part: "${tbl_info.is_audience_table ? 'JOIN cdp_audience_' + ps_id + '.customers t2 ON t1.cdp_customer_id = t2.cdp_customer_id' : ''}"
distinct: "${typeof tbl_info.use_distinct === 'undefined' || !tbl_info.use_distinct ? '' : 'DISTINCT'}"
Expand Down Expand Up @@ -313,7 +313,6 @@ _export:
_do:
_export:
cv_name: ${tbl_info.cv_name}
user_id: ${td.user_id}
input_db: ${td.database}
input_table_activations: ${td.tables.activations}
input_table_clicks: ${td.tables.clicks}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ _export:
_export:
td:
database: ${td.base_db_name}_${ps_id}
user_id: ${td.user_id[ps_id]}

+prepare_dbs:
td_ddl>:
Expand Down
1 change: 1 addition & 0 deletions scenarios/cdp_campaign_management/main_initial_ingest.dig
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ _export:
_export:
td:
database: ${td.base_db_name}_${ps_id}
user_id: ${td.user_id[ps_id]}

+prepare_dbs:
td_ddl>:
Expand Down
12 changes: 8 additions & 4 deletions scenarios/cdp_campaign_management/py_scripts/check_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,20 @@ def run(
mta_settings):

error_counter = 0
if user_id == "" or user_id == None:
print("user_id is not set.")

user_ids = json.loads(user_id)
print(user_ids)
if len(user_ids) == 0:
print("⚠ error: `user_id` is not set.")
error_counter += 1
else:
print(f"ⓘ {user_id} is used as the conversion journey identifier.")
for ps_id in user_ids:
print(f"ⓘ {user_ids[ps_id]} is used in {ps_id} as the conversion journey identifier.")


click_tables = json.loads(clicks_tables)
for ps_id in click_tables:
idx = 0

for table_setting in click_tables[ps_id]:
idx += 1
for key in ('table','url_col','is_audience_table'):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
DROP TABLE IF EXISTS ${dest_db}.${dest_table};
CREATE TABLE IF NOT EXISTS ${dest_db}.${dest_table} (
time bigint
,${td.user_id} varchar
,${user_id} varchar
,activation_step_id varchar
,syndication_id varchar
,activation_type varchar
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ CREATE TABLE IF NOT EXISTS ${dest_db}.${dest_table} (
,time_s varchar
,db_name varchar
,table_name varchar
,${td.user_id} varchar
,${user_id} varchar
,activation_step_id varchar
,cv_name varchar
,utm_campaign varchar
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ WITH tbl_base_activations AS
(
SELECT
t1.time
,${td.user_id}
,${user_id}
,cdp_customer_id
,t1.syndication_id
,COALESCE(type,'segment') AS activation_type
Expand All @@ -16,7 +16,7 @@ WITH tbl_base_activations AS
(
SELECT
time
,identifier AS ${td.user_id}
,identifier AS ${user_id}
,audience_id
,NULL AS cdp_customer_id
,CAST(activation_id AS VARCHAR) AS syndication_id
Expand All @@ -25,7 +25,7 @@ WITH tbl_base_activations AS
,activation_name
,integration_type AS connector_type
FROM ${cdp_audience_db}.${td.tables.activation_log}
WHERE identifier_type = '${td.user_id}'
WHERE identifier_type = '${user_id}'
AND CAST(audience_id AS VARCHAR) = '${ps_id}'
AND TD_TIME_RANGE(time,${time_from},${time_to})
) t1
Expand All @@ -43,7 +43,7 @@ WITH tbl_base_activations AS

SELECT
time
,${td.user_id}
,${user_id}
,s1.activation_step_id
,syndication_id
,activation_type
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
WITH tbl_cv_history AS
(
SELECT time, cv_name, ${td.user_id}, ROW_NUMBER()OVER(ORDER BY time) AS cv_id
SELECT time, cv_name, ${user_id}, ROW_NUMBER()OVER(ORDER BY time) AS cv_id
FROM ${td.tables.conversion_journeys}
WHERE cv_flg = 1
AND TD_TIME_RANGE(time, ${time_from}, ${time_to})
Expand All @@ -11,16 +11,16 @@ WITH tbl_cv_history AS
(
SELECT
cv_id
,ROW_NUMBER()OVER(PARTITION BY raw_data.cv_name, raw_data.${td.user_id}, raw_data.time, activation_step_id, type ORDER BY cv_id) AS cv_order
,ROW_NUMBER()OVER(PARTITION BY raw_data.cv_name, raw_data.${user_id}, raw_data.time, activation_step_id, type ORDER BY cv_id) AS cv_order
,cv_history.time AS cv_time
,(cv_history.time - raw_data.time)/3600 AS time_hour_to_cv
, raw_data.*
FROM ${td.tables.conversion_journeys} raw_data
JOIN tbl_cv_history cv_history
ON raw_data.${td.user_id} = cv_history.${td.user_id}
ON raw_data.${user_id} = cv_history.${user_id}
AND raw_data.cv_name = cv_history.cv_name
WHERE raw_data.time <= cv_history.time
AND raw_data.${td.user_id} <= cv_history.${td.user_id}
AND raw_data.${user_id} <= cv_history.${user_id}
AND type <> 'Activation'
)
WHERE cv_order = 1
Expand Down Expand Up @@ -54,7 +54,7 @@ WITH tbl_cv_history AS
ELSE 'Middle Click'
END AS click_type
,type
,${td.user_id}
,${user_id}
,activation_step_id
,utm_source
,utm_medium
Expand All @@ -72,8 +72,8 @@ WITH tbl_cv_history AS
SELECT
time
,cv_time
,${td.user_id}
,TD_MD5( CONCAT(cv_name, CAST(cv_id AS VARCHAR),CAST(cv_time AS VARCHAR),${td.user_id}) ) AS cv_id
,${user_id}
,TD_MD5( CONCAT(cv_name, CAST(cv_id AS VARCHAR),CAST(cv_time AS VARCHAR),${user_id}) ) AS cv_id
,position
,time_hour_to_cv
,time_hour_to_next
Expand Down Expand Up @@ -120,4 +120,4 @@ SELECT

FROM tbl_mta_base
WHERE size_journey > 0
-- ORDER BY ${td.user_id}, cv_id, position
-- ORDER BY ${user_id}, cv_id, position
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
session_unixtime: ${session_unixtime}
time_from: ${time_from}
time_to: ${time_to}
user_id: ${td.user_id}
user_id: ${td.user_id[ps_id]}
input_db: ${td.database}
input_table_master_activations: ${td.tables.master_activations}
input_table_daily_activations_info: ${td.tables.daily_activations_info}
Expand Down
2 changes: 1 addition & 1 deletion scenarios/cdp_campaign_management/write_to_gsheet.dig
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@

+export_mta:
td>:
query: SELECT TD_TIME_STRING(time,'d!','${timezone}') AS date,* FROM ${td.tables.mta_conversion_journeys} ORDER BY cv_name, ${td.user_id}, cv_id, position LIMIT 1000
query: SELECT TD_TIME_STRING(time,'d!','${timezone}') AS date,* FROM ${td.tables.mta_conversion_journeys} ORDER BY cv_name, ${user_id}, cv_id, position LIMIT 1000
result_connection: ${gsheet.result_connection}
result_settings:
spreadsheet_folder: ${gsheet.sheet_folder}
Expand Down

0 comments on commit 7670643

Please sign in to comment.