Skip to content

Commit

Permalink
add segment sync solution
Browse files Browse the repository at this point in the history
  • Loading branch information
tdroberto committed Jan 22, 2025
1 parent 4658ab0 commit 2b1eb76
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 0 deletions.
20 changes: 20 additions & 0 deletions scenarios/segment_sync/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Segment sync (replication)

----
## Overview

This project provides a solution to replicating batch segment to real-time segment and vice versa.

----
## Implementation
1. Copy and paste the code into a custom script in Treasure Workflows.

----
## Considerations

This project doesn't convert "Time Within Past" and "Segment Include/Exclude" segment rules.

----
## Questions

Please feel free to reach out to [email protected] with any questions you have about using this code.
7 changes: 7 additions & 0 deletions scenarios/segment_sync/config/params.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
mode: 0 # 0 - batch to rt, 1 - rt to batch
aud_api_ep: https://api-cdp.treasuredata.com
get_seg_list_path: /audiences/{audienceId}/folders/{folderId}/segments
post_new_seg_path: /audiences/{audienceId}/segments
audience: xxx
batch_folder: yyy
rt_folder: zzz
138 changes: 138 additions & 0 deletions scenarios/segment_sync/scripts/seg_rep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import os
import requests

def main(**kwargs):
# Init
td_api_key = os.getenv("TD_API_KEY")
aud_api_ep = kwargs.get("aud_api_ep")
audience = str(kwargs.get("audience"))
batch_folder = str(kwargs.get("batch_folder"))
rt_folder = str(kwargs.get("rt_folder"))
get_seg_list_path = kwargs.get("get_seg_list_path")
post_new_seg_path = kwargs.get("post_new_seg_path")
mode = kwargs.get("mode")
get_b_seg_list_path = aud_api_ep + get_seg_list_path.replace("{audienceId}", audience).replace("{folderId}", batch_folder)
get_rt_seg_list_path = aud_api_ep + get_seg_list_path.replace("{audienceId}", audience).replace("{folderId}", rt_folder)
post_new_seg_path = aud_api_ep + post_new_seg_path.replace("{audienceId}", audience)
b_data_dict = {}
b_name_set = set()
rt_data_dict = {}
rt_name_set = set()

# Get segments data via Audience API
def get_segs():
payload = {}
headers = {
"Authorization": f"TD1 {td_api_key}"
}
batch_segs = requests.request("GET", get_b_seg_list_path, headers = headers, data = payload)
rt_segs = requests.request("GET", get_rt_seg_list_path, headers = headers, data = payload)

if batch_segs.status_code == 200 and rt_segs.status_code == 200:
batch_segs_list = batch_segs.json()
rt_segs_list = rt_segs.json()
elif batch_segs != 200 and rt_segs.status_code != 200:
print(f"Failed to fetch batch segments. Status code: {batch_segs.status_code} - {batch_segs.reason} - {batch_segs.text}")
print(f"Failed to fetch real-time segments. Status code: {rt_segs.status_code} - {rt_segs.reason} - {rt_segs.text}")
exit()
elif batch_segs == 200 and rt_segs.status_code != 200:
print(f"Failed to fetch real-time segments. Status code: {rt_segs.status_code} - {rt_segs.reason} - {rt_segs.text}")
exit()
elif batch_segs != 200 and rt_segs.status_code == 200:
print(f"Failed to fetch batch segments. Status code: {rt_segs.status_code} - {rt_segs.reason} - {rt_segs.text}")
exit()
else:
print("Unknown error.")
exit()

return batch_segs_list, rt_segs_list

# Build segment lists
def build_seg_lists():
for i in range(len(batch_segs_list)):
b_data_dict[batch_segs_list[i]["name"]] = [batch_segs_list[i]["id"], batch_segs_list[i]["description"], batch_segs_list[i]["rule"]]
b_name_set.add(batch_segs_list[i]["name"])
for j in range(len(rt_segs_list)):
rt_data_dict[rt_segs_list[j]["name"]] = [rt_segs_list[j]["id"], rt_segs_list[j]["description"], rt_segs_list[j]["rule"]]
rt_name_set.add(rt_segs_list[j]["name"])

# Post new segment via Audience API
def post_seg(seg_name, headers, payload):
new_seg = requests.request("POST", post_new_seg_path, headers = headers, json = payload)
if new_seg.status_code == 200:
print(f"Segment synced OK: '{seg_name}'")
else:
print(f"Failed to post segment: '{seg_name}' - Status code: {new_seg.status_code} - {new_seg.reason} - {new_seg.text}")

# Segments that have time within past operator or segment reference
def detect_unconvertible(sn, rule):
twp = False
seg_ref = False
for c in rule.get("conditions", []):
if "conditions" in c:
for sc in c["conditions"]:
if str(sc).find("'type': 'TimeWithinPast'") > 0:
print(f"TWP operator found in '{sn}'.")
twp = True
if str(sc).find("'type': 'Reference'") > 0:
print(f"Segment reference found in '{sn}'.")
seg_ref = True
return twp or seg_ref

# Segments that already exist by segment name
def detect_existing(sn, ns2):
seg_exists = False
if sn in ns2:
print(f"Segment '{sn}' already exists.")
seg_exists = True
return seg_exists

# Compare segment lists and create diff
def replicate_diff():
if mode == 0:
folder = rt_folder
kind = 1
name_set1 = b_name_set
name_set2 = rt_name_set
data_dict = b_data_dict
elif mode == 1:
folder = batch_folder
kind = 0
name_set1 = rt_name_set
name_set2 = b_name_set
data_dict = rt_data_dict
else:
print("Configuration error.")
exit()
for sn in name_set1:
if not detect_existing(sn, name_set2) and not detect_unconvertible(sn, data_dict[sn][2]):
payload = {
'audienceId': audience,
'name': sn,
'kind': kind,
'description': data_dict[sn][1] + ' --- [Created from: ' + data_dict[sn][0] + ' ]',
'segmentFolderId': folder,
'rule': data_dict[sn][2]
}
headers = {
"Authorization": f"TD1 {td_api_key}",
"Content-Type": "application/json"
}
post_seg(sn, headers, payload)

batch_segs_list, rt_segs_list = get_segs()
build_seg_lists()
replicate_diff()

#Stats
print("Batch segment count (before): " + str(len(batch_segs_list)))
print("Real-time segment count (before): " + str(len(rt_segs_list)))
batch_segs_list, rt_segs_list = get_segs()
print("Batch segment count (after): " + str(len(batch_segs_list)))
print("Real-time segment count (after): " + str(len(rt_segs_list)))

print("Fin")

# Main
if __name__ == "__main__":
main()
11 changes: 11 additions & 0 deletions scenarios/segment_sync/segment_replication.dig
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
timezone: Asia/Tokyo

_export:
!include : config/params.yaml

+replicate:
py>: scripts.seg_rep.main
_env:
TD_API_KEY: ${secret:td.apikey}
docker:
image: "digdag/digdag-python:3.10"

0 comments on commit 2b1eb76

Please sign in to comment.