Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move user data export to background tasks #2954

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions admin/sql/create_foreign_keys.sql
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,10 @@ ALTER TABLE background_tasks
REFERENCES "user" (id)
ON DELETE CASCADE;

ALTER TABLE user_data_export
ADD CONSTRAINT user_data_export_user_id_foreign_key
FOREIGN KEY (user_id)
REFERENCES "user" (id)
ON DELETE CASCADE;

COMMIT;
4 changes: 3 additions & 1 deletion admin/sql/create_indexes.sql
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ CREATE UNIQUE INDEX caa_id_ndx_release_color ON release_color (caa_id);

CREATE UNIQUE INDEX user_id_ndx_user_setting ON user_setting (user_id);

CREATE INDEX background_tasks_user_id_task_type_idx ON background_tasks (user_id, task);
CREATE UNIQUE INDEX background_tasks_user_id_task_type_uniq_idx ON background_tasks (user_id, task);

CREATE INDEX user_data_export_user_id_idx ON user_data_export (user_id);

COMMIT;
2 changes: 2 additions & 0 deletions admin/sql/create_primary_keys.sql
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,6 @@ ALTER TABLE recommendation.do_not_recommend ADD CONSTRAINT rec_do_not_recommend_

ALTER TABLE background_tasks ADD CONSTRAINT background_tasks_id_pkey PRIMARY KEY (id);

ALTER TABLE user_data_export ADD CONSTRAINT user_data_export_id_pkey PRIMARY KEY (id);

COMMIT;
14 changes: 13 additions & 1 deletion admin/sql/create_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,19 @@ CREATE TABLE background_tasks (
id INTEGER GENERATED BY DEFAULT AS IDENTITY NOT NULL,
user_id INTEGER NOT NULL,
task background_tasks_type NOT NULL,
created TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
created TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
metadata JSONB
);

CREATE TABLE user_data_export (
id INTEGER GENERATED ALWAYS AS IDENTITY,
user_id INTEGER NOT NULL,
type user_data_export_type_type NOT NULL,
status user_data_export_status_type NOT NULL,
progress TEXT,
filename TEXT,
available_until TIMESTAMPTZ,
created TIMESTAMPTZ NOT NULL DEFAULT NOW()
);

-- The following line is now executed by the init-db action from manage.py. If you create a DB without the init-db function
Expand Down
6 changes: 5 additions & 1 deletion admin/sql/create_types.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,8 @@ CREATE TYPE user_stats_type AS ENUM('artists', 'releases', 'recordings', 'daily_

CREATE TYPE do_not_recommend_entity_type AS ENUM ('artist', 'release', 'release_group', 'recording');

CREATE TYPE background_tasks_type AS ENUM ('delete_listens', 'delete_user');
CREATE TYPE background_tasks_type AS ENUM ('delete_listens', 'delete_user', 'export_all_user_data');

CREATE TYPE user_data_export_status_type AS ENUM ('in_progress', 'waiting', 'completed', 'failed');

CREATE TYPE user_data_export_type_type AS ENUM ('export_all_user_data');
34 changes: 34 additions & 0 deletions admin/sql/updates/2024-08-06-add-background-export.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
ALTER TYPE background_tasks_type ADD VALUE 'export_all_user_data';
CREATE TYPE user_data_export_type_type AS ENUM ('export_all_user_data');
CREATE TYPE user_data_export_status_type AS ENUM ('in_progress', 'waiting', 'completed', 'failed');

BEGIN;

DROP INDEX background_tasks_user_id_task_type_idx;
CREATE UNIQUE INDEX background_tasks_user_id_task_type_uniq_idx ON background_tasks (user_id, task);

ALTER TABLE background_tasks ADD COLUMN metadata JSONB;

CREATE TABLE user_data_export (
id INTEGER GENERATED ALWAYS AS IDENTITY,
user_id INTEGER NOT NULL,
type user_data_export_type_type NOT NULL,
status user_data_export_status_type NOT NULL,
progress TEXT,
filename TEXT,
available_until TIMESTAMPTZ,
created TIMESTAMPTZ NOT NULL DEFAULT NOW()
);

ALTER TABLE user_data_export ADD CONSTRAINT user_data_export_id_pkey PRIMARY KEY (id);

ALTER TABLE user_data_export
ADD CONSTRAINT user_data_export_user_id_foreign_key
FOREIGN KEY (user_id)
REFERENCES "user" (id)
ON DELETE CASCADE;

CREATE INDEX user_data_export_user_id_idx ON user_data_export (user_id);
CREATE UNIQUE INDEX user_data_export_deduplicate_waiting_idx ON user_data_export (user_id, type) WHERE status = 'waiting' OR status = 'in_progress';

COMMIT;
3 changes: 3 additions & 0 deletions consul_config.py.ctmpl
Original file line number Diff line number Diff line change
Expand Up @@ -288,3 +288,6 @@ REJECT_LISTENS_WITHOUT_USER_EMAIL = {{template "KEY_JSON" "reject_listens_withou

# If set to True, do not allow new users without email to register and warn existing without email
REJECT_NEW_USERS_WITHOUT_EMAIL = {{template "KEY_JSON" "reject_new_users_without_email"}}

# base directory for user data exports
USER_DATA_EXPORT_BASE_DIR = {{template "KEY" "user_data_export_base_dir"}}
3 changes: 3 additions & 0 deletions docker/services/cron/crontab
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,6 @@ MAILTO=""

# run weekly cron job to update popularity datasets
0 10 * * 0 root /usr/local/bin/python /code/listenbrainz/manage.py spark request_popularity >> /logs/popularity_datasets.log 2>&1

# delete old and expired user data exports
0 11 * * * root /usr/local/bin/python /code/listenbrainz/manage.py delete-old-user-data-exports >> /logs/user_data_exports.log 2>&1
Loading
Loading