Skip to content

Commit

Permalink
Add unstable2sqlelf script
Browse files Browse the repository at this point in the history
Add a new script that downloads all the tags for the debian distribution
as sqlite databases.
  • Loading branch information
fzakaria committed Apr 6, 2024
1 parent 28b83af commit 48da661
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 8 deletions.
97 changes: 97 additions & 0 deletions tools/debian-unstable-tags.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
unstable-20240330
unstable-20240311
unstable-20240211
unstable-20240130
unstable-20240110
unstable-20231218
unstable-20231120
unstable-20231030
unstable-20231009
unstable-20230919
unstable-20230904
unstable-20230814
unstable-20230725
unstable-20230703
unstable-20230612
unstable-20230522
unstable-20230502
unstable-20230411
unstable-20230320
unstable-20230227
unstable-20230208
unstable-20230202
unstable-20230109
unstable-20221219
unstable-20221205
unstable-20221114
unstable-20221024
unstable-20221004
unstable-20220912
unstable-20220822
unstable-20220801
unstable-20220711
unstable-20220622
unstable-20220527
unstable-20220509
unstable-20220418
unstable-20220328
unstable-20220316
unstable-20220228
unstable-20220125
unstable-20211220
unstable-20211201
unstable-20211115
unstable-20211011
unstable-20210927
unstable-20210902
unstable-20210816
unstable-20210721
unstable-20210621
unstable-20210511
unstable-20210408
unstable-20210329
unstable-20210326
unstable-20210311
unstable-20210208
unstable-20210111
unstable-20201209
unstable-20201117
unstable-20201012
unstable-20200908
unstable-20200803
unstable-20200720
unstable-20200607
unstable-20200514
unstable-20200511
unstable-20200422
unstable-20200414
unstable-20200327
unstable-20200224
unstable-20200130
unstable-20191224
unstable-20191118
unstable-20191014
unstable-20190910
unstable-20190812
unstable-20190708
unstable-20190610
unstable-20190506
unstable-20190326
unstable-20190228
unstable-20190204
unstable-20190122
unstable-20181226
unstable-20181112
unstable-20181011
unstable-20180831
unstable-20180716
unstable-20180625
unstable-20180426
unstable-20180312
unstable-20180213
unstable-20171210
unstable-20171009
unstable-20170907
unstable-20170723
unstable-20170620
unstable-20170606
26 changes: 18 additions & 8 deletions tools/docker2sqlelf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,14 @@
LOG = logging.getLogger(__name__)


def docker2sqelf(image_name: str, keep_temp_dir: bool = False) -> str:
def docker2sqlelf(image_name: str, keep_temp_dir: bool, database_path: str) -> None:
"""Given a docker image, convert it to a sqlelf database.
Args:
image_name: The docker image name
keep_temp_dir: Whether to keep the temporary directory
database_path: The path to export the database to
"""
client = docker.from_env()

temp_dir = tempfile.mkdtemp()
Expand All @@ -31,6 +38,7 @@ def cleanup() -> None:

atexit.register(cleanup)

client.images.pull(image_name)
container = client.containers.create(image_name)
LOG.info(f"Created container with ID {container.id}")

Expand All @@ -47,8 +55,6 @@ def cleanup() -> None:
container.remove() # pyright: ignore
LOG.info(f"Removed container {container.id}")

modified_image_name = image_name.replace(":", "-")

filenames: list[str] = reduce(
lambda a, b: a + b,
map(
Expand All @@ -71,11 +77,9 @@ def cleanup() -> None:
engine = sql.make_sql_engine(filenames, cache_flags=elf.CacheFlag.ALL())

LOG.info("Dumping the sqlite database")
database_filename = f"{modified_image_name}.sqlite"
engine.dump(database_filename)
engine.dump(database_path)

LOG.info(f"Created database {database_filename}")
return database_filename
LOG.info(f"Created database {database_path}")


if __name__ == "__main__":
Expand All @@ -92,6 +96,12 @@ def cleanup() -> None:
parser.add_argument(
"-k", "--keep", help="Keep temporary directory", action="store_true"
)
parser.add_argument(
"-d",
"--database",
help="Database path to export to",
default="database.sqlite",
)
args = parser.parse_args()

docker2sqelf(args.image_name, args.keep)
docker2sqlelf(args.image_name, args.keep, args.database)
61 changes: 61 additions & 0 deletions tools/unstable2sqlelf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#! /usr/bin/env python3
"""
Run this script like so:
python -m tools.unstable2sqlelf unstable-sqlite
"""
import argparse
import logging
import os
import re
import sqlite3

from tools.docker2sqlelf import docker2sqlelf

LOG = logging.getLogger(__name__)


def add_distribution_timestamp(database_path: str, docker_tag: str) -> None:
"""
Add a distribution timestamp column to the ELF_HEADERS table in the given database.
"""
LOG.info(f"Adding distribution timestamp to database: {database_path}")
match = re.search(r"unstable-(\d+)", docker_tag)
if match is None:
raise ValueError(f"Invalid docker tag: {docker_tag}")
timestamp = match.group(1)
conn = sqlite3.connect(database_path)
cursor = conn.cursor()
cursor.execute("ALTER TABLE ELF_HEADERS ADD COLUMN DistributionTimestamp")
cursor.execute("UPDATE ELF_HEADERS SET DistributionTimestamp = ?", (timestamp,))
conn.commit()
conn.close()


if __name__ == "__main__":
# Setup the logging config
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s",
)
parser = argparse.ArgumentParser(
description="Download all unstable Debian distributions as sqlite databases."
)
parser.add_argument(
"output_directory",
help="Output directory to store the files.",
default="unstable-sqlite",
)
args = parser.parse_args()

LOG.info(f"Creating output directory: {args.output_directory}")
os.makedirs(args.output_directory, exist_ok=True)

with open(
os.path.join(os.path.dirname(__file__), "debian-unstable-tags.txt"), "r"
) as file:
tags = file.read().splitlines()
for tag in tags:
LOG.info(f"Processing tag: {tag}")
db_path = os.path.join(args.output_directory, f"debian-{tag}.sqlite")
docker2sqlelf(f"debian:{tag}", False, db_path)
add_distribution_timestamp(db_path, tag)

0 comments on commit 48da661

Please sign in to comment.