From 9effb17436076f57e1a272d6cd5b272380bea3d1 Mon Sep 17 00:00:00 2001 From: Tyler Erickson Date: Tue, 5 Mar 2024 02:22:02 -0800 Subject: [PATCH] Make consistent: STAC, PgSTAC, pyPgSTAC (#244) * Make caps consistent: PgSTAC, pyPgSTAC * Capitalization consistency: STAC --- .devcontainer/devcontainer.json | 2 +- CONTRIBUTING.md | 4 +-- README.md | 2 +- docs/src/pgstac.md | 26 +++++++++---------- docs/src/pypgstac.md | 18 ++++++------- src/pypgstac/README.md | 2 +- src/pypgstac/python/pypgstac/__init__.py | 2 +- src/pypgstac/python/pypgstac/db.py | 10 +++---- src/pypgstac/python/pypgstac/migrate.py | 2 +- src/pypgstac/python/pypgstac/pypgstac.py | 12 ++++----- .../tests/hydration/test_hydrate_pg.py | 4 +-- 11 files changed, 42 insertions(+), 42 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 9172e493..0535e52c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,5 @@ { - "name": "PGStac", + "name": "PgSTAC", "dockerComposeFile": "../docker-compose.yml", "service": "pgstac", "workspaceFolder": "/opt/src" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 82fbfd74..316594b7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Development - Contributing -PGStac uses a dockerized development environment. However, +PgSTAC uses a dockerized development environment. However, it still needs a local install of pypgstac to allow an editable install inside the docker container. This is installed automatically if you have set up a virtual environment for the project. Otherwise @@ -58,7 +58,7 @@ This will create a base migration for the new version and will create incrementa All changes to SQL should only be made in the `/src/pgstac/sql` directory. SQL Files will be run in alphabetical order. ### Adding Tests -PGStac tests can be written using PGTap or basic SQL output comparisons. Additional testing is available using PyTest in the PyPgSTAC module. Tests can be run using the `scripts/test` command. +PgSTAC tests can be written using PGTap or basic SQL output comparisons. Additional testing is available using PyTest in the PyPgSTAC module. Tests can be run using the `scripts/test` command. PGTap tests can be written using [PGTap](https://pgtap.org/) syntax. Tests should be added to the `/src/pgstac/tests/pgtap` directory. Any new sql files added to this directory must be added to `/src/pgstac/tests/pgtap.sql`. diff --git a/README.md b/README.md index ea504b70..21d85e46 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ PgSTAC provides functionality for STAC Filters, CQL2 search, and utilities to help manage the indexing and partitioning of STAC Collections and Items. -PgSTAC is used in production to scale to hundreds of millions of STAC items. PgSTAC implements core data models and functions to provide a STAC API from a PostgreSQL database. PgSTAC is entirely within the database and does not provide an HTTP-facing API. The [Stac FastAPI](https://github.com/stac-utils/stac-fastapi) PgSTAC backend and [Franklin](https://github.com/azavea/franklin) can be used to expose a PgSTAC catalog. Integrating PgSTAC with any other language with PostgreSQL drivers is also possible. +PgSTAC is used in production to scale to hundreds of millions of STAC items. PgSTAC implements core data models and functions to provide a STAC API from a PostgreSQL database. PgSTAC is entirely within the database and does not provide an HTTP-facing API. The [STAC FastAPI](https://github.com/stac-utils/stac-fastapi) PgSTAC backend and [Franklin](https://github.com/azavea/franklin) can be used to expose a PgSTAC catalog. Integrating PgSTAC with any other language with PostgreSQL drivers is also possible. PgSTAC Documentation: https://stac-utils.github.io/pgstac/pgstac diff --git a/docs/src/pgstac.md b/docs/src/pgstac.md index 2e68185d..34cb3839 100644 --- a/docs/src/pgstac.md +++ b/docs/src/pgstac.md @@ -1,15 +1,15 @@ PGDatabase Schema and Functions for Storing and Accessing STAC collections and items in PostgreSQL -STAC Client that uses PGStac available in [STAC-FastAPI](https://github.com/stac-utils/stac-fastapi) +STAC Client that uses PgSTAC available in [STAC-FastAPI](https://github.com/stac-utils/stac-fastapi) -PGStac requires **Postgresql>=13** and **PostGIS>=3**. Best performance will be had using PostGIS>=3.1. +PgSTAC requires **Postgresql>=13** and **PostGIS>=3**. Best performance will be had using PostGIS>=3.1. -### PGStac Settings -PGStac installs everything into the pgstac schema in the database. This schema must be in the search_path in the postgresql session while using pgstac. +### PgSTAC Settings +PgSTAC installs everything into the pgstac schema in the database. This schema must be in the search_path in the postgresql session while using pgstac. -#### PGStac Users +#### PgSTAC Users The pgstac_admin role is the owner of all the objects within pgstac and should be used when running things such as migrations. The pgstac_ingest role has read/write privileges on all tables and should be used for data ingest or if using the transactions extension with stac-fastapi-pgstac. @@ -28,7 +28,7 @@ To grant pgstac permissions to a current postgresql user: GRANT pgstac_read TO ; ``` -#### PGStac Search Path +#### PgSTAC Search Path The search_path can be set at the database level or role level or by setting within the current session. The search_path is already set if you are directly using one of the pgstac users. If you are not logging in directly as one of the pgstac users, you will need to set the search_path by adding it to the search_path of the user you are using: ```sql ALTER ROLE SET SEARCH_PATH TO pgstac, public; @@ -45,13 +45,13 @@ kwargs={ } ``` -#### PGStac Settings Variables +#### PgSTAC Settings Variables There are additional variables that control the settings used for calculating and displaying context (total row count) for a search, as well as a variable to set the filter language (cql-json or cql-json2). The context is "off" by default, and the default filter language is set to "cql2-json". Variables can be set either by passing them in via the connection options using your connection library, setting them in the pgstac_settings table or by setting them on the Role that is used to log in to the database. -Turning "context" on can be **very** expensive on larger databases. Much of what PGStac does is to optimize the search of items sorted by time where only fewer than 10,000 records are returned at a time. It does this by searching for the data in chunks and is able to "short circuit" and return as soon as it has the number of records requested. Calculating the context (the total count for a query) requires a scan of all records that match the query parameters and can take a very long time. Setting "context" to auto will use database statistics to estimate the number of rows much more quickly, but for some queries, the estimate may be quite a bit off. +Turning "context" on can be **very** expensive on larger databases. Much of what PgSTAC does is to optimize the search of items sorted by time where only fewer than 10,000 records are returned at a time. It does this by searching for the data in chunks and is able to "short circuit" and return as soon as it has the number of records requested. Calculating the context (the total count for a query) requires a scan of all records that match the query parameters and can take a very long time. Setting "context" to auto will use database statistics to estimate the number of rows much more quickly, but for some queries, the estimate may be quite a bit off. Example for updating the pgstac_settings table with a new value: ```sql @@ -92,19 +92,19 @@ The nohydrate conf item returns an unhydrated item bypassing the CPU intensive s SELECT search('{"conf":{"nohydrate"=true}}'); ``` -#### PGStac Partitioning -By default PGStac partitions data by collection (note: this is a change starting with version 0.5.0). Each collection can further be partitioned by either year or month. **Partitioning must be set up prior to loading any data!** Partitioning can be configured by setting the partition_trunc flag on a collection in the database. +#### PgSTAC Partitioning +By default PgSTAC partitions data by collection (note: this is a change starting with version 0.5.0). Each collection can further be partitioned by either year or month. **Partitioning must be set up prior to loading any data!** Partitioning can be configured by setting the partition_trunc flag on a collection in the database. ```sql UPDATE collections set partition_trunc='month' WHERE id=''; ``` In general, you should aim to keep each partition less than a few hundred thousand rows. Further partitioning (ie setting everything to 'month' when not needed to keep the partitions below a few hundred thousand rows) can be detrimental. -#### PGStac Indexes / Queryables +#### PgSTAC Indexes / Queryables -By default, PGStac includes indexes on the id, datetime, collection, and geometry. Further indexing can be added for additional properties globally or only on particular collections by modifications to the queryables table. +By default, PgSTAC includes indexes on the id, datetime, collection, and geometry. Further indexing can be added for additional properties globally or only on particular collections by modifications to the queryables table. -The `queryables` table controls the indexes that PGStac will build as well as the metadata that is returned from a [STAC Queryables endpoint](https://github.com/stac-api-extensions/filter#queryables). +The `queryables` table controls the indexes that PgSTAC will build as well as the metadata that is returned from a [STAC Queryables endpoint](https://github.com/stac-api-extensions/filter#queryables). | Column | Description | Type | Example | |-----------------------|--------------------------------------------------------------------------|------------|--------------------------------------------------------------------------------------------------------------------| diff --git a/docs/src/pypgstac.md b/docs/src/pypgstac.md index 95cbd495..25654904 100644 --- a/docs/src/pypgstac.md +++ b/docs/src/pypgstac.md @@ -2,12 +2,12 @@ PgSTAC includes a Python utility for bulk data loading and managing migrations. -PyPGStac is available on PyPI +pyPgSTAC is available on PyPI ``` pip install pypgstac ``` -By default, PyPGStac does not install the `psycopg` dependency. If you want the database driver installed, use: +By default, pyPgSTAC does not install the `psycopg` dependency. If you want the database driver installed, use: ``` pip install pypgstac[psycopg] @@ -39,7 +39,7 @@ Commands: version Get version from a pgstac database. ``` -PyPGStac will get the database connection settings from the **standard PG environment variables**: +pyPgSTAC will get the database connection settings from the **standard PG environment variables**: - PGHOST=0.0.0.0 - PGPORT=5432 @@ -50,18 +50,18 @@ PyPGStac will get the database connection settings from the **standard PG enviro It can also take a DSN database url "postgresql://..." via the **--dsn** flag. ### Migrations -PyPGStac has a utility to help apply migrations to an existing PGStac instance to bring it up to date. +pyPgSTAC has a utility to help apply migrations to an existing PgSTAC instance to bring it up to date. There are two types of migrations: - - **Base migrations** install PGStac into a database with no current PGStac installation. These migrations follow the file pattern `"pgstac.[version].sql"` - - **Incremental migrations** are used to move PGStac from one version to the next. These migrations follow the file pattern `"pgstac.[version].[fromversion].sql"` + - **Base migrations** install PgSTAC into a database with no current PgSTAC installation. These migrations follow the file pattern `"pgstac.[version].sql"` + - **Incremental migrations** are used to move PgSTAC from one version to the next. These migrations follow the file pattern `"pgstac.[version].[fromversion].sql"` -Migrations are stored in ```pypgstac/pypgstac/migration`s``` and are distributed with the PyPGStac package. +Migrations are stored in ```pypgstac/pypgstac/migration`s``` and are distributed with the pyPgSTAC package. ### Running Migrations -PyPGStac has a utility for checking the version of an existing PGStac database and applying the appropriate migrations in the correct order. It can also be used to setup a database from scratch. +pyPgSTAC has a utility for checking the version of an existing PgSTAC database and applying the appropriate migrations in the correct order. It can also be used to setup a database from scratch. -To create an initial PGStac database or bring an existing one up to date, check you have the pypgstac version installed you want to migrate to and run: +To create an initial PgSTAC database or bring an existing one up to date, check you have the pypgstac version installed you want to migrate to and run: ``` pypgstac migrate ``` diff --git a/src/pypgstac/README.md b/src/pypgstac/README.md index df448ab1..c99451db 100644 --- a/src/pypgstac/README.md +++ b/src/pypgstac/README.md @@ -1,3 +1,3 @@ # pypgstac -Python tools for working with PGStac +Python tools for working with PgSTAC diff --git a/src/pypgstac/python/pypgstac/__init__.py b/src/pypgstac/python/pypgstac/__init__.py index 91d4db34..9886933d 100644 --- a/src/pypgstac/python/pypgstac/__init__.py +++ b/src/pypgstac/python/pypgstac/__init__.py @@ -1,4 +1,4 @@ -"""PyPGStac Version.""" +"""pyPgSTAC Version.""" from pypgstac.version import __version__ __all__ = ["__version__"] diff --git a/src/pypgstac/python/pypgstac/db.py b/src/pypgstac/python/pypgstac/db.py index e5c4d3d3..61da1df5 100644 --- a/src/pypgstac/python/pypgstac/db.py +++ b/src/pypgstac/python/pypgstac/db.py @@ -1,4 +1,4 @@ -"""Base library for database interaction with PgStac.""" +"""Base library for database interaction with PgSTAC.""" import atexit import logging import time @@ -53,7 +53,7 @@ class Settings(BaseSettings): class PgstacDB: - """Base class for interacting with PgStac Database.""" + """Base class for interacting with PgSTAC Database.""" def __init__( self, @@ -260,7 +260,7 @@ def version(self) -> Optional[str]: if isinstance(version, str): return version except psycopg.errors.UndefinedTable: - logger.debug("PGStac is not installed.") + logger.debug("PgSTAC is not installed.") if self.connection is not None: self.connection.rollback() return None @@ -278,7 +278,7 @@ def pg_version(self) -> str: version = version.decode() if isinstance(version, str): if int(version.split(".")[0]) < 13: - raise Exception("PGStac requires PostgreSQL 13+") + raise Exception("PgSTAC requires PostgreSQL 13+") return version else: if self.connection is not None: @@ -299,5 +299,5 @@ def func(self, function_name: str, *args: Any) -> Generator: return self.query(base_query, cleaned_args) def search(self, query: Union[dict, str, psycopg.types.json.Jsonb] = "{}") -> str: - """Search PgStac.""" + """Search PgSTAC.""" return dumps(next(self.func("search", query))[0]) diff --git a/src/pypgstac/python/pypgstac/migrate.py b/src/pypgstac/python/pypgstac/migrate.py index e5d256f9..6855ff36 100644 --- a/src/pypgstac/python/pypgstac/migrate.py +++ b/src/pypgstac/python/pypgstac/migrate.py @@ -123,7 +123,7 @@ def run_migration(self, toversion: Optional[str] = None) -> str: toversion = "unreleased" pg_version = self.db.pg_version - logger.info(f"Migrating PGStac on PostgreSQL Version {pg_version}") + logger.info(f"Migrating PgSTAC on PostgreSQL Version {pg_version}") oldversion = self.db.version if oldversion == toversion: logger.info(f"Target database already at version: {toversion}") diff --git a/src/pypgstac/python/pypgstac/pypgstac.py b/src/pypgstac/python/pypgstac/pypgstac.py index 1cfbdb38..a4684f6a 100644 --- a/src/pypgstac/python/pypgstac/pypgstac.py +++ b/src/pypgstac/python/pypgstac/pypgstac.py @@ -13,7 +13,7 @@ class PgstacCLI: - """CLI for PgStac.""" + """CLI for PgSTAC.""" def __init__( self, @@ -22,7 +22,7 @@ def __init__( debug: bool = False, usequeue: bool = False, ): - """Initialize PgStac CLI.""" + """Initialize PgSTAC CLI.""" if version: sys.exit(0) @@ -39,7 +39,7 @@ def initversion(self) -> str: @property def version(self) -> Optional[str]: - """Get PGStac version installed on database.""" + """Get PgSTAC version installed on database.""" return self._db.version @property @@ -52,11 +52,11 @@ def pgready(self) -> None: self._db.wait() def search(self, query: str) -> str: - """Search PgStac.""" + """Search PgSTAC.""" return self._db.search(query) def migrate(self, toversion: Optional[str] = None) -> str: - """Migrate PgStac Database.""" + """Migrate PgSTAC Database.""" migrator = Migrate(self._db) return migrator.run_migration(toversion=toversion) @@ -68,7 +68,7 @@ def load( dehydrated: Optional[bool] = False, chunksize: Optional[int] = 10000, ) -> None: - """Load collections or items into PGStac.""" + """Load collections or items into PgSTAC.""" loader = Loader(db=self._db) if table == "collections": loader.load_collections(file, method) diff --git a/src/pypgstac/tests/hydration/test_hydrate_pg.py b/src/pypgstac/tests/hydration/test_hydrate_pg.py index 22b9f7c6..7f7ddc05 100644 --- a/src/pypgstac/tests/hydration/test_hydrate_pg.py +++ b/src/pypgstac/tests/hydration/test_hydrate_pg.py @@ -1,4 +1,4 @@ -"""Test Hydration in PGStac.""" +"""Test Hydration in PgSTAC.""" import os from contextlib import contextmanager from typing import Any, Dict, Generator @@ -12,7 +12,7 @@ class TestHydratePG(THydrate): - """Test hydration using PGStac.""" + """Test hydration using PgSTAC.""" @contextmanager def db(self) -> Generator[PgstacDB, None, None]: