diff --git a/.editorconfig b/.editorconfig index bdc9b52ce69b2..b3084d3f3aa07 100644 --- a/.editorconfig +++ b/.editorconfig @@ -40,6 +40,9 @@ indent_size = 2 [*.ts] indent_size = 2 +[*.tsx] +indent_size = 2 + [*.css] indent_size = 2 diff --git a/.github/boring-cyborg.yml b/.github/boring-cyborg.yml index 360de1e96236d..1af41df176597 100644 --- a/.github/boring-cyborg.yml +++ b/.github/boring-cyborg.yml @@ -37,16 +37,10 @@ labelPRBasedOnFilePath: - providers/apache/cassandra/** provider:apache-drill: - - providers/src/airflow/providers/apache/drill/**/* - - docs/apache-airflow-providers-apache-drill/**/* - - providers/tests/apache/drill/**/* - - providers/tests/system/apache/drill/**/* + - providers/apache/drill/** provider:apache-druid: - - providers/src/airflow/providers/apache/druid/**/* - - docs/apache-airflow-providers-apache-druid/**/* - - providers/tests/apache/druid/**/* - - providers/tests/system/apache/druid/**/* + - providers/apache/druid/** provider:apache-flink: - providers/src/airflow/providers/apache/flink/**/* @@ -147,10 +141,7 @@ labelPRBasedOnFilePath: - providers/datadog/** provider:dbt-cloud: - - providers/src/airflow/providers/dbt/cloud/**/* - - docs/apache-airflow-providers-dbt-cloud/**/* - - providers/tests/dbt/cloud/**/* - - providers/tests/system/dbt/cloud/**/* + - providers/dbt/cloud/** provider:dingding: - providers/src/airflow/providers/dingding/**/* @@ -168,10 +159,7 @@ labelPRBasedOnFilePath: - providers/edge/** provider:elasticsearch: - - providers/src/airflow/providers/elasticsearch/**/* - - docs/apache-airflow-providers-elasticsearch/**/* - - providers/tests/elasticsearch/**/* - - providers/tests/system/elasticsearch/**/* + - providers/elasticsearch/** provider:exasol: - providers/exasol/** @@ -188,10 +176,7 @@ labelPRBasedOnFilePath: - providers/ftp/** provider:github: - - providers/src/airflow/providers/github/**/* - - docs/apache-airflow-providers-github/**/* - - providers/tests/github/**/* - - providers/tests/system/github/**/* + - providers/github/** provider:google: - providers/src/airflow/providers/google/**/* @@ -270,10 +255,7 @@ labelPRBasedOnFilePath: - providers/openlineage/** provider:opensearch: - - providers/src/airflow/providers/opensearch/**/* - - docs/apache-airflow-providers-opensearch/**/* - - providers/tests/opensearch/**/* - - providers/tests/system/opensearch/**/* + - providers/opensearch/** provider:opsgenie: - providers/opsgenie/** diff --git a/.github/workflows/basic-tests.yml b/.github/workflows/basic-tests.yml index 37fa3978c6b79..62ac87721a3a3 100644 --- a/.github/workflows/basic-tests.yml +++ b/.github/workflows/basic-tests.yml @@ -85,7 +85,7 @@ jobs: - run: uv tool run --from apache-airflow-breeze pytest -n auto --color=yes working-directory: ./dev/breeze/ tests-ui: - timeout-minutes: 10 + timeout-minutes: 15 name: React UI tests runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} if: inputs.run-ui-tests == 'true' @@ -115,7 +115,7 @@ jobs: with: path: airflow/ui/node_modules/ key: cache-ui-node-modules-v1-${{ runner.os }}-${{ hashFiles('airflow/ui/**/pnpm-lock.yaml') }} - id: restore-eslint-cache + id: restore-eslint-cache-ui - run: cd airflow/ui && pnpm install --frozen-lockfile - run: cd airflow/ui && pnpm test env: @@ -127,7 +127,29 @@ jobs: key: cache-ui-node-modules-v1-${{ runner.os }}-${{ hashFiles('airflow/ui/**/pnpm-lock.yaml') }} if-no-files-found: 'error' retention-days: '2' - if: steps.restore-eslint-cache.outputs.stash-hit != 'true' + if: steps.restore-eslint-cache-ui.outputs.stash-hit != 'true' + - name: "Restore eslint cache (simple auth manager UI)" + uses: apache/infrastructure-actions/stash/restore@1c35b5ccf8fba5d4c3fdf25a045ca91aa0cbc468 + with: + path: airflow/auth/managers/simple/ui/node_modules/ + key: > + cache-simple-am-ui-node-modules-v1- + ${{ runner.os }}-${{ hashFiles('airflow/auth/managers/simple/ui/**/pnpm-lock.yaml') }} + id: restore-eslint-cache-simple-am-ui + - run: cd airflow/auth/managers/simple/ui && pnpm install --frozen-lockfile + - run: cd airflow/auth/managers/simple/ui && pnpm test + env: + FORCE_COLOR: 2 + - name: "Save eslint cache (ui)" + uses: apache/infrastructure-actions/stash/save@1c35b5ccf8fba5d4c3fdf25a045ca91aa0cbc468 + with: + path: airflow/auth/managers/simple/ui/node_modules/ + key: > + cache-simple-am-ui-node-modules-v1- + ${{ runner.os }}-${{ hashFiles('airflow/auth/managers/simple/ui/**/pnpm-lock.yaml') }} + if-no-files-found: 'error' + retention-days: '2' + if: steps.restore-eslint-cache-simple-am-ui.outputs.stash-hit != 'true' tests-www: timeout-minutes: 10 diff --git a/Dockerfile.ci b/Dockerfile.ci index 23b743165d3b0..396c04b3fd94e 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -935,17 +935,10 @@ function check_boto_upgrade() { # We need to include few dependencies to pass pip check with other dependencies: # * oss2 as dependency as otherwise jmespath will be bumped (sync with alibaba provider) # * cryptography is kept for snowflake-connector-python limitation (sync with snowflake provider) - # * requests needs to be limited to be compatible with apache beam (sync with apache-beam provider) - # * yandexcloud requirements for requests does not match those of apache.beam and latest botocore - # Both requests and yandexcloud exclusion above might be removed after - # https://github.com/apache/beam/issues/32080 is addressed - # This is already addressed and planned for 2.59.0 release. - # When you remove yandexcloud and opensearch from the above list, you can also remove the - # optional providers_dependencies exclusions from "test_example_dags.py" in "tests/always". set -x # shellcheck disable=SC2086 ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade boto3 botocore \ - "oss2>=2.14.0" "cryptography<43.0.0" "requests!=2.32.*,<3.0.0,>=2.24.0" + "oss2>=2.14.0" "cryptography<43.0.0" "opensearch-py" set +x pip check } diff --git a/airflow/api_connexion/schemas/task_instance_schema.py b/airflow/api_connexion/schemas/task_instance_schema.py index 360ecdf277e76..b3aa88d96f589 100644 --- a/airflow/api_connexion/schemas/task_instance_schema.py +++ b/airflow/api_connexion/schemas/task_instance_schema.py @@ -60,6 +60,7 @@ class Meta: priority_weight = auto_field() operator = auto_field() queued_dttm = auto_field(data_key="queued_when") + scheduled_dttm = auto_field(data_key="scheduled_when") pid = auto_field() executor = auto_field() executor_config = auto_field() @@ -102,6 +103,7 @@ class Meta: priority_weight = auto_field() operator = auto_field() queued_dttm = auto_field(data_key="queued_when") + scheduled_dttm = auto_field(data_key="scheduled_when") pid = auto_field() executor = auto_field() executor_config = auto_field() diff --git a/airflow/api_fastapi/core_api/base.py b/airflow/api_fastapi/core_api/base.py index fc1c68884c881..d88ec1757eb60 100644 --- a/airflow/api_fastapi/core_api/base.py +++ b/airflow/api_fastapi/core_api/base.py @@ -27,3 +27,15 @@ class BaseModel(PydanticBaseModel): """ model_config = ConfigDict(from_attributes=True, populate_by_name=True) + + +class StrictBaseModel(BaseModel): + """ + StrictBaseModel is a base Pydantic model for REST API that does not allow any extra fields. + + Use this class for models that should not have any extra fields in the payload. + + :meta private: + """ + + model_config = ConfigDict(from_attributes=True, populate_by_name=True, extra="forbid") diff --git a/airflow/api_fastapi/core_api/datamodels/assets.py b/airflow/api_fastapi/core_api/datamodels/assets.py index c7b7bec034c06..fd8f7cef2d415 100644 --- a/airflow/api_fastapi/core_api/datamodels/assets.py +++ b/airflow/api_fastapi/core_api/datamodels/assets.py @@ -21,11 +21,11 @@ from pydantic import Field, field_validator -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel from airflow.utils.log.secrets_masker import redact -class DagScheduleAssetReference(BaseModel): +class DagScheduleAssetReference(StrictBaseModel): """DAG schedule reference serializer for assets.""" dag_id: str @@ -33,7 +33,7 @@ class DagScheduleAssetReference(BaseModel): updated_at: datetime -class TaskOutletAssetReference(BaseModel): +class TaskOutletAssetReference(StrictBaseModel): """Task outlet reference serializer for assets.""" dag_id: str @@ -84,7 +84,7 @@ class AssetAliasCollectionResponse(BaseModel): total_entries: int -class DagRunAssetReference(BaseModel): +class DagRunAssetReference(StrictBaseModel): """DAGRun serializer for asset responses.""" run_id: str @@ -141,7 +141,7 @@ class QueuedEventCollectionResponse(BaseModel): total_entries: int -class CreateAssetEventsBody(BaseModel): +class CreateAssetEventsBody(StrictBaseModel): """Create asset events request.""" asset_id: int diff --git a/airflow/api_fastapi/core_api/datamodels/backfills.py b/airflow/api_fastapi/core_api/datamodels/backfills.py index e36e50ea3b8d7..c74a7e2020313 100644 --- a/airflow/api_fastapi/core_api/datamodels/backfills.py +++ b/airflow/api_fastapi/core_api/datamodels/backfills.py @@ -19,11 +19,11 @@ from datetime import datetime -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel from airflow.models.backfill import ReprocessBehavior -class BackfillPostBody(BaseModel): +class BackfillPostBody(StrictBaseModel): """Object used for create backfill request.""" dag_id: str diff --git a/airflow/api_fastapi/core_api/datamodels/common.py b/airflow/api_fastapi/core_api/datamodels/common.py index 4af5356f2ed3a..3c7a04255c963 100644 --- a/airflow/api_fastapi/core_api/datamodels/common.py +++ b/airflow/api_fastapi/core_api/datamodels/common.py @@ -27,7 +27,7 @@ from pydantic import Discriminator, Field, Tag -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel # Common Bulk Data Models T = TypeVar("T") @@ -57,7 +57,7 @@ class BulkActionNotOnExistence(enum.Enum): SKIP = "skip" -class BulkBaseAction(BaseModel, Generic[T]): +class BulkBaseAction(StrictBaseModel, Generic[T]): """Base class for bulk actions.""" action: BulkAction = Field(..., description="The action to be performed on the entities.") @@ -88,7 +88,7 @@ def _action_discriminator(action: Any) -> str: return BulkAction(action["action"]).value -class BulkBody(BaseModel, Generic[T]): +class BulkBody(StrictBaseModel, Generic[T]): """Serializer for bulk entity operations.""" actions: list[ diff --git a/airflow/api_fastapi/core_api/datamodels/config.py b/airflow/api_fastapi/core_api/datamodels/config.py index c16aa98093fb1..42a327431ceed 100644 --- a/airflow/api_fastapi/core_api/datamodels/config.py +++ b/airflow/api_fastapi/core_api/datamodels/config.py @@ -16,10 +16,10 @@ # under the License. from __future__ import annotations -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import StrictBaseModel -class ConfigOption(BaseModel): +class ConfigOption(StrictBaseModel): """Config option.""" key: str @@ -32,7 +32,7 @@ def text_format(self): return f"{self.key} = {self.value}" -class ConfigSection(BaseModel): +class ConfigSection(StrictBaseModel): """Config Section Schema.""" name: str @@ -53,7 +53,7 @@ def text_format(self): return f"[{self.name}]\n" + "\n".join(option.text_format for option in self.options) + "\n" -class Config(BaseModel): +class Config(StrictBaseModel): """List of config sections with their options.""" sections: list[ConfigSection] diff --git a/airflow/api_fastapi/core_api/datamodels/connections.py b/airflow/api_fastapi/core_api/datamodels/connections.py index 00d075bc4ace4..4650e1354dc1e 100644 --- a/airflow/api_fastapi/core_api/datamodels/connections.py +++ b/airflow/api_fastapi/core_api/datamodels/connections.py @@ -22,7 +22,7 @@ from pydantic import Field, field_validator from pydantic_core.core_schema import ValidationInfo -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel from airflow.utils.log.secrets_masker import redact @@ -76,7 +76,7 @@ class ConnectionTestResponse(BaseModel): # Request Models -class ConnectionBody(BaseModel): +class ConnectionBody(StrictBaseModel): """Connection Serializer for requests body.""" connection_id: str = Field(serialization_alias="conn_id", max_length=200, pattern=r"^[\w.-]+$") diff --git a/airflow/api_fastapi/core_api/datamodels/dag_run.py b/airflow/api_fastapi/core_api/datamodels/dag_run.py index 48c92d2a83cb0..78e0254f62240 100644 --- a/airflow/api_fastapi/core_api/datamodels/dag_run.py +++ b/airflow/api_fastapi/core_api/datamodels/dag_run.py @@ -22,7 +22,7 @@ from pydantic import AwareDatetime, Field, NonNegativeInt, computed_field, model_validator -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel from airflow.models import DagRun from airflow.utils import timezone from airflow.utils.state import DagRunState @@ -37,14 +37,14 @@ class DAGRunPatchStates(str, Enum): FAILED = DagRunState.FAILED -class DAGRunPatchBody(BaseModel): +class DAGRunPatchBody(StrictBaseModel): """DAG Run Serializer for PATCH requests.""" state: DAGRunPatchStates | None = None note: str | None = Field(None, max_length=1000) -class DAGRunClearBody(BaseModel): +class DAGRunClearBody(StrictBaseModel): """DAG Run serializer for clear endpoint body.""" dry_run: bool = True @@ -78,7 +78,7 @@ class DAGRunCollectionResponse(BaseModel): total_entries: int -class TriggerDAGRunPostBody(BaseModel): +class TriggerDAGRunPostBody(StrictBaseModel): """Trigger DAG Run Serializer for POST body.""" dag_run_id: str | None = None @@ -109,7 +109,7 @@ def logical_date(self) -> datetime: return timezone.utcnow() -class DAGRunsBatchBody(BaseModel): +class DAGRunsBatchBody(StrictBaseModel): """List DAG Runs body for batch endpoint.""" order_by: str | None = None diff --git a/airflow/api_fastapi/core_api/datamodels/dags.py b/airflow/api_fastapi/core_api/datamodels/dags.py index 30399b42f8d12..504a41683232c 100644 --- a/airflow/api_fastapi/core_api/datamodels/dags.py +++ b/airflow/api_fastapi/core_api/datamodels/dags.py @@ -31,7 +31,7 @@ field_validator, ) -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel from airflow.api_fastapi.core_api.datamodels.dag_tags import DagTagResponse from airflow.configuration import conf @@ -92,7 +92,7 @@ def file_token(self) -> str: return serializer.dumps(self.fileloc) -class DAGPatchBody(BaseModel): +class DAGPatchBody(StrictBaseModel): """Dag Serializer for updatable bodies.""" is_paused: bool diff --git a/airflow/api_fastapi/core_api/datamodels/pools.py b/airflow/api_fastapi/core_api/datamodels/pools.py index 0040c49a1efda..096e357dfaf1d 100644 --- a/airflow/api_fastapi/core_api/datamodels/pools.py +++ b/airflow/api_fastapi/core_api/datamodels/pools.py @@ -21,7 +21,7 @@ from pydantic import BeforeValidator, ConfigDict, Field -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel def _call_function(function: Callable[[], int]) -> int: @@ -60,7 +60,7 @@ class PoolCollectionResponse(BaseModel): total_entries: int -class PoolPatchBody(BaseModel): +class PoolPatchBody(StrictBaseModel): """Pool serializer for patch bodies.""" model_config = ConfigDict(populate_by_name=True, from_attributes=True) @@ -71,7 +71,7 @@ class PoolPatchBody(BaseModel): include_deferred: bool | None = None -class PoolBody(BasePool): +class PoolBody(BasePool, StrictBaseModel): """Pool serializer for post bodies.""" pool: str = Field(alias="name", max_length=256) diff --git a/airflow/api_fastapi/core_api/datamodels/task_instances.py b/airflow/api_fastapi/core_api/datamodels/task_instances.py index 7cecb96ca42ce..eaebe589613f8 100644 --- a/airflow/api_fastapi/core_api/datamodels/task_instances.py +++ b/airflow/api_fastapi/core_api/datamodels/task_instances.py @@ -32,7 +32,7 @@ model_validator, ) -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel from airflow.api_fastapi.core_api.datamodels.job import JobResponse from airflow.api_fastapi.core_api.datamodels.trigger import TriggerResponse from airflow.utils.state import TaskInstanceState @@ -64,6 +64,7 @@ class TaskInstanceResponse(BaseModel): priority_weight: int | None operator: str | None queued_dttm: datetime | None = Field(alias="queued_when") + scheduled_dttm: datetime | None = Field(alias="scheduled_when") pid: int | None executor: str | None executor_config: Annotated[str, BeforeValidator(str)] @@ -97,7 +98,7 @@ class TaskDependencyCollectionResponse(BaseModel): dependencies: list[TaskDependencyResponse] -class TaskInstancesBatchBody(BaseModel): +class TaskInstancesBatchBody(StrictBaseModel): """Task Instance body for get batch.""" dag_ids: list[str] | None = None @@ -147,6 +148,7 @@ class TaskInstanceHistoryResponse(BaseModel): priority_weight: int | None operator: str | None queued_dttm: datetime | None = Field(alias="queued_when") + scheduled_dttm: datetime | None = Field(alias="scheduled_when") pid: int | None executor: str | None executor_config: Annotated[str, BeforeValidator(str)] @@ -159,7 +161,7 @@ class TaskInstanceHistoryCollectionResponse(BaseModel): total_entries: int -class ClearTaskInstancesBody(BaseModel): +class ClearTaskInstancesBody(StrictBaseModel): """Request body for Clear Task Instances endpoint.""" dry_run: bool = True @@ -195,7 +197,7 @@ def validate_model(cls, data: Any) -> Any: return data -class PatchTaskInstanceBody(BaseModel): +class PatchTaskInstanceBody(StrictBaseModel): """Request body for Clear Task Instances endpoint.""" new_state: TaskInstanceState | None = None diff --git a/airflow/api_fastapi/core_api/datamodels/variables.py b/airflow/api_fastapi/core_api/datamodels/variables.py index 8307809bc5f5b..82cfdbb130523 100644 --- a/airflow/api_fastapi/core_api/datamodels/variables.py +++ b/airflow/api_fastapi/core_api/datamodels/variables.py @@ -21,7 +21,7 @@ from pydantic import ConfigDict, Field, model_validator -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel from airflow.models.base import ID_LEN from airflow.typing_compat import Self from airflow.utils.log.secrets_masker import redact @@ -52,7 +52,7 @@ def redact_val(self) -> Self: return self -class VariableBody(BaseModel): +class VariableBody(StrictBaseModel): """Variable serializer for bodies.""" key: str = Field(max_length=ID_LEN) diff --git a/airflow/api_fastapi/core_api/datamodels/xcom.py b/airflow/api_fastapi/core_api/datamodels/xcom.py index 3a819b317d760..b63db3ff87d15 100644 --- a/airflow/api_fastapi/core_api/datamodels/xcom.py +++ b/airflow/api_fastapi/core_api/datamodels/xcom.py @@ -52,8 +52,8 @@ def value_to_string(cls, v): return str(v) if v is not None else None -class XComCollection(BaseModel): - """List of XCom items.""" +class XComCollectionResponse(BaseModel): + """XCom Collection serializer for responses.""" xcom_entries: list[XComResponse] total_entries: int diff --git a/airflow/api_fastapi/core_api/openapi/v1-generated.yaml b/airflow/api_fastapi/core_api/openapi/v1-generated.yaml index a85947c6451c2..8bf3b1540fc55 100644 --- a/airflow/api_fastapi/core_api/openapi/v1-generated.yaml +++ b/airflow/api_fastapi/core_api/openapi/v1-generated.yaml @@ -258,6 +258,14 @@ paths: type: boolean default: false title: External Dependencies + - name: dag_version + in: query + required: false + schema: + anyOf: + - type: integer + - type: 'null' + title: Dag Version responses: '200': description: Successful Response @@ -4426,7 +4434,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/XComCollection' + $ref: '#/components/schemas/XComCollectionResponse' '401': content: application/json: @@ -6773,6 +6781,7 @@ components: type: integer title: Max Active Runs default: 10 + additionalProperties: false type: object required: - dag_id @@ -6910,6 +6919,7 @@ components: - $ref: '#/components/schemas/BulkDeleteAction_ConnectionBody_' type: array title: Actions + additionalProperties: false type: object required: - actions @@ -6924,6 +6934,7 @@ components: - $ref: '#/components/schemas/BulkDeleteAction_PoolBody_' type: array title: Actions + additionalProperties: false type: object required: - actions @@ -6938,6 +6949,7 @@ components: - $ref: '#/components/schemas/BulkDeleteAction_VariableBody_' type: array title: Actions + additionalProperties: false type: object required: - actions @@ -6956,6 +6968,7 @@ components: action_on_existence: $ref: '#/components/schemas/BulkActionOnExistence' default: fail + additionalProperties: false type: object required: - action @@ -6975,6 +6988,7 @@ components: action_on_existence: $ref: '#/components/schemas/BulkActionOnExistence' default: fail + additionalProperties: false type: object required: - action @@ -6994,6 +7008,7 @@ components: action_on_existence: $ref: '#/components/schemas/BulkActionOnExistence' default: fail + additionalProperties: false type: object required: - action @@ -7013,6 +7028,7 @@ components: action_on_non_existence: $ref: '#/components/schemas/BulkActionNotOnExistence' default: fail + additionalProperties: false type: object required: - action @@ -7032,6 +7048,7 @@ components: action_on_non_existence: $ref: '#/components/schemas/BulkActionNotOnExistence' default: fail + additionalProperties: false type: object required: - action @@ -7051,6 +7068,7 @@ components: action_on_non_existence: $ref: '#/components/schemas/BulkActionNotOnExistence' default: fail + additionalProperties: false type: object required: - action @@ -7103,6 +7121,7 @@ components: action_on_non_existence: $ref: '#/components/schemas/BulkActionNotOnExistence' default: fail + additionalProperties: false type: object required: - action @@ -7122,6 +7141,7 @@ components: action_on_non_existence: $ref: '#/components/schemas/BulkActionNotOnExistence' default: fail + additionalProperties: false type: object required: - action @@ -7141,6 +7161,7 @@ components: action_on_non_existence: $ref: '#/components/schemas/BulkActionNotOnExistence' default: fail + additionalProperties: false type: object required: - action @@ -7211,6 +7232,7 @@ components: type: boolean title: Include Past default: false + additionalProperties: false type: object title: ClearTaskInstancesBody description: Request body for Clear Task Instances endpoint. @@ -7221,6 +7243,7 @@ components: $ref: '#/components/schemas/ConfigSection' type: array title: Sections + additionalProperties: false type: object required: - sections @@ -7241,6 +7264,7 @@ components: maxItems: 2 minItems: 2 title: Value + additionalProperties: false type: object required: - key @@ -7343,6 +7367,7 @@ components: $ref: '#/components/schemas/ConfigOption' type: array title: Options + additionalProperties: false type: object required: - name @@ -7394,6 +7419,7 @@ components: - type: string - type: 'null' title: Extra + additionalProperties: false type: object required: - connection_id @@ -7733,6 +7759,7 @@ components: is_paused: type: boolean title: Is Paused + additionalProperties: false type: object required: - is_paused @@ -7880,6 +7907,7 @@ components: type: boolean title: Only Failed default: false + additionalProperties: false type: object title: DAGRunClearBody description: DAG Run serializer for clear endpoint body. @@ -7911,6 +7939,7 @@ components: maxLength: 1000 - type: 'null' title: Note + additionalProperties: false type: object title: DAGRunPatchBody description: DAG Run Serializer for PATCH requests. @@ -8121,6 +8150,7 @@ components: format: date-time - type: 'null' title: End Date Lte + additionalProperties: false type: object title: DAGRunsBatchBody description: List DAG Runs body for batch endpoint. @@ -8405,6 +8435,7 @@ components: type: string format: date-time title: Data Interval End + additionalProperties: false type: object required: - run_id @@ -8468,6 +8499,7 @@ components: type: string format: date-time title: Updated At + additionalProperties: false type: object required: - dag_id @@ -9128,6 +9160,7 @@ components: type: boolean title: Include Past default: false + additionalProperties: false type: object title: PatchTaskInstanceBody description: Request body for Clear Task Instances endpoint. @@ -9233,6 +9266,7 @@ components: type: boolean title: Include Deferred default: false + additionalProperties: false type: object required: - name @@ -9277,6 +9311,7 @@ components: - type: boolean - type: 'null' title: Include Deferred + additionalProperties: false type: object title: PoolPatchBody description: Pool serializer for patch bodies. @@ -9608,6 +9643,12 @@ components: format: date-time - type: 'null' title: Queued When + scheduled_when: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Scheduled When pid: anyOf: - type: integer @@ -9642,6 +9683,7 @@ components: - priority_weight - operator - queued_when + - scheduled_when - pid - executor - executor_config @@ -9735,6 +9777,12 @@ components: format: date-time - type: 'null' title: Queued When + scheduled_when: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Scheduled When pid: anyOf: - type: integer @@ -9793,6 +9841,7 @@ components: - priority_weight - operator - queued_when + - scheduled_when - pid - executor - executor_config @@ -9994,6 +10043,7 @@ components: - type: string - type: 'null' title: Order By + additionalProperties: false type: object title: TaskInstancesBatchBody description: Task Instance body for get batch. @@ -10029,6 +10079,7 @@ components: type: string format: date-time title: Updated At + additionalProperties: false type: object required: - dag_id @@ -10254,6 +10305,7 @@ components: - type: string - type: 'null' title: Note + additionalProperties: false type: object title: TriggerDAGRunPostBody description: Trigger DAG Run Serializer for POST body. @@ -10339,6 +10391,7 @@ components: - type: string - type: 'null' title: Description + additionalProperties: false type: object required: - key @@ -10401,7 +10454,7 @@ components: - git_version title: VersionInfo description: Version information serializer for responses. - XComCollection: + XComCollectionResponse: properties: xcom_entries: items: @@ -10415,8 +10468,8 @@ components: required: - xcom_entries - total_entries - title: XComCollection - description: List of XCom items. + title: XComCollectionResponse + description: XCom Collection serializer for responses. XComResponse: properties: key: diff --git a/airflow/api_fastapi/core_api/routes/public/xcom.py b/airflow/api_fastapi/core_api/routes/public/xcom.py index 524d0722d1caa..b8fa6456e570d 100644 --- a/airflow/api_fastapi/core_api/routes/public/xcom.py +++ b/airflow/api_fastapi/core_api/routes/public/xcom.py @@ -26,7 +26,7 @@ from airflow.api_fastapi.common.parameters import QueryLimit, QueryOffset from airflow.api_fastapi.common.router import AirflowRouter from airflow.api_fastapi.core_api.datamodels.xcom import ( - XComCollection, + XComCollectionResponse, XComResponseNative, XComResponseString, ) @@ -112,7 +112,7 @@ def get_xcom_entries( session: SessionDep, xcom_key: Annotated[str | None, Query()] = None, map_index: Annotated[int | None, Query(ge=-1)] = None, -) -> XComCollection: +) -> XComCollectionResponse: """ Get all XCom entries. @@ -140,4 +140,4 @@ def get_xcom_entries( ) query = query.order_by(XCom.dag_id, XCom.task_id, XCom.run_id, XCom.map_index, XCom.key) xcoms = session.scalars(query) - return XComCollection(xcom_entries=xcoms, total_entries=total_entries) + return XComCollectionResponse(xcom_entries=xcoms, total_entries=total_entries) diff --git a/airflow/api_fastapi/core_api/routes/ui/structure.py b/airflow/api_fastapi/core_api/routes/ui/structure.py index e143100b259a1..44a2d9f2f5bd6 100644 --- a/airflow/api_fastapi/core_api/routes/ui/structure.py +++ b/airflow/api_fastapi/core_api/routes/ui/structure.py @@ -16,7 +16,8 @@ # under the License. from __future__ import annotations -from fastapi import HTTPException, Request, status +from fastapi import HTTPException, status +from sqlalchemy import select from airflow.api_fastapi.common.db.common import SessionDep from airflow.api_fastapi.common.parameters import QueryIncludeDownstream, QueryIncludeUpstream @@ -24,6 +25,7 @@ from airflow.api_fastapi.core_api.datamodels.ui.structure import StructureDataResponse from airflow.api_fastapi.core_api.openapi.exceptions import create_openapi_http_exception_doc from airflow.api_fastapi.core_api.services.ui.structure import get_upstream_assets +from airflow.models.dag_version import DagVersion from airflow.models.serialized_dag import SerializedDagModel from airflow.utils.dag_edges import dag_edges from airflow.utils.task_group import task_group_to_dict @@ -38,17 +40,33 @@ def structure_data( session: SessionDep, dag_id: str, - request: Request, include_upstream: QueryIncludeUpstream = False, include_downstream: QueryIncludeDownstream = False, root: str | None = None, external_dependencies: bool = False, + dag_version: int | None = None, ) -> StructureDataResponse: """Get Structure Data.""" - dag = request.app.state.dag_bag.get_dag(dag_id) - - if dag is None: - raise HTTPException(status.HTTP_404_NOT_FOUND, f"Dag with id {dag_id} was not found") + if dag_version is None: + dag_version_model = DagVersion.get_latest_version(dag_id) + if dag_version_model is None: + raise HTTPException( + status.HTTP_404_NOT_FOUND, + f"Dag with id {dag_id} was not found", + ) + dag_version = dag_version_model.version_number + + serialized_dag: SerializedDagModel = session.scalar( + select(SerializedDagModel) + .join(DagVersion) + .where(SerializedDagModel.dag_id == dag_id, DagVersion.version_number == dag_version) + ) + if serialized_dag is None: + raise HTTPException( + status.HTTP_404_NOT_FOUND, + f"Dag with id {dag_id} and version {dag_version} was not found", + ) + dag = serialized_dag.dag if root: dag = dag.partial_subset( diff --git a/airflow/api_fastapi/execution_api/datamodels/taskinstance.py b/airflow/api_fastapi/execution_api/datamodels/taskinstance.py index e427cac5f3db4..675a76d431392 100644 --- a/airflow/api_fastapi/execution_api/datamodels/taskinstance.py +++ b/airflow/api_fastapi/execution_api/datamodels/taskinstance.py @@ -32,7 +32,7 @@ ) from airflow.api_fastapi.common.types import UtcDateTime -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel from airflow.api_fastapi.execution_api.datamodels.asset import AssetProfile from airflow.api_fastapi.execution_api.datamodels.connection import ConnectionResponse from airflow.api_fastapi.execution_api.datamodels.variable import VariableResponse @@ -42,7 +42,7 @@ AwareDatetimeAdapter = TypeAdapter(AwareDatetime) -class TIEnterRunningPayload(BaseModel): +class TIEnterRunningPayload(StrictBaseModel): """Schema for updating TaskInstance to 'RUNNING' state with minimal required fields.""" state: Annotated[ @@ -60,7 +60,7 @@ class TIEnterRunningPayload(BaseModel): """When the task started executing""" -class TITerminalStatePayload(BaseModel): +class TITerminalStatePayload(StrictBaseModel): """Schema for updating TaskInstance to a terminal state except SUCCESS state.""" state: Literal[ @@ -74,7 +74,7 @@ class TITerminalStatePayload(BaseModel): """When the task completed executing""" -class TISuccessStatePayload(BaseModel): +class TISuccessStatePayload(StrictBaseModel): """Schema for updating TaskInstance to success state.""" state: Annotated[ @@ -96,13 +96,13 @@ class TISuccessStatePayload(BaseModel): outlet_events: Annotated[list[Any], Field(default_factory=list)] -class TITargetStatePayload(BaseModel): +class TITargetStatePayload(StrictBaseModel): """Schema for updating TaskInstance to a target state, excluding terminal and running states.""" state: IntermediateTIState -class TIDeferredStatePayload(BaseModel): +class TIDeferredStatePayload(StrictBaseModel): """Schema for updating TaskInstance to a deferred state.""" state: Annotated[ @@ -128,7 +128,7 @@ def validate_moment(cls, v): return v -class TIRescheduleStatePayload(BaseModel): +class TIRescheduleStatePayload(StrictBaseModel): """Schema for updating TaskInstance to a up_for_reschedule state.""" state: Annotated[ @@ -146,7 +146,7 @@ class TIRescheduleStatePayload(BaseModel): end_date: UtcDateTime -def ti_state_discriminator(v: dict[str, str] | BaseModel) -> str: +def ti_state_discriminator(v: dict[str, str] | StrictBaseModel) -> str: """ Determine the discriminator key for TaskInstance state transitions. @@ -185,7 +185,7 @@ def ti_state_discriminator(v: dict[str, str] | BaseModel) -> str: ] -class TIHeartbeatInfo(BaseModel): +class TIHeartbeatInfo(StrictBaseModel): """Schema for TaskInstance heartbeat endpoint.""" hostname: str @@ -194,7 +194,7 @@ class TIHeartbeatInfo(BaseModel): # This model is not used in the API, but it is included in generated OpenAPI schema # for use in the client SDKs. -class TaskInstance(BaseModel): +class TaskInstance(StrictBaseModel): """Schema for TaskInstance model with minimal required fields needed for Runtime.""" id: uuid.UUID @@ -207,7 +207,7 @@ class TaskInstance(BaseModel): hostname: str | None = None -class DagRun(BaseModel): +class DagRun(StrictBaseModel): """Schema for DagRun model with minimal required fields needed for Runtime.""" # TODO: `dag_id` and `run_id` are duplicated from TaskInstance diff --git a/airflow/api_fastapi/execution_api/datamodels/variable.py b/airflow/api_fastapi/execution_api/datamodels/variable.py index 6c597524763aa..73361908a810f 100644 --- a/airflow/api_fastapi/execution_api/datamodels/variable.py +++ b/airflow/api_fastapi/execution_api/datamodels/variable.py @@ -19,7 +19,7 @@ from pydantic import Field -from airflow.api_fastapi.core_api.base import BaseModel, ConfigDict +from airflow.api_fastapi.core_api.base import BaseModel, ConfigDict, StrictBaseModel class VariableResponse(BaseModel): @@ -29,7 +29,7 @@ class VariableResponse(BaseModel): val: str | None = Field(alias="value") -class VariablePostBody(BaseModel): +class VariablePostBody(StrictBaseModel): """Request body schema for creating variables.""" model_config = ConfigDict(extra="forbid") diff --git a/airflow/auth/managers/simple/datamodels/login.py b/airflow/auth/managers/simple/datamodels/login.py index 9d8eef55d23ba..ff4883625eccd 100644 --- a/airflow/auth/managers/simple/datamodels/login.py +++ b/airflow/auth/managers/simple/datamodels/login.py @@ -19,7 +19,7 @@ from pydantic import Field -from airflow.api_fastapi.core_api.base import BaseModel +from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel class LoginResponse(BaseModel): @@ -28,7 +28,7 @@ class LoginResponse(BaseModel): jwt_token: str -class LoginBody(BaseModel): +class LoginBody(StrictBaseModel): """Login serializer for post bodies.""" username: str = Field() diff --git a/airflow/auth/managers/simple/openapi/v1-generated.yaml b/airflow/auth/managers/simple/openapi/v1-generated.yaml index 4ae5c70876149..d0efc68864d17 100644 --- a/airflow/auth/managers/simple/openapi/v1-generated.yaml +++ b/airflow/auth/managers/simple/openapi/v1-generated.yaml @@ -75,6 +75,7 @@ components: password: type: string title: Password + additionalProperties: false type: object required: - username diff --git a/airflow/auth/managers/simple/ui/src/test-utils.tsx b/airflow/auth/managers/simple/ui/src/test-utils.tsx index 366492299d1f5..c9428a94452ce 100644 --- a/airflow/auth/managers/simple/ui/src/test-utils.tsx +++ b/airflow/auth/managers/simple/ui/src/test-utils.tsx @@ -18,23 +18,29 @@ */ import React, { PropsWithChildren } from "react"; -import { ChakraProvider } from "@chakra-ui/react"; +import { ChakraProvider, defaultSystem } from "@chakra-ui/react"; import { MemoryRouter, MemoryRouterProps } from "react-router-dom"; -import { QueryClientProvider } from "@tanstack/react-query"; -import { queryClient } from "./queryClient"; +import {QueryClient, QueryClientProvider} from "@tanstack/react-query"; interface WrapperProps extends PropsWithChildren { initialEntries?: MemoryRouterProps["initialEntries"]; } export const Wrapper = ({ initialEntries, children }: WrapperProps) => { + const queryClient = new QueryClient({ + defaultOptions: { + queries: { + staleTime: Infinity, + }, + }, + }); return ( - - - - {children} - - + + + + {children} + + ); }; diff --git a/airflow/cli/commands/remote_commands/task_command.py b/airflow/cli/commands/remote_commands/task_command.py index b1f9182e4c9c3..011f629d13f56 100644 --- a/airflow/cli/commands/remote_commands/task_command.py +++ b/airflow/cli/commands/remote_commands/task_command.py @@ -28,6 +28,7 @@ import textwrap from collections.abc import Generator from contextlib import contextmanager, redirect_stderr, redirect_stdout, suppress +from pathlib import Path from typing import TYPE_CHECKING, Protocol, cast import pendulum @@ -45,8 +46,8 @@ from airflow.models import TaskInstance from airflow.models.dag import DAG, _run_inline_trigger from airflow.models.dagrun import DagRun -from airflow.models.param import ParamsDict from airflow.models.taskinstance import TaskReturnCode +from airflow.sdk.definitions.param import ParamsDict from airflow.settings import IS_EXECUTOR_CONTAINER, IS_K8S_EXECUTOR_POD from airflow.ti_deps.dep_context import DepContext from airflow.ti_deps.dependencies_deps import SCHEDULER_QUEUED_DEPS @@ -299,7 +300,9 @@ def _run_task_by_executor(args, dag: DAG, ti: TaskInstance) -> None: if executor.queue_workload.__func__ is not BaseExecutor.queue_workload: # type: ignore[attr-defined] from airflow.executors import workloads - workload = workloads.ExecuteTask.make(ti, dag_rel_path=dag.relative_fileloc) + if TYPE_CHECKING: + assert dag.relative_fileloc + workload = workloads.ExecuteTask.make(ti, dag_rel_path=Path(dag.relative_fileloc)) with create_session() as session: executor.queue_workload(workload, session) else: diff --git a/airflow/dag_processing/collection.py b/airflow/dag_processing/collection.py index 7eb700de89b64..c8bac5cef964a 100644 --- a/airflow/dag_processing/collection.py +++ b/airflow/dag_processing/collection.py @@ -210,6 +210,7 @@ def _serialize_dag_capturing_errors( except Exception: log.exception("Failed to write serialized DAG dag_id=%s fileloc=%s", dag.dag_id, dag.fileloc) dagbag_import_error_traceback_depth = conf.getint("core", "dagbag_import_error_traceback_depth") + # todo AIP-66: this needs to use bundle name / rel fileloc instead return [(dag.fileloc, traceback.format_exc(limit=-dagbag_import_error_traceback_depth))] @@ -422,6 +423,7 @@ def update_dags( for dag_id, dm in sorted(orm_dags.items()): dag = self.dags[dag_id] dm.fileloc = dag.fileloc + dm.relative_fileloc = dag.relative_fileloc dm.owners = dag.owner or conf.get("operators", "default_owner") dm.is_active = True dm.has_import_errors = False diff --git a/airflow/dag_processing/manager.py b/airflow/dag_processing/manager.py index 014104c2cba65..fa68cb4fab1ba 100644 --- a/airflow/dag_processing/manager.py +++ b/airflow/dag_processing/manager.py @@ -32,10 +32,11 @@ import zipfile from collections import defaultdict, deque from collections.abc import Callable, Iterator +from dataclasses import dataclass, field from datetime import datetime, timedelta from importlib import import_module from pathlib import Path -from typing import TYPE_CHECKING, Any, NamedTuple +from typing import TYPE_CHECKING, Any, NamedTuple, cast import attrs import structlog @@ -97,11 +98,19 @@ class DagFileStat: log = logging.getLogger("airflow.processor_manager") -class DagFileInfo(NamedTuple): +@dataclass(frozen=True) +class DagFileInfo: """Information about a DAG file.""" - path: str # absolute path of the file + rel_path: Path bundle_name: str + bundle_path: Path | None = field(compare=False, default=None) + + @property + def absolute_path(self) -> Path: + if not self.bundle_path: + raise ValueError("bundle_path not set") + return self.bundle_path / self.rel_path def _config_int_factory(section: str, key: str): @@ -238,25 +247,27 @@ def _scan_stale_dags(self): elapsed_time_since_refresh = now - self._last_deactivate_stale_dags_time if elapsed_time_since_refresh > self.parsing_cleanup_interval: last_parsed = { - fp: stat.last_finish_time for fp, stat in self._file_stats.items() if stat.last_finish_time + file_info: stat.last_finish_time + for file_info, stat in self._file_stats.items() + if stat.last_finish_time } - self.deactivate_stale_dags( - last_parsed=last_parsed, - stale_dag_threshold=self.stale_dag_threshold, - ) + self.deactivate_stale_dags(last_parsed=last_parsed) self._last_deactivate_stale_dags_time = time.monotonic() @provide_session def deactivate_stale_dags( self, last_parsed: dict[DagFileInfo, datetime | None], - stale_dag_threshold: int, session: Session = NEW_SESSION, ): """Detect and deactivate DAGs which are no longer present in files.""" to_deactivate = set() query = select( - DagModel.dag_id, DagModel.bundle_name, DagModel.fileloc, DagModel.last_parsed_time + DagModel.dag_id, + DagModel.bundle_name, + DagModel.fileloc, + DagModel.last_parsed_time, + DagModel.relative_fileloc, ).where(DagModel.is_active) # TODO: AIP-66 by bundle! dags_parsed = session.execute(query) @@ -266,14 +277,11 @@ def deactivate_stale_dags( # last_parsed_time is the processor_timeout. Longer than that indicates that the DAG is # no longer present in the file. We have a stale_dag_threshold configured to prevent a # significant delay in deactivation of stale dags when a large timeout is configured - dag_file_path = DagFileInfo(path=dag.fileloc, bundle_name=dag.bundle_name) - if ( - dag_file_path in last_parsed - and (dag.last_parsed_time + timedelta(seconds=stale_dag_threshold)) - < last_parsed[dag_file_path] - ): - self.log.info("DAG %s is missing and will be deactivated.", dag.dag_id) - to_deactivate.add(dag.dag_id) + file_info = DagFileInfo(rel_path=Path(dag.relative_fileloc), bundle_name=dag.bundle_name) + if last_finish_time := last_parsed.get(file_info, None): + if dag.last_parsed_time + timedelta(seconds=self.stale_dag_threshold) < last_finish_time: + self.log.info("DAG %s is missing and will be deactivated.", dag.dag_id) + to_deactivate.add(dag.dag_id) if to_deactivate: deactivated_dagmodel = session.execute( @@ -480,29 +488,30 @@ def _refresh_dag_bundles(self): "Version changed for %s, new version: %s", bundle.name, version_after_refresh ) - bundle_file_paths = self._find_files_in_bundle(bundle) + found_file_infos = [ + DagFileInfo(rel_path=p, bundle_name=bundle.name, bundle_path=bundle.path) + for p in self._find_files_in_bundle(bundle) + ] new_file_paths = [f for f in self._file_paths if f.bundle_name != bundle.name] - new_file_paths.extend( - DagFileInfo(path=path, bundle_name=bundle.name) for path in bundle_file_paths - ) + new_file_paths.extend(found_file_infos) self.set_file_paths(new_file_paths) - self.deactivate_deleted_dags(bundle_file_paths) + self.deactivate_deleted_dags(active_files=found_file_infos) self.clear_nonexistent_import_errors() self._bundle_versions[bundle.name] = bundle.get_current_version() - def _find_files_in_bundle(self, bundle: BaseDagBundle) -> list[str]: - """Refresh file paths from bundle dir.""" + def _find_files_in_bundle(self, bundle: BaseDagBundle) -> list[Path]: + """Get relative file paths from bundle dir.""" # Build up a list of Python files that could contain DAGs self.log.info("Searching for files in %s at %s", bundle.name, bundle.path) - file_paths = list_py_file_paths(bundle.path) + file_paths = [Path(x).relative_to(bundle.path) for x in list_py_file_paths(bundle.path)] self.log.info("Found %s files for bundle %s", len(file_paths), bundle.name) return file_paths - def deactivate_deleted_dags(self, file_paths: set[str]) -> None: + def deactivate_deleted_dags(self, active_files: list[DagFileInfo]) -> None: """Deactivate DAGs that come from files that are no longer present.""" def _iter_dag_filelocs(fileloc: str) -> Iterator[str]: @@ -522,10 +531,20 @@ def _iter_dag_filelocs(fileloc: str) -> Iterator[str]: except zipfile.BadZipFile: self.log.exception("There was an error accessing ZIP file %s %s", fileloc) - dag_filelocs = {full_loc for path in file_paths for full_loc in _iter_dag_filelocs(path)} + active_subpaths: set[tuple[str, str]] = set() + """ + 'subpath' here means bundle + modified rel path. What does modified rel path mean? + Well, '_iter_dag_filelocs' walks through zip files and may return a "path" that is, + rel path to the zip, plus the rel path within the zip. So, since this is is a bit different + from most uses of the word "rel path", I wanted to call it something different. + A set is used presumably since many dags can be in one file. + """ + + for info in active_files: + for path in _iter_dag_filelocs(str(info.absolute_path)): + active_subpaths.add((info.bundle_name, path)) - # TODO: AIP-66: make bundle aware, as fileloc won't be unique long term. - DagModel.deactivate_deleted_dags(dag_filelocs) + DagModel.deactivate_deleted_dags(active_subpaths) def _print_stat(self): """Occasionally print out stats about how fast the files are getting processed.""" @@ -549,7 +568,8 @@ def clear_nonexistent_import_errors(self, session=NEW_SESSION): if self._file_paths: query = query.where( tuple_(ParseImportError.filename, ParseImportError.bundle_name).notin_( - [(f.path, f.bundle_name) for f in self._file_paths] + # todo AIP-66: ParseImportError should have rel fileloce + bundle name + [(str(f.absolute_path), f.bundle_name) for f in self._file_paths] ), ) @@ -594,7 +614,7 @@ def _log_file_processing_stats(self, known_file_paths): proc = self._processors.get(file_path) num_dags = stat.num_dags num_errors = stat.import_errors - file_name = Path(file_path.path).stem + file_name = Path(file_path.rel_path).stem processor_pid = proc.pid if proc else None processor_start_time = proc.start_time if proc else None runtime = (now - processor_start_time) if processor_start_time else None @@ -746,7 +766,7 @@ def _render_log_filename(self, dag_file: DagFileInfo) -> str: self._latest_log_symlink_date = datetime.today() bundle = next(b for b in self._dag_bundles if b.name == dag_file.bundle_name) - relative_path = Path(dag_file.path).relative_to(bundle.path) + relative_path = Path(dag_file.rel_path) return os.path.join(self._get_log_dir(), bundle.name, f"{relative_path}.log") def _get_logger_for_dag_file(self, dag_file: DagFileInfo): @@ -764,7 +784,8 @@ def _create_process(self, dag_file: DagFileInfo) -> DagFileProcessorProcess: return DagFileProcessorProcess.start( id=id, - path=dag_file.path, + path=dag_file.absolute_path, + bundle_path=cast(Path, dag_file.bundle_path), callbacks=callback_to_execute_for_file, selector=self.selector, logger=self._get_logger_for_dag_file(dag_file), @@ -815,7 +836,7 @@ def prepare_file_path_queue(self): for file_path in self._file_paths: if is_mtime_mode: try: - files_with_mtime[file_path] = os.path.getmtime(file_path.path) + files_with_mtime[file_path] = os.path.getmtime(file_path.absolute_path) except FileNotFoundError: self.log.warning("Skipping processing of missing file: %s", file_path) self._file_stats.pop(file_path, None) @@ -841,7 +862,7 @@ def prepare_file_path_queue(self): if is_mtime_mode: file_paths = sorted(files_with_mtime, key=files_with_mtime.get, reverse=True) elif list_mode == "alphabetical": - file_paths.sort() + file_paths.sort(key=lambda f: f.rel_path) elif list_mode == "random_seeded_by_host": # Shuffle the list seeded by hostname so multiple DAG processors can work on different # set of files. Since we set the seed, the sort order will remain same per host diff --git a/airflow/dag_processing/processor.py b/airflow/dag_processing/processor.py index 5d73c50e7d736..2a822e1bae97f 100644 --- a/airflow/dag_processing/processor.py +++ b/airflow/dag_processing/processor.py @@ -19,6 +19,7 @@ import os import sys import traceback +from pathlib import Path from typing import TYPE_CHECKING, Annotated, Callable, Literal, Union import attrs @@ -73,6 +74,7 @@ def _parse_file(msg: DagFileParseRequest, log: FilteringBoundLogger) -> DagFileP # TODO: Set known_pool names on DagBag! bag = DagBag( dag_folder=msg.file, + bundle_path=msg.bundle_path, include_examples=False, safe_mode=True, load_op_links=False, @@ -159,6 +161,10 @@ class DagFileParseRequest(BaseModel): """ file: str + + bundle_path: Path + """Passing bundle path around lets us figure out relative file path.""" + requests_fd: int callback_requests: list[CallbackRequest] = Field(default_factory=list) type: Literal["DagFileParseRequest"] = "DagFileParseRequest" @@ -205,17 +211,24 @@ def start( # type: ignore[override] cls, *, path: str | os.PathLike[str], + bundle_path: Path, callbacks: list[CallbackRequest], target: Callable[[], None] = _parse_file_entrypoint, **kwargs, ) -> Self: proc: Self = super().start(target=target, **kwargs) - proc._on_child_started(callbacks, path) + proc._on_child_started(callbacks, path, bundle_path) return proc - def _on_child_started(self, callbacks: list[CallbackRequest], path: str | os.PathLike[str]) -> None: + def _on_child_started( + self, + callbacks: list[CallbackRequest], + path: str | os.PathLike[str], + bundle_path: Path, + ) -> None: msg = DagFileParseRequest( file=os.fspath(path), + bundle_path=bundle_path, requests_fd=self._requests_fd, callback_requests=callbacks, ) diff --git a/airflow/example_dags/example_params_trigger_ui.py b/airflow/example_dags/example_params_trigger_ui.py index e47ceae556501..ece4056764567 100644 --- a/airflow/example_dags/example_params_trigger_ui.py +++ b/airflow/example_dags/example_params_trigger_ui.py @@ -27,7 +27,7 @@ from airflow.decorators import task from airflow.models.dag import DAG -from airflow.models.param import Param, ParamsDict +from airflow.sdk import Param, ParamsDict from airflow.utils.trigger_rule import TriggerRule # [START params_trigger] diff --git a/airflow/example_dags/example_params_ui_tutorial.py b/airflow/example_dags/example_params_ui_tutorial.py index b64e777bed144..0bf9994c95c70 100644 --- a/airflow/example_dags/example_params_ui_tutorial.py +++ b/airflow/example_dags/example_params_ui_tutorial.py @@ -29,7 +29,7 @@ from airflow.decorators import task from airflow.models.dag import DAG -from airflow.models.param import Param, ParamsDict +from airflow.sdk import Param, ParamsDict with ( DAG( diff --git a/airflow/jobs/scheduler_job_runner.py b/airflow/jobs/scheduler_job_runner.py index 92b7c2b0010ed..898bfe9c95ea4 100644 --- a/airflow/jobs/scheduler_job_runner.py +++ b/airflow/jobs/scheduler_job_runner.py @@ -775,7 +775,7 @@ def process_executor_events( "TaskInstance Finished: dag_id=%s, task_id=%s, run_id=%s, map_index=%s, " "run_start_date=%s, run_end_date=%s, " "run_duration=%s, state=%s, executor=%s, executor_state=%s, try_number=%s, max_tries=%s, " - "pool=%s, queue=%s, priority_weight=%d, operator=%s, queued_dttm=%s, " + "pool=%s, queue=%s, priority_weight=%d, operator=%s, queued_dttm=%s, scheduled_dttm=%s," "queued_by_job_id=%s, pid=%s" ) cls.logger().info( @@ -797,6 +797,7 @@ def process_executor_events( ti.priority_weight, ti.operator, ti.queued_dttm, + ti.scheduled_dttm, ti.queued_by_job_id, ti.pid, ) @@ -1808,6 +1809,7 @@ def _reschedule_stuck_task(self, ti: TaskInstance, session: Session): .values( state=TaskInstanceState.SCHEDULED, queued_dttm=None, + scheduled_dttm=timezone.utcnow(), ) .execution_options(synchronize_session=False) ) @@ -1962,6 +1964,7 @@ def check_trigger_timeouts( state=TaskInstanceState.SCHEDULED, next_method=TRIGGER_FAIL_REPR, next_kwargs={"error": TriggerFailureReason.TRIGGER_TIMEOUT}, + scheduled_dttm=timezone.utcnow(), trigger_id=None, ) ).rowcount diff --git a/airflow/migrations/versions/0056_3_0_0_add_relative_fileloc_column.py b/airflow/migrations/versions/0056_3_0_0_add_relative_fileloc_column.py new file mode 100644 index 0000000000000..3e96f198b939e --- /dev/null +++ b/airflow/migrations/versions/0056_3_0_0_add_relative_fileloc_column.py @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Add relative fileloc column. + +Revision ID: 8ea135928435 +Revises: e39a26ac59f6 +Create Date: 2025-01-24 13:17:13.444341 + +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op + +revision = "8ea135928435" +down_revision = "e39a26ac59f6" +branch_labels = None +depends_on = None +airflow_version = "3.0.0" + + +def upgrade(): + """Apply Add relative fileloc column.""" + with op.batch_alter_table("dag", schema=None) as batch_op: + batch_op.add_column(sa.Column("relative_fileloc", sa.String(length=2000), nullable=True)) + + +def downgrade(): + """Unapply Add relative fileloc column.""" + with op.batch_alter_table("dag", schema=None) as batch_op: + batch_op.drop_column("relative_fileloc") diff --git a/airflow/migrations/versions/0057_3_0_0_add_new_task_instance_field_scheduled_.py b/airflow/migrations/versions/0057_3_0_0_add_new_task_instance_field_scheduled_.py new file mode 100644 index 0000000000000..3f464e45d7c60 --- /dev/null +++ b/airflow/migrations/versions/0057_3_0_0_add_new_task_instance_field_scheduled_.py @@ -0,0 +1,64 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +add new task_instance field scheduled_dttm. + +Revision ID: 33b04e4bfa19 +Revises: 8ea135928435 +Create Date: 2025-01-22 11:22:01.272681 + +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op + +from airflow.utils.sqlalchemy import UtcDateTime + +# revision identifiers, used by Alembic. +revision = "33b04e4bfa19" +down_revision = "8ea135928435" +branch_labels = None +depends_on = None +airflow_version = "3.0.0" + + +def upgrade(): + """Apply add new task_instance field scheduled_dttm.""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("task_instance", schema=None) as batch_op: + batch_op.add_column(sa.Column("scheduled_dttm", UtcDateTime(timezone=True), nullable=True)) + + with op.batch_alter_table("task_instance_history", schema=None) as batch_op: + batch_op.add_column(sa.Column("scheduled_dttm", UtcDateTime(timezone=True), nullable=True)) + + # ### end Alembic commands ### + + +def downgrade(): + """Unapply add new task_instance field scheduled_dttm.""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("task_instance_history", schema=None) as batch_op: + batch_op.drop_column("scheduled_dttm") + + with op.batch_alter_table("task_instance", schema=None) as batch_op: + batch_op.drop_column("scheduled_dttm") + + # ### end Alembic commands ### diff --git a/airflow/models/__init__.py b/airflow/models/__init__.py index ae0fa3040e181..6bd3883b139af 100644 --- a/airflow/models/__init__.py +++ b/airflow/models/__init__.py @@ -99,7 +99,7 @@ def __getattr__(name): "Log": "airflow.models.log", "MappedOperator": "airflow.models.mappedoperator", "Operator": "airflow.models.operator", - "Param": "airflow.models.param", + "Param": "airflow.sdk.definitions.param", "Pool": "airflow.models.pool", "RenderedTaskInstanceFields": "airflow.models.renderedtifields", "SkipMixin": "airflow.models.skipmixin", @@ -128,7 +128,6 @@ def __getattr__(name): from airflow.models.log import Log from airflow.models.mappedoperator import MappedOperator from airflow.models.operator import Operator - from airflow.models.param import Param from airflow.models.pool import Pool from airflow.models.renderedtifields import RenderedTaskInstanceFields from airflow.models.skipmixin import SkipMixin @@ -138,3 +137,4 @@ def __getattr__(name): from airflow.models.trigger import Trigger from airflow.models.variable import Variable from airflow.models.xcom import XCom + from airflow.sdk.definitions.param import Param diff --git a/airflow/models/dag.py b/airflow/models/dag.py index 9b88e6d70f71c..a0f6e901dc3e9 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -21,11 +21,10 @@ import copy import functools import logging -import pathlib import sys import time from collections import defaultdict -from collections.abc import Collection, Container, Generator, Iterable, Sequence +from collections.abc import Collection, Generator, Iterable, Sequence from contextlib import ExitStack from datetime import datetime, timedelta from functools import cache @@ -735,20 +734,6 @@ def dag_id(self, value: str) -> None: def timetable_summary(self) -> str: return self.timetable.summary - @property - def relative_fileloc(self) -> pathlib.Path: - """File location of the importable dag 'file' relative to the configured DAGs folder.""" - path = pathlib.Path(self.fileloc) - try: - rel_path = path.relative_to(self._processor_dags_folder or settings.DAGS_FOLDER) - if rel_path == pathlib.Path("."): - return path - else: - return rel_path - except ValueError: - # Not relative to DAGS_FOLDER. - return path - @provide_session def get_concurrency_reached(self, session=NEW_SESSION) -> bool: """Return a boolean indicating whether the max_active_tasks limit for this DAG has been reached.""" @@ -1681,6 +1666,7 @@ def add_logger_if_needed(ti: TaskInstance): if s.state != TaskInstanceState.UP_FOR_RESCHEDULE: s.try_number += 1 s.state = TaskInstanceState.SCHEDULED + s.scheduled_dttm = timezone.utcnow() session.commit() # triggerer may mark tasks scheduled so we read from DB all_tis = set(dr.get_task_instances(session=session)) @@ -2045,6 +2031,7 @@ class DagModel(Base): # packaged DAG, it will point to the subpath of the DAG within the # associated zip. fileloc = Column(String(2000)) + relative_fileloc = Column(String(2000)) bundle_name = Column(StringID(), ForeignKey("dag_bundle.name"), nullable=True) # The version of the bundle the last time the DAG was processed bundle_version = Column(String(200), nullable=True) @@ -2214,18 +2201,6 @@ def get_default_view(self) -> str: def safe_dag_id(self): return self.dag_id.replace(".", "__dot__") - @property - def relative_fileloc(self) -> pathlib.Path | None: - """File location of the importable dag 'file' relative to the configured DAGs folder.""" - if self.fileloc is None: - return None - path = pathlib.Path(self.fileloc) - try: - return path.relative_to(settings.DAGS_FOLDER) - except ValueError: - # Not relative to DAGS_FOLDER. - return path - @provide_session def set_is_paused(self, is_paused: bool, session=NEW_SESSION) -> None: """ @@ -2266,25 +2241,25 @@ def dag_display_name(self) -> str: @provide_session def deactivate_deleted_dags( cls, - alive_dag_filelocs: Container[str], + active: set[tuple[str, str]], session: Session = NEW_SESSION, ) -> None: """ Set ``is_active=False`` on the DAGs for which the DAG files have been removed. - :param alive_dag_filelocs: file paths of alive DAGs + :param active_paths: file paths of alive DAGs :param session: ORM Session """ log.debug("Deactivating DAGs (for which DAG files are deleted) from %s table ", cls.__tablename__) dag_models = session.scalars( select(cls).where( - cls.fileloc.is_not(None), + cls.relative_fileloc.is_not(None), ) ) - for dag_model in dag_models: - if dag_model.fileloc not in alive_dag_filelocs: - dag_model.is_active = False + for dm in dag_models: + if (dm.bundle_name, dm.relative_fileloc) not in active: + dm.is_active = False @classmethod def dags_needing_dagruns(cls, session: Session) -> tuple[Query, dict[str, tuple[datetime, datetime]]]: diff --git a/airflow/models/dagbag.py b/airflow/models/dagbag.py index 11efc362d6d9a..a18b133f8e67e 100644 --- a/airflow/models/dagbag.py +++ b/airflow/models/dagbag.py @@ -119,16 +119,17 @@ class DagBag(LoggingMixin): def __init__( self, - dag_folder: str | Path | None = None, + dag_folder: str | Path | None = None, # todo AIP-66: rename this to path include_examples: bool | ArgNotSet = NOTSET, safe_mode: bool | ArgNotSet = NOTSET, read_dags_from_db: bool = False, load_op_links: bool = True, collect_dags: bool = True, known_pools: set[str] | None = None, + bundle_path: Path | None = None, ): super().__init__() - + self.bundle_path: Path | None = bundle_path include_examples = ( include_examples if isinstance(include_examples, bool) @@ -482,6 +483,10 @@ def _process_modules(self, filepath, mods, file_last_changed_on_disk): for dag, mod in top_level_dags: dag.fileloc = mod.__file__ + if self.bundle_path: + dag.relative_fileloc = str(Path(mod.__file__).relative_to(self.bundle_path)) + else: + dag.relative_fileloc = dag.fileloc try: dag.validate() self.bag_dag(dag=dag) diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index 35d8af4322c49..727746b9b0333 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -1662,6 +1662,7 @@ def schedule_tis( ) .values( state=TaskInstanceState.SCHEDULED, + scheduled_dttm=timezone.utcnow(), try_number=case( ( or_(TI.state.is_(None), TI.state != TaskInstanceState.UP_FOR_RESCHEDULE), diff --git a/airflow/models/errors.py b/airflow/models/errors.py index 21c2236e2c18b..748d56c46b462 100644 --- a/airflow/models/errors.py +++ b/airflow/models/errors.py @@ -29,6 +29,6 @@ class ParseImportError(Base): __tablename__ = "import_error" id = Column(Integer, primary_key=True) timestamp = Column(UtcDateTime) - filename = Column(String(1024)) + filename = Column(String(1024)) # todo AIP-66: make this bundle and relative fileloc bundle_name = Column(StringID()) stacktrace = Column(Text) diff --git a/airflow/models/param.py b/airflow/models/param.py index cd3ccec26a48a..01886f6e585ab 100644 --- a/airflow/models/param.py +++ b/airflow/models/param.py @@ -14,340 +14,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from __future__ import annotations - -import contextlib -import copy -import json -import logging -from collections.abc import ItemsView, Iterable, MutableMapping, ValuesView -from typing import TYPE_CHECKING, Any, ClassVar - -from airflow.exceptions import AirflowException, ParamValidationError -from airflow.sdk.definitions._internal.mixins import ResolveMixin -from airflow.utils.types import NOTSET, ArgNotSet - -if TYPE_CHECKING: - from airflow.sdk.definitions.context import Context - from airflow.sdk.definitions.dag import DAG - from airflow.sdk.types import Operator - -logger = logging.getLogger(__name__) - - -class Param: - """ - Class to hold the default value of a Param and rule set to do the validations. - - Without the rule set it always validates and returns the default value. - - :param default: The value this Param object holds - :param description: Optional help text for the Param - :param schema: The validation schema of the Param, if not given then all kwargs except - default & description will form the schema - """ - - __version__: ClassVar[int] = 1 - - CLASS_IDENTIFIER = "__class" - - def __init__(self, default: Any = NOTSET, description: str | None = None, **kwargs): - if default is not NOTSET: - self._check_json(default) - self.value = default - self.description = description - self.schema = kwargs.pop("schema") if "schema" in kwargs else kwargs - - def __copy__(self) -> Param: - return Param(self.value, self.description, schema=self.schema) - - @staticmethod - def _check_json(value): - try: - json.dumps(value) - except Exception: - raise ParamValidationError( - "All provided parameters must be json-serializable. " - f"The value '{value}' is not serializable." - ) - - def resolve(self, value: Any = NOTSET, suppress_exception: bool = False) -> Any: - """ - Run the validations and returns the Param's final value. - - May raise ValueError on failed validations, or TypeError - if no value is passed and no value already exists. - We first check that value is json-serializable; if not, warn. - In future release we will require the value to be json-serializable. - - :param value: The value to be updated for the Param - :param suppress_exception: To raise an exception or not when the validations fails. - If true and validations fails, the return value would be None. - """ - import jsonschema - from jsonschema import FormatChecker - from jsonschema.exceptions import ValidationError - - if value is not NOTSET: - self._check_json(value) - final_val = self.value if value is NOTSET else value - if isinstance(final_val, ArgNotSet): - if suppress_exception: - return None - raise ParamValidationError("No value passed and Param has no default value") - try: - jsonschema.validate(final_val, self.schema, format_checker=FormatChecker()) - except ValidationError as err: - if suppress_exception: - return None - raise ParamValidationError(err) from None - self.value = final_val - return final_val - - def dump(self) -> dict: - """Dump the Param as a dictionary.""" - out_dict: dict[str, str | None] = { - self.CLASS_IDENTIFIER: f"{self.__module__}.{self.__class__.__name__}" - } - out_dict.update(self.__dict__) - # Ensure that not set is translated to None - if self.value is NOTSET: - out_dict["value"] = None - return out_dict - - @property - def has_value(self) -> bool: - return self.value is not NOTSET and self.value is not None - - def serialize(self) -> dict: - return {"value": self.value, "description": self.description, "schema": self.schema} - - @staticmethod - def deserialize(data: dict[str, Any], version: int) -> Param: - if version > Param.__version__: - raise TypeError("serialized version > class version") - - return Param(default=data["value"], description=data["description"], schema=data["schema"]) - - -class ParamsDict(MutableMapping[str, Any]): - """ - Class to hold all params for dags or tasks. - - All the keys are strictly string and values are converted into Param's object - if they are not already. This class is to replace param's dictionary implicitly - and ideally not needed to be used directly. - - - :param dict_obj: A dict or dict like object to init ParamsDict - :param suppress_exception: Flag to suppress value exceptions while initializing the ParamsDict - """ - - __version__: ClassVar[int] = 1 - __slots__ = ["__dict", "suppress_exception"] - - def __init__(self, dict_obj: MutableMapping | None = None, suppress_exception: bool = False): - params_dict: dict[str, Param] = {} - dict_obj = dict_obj or {} - for k, v in dict_obj.items(): - if not isinstance(v, Param): - params_dict[k] = Param(v) - else: - params_dict[k] = v - self.__dict = params_dict - self.suppress_exception = suppress_exception - - def __bool__(self) -> bool: - return bool(self.__dict) - - def __eq__(self, other: Any) -> bool: - if isinstance(other, ParamsDict): - return self.dump() == other.dump() - if isinstance(other, dict): - return self.dump() == other - return NotImplemented - - def __copy__(self) -> ParamsDict: - return ParamsDict(self.__dict, self.suppress_exception) - - def __deepcopy__(self, memo: dict[int, Any] | None) -> ParamsDict: - return ParamsDict(copy.deepcopy(self.__dict, memo), self.suppress_exception) - - def __contains__(self, o: object) -> bool: - return o in self.__dict - def __len__(self) -> int: - return len(self.__dict) +"""Re exporting the new param module from Task SDK for backward compatibility.""" - def __delitem__(self, v: str) -> None: - del self.__dict[v] - - def __iter__(self): - return iter(self.__dict) - - def __repr__(self): - return repr(self.dump()) - - def __setitem__(self, key: str, value: Any) -> None: - """ - Override for dictionary's ``setitem`` method to ensure all values are of Param's type only. - - :param key: A key which needs to be inserted or updated in the dict - :param value: A value which needs to be set against the key. It could be of any - type but will be converted and stored as a Param object eventually. - """ - if isinstance(value, Param): - param = value - elif key in self.__dict: - param = self.__dict[key] - try: - param.resolve(value=value, suppress_exception=self.suppress_exception) - except ParamValidationError as ve: - raise ParamValidationError(f"Invalid input for param {key}: {ve}") from None - else: - # if the key isn't there already and if the value isn't of Param type create a new Param object - param = Param(value) - - self.__dict[key] = param - - def __getitem__(self, key: str) -> Any: - """ - Override for dictionary's ``getitem`` method to call the resolve method after fetching the key. - - :param key: The key to fetch - """ - param = self.__dict[key] - return param.resolve(suppress_exception=self.suppress_exception) - - def get_param(self, key: str) -> Param: - """Get the internal :class:`.Param` object for this key.""" - return self.__dict[key] - - def items(self): - return ItemsView(self.__dict) - - def values(self): - return ValuesView(self.__dict) - - def update(self, *args, **kwargs) -> None: - if len(args) == 1 and not kwargs and isinstance(args[0], ParamsDict): - return super().update(args[0].__dict) - super().update(*args, **kwargs) - - def dump(self) -> dict[str, Any]: - """Dump the ParamsDict object as a dictionary, while suppressing exceptions.""" - return {k: v.resolve(suppress_exception=True) for k, v in self.items()} - - def validate(self) -> dict[str, Any]: - """Validate & returns all the Params object stored in the dictionary.""" - resolved_dict = {} - try: - for k, v in self.items(): - resolved_dict[k] = v.resolve(suppress_exception=self.suppress_exception) - except ParamValidationError as ve: - raise ParamValidationError(f"Invalid input for param {k}: {ve}") from None - - return resolved_dict - - def serialize(self) -> dict[str, Any]: - return self.dump() - - @staticmethod - def deserialize(data: dict, version: int) -> ParamsDict: - if version > ParamsDict.__version__: - raise TypeError("serialized version > class version") - - return ParamsDict(data) - - -class DagParam(ResolveMixin): - """ - DAG run parameter reference. - - This binds a simple Param object to a name within a DAG instance, so that it - can be resolved during the runtime via the ``{{ context }}`` dictionary. The - ideal use case of this class is to implicitly convert args passed to a - method decorated by ``@dag``. - - It can be used to parameterize a DAG. You can overwrite its value by setting - it on conf when you trigger your DagRun. - - This can also be used in templates by accessing ``{{ context.params }}``. - - **Example**: - - with DAG(...) as dag: - EmailOperator(subject=dag.param('subject', 'Hi from Airflow!')) - - :param current_dag: Dag being used for parameter. - :param name: key value which is used to set the parameter - :param default: Default value used if no parameter was set. - """ - - def __init__(self, current_dag: DAG, name: str, default: Any = NOTSET): - if default is not NOTSET: - current_dag.params[name] = default - self._name = name - self._default = default - self.current_dag = current_dag - - def iter_references(self) -> Iterable[tuple[Operator, str]]: - return () - - def resolve(self, context: Context, *, include_xcom: bool = True) -> Any: - """Pull DagParam value from DagRun context. This method is run during ``op.execute()``.""" - with contextlib.suppress(KeyError): - if context["dag_run"].conf: - return context["dag_run"].conf[self._name] - if self._default is not NOTSET: - return self._default - with contextlib.suppress(KeyError): - return context["params"][self._name] - raise AirflowException(f"No value could be resolved for parameter {self._name}") - - def serialize(self) -> dict: - """Serialize the DagParam object into a dictionary.""" - return { - "dag_id": self.current_dag.dag_id, - "name": self._name, - "default": self._default, - } - - @classmethod - def deserialize(cls, data: dict, dags: dict) -> DagParam: - """ - Deserializes the dictionary back into a DagParam object. - - :param data: The serialized representation of the DagParam. - :param dags: A dictionary of available DAGs to look up the DAG. - """ - dag_id = data["dag_id"] - # Retrieve the current DAG from the provided DAGs dictionary - current_dag = dags.get(dag_id) - if not current_dag: - raise ValueError(f"DAG with id {dag_id} not found.") - - return cls(current_dag=current_dag, name=data["name"], default=data["default"]) - - -def process_params( - dag: DAG, - task: Operator, - dagrun_conf: dict[str, Any] | None, - *, - suppress_exception: bool, -) -> dict[str, Any]: - """Merge, validate params, and convert them into a simple dict.""" - from airflow.configuration import conf +from __future__ import annotations - dagrun_conf = dagrun_conf or {} +from airflow.sdk.definitions.param import Param, ParamsDict - params = ParamsDict(suppress_exception=suppress_exception) - with contextlib.suppress(AttributeError): - params.update(dag.params) - if task.params: - params.update(task.params) - if conf.getboolean("core", "dag_run_conf_overrides_params") and dagrun_conf: - logger.debug("Updating task params (%s) with DagRun.conf (%s)", params, dagrun_conf) - params.update(dagrun_conf) - return params.validate() +__all__ = ["Param", "ParamsDict"] diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 69b6d147eadb3..2aea43eb423bf 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -32,6 +32,7 @@ from datetime import timedelta from enum import Enum from functools import cache +from pathlib import Path from typing import TYPE_CHECKING, Any, Callable from urllib.parse import quote @@ -98,7 +99,6 @@ from airflow.models.base import Base, StringID, TaskInstanceDependencies, _sentinel from airflow.models.dagbag import DagBag from airflow.models.log import Log -from airflow.models.param import process_params from airflow.models.renderedtifields import get_serialized_template_fields from airflow.models.taskinstancekey import TaskInstanceKey from airflow.models.taskmap import TaskMap @@ -108,6 +108,7 @@ from airflow.sdk.api.datamodels._generated import AssetProfile from airflow.sdk.definitions._internal.templater import SandboxedEnvironment from airflow.sdk.definitions.asset import Asset, AssetAlias, AssetNameRef, AssetUniqueKey, AssetUriRef +from airflow.sdk.definitions.param import process_params from airflow.sdk.definitions.taskgroup import MappedTaskGroup from airflow.sentry import Sentry from airflow.settings import task_instance_mutation_hook @@ -823,6 +824,7 @@ def _set_ti_attrs(target, source, include_dag_run=False): target.operator = source.operator target.custom_operator_name = source.custom_operator_name target.queued_dttm = source.queued_dttm + target.scheduled_dttm = source.scheduled_dttm target.queued_by_job_id = source.queued_by_job_id target.last_heartbeat_at = source.last_heartbeat_at target.pid = source.pid @@ -1711,6 +1713,7 @@ class TaskInstance(Base, LoggingMixin): operator = Column(String(1000)) custom_operator_name = Column(String(1000)) queued_dttm = Column(UtcDateTime) + scheduled_dttm = Column(UtcDateTime) queued_by_job_id = Column(Integer) last_heartbeat_at = Column(UtcDateTime) @@ -1946,7 +1949,9 @@ def _command_as_list( if dag is None: raise ValueError("DagModel is empty") - path = dag.relative_fileloc + path = None + if dag.relative_fileloc: + path = Path(dag.relative_fileloc) if path: if not path.is_absolute(): @@ -2702,23 +2707,24 @@ def emit_state_change_metric(self, new_state: TaskInstanceState) -> None: timing = timezone.utcnow() - self.queued_dttm elif new_state == TaskInstanceState.QUEUED: metric_name = "scheduled_duration" - if self.start_date is None: - # This check does not work correctly before fields like `scheduled_dttm` are implemented. - # TODO: Change the level to WARNING once it's viable. - # see #30612 #34493 and #34771 for more details - self.log.debug( + if self.scheduled_dttm is None: + self.log.warning( "cannot record %s for task %s because previous state change time has not been saved", metric_name, self.task_id, ) return - timing = timezone.utcnow() - self.start_date + timing = timezone.utcnow() - self.scheduled_dttm else: raise NotImplementedError("no metric emission setup for state %s", new_state) # send metric twice, once (legacy) with tags in the name and once with tags as tags Stats.timing(f"dag.{self.dag_id}.{self.task_id}.{metric_name}", timing) - Stats.timing(f"task.{metric_name}", timing, tags={"task_id": self.task_id, "dag_id": self.dag_id}) + Stats.timing( + f"task.{metric_name}", + timing, + tags={"task_id": self.task_id, "dag_id": self.dag_id, "queue": self.queue}, + ) def clear_next_method_args(self) -> None: """Ensure we unset next_method and next_kwargs to ensure that any retries don't reuse them.""" diff --git a/airflow/models/taskinstancehistory.py b/airflow/models/taskinstancehistory.py index 9ac11cad7dba5..e97e6de22ec9a 100644 --- a/airflow/models/taskinstancehistory.py +++ b/airflow/models/taskinstancehistory.py @@ -77,6 +77,7 @@ class TaskInstanceHistory(Base): operator = Column(String(1000)) custom_operator_name = Column(String(1000)) queued_dttm = Column(UtcDateTime) + scheduled_dttm = Column(UtcDateTime) queued_by_job_id = Column(Integer) pid = Column(Integer) executor = Column(String(1000)) diff --git a/airflow/models/trigger.py b/airflow/models/trigger.py index 2e0fe9f7f2bbe..ce139c3134135 100644 --- a/airflow/models/trigger.py +++ b/airflow/models/trigger.py @@ -281,6 +281,7 @@ def submit_failure(cls, trigger_id, exc=None, session: Session = NEW_SESSION) -> task_instance.trigger_id = None # Finally, mark it as scheduled so it gets re-queued task_instance.state = TaskInstanceState.SCHEDULED + task_instance.scheduled_dttm = timezone.utcnow() @classmethod @provide_session diff --git a/airflow/serialization/schema.json b/airflow/serialization/schema.json index 15e46925d0d45..292415ce11e60 100644 --- a/airflow/serialization/schema.json +++ b/airflow/serialization/schema.json @@ -165,6 +165,7 @@ }, "catchup": { "type": "boolean" }, "fileloc": { "type" : "string"}, + "relative_fileloc": { "type" : "string"}, "_processor_dags_folder": { "anyOf": [ { "type": "null" }, diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index b7e08a45aed74..9c5f43c0c0b5e 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -47,7 +47,6 @@ create_expand_input, get_map_type_key, ) -from airflow.models.param import Param, ParamsDict from airflow.models.taskinstance import SimpleTaskInstance from airflow.models.taskinstancekey import TaskInstanceKey from airflow.providers_manager import ProvidersManager @@ -64,6 +63,7 @@ ) from airflow.sdk.definitions.baseoperator import BaseOperator as TaskSDKBaseOperator from airflow.sdk.definitions.mappedoperator import MappedOperator +from airflow.sdk.definitions.param import Param, ParamsDict from airflow.sdk.definitions.taskgroup import MappedTaskGroup, TaskGroup from airflow.sdk.definitions.xcom_arg import XComArg, deserialize_xcom_arg, serialize_xcom_arg from airflow.sdk.execution_time.context import OutletEventAccessor, OutletEventAccessors @@ -985,7 +985,7 @@ def _serialize_params_dict(cls, params: ParamsDict | dict) -> list[tuple[str, di class_identity = f"{v.__module__}.{v.__class__.__name__}" except AttributeError: class_identity = "" - if class_identity == "airflow.models.param.Param": + if class_identity == "airflow.sdk.definitions.param.Param": serialized_params.append((k, cls._serialize_param(v))) else: # Auto-box other values into Params object like it is done by DAG parsing as well diff --git a/airflow/triggers/base.py b/airflow/triggers/base.py index cf71f1a426ddd..4e88465d533a7 100644 --- a/airflow/triggers/base.py +++ b/airflow/triggers/base.py @@ -25,6 +25,7 @@ from airflow.callbacks.callback_requests import TaskCallbackRequest from airflow.callbacks.database_callback_sink import DatabaseCallbackSink +from airflow.utils import timezone from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.state import TaskInstanceState @@ -172,6 +173,7 @@ def handle_submit(self, *, task_instance: TaskInstance, session: Session = NEW_S # Set the state of the task instance to scheduled task_instance.state = TaskInstanceState.SCHEDULED + task_instance.scheduled_dttm = timezone.utcnow() class BaseTaskEndEvent(TriggerEvent): diff --git a/airflow/ui/openapi-gen/queries/common.ts b/airflow/ui/openapi-gen/queries/common.ts index 0e137620a4e6d..3994fb685e7c2 100644 --- a/airflow/ui/openapi-gen/queries/common.ts +++ b/airflow/ui/openapi-gen/queries/common.ts @@ -359,12 +359,14 @@ export const useStructureServiceStructureDataKey = "StructureServiceStructureDat export const UseStructureServiceStructureDataKeyFn = ( { dagId, + dagVersion, externalDependencies, includeDownstream, includeUpstream, root, }: { dagId: string; + dagVersion?: number; externalDependencies?: boolean; includeDownstream?: boolean; includeUpstream?: boolean; @@ -373,7 +375,7 @@ export const UseStructureServiceStructureDataKeyFn = ( queryKey?: Array, ) => [ useStructureServiceStructureDataKey, - ...(queryKey ?? [{ dagId, externalDependencies, includeDownstream, includeUpstream, root }]), + ...(queryKey ?? [{ dagId, dagVersion, externalDependencies, includeDownstream, includeUpstream, root }]), ]; export type BackfillServiceListBackfillsDefaultResponse = Awaited< ReturnType diff --git a/airflow/ui/openapi-gen/queries/prefetch.ts b/airflow/ui/openapi-gen/queries/prefetch.ts index 63a949aca94e6..923e62fe46834 100644 --- a/airflow/ui/openapi-gen/queries/prefetch.ts +++ b/airflow/ui/openapi-gen/queries/prefetch.ts @@ -475,6 +475,7 @@ export const prefetchUseDashboardServiceHistoricalMetrics = ( * @param data.includeDownstream * @param data.root * @param data.externalDependencies + * @param data.dagVersion * @returns StructureDataResponse Successful Response * @throws ApiError */ @@ -482,12 +483,14 @@ export const prefetchUseStructureServiceStructureData = ( queryClient: QueryClient, { dagId, + dagVersion, externalDependencies, includeDownstream, includeUpstream, root, }: { dagId: string; + dagVersion?: number; externalDependencies?: boolean; includeDownstream?: boolean; includeUpstream?: boolean; @@ -497,6 +500,7 @@ export const prefetchUseStructureServiceStructureData = ( queryClient.prefetchQuery({ queryKey: Common.UseStructureServiceStructureDataKeyFn({ dagId, + dagVersion, externalDependencies, includeDownstream, includeUpstream, @@ -505,6 +509,7 @@ export const prefetchUseStructureServiceStructureData = ( queryFn: () => StructureService.structureData({ dagId, + dagVersion, externalDependencies, includeDownstream, includeUpstream, @@ -2123,7 +2128,7 @@ export const prefetchUseXcomServiceGetXcomEntry = ( * @param data.mapIndex * @param data.limit * @param data.offset - * @returns XComCollection Successful Response + * @returns XComCollectionResponse Successful Response * @throws ApiError */ export const prefetchUseXcomServiceGetXcomEntries = ( diff --git a/airflow/ui/openapi-gen/queries/queries.ts b/airflow/ui/openapi-gen/queries/queries.ts index c4d201767e17d..3f340422446d8 100644 --- a/airflow/ui/openapi-gen/queries/queries.ts +++ b/airflow/ui/openapi-gen/queries/queries.ts @@ -585,6 +585,7 @@ export const useDashboardServiceHistoricalMetrics = < * @param data.includeDownstream * @param data.root * @param data.externalDependencies + * @param data.dagVersion * @returns StructureDataResponse Successful Response * @throws ApiError */ @@ -595,12 +596,14 @@ export const useStructureServiceStructureData = < >( { dagId, + dagVersion, externalDependencies, includeDownstream, includeUpstream, root, }: { dagId: string; + dagVersion?: number; externalDependencies?: boolean; includeDownstream?: boolean; includeUpstream?: boolean; @@ -611,12 +614,13 @@ export const useStructureServiceStructureData = < ) => useQuery({ queryKey: Common.UseStructureServiceStructureDataKeyFn( - { dagId, externalDependencies, includeDownstream, includeUpstream, root }, + { dagId, dagVersion, externalDependencies, includeDownstream, includeUpstream, root }, queryKey, ), queryFn: () => StructureService.structureData({ dagId, + dagVersion, externalDependencies, includeDownstream, includeUpstream, @@ -2503,7 +2507,7 @@ export const useXcomServiceGetXcomEntry = < * @param data.mapIndex * @param data.limit * @param data.offset - * @returns XComCollection Successful Response + * @returns XComCollectionResponse Successful Response * @throws ApiError */ export const useXcomServiceGetXcomEntries = < diff --git a/airflow/ui/openapi-gen/queries/suspense.ts b/airflow/ui/openapi-gen/queries/suspense.ts index d2f3bfc937c72..8653b189b867a 100644 --- a/airflow/ui/openapi-gen/queries/suspense.ts +++ b/airflow/ui/openapi-gen/queries/suspense.ts @@ -564,6 +564,7 @@ export const useDashboardServiceHistoricalMetricsSuspense = < * @param data.includeDownstream * @param data.root * @param data.externalDependencies + * @param data.dagVersion * @returns StructureDataResponse Successful Response * @throws ApiError */ @@ -574,12 +575,14 @@ export const useStructureServiceStructureDataSuspense = < >( { dagId, + dagVersion, externalDependencies, includeDownstream, includeUpstream, root, }: { dagId: string; + dagVersion?: number; externalDependencies?: boolean; includeDownstream?: boolean; includeUpstream?: boolean; @@ -590,12 +593,13 @@ export const useStructureServiceStructureDataSuspense = < ) => useSuspenseQuery({ queryKey: Common.UseStructureServiceStructureDataKeyFn( - { dagId, externalDependencies, includeDownstream, includeUpstream, root }, + { dagId, dagVersion, externalDependencies, includeDownstream, includeUpstream, root }, queryKey, ), queryFn: () => StructureService.structureData({ dagId, + dagVersion, externalDependencies, includeDownstream, includeUpstream, @@ -2482,7 +2486,7 @@ export const useXcomServiceGetXcomEntrySuspense = < * @param data.mapIndex * @param data.limit * @param data.offset - * @returns XComCollection Successful Response + * @returns XComCollectionResponse Successful Response * @throws ApiError */ export const useXcomServiceGetXcomEntriesSuspense = < diff --git a/airflow/ui/openapi-gen/requests/schemas.gen.ts b/airflow/ui/openapi-gen/requests/schemas.gen.ts index 170b977beb0ed..d30aacd6be03e 100644 --- a/airflow/ui/openapi-gen/requests/schemas.gen.ts +++ b/airflow/ui/openapi-gen/requests/schemas.gen.ts @@ -413,6 +413,7 @@ export const $BackfillPostBody = { default: 10, }, }, + additionalProperties: false, type: "object", required: ["dag_id", "from_date", "to_date"], title: "BackfillPostBody", @@ -587,6 +588,7 @@ export const $BulkBody_ConnectionBody_ = { title: "Actions", }, }, + additionalProperties: false, type: "object", required: ["actions"], title: "BulkBody[ConnectionBody]", @@ -612,6 +614,7 @@ export const $BulkBody_PoolBody_ = { title: "Actions", }, }, + additionalProperties: false, type: "object", required: ["actions"], title: "BulkBody[PoolBody]", @@ -637,6 +640,7 @@ export const $BulkBody_VariableBody_ = { title: "Actions", }, }, + additionalProperties: false, type: "object", required: ["actions"], title: "BulkBody[VariableBody]", @@ -661,6 +665,7 @@ export const $BulkCreateAction_ConnectionBody_ = { default: "fail", }, }, + additionalProperties: false, type: "object", required: ["action", "entities"], title: "BulkCreateAction[ConnectionBody]", @@ -685,6 +690,7 @@ export const $BulkCreateAction_PoolBody_ = { default: "fail", }, }, + additionalProperties: false, type: "object", required: ["action", "entities"], title: "BulkCreateAction[PoolBody]", @@ -709,6 +715,7 @@ export const $BulkCreateAction_VariableBody_ = { default: "fail", }, }, + additionalProperties: false, type: "object", required: ["action", "entities"], title: "BulkCreateAction[VariableBody]", @@ -733,6 +740,7 @@ export const $BulkDeleteAction_ConnectionBody_ = { default: "fail", }, }, + additionalProperties: false, type: "object", required: ["action", "entities"], title: "BulkDeleteAction[ConnectionBody]", @@ -757,6 +765,7 @@ export const $BulkDeleteAction_PoolBody_ = { default: "fail", }, }, + additionalProperties: false, type: "object", required: ["action", "entities"], title: "BulkDeleteAction[PoolBody]", @@ -781,6 +790,7 @@ export const $BulkDeleteAction_VariableBody_ = { default: "fail", }, }, + additionalProperties: false, type: "object", required: ["action", "entities"], title: "BulkDeleteAction[VariableBody]", @@ -850,6 +860,7 @@ export const $BulkUpdateAction_ConnectionBody_ = { default: "fail", }, }, + additionalProperties: false, type: "object", required: ["action", "entities"], title: "BulkUpdateAction[ConnectionBody]", @@ -874,6 +885,7 @@ export const $BulkUpdateAction_PoolBody_ = { default: "fail", }, }, + additionalProperties: false, type: "object", required: ["action", "entities"], title: "BulkUpdateAction[PoolBody]", @@ -898,6 +910,7 @@ export const $BulkUpdateAction_VariableBody_ = { default: "fail", }, }, + additionalProperties: false, type: "object", required: ["action", "entities"], title: "BulkUpdateAction[VariableBody]", @@ -1012,6 +1025,7 @@ export const $ClearTaskInstancesBody = { default: false, }, }, + additionalProperties: false, type: "object", title: "ClearTaskInstancesBody", description: "Request body for Clear Task Instances endpoint.", @@ -1027,6 +1041,7 @@ export const $Config = { title: "Sections", }, }, + additionalProperties: false, type: "object", required: ["sections"], title: "Config", @@ -1061,6 +1076,7 @@ export const $ConfigOption = { title: "Value", }, }, + additionalProperties: false, type: "object", required: ["key", "value"], title: "ConfigOption", @@ -1191,6 +1207,7 @@ export const $ConfigSection = { title: "Options", }, }, + additionalProperties: false, type: "object", required: ["name", "options"], title: "ConfigSection", @@ -1287,6 +1304,7 @@ export const $ConnectionBody = { title: "Extra", }, }, + additionalProperties: false, type: "object", required: ["connection_id", "conn_type"], title: "ConnectionBody", @@ -1839,6 +1857,7 @@ export const $DAGPatchBody = { title: "Is Paused", }, }, + additionalProperties: false, type: "object", required: ["is_paused"], title: "DAGPatchBody", @@ -2074,6 +2093,7 @@ export const $DAGRunClearBody = { default: false, }, }, + additionalProperties: false, type: "object", title: "DAGRunClearBody", description: "DAG Run serializer for clear endpoint body.", @@ -2124,6 +2144,7 @@ export const $DAGRunPatchBody = { title: "Note", }, }, + additionalProperties: false, type: "object", title: "DAGRunPatchBody", description: "DAG Run Serializer for PATCH requests.", @@ -2464,6 +2485,7 @@ export const $DAGRunsBatchBody = { title: "End Date Lte", }, }, + additionalProperties: false, type: "object", title: "DAGRunsBatchBody", description: "List DAG Runs body for batch endpoint.", @@ -2891,6 +2913,7 @@ export const $DagRunAssetReference = { title: "Data Interval End", }, }, + additionalProperties: false, type: "object", required: [ "run_id", @@ -2948,6 +2971,7 @@ export const $DagScheduleAssetReference = { title: "Updated At", }, }, + additionalProperties: false, type: "object", required: ["dag_id", "created_at", "updated_at"], title: "DagScheduleAssetReference", @@ -3956,6 +3980,7 @@ export const $PatchTaskInstanceBody = { default: false, }, }, + additionalProperties: false, type: "object", title: "PatchTaskInstanceBody", description: "Request body for Clear Task Instances endpoint.", @@ -4101,6 +4126,7 @@ export const $PoolBody = { default: false, }, }, + additionalProperties: false, type: "object", required: ["name", "slots"], title: "PoolBody", @@ -4174,6 +4200,7 @@ export const $PoolPatchBody = { title: "Include Deferred", }, }, + additionalProperties: false, type: "object", title: "PoolPatchBody", description: "Pool serializer for patch bodies.", @@ -4640,6 +4667,18 @@ export const $TaskInstanceHistoryResponse = { ], title: "Queued When", }, + scheduled_when: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Scheduled When", + }, pid: { anyOf: [ { @@ -4688,6 +4727,7 @@ export const $TaskInstanceHistoryResponse = { "priority_weight", "operator", "queued_when", + "scheduled_when", "pid", "executor", "executor_config", @@ -4855,6 +4895,18 @@ export const $TaskInstanceResponse = { ], title: "Queued When", }, + scheduled_when: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Scheduled When", + }, pid: { anyOf: [ { @@ -4952,6 +5004,7 @@ export const $TaskInstanceResponse = { "priority_weight", "operator", "queued_when", + "scheduled_when", "pid", "executor", "executor_config", @@ -5286,6 +5339,7 @@ export const $TaskInstancesBatchBody = { title: "Order By", }, }, + additionalProperties: false, type: "object", title: "TaskInstancesBatchBody", description: "Task Instance body for get batch.", @@ -5336,6 +5390,7 @@ export const $TaskOutletAssetReference = { title: "Updated At", }, }, + additionalProperties: false, type: "object", required: ["dag_id", "task_id", "created_at", "updated_at"], title: "TaskOutletAssetReference", @@ -5738,6 +5793,7 @@ export const $TriggerDAGRunPostBody = { title: "Note", }, }, + additionalProperties: false, type: "object", title: "TriggerDAGRunPostBody", description: "Trigger DAG Run Serializer for POST body.", @@ -5864,6 +5920,7 @@ export const $VariableBody = { title: "Description", }, }, + additionalProperties: false, type: "object", required: ["key", "value"], title: "VariableBody", @@ -5946,7 +6003,7 @@ export const $VersionInfo = { description: "Version information serializer for responses.", } as const; -export const $XComCollection = { +export const $XComCollectionResponse = { properties: { xcom_entries: { items: { @@ -5962,8 +6019,8 @@ export const $XComCollection = { }, type: "object", required: ["xcom_entries", "total_entries"], - title: "XComCollection", - description: "List of XCom items.", + title: "XComCollectionResponse", + description: "XCom Collection serializer for responses.", } as const; export const $XComResponse = { diff --git a/airflow/ui/openapi-gen/requests/services.gen.ts b/airflow/ui/openapi-gen/requests/services.gen.ts index 1d64d9d03999f..14ea58826d122 100644 --- a/airflow/ui/openapi-gen/requests/services.gen.ts +++ b/airflow/ui/openapi-gen/requests/services.gen.ts @@ -740,6 +740,7 @@ export class StructureService { * @param data.includeDownstream * @param data.root * @param data.externalDependencies + * @param data.dagVersion * @returns StructureDataResponse Successful Response * @throws ApiError */ @@ -753,6 +754,7 @@ export class StructureService { include_downstream: data.includeDownstream, root: data.root, external_dependencies: data.externalDependencies, + dag_version: data.dagVersion, }, errors: { 404: "Not Found", @@ -2976,7 +2978,7 @@ export class XcomService { * @param data.mapIndex * @param data.limit * @param data.offset - * @returns XComCollection Successful Response + * @returns XComCollectionResponse Successful Response * @throws ApiError */ public static getXcomEntries(data: GetXcomEntriesData): CancelablePromise { diff --git a/airflow/ui/openapi-gen/requests/types.gen.ts b/airflow/ui/openapi-gen/requests/types.gen.ts index 9300f92898a95..17ef596ce2f07 100644 --- a/airflow/ui/openapi-gen/requests/types.gen.ts +++ b/airflow/ui/openapi-gen/requests/types.gen.ts @@ -1223,6 +1223,7 @@ export type TaskInstanceHistoryResponse = { priority_weight: number | null; operator: string | null; queued_when: string | null; + scheduled_when: string | null; pid: number | null; executor: string | null; executor_config: string; @@ -1253,6 +1254,7 @@ export type TaskInstanceResponse = { priority_weight: number | null; operator: string | null; queued_when: string | null; + scheduled_when: string | null; pid: number | null; executor: string | null; executor_config: string; @@ -1469,9 +1471,9 @@ export type VersionInfo = { }; /** - * List of XCom items. + * XCom Collection serializer for responses. */ -export type XComCollection = { +export type XComCollectionResponse = { xcom_entries: Array; total_entries: number; }; @@ -1665,6 +1667,7 @@ export type HistoricalMetricsResponse = HistoricalMetricDataResponse; export type StructureDataData = { dagId: string; + dagVersion?: number | null; externalDependencies?: boolean; includeDownstream?: boolean; includeUpstream?: boolean; @@ -2300,7 +2303,7 @@ export type GetXcomEntriesData = { xcomKey?: string | null; }; -export type GetXcomEntriesResponse = XComCollection; +export type GetXcomEntriesResponse = XComCollectionResponse; export type GetTasksData = { dagId: string; @@ -4690,7 +4693,7 @@ export type $OpenApiTs = { /** * Successful Response */ - 200: XComCollection; + 200: XComCollectionResponse; /** * Bad Request */ diff --git a/airflow/ui/package.json b/airflow/ui/package.json index 0526f20d4d040..322623e9db68b 100644 --- a/airflow/ui/package.json +++ b/airflow/ui/package.json @@ -39,6 +39,7 @@ "react-chartjs-2": "^5.2.0", "react-dom": "^18.3.1", "react-hook-form": "^7.20.0", + "react-hotkeys-hook": "^4.6.1", "react-icons": "^5.4.0", "react-json-view": "^1.21.3", "react-markdown": "^9.0.1", diff --git a/airflow/ui/pnpm-lock.yaml b/airflow/ui/pnpm-lock.yaml index 5b89d9517c81f..20ae58594252c 100644 --- a/airflow/ui/pnpm-lock.yaml +++ b/airflow/ui/pnpm-lock.yaml @@ -77,6 +77,9 @@ importers: react-hook-form: specifier: ^7.20.0 version: 7.53.1(react@18.3.1) + react-hotkeys-hook: + specifier: ^4.6.1 + version: 4.6.1(react-dom@18.3.1(react@18.3.1))(react@18.3.1) react-icons: specifier: ^5.4.0 version: 5.4.0(react@18.3.1) @@ -3441,6 +3444,12 @@ packages: peerDependencies: react: ^16.8.0 || ^17 || ^18 || ^19 + react-hotkeys-hook@4.6.1: + resolution: {integrity: sha512-XlZpbKUj9tkfgPgT9gA+1p7Ey6vFIZHttUjPqpTdyT5nqQ8mHL7elxvSbaC+dpSiHUSmr21Ya1mDxBZG3aje4Q==} + peerDependencies: + react: '>=16.8.1' + react-dom: '>=16.8.1' + react-icons@5.4.0: resolution: {integrity: sha512-7eltJxgVt7X64oHh6wSWNwwbKTCtMfK35hcjvJS0yxEAhPM8oUKdS3+kqaW1vicIltw+kR2unHaa12S9pPALoQ==} peerDependencies: @@ -8475,6 +8484,11 @@ snapshots: dependencies: react: 18.3.1 + react-hotkeys-hook@4.6.1(react-dom@18.3.1(react@18.3.1))(react@18.3.1): + dependencies: + react: 18.3.1 + react-dom: 18.3.1(react@18.3.1) + react-icons@5.4.0(react@18.3.1): dependencies: react: 18.3.1 diff --git a/airflow/ui/src/components/ErrorAlert.tsx b/airflow/ui/src/components/ErrorAlert.tsx index 538cb230f7da1..f5d9ac15cb48b 100644 --- a/airflow/ui/src/components/ErrorAlert.tsx +++ b/airflow/ui/src/components/ErrorAlert.tsx @@ -23,7 +23,7 @@ import type { HTTPExceptionResponse, HTTPValidationError } from "openapi-gen/req import { Alert } from "./ui"; type ExpandedApiError = { - body: HTTPExceptionResponse | HTTPValidationError; + body: HTTPExceptionResponse | HTTPValidationError | undefined; } & ApiError; type Props = { @@ -37,7 +37,7 @@ export const ErrorAlert = ({ error: err }: Props) => { return undefined; } - const details = error.body.detail; + const details = error.body?.detail; let detailMessage; if (details !== undefined) { diff --git a/airflow/ui/src/components/SearchDags/SearchDagsButton.tsx b/airflow/ui/src/components/SearchDags/SearchDagsButton.tsx index 942e3e5a848dc..83974fd090985 100644 --- a/airflow/ui/src/components/SearchDags/SearchDagsButton.tsx +++ b/airflow/ui/src/components/SearchDags/SearchDagsButton.tsx @@ -16,25 +16,37 @@ * specific language governing permissions and limitations * under the License. */ -import { Button, Box } from "@chakra-ui/react"; +import { Button, Box, Kbd } from "@chakra-ui/react"; import { useState } from "react"; +import { useHotkeys } from "react-hotkeys-hook"; import { MdSearch } from "react-icons/md"; import { Dialog } from "src/components/ui"; +import { getMetaKey } from "src/utils"; import { SearchDags } from "./SearchDags"; export const SearchDagsButton = () => { const [isOpen, setIsOpen] = useState(false); + const metaKey = getMetaKey(); const onOpenChange = () => { setIsOpen(false); }; + useHotkeys( + "mod+k", + () => { + setIsOpen(true); + }, + [isOpen], + { preventDefault: true }, + ); + return ( diff --git a/airflow/ui/src/layouts/Nav/AdminButton.tsx b/airflow/ui/src/layouts/Nav/AdminButton.tsx index 7889f4ede56cd..53ad5c9d4d752 100644 --- a/airflow/ui/src/layouts/Nav/AdminButton.tsx +++ b/airflow/ui/src/layouts/Nav/AdminButton.tsx @@ -32,6 +32,10 @@ const links = [ href: "/pools", title: "Pools", }, + { + href: "/providers", + title: "Providers", + }, ]; export const AdminButton = () => ( diff --git a/airflow/ui/src/pages/Providers.tsx b/airflow/ui/src/pages/Providers.tsx new file mode 100644 index 0000000000000..8c05f434d5d23 --- /dev/null +++ b/airflow/ui/src/pages/Providers.tsx @@ -0,0 +1,83 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { Box, Heading, Link } from "@chakra-ui/react"; +import type { ColumnDef } from "@tanstack/react-table"; + +import { useProviderServiceGetProviders } from "openapi/queries"; +import type { ProviderResponse } from "openapi/requests/types.gen"; +import { DataTable } from "src/components/DataTable"; +import { ErrorAlert } from "src/components/ErrorAlert"; + +const columns: Array> = [ + { + accessorKey: "package_name", + cell: ({ row: { original } }) => ( + + {original.package_name} + + ), + header: "Package Name", + }, + { + accessorKey: "version", + cell: ({ row: { original } }) => original.version, + header: () => "Version", + }, + { + accessorKey: "description", + cell: ({ row: { original } }) => { + const urlRegex = /http(s)?:\/\/[\w.-]+(\.?:[\w.-]+)*([#/?][\w!#$%&'()*+,./:;=?@[\]~-]*)?/gu; + const urls = original.description.match(urlRegex); + const cleanText = original.description.replaceAll(/\n(?:and)?/gu, " ").split(" "); + + return cleanText.map((part) => + urls?.includes(part) ? ( + + {part} + + ) : ( + `${part} ` + ), + ); + }, + header: "Description", + }, +]; + +export const Providers = () => { + const { data, error } = useProviderServiceGetProviders(); + + return ( + + Providers + } + total={data?.total_entries} + /> + + ); +}; diff --git a/airflow/ui/src/router.tsx b/airflow/ui/src/router.tsx index 5e5f1b3a576a7..24a5c7240cb50 100644 --- a/airflow/ui/src/router.tsx +++ b/airflow/ui/src/router.tsx @@ -31,6 +31,7 @@ import { DagsList } from "src/pages/DagsList"; import { Dashboard } from "src/pages/Dashboard"; import { ErrorPage } from "src/pages/Error"; import { Events } from "src/pages/Events"; +import { Providers } from "src/pages/Providers"; import { Run } from "src/pages/Run"; import { Details as DagRunDetails } from "src/pages/Run/Details"; import { TaskInstances } from "src/pages/Run/TaskInstances"; @@ -70,6 +71,10 @@ export const routerConfig = [ element: , path: "pools", }, + { + element: , + path: "providers", + }, { children: [ { element: , index: true }, diff --git a/airflow/ui/src/utils/getMetaKey.ts b/airflow/ui/src/utils/getMetaKey.ts new file mode 100644 index 0000000000000..4e1e867c2c268 --- /dev/null +++ b/airflow/ui/src/utils/getMetaKey.ts @@ -0,0 +1,20 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +export const getMetaKey = () => (navigator.appVersion.includes("Mac") ? "⌘" : "Ctrl"); diff --git a/airflow/ui/src/utils/index.ts b/airflow/ui/src/utils/index.ts index bdfe8ac8d8002..60357e9470aba 100644 --- a/airflow/ui/src/utils/index.ts +++ b/airflow/ui/src/utils/index.ts @@ -20,3 +20,4 @@ export { capitalize } from "./capitalize"; export { pluralize } from "./pluralize"; export { getDuration } from "./datetime_utils"; +export { getMetaKey } from "./getMetaKey"; diff --git a/airflow/utils/context.py b/airflow/utils/context.py index 6ed1399fe63f6..0415542c6ca8c 100644 --- a/airflow/utils/context.py +++ b/airflow/utils/context.py @@ -303,7 +303,7 @@ def context_update_for_unmapped(context: Context, task: BaseOperator) -> None: :meta private: """ - from airflow.models.param import process_params + from airflow.sdk.definitions.param import process_params context["task"] = context["ti"].task = task context["params"] = process_params( diff --git a/airflow/utils/db.py b/airflow/utils/db.py index 1a1eb6f4d3500..0dc4dbb1b61f4 100644 --- a/airflow/utils/db.py +++ b/airflow/utils/db.py @@ -94,7 +94,7 @@ class MappedClassProtocol(Protocol): "2.9.2": "686269002441", "2.10.0": "22ed7efa9da2", "2.10.3": "5f2621c13b39", - "3.0.0": "e39a26ac59f6", + "3.0.0": "33b04e4bfa19", } diff --git a/docs/.gitignore b/docs/.gitignore index 1f80c74e825a2..1828bb5fc3576 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -4,6 +4,8 @@ apache-airflow-providers-airbyte apache-airflow-providers-alibaba apache-airflow-providers-apache-beam apache-airflow-providers-apache-cassandra +apache-airflow-providers-apache-drill +apache-airflow-providers-apache-druid apache-airflow-providers-apache-iceberg apache-airflow-providers-apache-kafka apache-airflow-providers-apache-kylin @@ -20,12 +22,15 @@ apache-airflow-providers-common-compat apache-airflow-providers-common-io apache-airflow-providers-common-sql apache-airflow-providers-datadog +apache-airflow-providers-dbt-cloud apache-airflow-providers-discord apache-airflow-providers-docker apache-airflow-providers-edge +apache-airflow-providers-elasticsearch apache-airflow-providers-exasol apache-airflow-providers-facebook apache-airflow-providers-ftp +apache-airflow-providers-github apache-airflow-providers-http apache-airflow-providers-influxdb apache-airflow-providers-mongo @@ -35,6 +40,7 @@ apache-airflow-providers-imap apache-airflow-providers-neo4j apache-airflow-providers-openai apache-airflow-providers-openfaas +apache-airflow-providers-opensearch apache-airflow-providers-opsgenie apache-airflow-providers-papermill apache-airflow-providers-pgvector diff --git a/docs/apache-airflow-providers-apache-drill/changelog.rst b/docs/apache-airflow-providers-apache-drill/changelog.rst deleted file mode 100644 index 79971613d2f63..0000000000000 --- a/docs/apache-airflow-providers-apache-drill/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/apache/drill/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-apache-druid/changelog.rst b/docs/apache-airflow-providers-apache-druid/changelog.rst deleted file mode 100644 index 652948c8ee8c2..0000000000000 --- a/docs/apache-airflow-providers-apache-druid/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/apache/druid/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-dbt-cloud/changelog.rst b/docs/apache-airflow-providers-dbt-cloud/changelog.rst deleted file mode 100644 index be4203ad0c942..0000000000000 --- a/docs/apache-airflow-providers-dbt-cloud/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/dbt/cloud/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-elasticsearch/changelog.rst b/docs/apache-airflow-providers-elasticsearch/changelog.rst deleted file mode 100644 index 840359e3b0a4e..0000000000000 --- a/docs/apache-airflow-providers-elasticsearch/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/elasticsearch/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-github/changelog.rst b/docs/apache-airflow-providers-github/changelog.rst deleted file mode 100644 index 231425db490d2..0000000000000 --- a/docs/apache-airflow-providers-github/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/github/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-google/operators/cloud/automl.rst b/docs/apache-airflow-providers-google/operators/cloud/automl.rst index 4eb461409fa3c..4cb6a9724cadb 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/automl.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/automl.rst @@ -163,25 +163,21 @@ You can find example on how to use VertexAI operators here: :end-before: [END how_to_cloud_vertex_ai_delete_model_operator] .. _howto/operator:AutoMLPredictOperator: -.. _howto/operator:AutoMLBatchPredictOperator: Making Predictions ^^^^^^^^^^^^^^^^^^ To obtain predictions from Google Cloud AutoML model you can use -:class:`~airflow.providers.google.cloud.operators.automl.AutoMLPredictOperator` or -:class:`~airflow.providers.google.cloud.operators.automl.AutoMLBatchPredictOperator`. In the first case +:class:`~airflow.providers.google.cloud.operators.automl.AutoMLPredictOperator`. In the first case the model must be deployed. -Th :class:`~airflow.providers.google.cloud.operators.automl.AutoMLBatchPredictOperator` deprecated for tables, -video intelligence, vision and natural language is deprecated and will be removed after 31.03.2024. -Please use +For tables, video intelligence, vision and natural language you can use the following operators: + :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.CreateBatchPredictionJobOperator`, :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.GetBatchPredictionJobOperator`, :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.ListBatchPredictionJobsOperator`, -:class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.DeleteBatchPredictionJobOperator`, -instead. +:class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.DeleteBatchPredictionJobOperator`. You can find examples on how to use VertexAI operators here: .. exampleinclude:: /../../providers/tests/system/google/cloud/vertex_ai/example_vertex_ai_batch_prediction_job.py diff --git a/docs/apache-airflow-providers-google/operators/cloud/dataflow.rst b/docs/apache-airflow-providers-google/operators/cloud/dataflow.rst index 3213aec60690e..6dd405ce93213 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/dataflow.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/dataflow.rst @@ -54,9 +54,6 @@ There are several ways to run a Dataflow pipeline depending on your environment, command-line tool to build and save the Flex Template spec file in Cloud Storage. See: :ref:`howto/operator:DataflowStartFlexTemplateOperator` -- **SQL pipeline**: Developer can write pipeline as SQL statement and then execute it in Dataflow. See: - :ref:`howto/operator:DataflowStartSqlJobOperator` - It is a good idea to test your pipeline using the non-templated pipeline, and then run the pipeline in production using the templates. @@ -283,29 +280,6 @@ Also for this action you can use the operator in the deferrable mode: :start-after: [START howto_operator_start_flex_template_job_deferrable] :end-before: [END howto_operator_start_flex_template_job_deferrable] -.. _howto/operator:DataflowStartSqlJobOperator: - -Dataflow SQL -"""""""""""" -Dataflow SQL supports a variant of the ZetaSQL query syntax and includes additional streaming -extensions for running Dataflow streaming jobs. - -Here is an example of running Dataflow SQL job with -:class:`~airflow.providers.google.cloud.operators.dataflow.DataflowStartSqlJobOperator`: - -.. exampleinclude:: /../../providers/tests/system/google/cloud/dataflow/example_dataflow_sql.py - :language: python - :dedent: 4 - :start-after: [START howto_operator_start_sql_job] - :end-before: [END howto_operator_start_sql_job] - -.. warning:: - This operator requires ``gcloud`` command (Google Cloud SDK) must be installed on the Airflow worker - `__ - -See the `Dataflow SQL reference -`_. - .. _howto/operator:DataflowStartYamlJobOperator: Dataflow YAML diff --git a/docs/apache-airflow-providers-opensearch/changelog.rst b/docs/apache-airflow-providers-opensearch/changelog.rst deleted file mode 100644 index 21f39a6c1f6ea..0000000000000 --- a/docs/apache-airflow-providers-opensearch/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/opensearch/CHANGELOG.rst diff --git a/docs/apache-airflow/core-concepts/params.rst b/docs/apache-airflow/core-concepts/params.rst index b54026ccb22fc..8c1c98cd1724c 100644 --- a/docs/apache-airflow/core-concepts/params.rst +++ b/docs/apache-airflow/core-concepts/params.rst @@ -22,8 +22,8 @@ Params Params enable you to provide runtime configuration to tasks. You can configure default Params in your DAG code and supply additional Params, or overwrite Param values, at runtime when you trigger a DAG. -:class:`~airflow.models.param.Param` values are validated with JSON Schema. For scheduled DAG runs, -default :class:`~airflow.models.param.Param` values are used. +:class:`~airflow.sdk.definitions.param.Param` values are validated with JSON Schema. For scheduled DAG runs, +default :class:`~airflow.sdk.definitions.param.Param` values are used. Also defined Params are used to render a nice UI when triggering manually. When you trigger a DAG manually, you can modify its Params before the dagrun starts. @@ -33,14 +33,14 @@ DAG-level Params ---------------- To add Params to a :class:`~airflow.models.dag.DAG`, initialize it with the ``params`` kwarg. -Use a dictionary that maps Param names to either a :class:`~airflow.models.param.Param` or an object indicating the parameter's default value. +Use a dictionary that maps Param names to either a :class:`~airflow.sdk.definitions.param.Param` or an object indicating the parameter's default value. .. code-block:: :emphasize-lines: 7-10 from airflow import DAG from airflow.decorators import task - from airflow.models.param import Param + from airflow.sdk import Param with DAG( "the_dag", @@ -127,7 +127,7 @@ You can change this by setting ``render_template_as_native_obj=True`` while init ): -This way, the :class:`~airflow.models.param.Param`'s type is respected when it's provided to your task: +This way, the :class:`~airflow.sdk.definitions.param.Param`'s type is respected when it's provided to your task: .. code-block:: @@ -160,7 +160,7 @@ Another way to access your param is via a task's ``context`` kwarg. JSON Schema Validation ---------------------- -:class:`~airflow.models.param.Param` makes use of `JSON Schema `_, so you can use the full JSON Schema specifications mentioned at https://json-schema.org/draft/2020-12/json-schema-validation.html to define ``Param`` objects. +:class:`~airflow.sdk.definitions.param.Param` makes use of `JSON Schema `_, so you can use the full JSON Schema specifications mentioned at https://json-schema.org/draft/2020-12/json-schema-validation.html to define ``Param`` objects. .. code-block:: @@ -195,8 +195,8 @@ JSON Schema Validation at time of trigger. .. note:: - As of now, for security reasons, one can not use :class:`~airflow.models.param.Param` objects derived out of custom classes. We are - planning to have a registration system for custom :class:`~airflow.models.param.Param` classes, just like we've for Operator ExtraLinks. + As of now, for security reasons, one can not use :class:`~airflow.sdk.definitions.param.Param` objects derived out of custom classes. We are + planning to have a registration system for custom :class:`~airflow.sdk.definitions.param.Param` classes, just like we've for Operator ExtraLinks. Use Params to Provide a Trigger UI Form --------------------------------------- @@ -207,21 +207,21 @@ Use Params to Provide a Trigger UI Form This form is provided when a user clicks on the "Trigger DAG" button. The Trigger UI Form is rendered based on the pre-defined DAG Params. If the DAG has no params defined, the trigger form is skipped. -The form elements can be defined with the :class:`~airflow.models.param.Param` class and attributes define how a form field is displayed. +The form elements can be defined with the :class:`~airflow.sdk.definitions.param.Param` class and attributes define how a form field is displayed. The following features are supported in the Trigger UI Form: -- Direct scalar values (boolean, int, string, lists, dicts) from top-level DAG params are auto-boxed into :class:`~airflow.models.param.Param` objects. +- Direct scalar values (boolean, int, string, lists, dicts) from top-level DAG params are auto-boxed into :class:`~airflow.sdk.definitions.param.Param` objects. From the native Python data type the ``type`` attribute is auto detected. So these simple types render to a corresponding field type. The name of the parameter is used as label and no further validation is made, all values are treated as optional. -- If you use the :class:`~airflow.models.param.Param` class as definition of the parameter value, the following attributes can be added: +- If you use the :class:`~airflow.sdk.definitions.param.Param` class as definition of the parameter value, the following attributes can be added: - - The :class:`~airflow.models.param.Param` attribute ``title`` is used to render the form field label of the entry box. + - The :class:`~airflow.sdk.definitions.param.Param` attribute ``title`` is used to render the form field label of the entry box. If no ``title`` is defined the parameter name/key is used instead. - - The :class:`~airflow.models.param.Param` attribute ``description`` is rendered below an entry field as help text in gray color. + - The :class:`~airflow.sdk.definitions.param.Param` attribute ``description`` is rendered below an entry field as help text in gray color. If you want to provide special formatting or links you need to use the Param attribute ``description_md``. See tutorial DAG :ref:`Params UI example DAG ` for an example. - - The :class:`~airflow.models.param.Param` attribute ``type`` influences how a field is rendered. The following types are supported: + - The :class:`~airflow.sdk.definitions.param.Param` attribute ``type`` influences how a field is rendered. The following types are supported: .. list-table:: :header-rows: 1 diff --git a/docs/apache-airflow/img/airflow_erd.sha256 b/docs/apache-airflow/img/airflow_erd.sha256 index b058ae34b3783..e3f420e62c1d6 100644 --- a/docs/apache-airflow/img/airflow_erd.sha256 +++ b/docs/apache-airflow/img/airflow_erd.sha256 @@ -1 +1 @@ -eb25e0718c9382cdbb02368c9c3e29c90da06ddaba8e8e92d9fc53417b714039 \ No newline at end of file +829be35e333798f7c33c5fe0130ed12fad481c92145abc398ae23b815dd7b6ed \ No newline at end of file diff --git a/docs/apache-airflow/img/airflow_erd.svg b/docs/apache-airflow/img/airflow_erd.svg index 92ab9bfb7855c..d20fa37b8ea66 100644 --- a/docs/apache-airflow/img/airflow_erd.svg +++ b/docs/apache-airflow/img/airflow_erd.svg @@ -649,24 +649,24 @@ dagrun_asset_event - -dagrun_asset_event - -dag_run_id - - [INTEGER] - NOT NULL - -event_id - - [INTEGER] - NOT NULL + +dagrun_asset_event + +dag_run_id + + [INTEGER] + NOT NULL + +event_id + + [INTEGER] + NOT NULL asset_event--dagrun_asset_event - -0..N + +0..N 1 @@ -709,687 +709,695 @@ task_instance - -task_instance - -id - - [UUID] - NOT NULL - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -last_heartbeat_at - - [TIMESTAMP] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSON] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance + +id + + [UUID] + NOT NULL + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +last_heartbeat_at + + [TIMESTAMP] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSON] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +scheduled_dttm + + [TIMESTAMP] + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] trigger--task_instance - -0..N + +0..N {0,1} task_reschedule - -task_reschedule - -id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -duration - - [INTEGER] - NOT NULL - -end_date - - [TIMESTAMP] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -reschedule_date - - [TIMESTAMP] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -try_number - - [INTEGER] - NOT NULL + +task_reschedule + +id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +duration + + [INTEGER] + NOT NULL + +end_date + + [TIMESTAMP] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +reschedule_date + + [TIMESTAMP] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +try_number + + [INTEGER] + NOT NULL task_instance--task_reschedule - -0..N -1 + +0..N +1 task_instance--task_reschedule - -0..N -1 + +0..N +1 task_instance--task_reschedule - -0..N -1 + +0..N +1 task_instance--task_reschedule - -0..N -1 + +0..N +1 rendered_task_instance_fields - -rendered_task_instance_fields - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -k8s_pod_yaml - - [JSON] - -rendered_fields - - [JSON] - NOT NULL + +rendered_task_instance_fields + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +k8s_pod_yaml + + [JSON] + +rendered_fields + + [JSON] + NOT NULL task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_map - -task_map - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -keys - - [JSON] - -length - - [INTEGER] - NOT NULL + +task_map + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +keys + + [JSON] + +length + + [INTEGER] + NOT NULL task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 xcom - -xcom - -dag_run_id - - [INTEGER] - NOT NULL - -key - - [VARCHAR(512)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL - -value - - [JSONB] + +xcom + +dag_run_id + + [INTEGER] + NOT NULL + +key + + [VARCHAR(512)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL + +value + + [JSONB] task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 task_instance_note - -task_instance_note - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +task_instance_note + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance_history - -task_instance_history - -id - - [INTEGER] - NOT NULL - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSON] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - NOT NULL - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance_history + +id + + [INTEGER] + NOT NULL + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSON] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +scheduled_dttm + + [TIMESTAMP] + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + NOT NULL + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 @@ -1417,108 +1425,112 @@ dag - -dag - -dag_id - - [VARCHAR(250)] - NOT NULL - -asset_expression - - [JSON] - -bundle_name - - [VARCHAR(250)] - -bundle_version - - [VARCHAR(200)] - -dag_display_name - - [VARCHAR(2000)] - -default_view - - [VARCHAR(25)] - -description - - [TEXT] - -fileloc - - [VARCHAR(2000)] - -has_import_errors - - [BOOLEAN] - -has_task_concurrency_limits - - [BOOLEAN] - NOT NULL - -is_active - - [BOOLEAN] - -is_paused - - [BOOLEAN] - -last_expired - - [TIMESTAMP] - -last_parsed_time - - [TIMESTAMP] - -max_active_runs - - [INTEGER] - -max_active_tasks - - [INTEGER] - NOT NULL - -max_consecutive_failed_dag_runs - - [INTEGER] - NOT NULL - -next_dagrun - - [TIMESTAMP] - -next_dagrun_create_after - - [TIMESTAMP] - -next_dagrun_data_interval_end - - [TIMESTAMP] - -next_dagrun_data_interval_start - - [TIMESTAMP] - -owners - - [VARCHAR(2000)] - -timetable_description - - [VARCHAR(1000)] - -timetable_summary - - [TEXT] + +dag + +dag_id + + [VARCHAR(250)] + NOT NULL + +asset_expression + + [JSON] + +bundle_name + + [VARCHAR(250)] + +bundle_version + + [VARCHAR(200)] + +dag_display_name + + [VARCHAR(2000)] + +default_view + + [VARCHAR(25)] + +description + + [TEXT] + +fileloc + + [VARCHAR(2000)] + +has_import_errors + + [BOOLEAN] + +has_task_concurrency_limits + + [BOOLEAN] + NOT NULL + +is_active + + [BOOLEAN] + +is_paused + + [BOOLEAN] + +last_expired + + [TIMESTAMP] + +last_parsed_time + + [TIMESTAMP] + +max_active_runs + + [INTEGER] + +max_active_tasks + + [INTEGER] + NOT NULL + +max_consecutive_failed_dag_runs + + [INTEGER] + NOT NULL + +next_dagrun + + [TIMESTAMP] + +next_dagrun_create_after + + [TIMESTAMP] + +next_dagrun_data_interval_end + + [TIMESTAMP] + +next_dagrun_data_interval_start + + [TIMESTAMP] + +owners + + [VARCHAR(2000)] + +relative_fileloc + + [VARCHAR(2000)] + +timetable_description + + [VARCHAR(1000)] + +timetable_summary + + [TEXT] @@ -1530,16 +1542,16 @@ dag--dag_schedule_asset_alias_reference - + 0..N -1 +1 dag--dag_schedule_asset_reference - + 0..N -1 +1 @@ -1551,9 +1563,9 @@ dag--asset_dag_run_queue - + 0..N -1 +1 @@ -1733,162 +1745,162 @@ dag--dag_warning - + 0..N -1 +1 deadline - -deadline - -id - - [UUID] - NOT NULL - -callback - - [VARCHAR(500)] - NOT NULL - -callback_kwargs - - [JSON] - -dag_id - - [VARCHAR(250)] - -dagrun_id - - [INTEGER] - -deadline - - [TIMESTAMP] - NOT NULL + +deadline + +id + + [UUID] + NOT NULL + +callback + + [VARCHAR(500)] + NOT NULL + +callback_kwargs + + [JSON] + +dag_id + + [VARCHAR(250)] + +dagrun_id + + [INTEGER] + +deadline + + [TIMESTAMP] + NOT NULL dag--deadline - -0..N + +0..N {0,1} dag_version--task_instance - -0..N + +0..N {0,1} dag_run - -dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - -bundle_version - - [VARCHAR(250)] - -clear_number - - [INTEGER] - NOT NULL - -conf - - [JSONB] - -creating_job_id - - [INTEGER] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -data_interval_end - - [TIMESTAMP] - -data_interval_start - - [TIMESTAMP] - -end_date - - [TIMESTAMP] - -external_trigger - - [BOOLEAN] - -last_scheduling_decision - - [TIMESTAMP] - -log_template_id - - [INTEGER] - -logical_date - - [TIMESTAMP] - NOT NULL - -queued_at - - [TIMESTAMP] - -run_id - - [VARCHAR(250)] - NOT NULL - -run_type - - [VARCHAR(50)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(50)] - -triggered_by - - [VARCHAR(50)] - -updated_at - - [TIMESTAMP] + +dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + +bundle_version + + [VARCHAR(250)] + +clear_number + + [INTEGER] + NOT NULL + +conf + + [JSONB] + +creating_job_id + + [INTEGER] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + +data_interval_end + + [TIMESTAMP] + +data_interval_start + + [TIMESTAMP] + +end_date + + [TIMESTAMP] + +external_trigger + + [BOOLEAN] + +last_scheduling_decision + + [TIMESTAMP] + +log_template_id + + [INTEGER] + +logical_date + + [TIMESTAMP] + NOT NULL + +queued_at + + [TIMESTAMP] + +run_id + + [VARCHAR(250)] + NOT NULL + +run_type + + [VARCHAR(50)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(50)] + +triggered_by + + [VARCHAR(50)] + +updated_at + + [TIMESTAMP] dag_version--dag_run - -0..N + +0..N {0,1} @@ -1988,121 +2000,121 @@ dag_run--dagrun_asset_event - -0..N -1 + +0..N +1 dag_run--task_instance - -0..N -1 + +0..N +1 dag_run--task_instance - -0..N -1 + +0..N +1 dag_run--deadline - -0..N -{0,1} + +0..N +{0,1} backfill_dag_run - -backfill_dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - NOT NULL - -dag_run_id - - [INTEGER] - -exception_reason - - [VARCHAR(250)] - -logical_date - - [TIMESTAMP] - NOT NULL - -sort_ordinal - - [INTEGER] - NOT NULL + +backfill_dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + NOT NULL + +dag_run_id + + [INTEGER] + +exception_reason + + [VARCHAR(250)] + +logical_date + + [TIMESTAMP] + NOT NULL + +sort_ordinal + + [INTEGER] + NOT NULL dag_run--backfill_dag_run - -0..N -{0,1} + +0..N +{0,1} dag_run_note - -dag_run_note - -dag_run_id - - [INTEGER] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +dag_run_note + +dag_run_id + + [INTEGER] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] dag_run--dag_run_note - -1 -1 + +1 +1 dag_run--task_reschedule - -0..N -1 + +0..N +1 dag_run--task_reschedule - -0..N -1 + +0..N +1 @@ -2133,9 +2145,9 @@ log_template--dag_run - -0..N -{0,1} + +0..N +{0,1} @@ -2199,50 +2211,50 @@ backfill--dag_run - -0..N -{0,1} + +0..N +{0,1} backfill--backfill_dag_run - -0..N -1 + +0..N +1 session - -session - -id - - [INTEGER] - NOT NULL - -data - - [BYTEA] - -expiry - - [TIMESTAMP] - -session_id - - [VARCHAR(255)] + +session + +id + + [INTEGER] + NOT NULL + +data + + [BYTEA] + +expiry + + [TIMESTAMP] + +session_id + + [VARCHAR(255)] alembic_version - -alembic_version - -version_num - - [VARCHAR(32)] - NOT NULL + +alembic_version + +version_num + + [VARCHAR(32)] + NOT NULL diff --git a/docs/apache-airflow/migrations-ref.rst b/docs/apache-airflow/migrations-ref.rst index 62013ff8f799c..633c2338f4064 100644 --- a/docs/apache-airflow/migrations-ref.rst +++ b/docs/apache-airflow/migrations-ref.rst @@ -39,7 +39,11 @@ Here's the list of all the Database Migrations that are executed via when you ru +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=========================+==================+===================+==============================================================+ -| ``e39a26ac59f6`` (head) | ``38770795785f`` | ``3.0.0`` | remove pickled data from dagrun table. | +| ``33b04e4bfa19`` (head) | ``8ea135928435`` | ``3.0.0`` | add new task_instance field scheduled_dttm. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``8ea135928435`` | ``e39a26ac59f6`` | ``3.0.0`` | Add relative fileloc column. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``e39a26ac59f6`` | ``38770795785f`` | ``3.0.0`` | remove pickled data from dagrun table. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | ``38770795785f`` | ``5c9c0231baa2`` | ``3.0.0`` | Add asset reference models. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ diff --git a/docs/apache-airflow/public-airflow-interface.rst b/docs/apache-airflow/public-airflow-interface.rst index 2853c6fbe2e1b..d1ac63eb5b3a9 100644 --- a/docs/apache-airflow/public-airflow-interface.rst +++ b/docs/apache-airflow/public-airflow-interface.rst @@ -62,7 +62,7 @@ DAGs The DAG is Airflow's core entity that represents a recurring workflow. You can create a DAG by instantiating the :class:`~airflow.models.dag.DAG` class in your DAG file. You can also instantiate them via :class:`~airflow.models.dagbag.DagBag` class that reads DAGs from a file or a folder. DAGs -can also have parameters specified via :class:`~airflow.models.param.Param` class. +can also have parameters specified via :class:`~airflow.sdk.definitions.param.Param` class. Airflow has a set of example DAGs that you can use to learn how to write DAGs diff --git a/docs/docker-stack/recipes.rst b/docs/docker-stack/recipes.rst index 3402acb1019ca..7666fa892f747 100644 --- a/docs/docker-stack/recipes.rst +++ b/docs/docker-stack/recipes.rst @@ -26,8 +26,7 @@ Google Cloud SDK installation ----------------------------- Some operators, such as :class:`~airflow.providers.google.cloud.operators.kubernetes_engine.GKEStartPodOperator`, -:class:`~airflow.providers.google.cloud.operators.dataflow.DataflowStartSqlJobOperator`, require -the installation of `Google Cloud SDK `__ (includes ``gcloud``). +require the installation of `Google Cloud SDK `__ (includes ``gcloud``). You can also run these commands with BashOperator. Create a new Dockerfile like the one shown below. diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 8b8d983f8295f..0cf28a5b40cc7 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -76,6 +76,7 @@ "deps": [ "apache-airflow>=2.9.0", "apache-beam>=2.53.0", + "numpy>=1.26.0", "pyarrow>=14.0.1" ], "devel-deps": [], diff --git a/providers/apache/beam/README.rst b/providers/apache/beam/README.rst index 30a339f02b55a..7f61ac4e600bd 100644 --- a/providers/apache/beam/README.rst +++ b/providers/apache/beam/README.rst @@ -57,6 +57,7 @@ PIP package Version required ``apache-airflow`` ``>=2.9.0`` ``apache-beam`` ``>=2.53.0`` ``pyarrow`` ``>=14.0.1`` +``numpy`` ``>=1.26.0`` ================== ================== Cross provider package dependencies diff --git a/providers/apache/beam/pyproject.toml b/providers/apache/beam/pyproject.toml index ac5fb3bee884a..9425bdfa5d3a1 100644 --- a/providers/apache/beam/pyproject.toml +++ b/providers/apache/beam/pyproject.toml @@ -59,6 +59,7 @@ dependencies = [ # Apache Beam > 2.53.0 and pyarrow > 14.0.1 fix https://nvd.nist.gov/vuln/detail/CVE-2023-47248. "apache-beam>=2.53.0", "pyarrow>=14.0.1", + "numpy>=1.26.0", ] # The optional dependencies should be modified in place in the generated file diff --git a/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py b/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py index cb39402c5067b..479cfbeb41ba0 100644 --- a/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py +++ b/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py @@ -93,7 +93,7 @@ def get_provider_info(): "python-modules": ["airflow.providers.apache.beam.triggers.beam"], } ], - "dependencies": ["apache-airflow>=2.9.0", "apache-beam>=2.53.0", "pyarrow>=14.0.1"], + "dependencies": ["apache-airflow>=2.9.0", "apache-beam>=2.53.0", "pyarrow>=14.0.1", "numpy>=1.26.0"], "optional-dependencies": { "google": ["apache-beam[gcp]"], "common.compat": ["apache-airflow-providers-common-compat"], diff --git a/providers/apache/drill/README.rst b/providers/apache/drill/README.rst new file mode 100644 index 0000000000000..0a6a843e9e19e --- /dev/null +++ b/providers/apache/drill/README.rst @@ -0,0 +1,82 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-apache-drill`` + +Release: ``3.0.0`` + + +`Apache Drill `__. + + +Provider package +---------------- + +This is a provider package for ``apache.drill`` provider. All classes for this provider package +are in ``airflow.providers.apache.drill`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-apache-drill`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +======================================= ================== +PIP package Version required +======================================= ================== +``apache-airflow`` ``>=2.9.0`` +``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``sqlalchemy-drill`` ``>=1.1.0`` +======================================= ================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider packages in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-apache-drill[common.sql] + + +============================================================================================================ ============== +Dependent package Extra +============================================================================================================ ============== +`apache-airflow-providers-common-sql `_ ``common.sql`` +============================================================================================================ ============== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/apache/drill/.latest-doc-only-change.txt b/providers/apache/drill/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/apache/drill/.latest-doc-only-change.txt rename to providers/apache/drill/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/apache/drill/CHANGELOG.rst b/providers/apache/drill/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/apache/drill/CHANGELOG.rst rename to providers/apache/drill/docs/changelog.rst diff --git a/docs/apache-airflow-providers-apache-drill/commits.rst b/providers/apache/drill/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-apache-drill/commits.rst rename to providers/apache/drill/docs/commits.rst diff --git a/docs/apache-airflow-providers-apache-drill/connections/drill.rst b/providers/apache/drill/docs/connections/drill.rst similarity index 100% rename from docs/apache-airflow-providers-apache-drill/connections/drill.rst rename to providers/apache/drill/docs/connections/drill.rst diff --git a/docs/apache-airflow-providers-apache-drill/index.rst b/providers/apache/drill/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-apache-drill/index.rst rename to providers/apache/drill/docs/index.rst diff --git a/docs/apache-airflow-providers-apache-drill/installing-providers-from-sources.rst b/providers/apache/drill/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-apache-drill/installing-providers-from-sources.rst rename to providers/apache/drill/docs/installing-providers-from-sources.rst diff --git a/docs/integration-logos/apache/drill.png b/providers/apache/drill/docs/integration-logos/drill.png similarity index 100% rename from docs/integration-logos/apache/drill.png rename to providers/apache/drill/docs/integration-logos/drill.png diff --git a/docs/apache-airflow-providers-apache-drill/operators.rst b/providers/apache/drill/docs/operators.rst similarity index 95% rename from docs/apache-airflow-providers-apache-drill/operators.rst rename to providers/apache/drill/docs/operators.rst index 47784b67fedc3..396964ef70e13 100644 --- a/docs/apache-airflow-providers-apache-drill/operators.rst +++ b/providers/apache/drill/docs/operators.rst @@ -39,7 +39,7 @@ The ``sql`` parameter can be templated and be an external ``.sql`` file. Using the operator """""""""""""""""" -.. exampleinclude:: /../../providers/tests/system/apache/drill/example_drill_dag.py +.. exampleinclude:: /../../providers/apache/drill/tests/system/apache/drill/example_drill_dag.py :language: python :dedent: 4 :start-after: [START howto_operator_drill] diff --git a/docs/apache-airflow-providers-apache-drill/security.rst b/providers/apache/drill/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-apache-drill/security.rst rename to providers/apache/drill/docs/security.rst diff --git a/providers/src/airflow/providers/apache/drill/provider.yaml b/providers/apache/drill/provider.yaml similarity index 91% rename from providers/src/airflow/providers/apache/drill/provider.yaml rename to providers/apache/drill/provider.yaml index 3ea76623b1dca..89007829985b3 100644 --- a/providers/src/airflow/providers/apache/drill/provider.yaml +++ b/providers/apache/drill/provider.yaml @@ -53,17 +53,12 @@ versions: - 1.0.1 - 1.0.0 -dependencies: - - apache-airflow>=2.9.0 - - apache-airflow-providers-common-sql>=1.20.0 - - sqlalchemy-drill>=1.1.0 - integrations: - integration-name: Apache Drill external-doc-url: https://drill.apache.org/ how-to-guide: - /docs/apache-airflow-providers-apache-drill/operators.rst - logo: /integration-logos/apache/drill.png + logo: /docs/integration-logos/drill.png tags: [apache] hooks: diff --git a/providers/apache/drill/pyproject.toml b/providers/apache/drill/pyproject.toml new file mode 100644 index 0000000000000..6dfb0c9caab63 --- /dev/null +++ b/providers/apache/drill/pyproject.toml @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-apache-drill" +version = "3.0.0" +description = "Provider package apache-airflow-providers-apache-drill for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "apache.drill", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "sqlalchemy-drill>=1.1.0", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-drill/3.0.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-drill/3.0.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.apache.drill.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.apache.drill" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/apache/drill/src/airflow/providers/apache/drill/LICENSE b/providers/apache/drill/src/airflow/providers/apache/drill/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/apache/drill/src/airflow/providers/apache/drill/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/apache/drill/__init__.py b/providers/apache/drill/src/airflow/providers/apache/drill/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/drill/__init__.py rename to providers/apache/drill/src/airflow/providers/apache/drill/__init__.py diff --git a/providers/apache/drill/src/airflow/providers/apache/drill/get_provider_info.py b/providers/apache/drill/src/airflow/providers/apache/drill/get_provider_info.py new file mode 100644 index 0000000000000..e0553925d547d --- /dev/null +++ b/providers/apache/drill/src/airflow/providers/apache/drill/get_provider_info.py @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-apache-drill", + "name": "Apache Drill", + "description": "`Apache Drill `__.\n", + "state": "ready", + "source-date-epoch": 1734527755, + "versions": [ + "3.0.0", + "2.8.1", + "2.8.0", + "2.7.3", + "2.7.2", + "2.7.1", + "2.7.0", + "2.6.1", + "2.6.0", + "2.5.0", + "2.4.4", + "2.4.3", + "2.4.2", + "2.4.1", + "2.4.0", + "2.3.2", + "2.3.1", + "2.3.0", + "2.2.1", + "2.2.0", + "2.1.0", + "2.0.0", + "1.0.4", + "1.0.3", + "1.0.2", + "1.0.1", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "Apache Drill", + "external-doc-url": "https://drill.apache.org/", + "how-to-guide": ["/docs/apache-airflow-providers-apache-drill/operators.rst"], + "logo": "/docs/integration-logos/drill.png", + "tags": ["apache"], + } + ], + "hooks": [ + { + "integration-name": "Apache Drill", + "python-modules": ["airflow.providers.apache.drill.hooks.drill"], + } + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.apache.drill.hooks.drill.DrillHook", + "connection-type": "drill", + } + ], + "dependencies": [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "sqlalchemy-drill>=1.1.0", + ], + } diff --git a/providers/src/airflow/providers/apache/drill/hooks/__init__.py b/providers/apache/drill/src/airflow/providers/apache/drill/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/drill/hooks/__init__.py rename to providers/apache/drill/src/airflow/providers/apache/drill/hooks/__init__.py diff --git a/providers/src/airflow/providers/apache/drill/hooks/drill.py b/providers/apache/drill/src/airflow/providers/apache/drill/hooks/drill.py similarity index 100% rename from providers/src/airflow/providers/apache/drill/hooks/drill.py rename to providers/apache/drill/src/airflow/providers/apache/drill/hooks/drill.py diff --git a/providers/src/airflow/providers/apache/drill/operators/__init__.py b/providers/apache/drill/src/airflow/providers/apache/drill/operators/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/drill/operators/__init__.py rename to providers/apache/drill/src/airflow/providers/apache/drill/operators/__init__.py diff --git a/providers/apache/drill/tests/conftest.py b/providers/apache/drill/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/apache/drill/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/apache/drill/tests/provider_tests/__init__.py b/providers/apache/drill/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/drill/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/drill/tests/provider_tests/apache/__init__.py b/providers/apache/drill/tests/provider_tests/apache/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/drill/tests/provider_tests/apache/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/src/airflow/providers/apache/druid/hooks/__init__.py b/providers/apache/drill/tests/provider_tests/apache/drill/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/hooks/__init__.py rename to providers/apache/drill/tests/provider_tests/apache/drill/__init__.py diff --git a/providers/src/airflow/providers/apache/druid/operators/__init__.py b/providers/apache/drill/tests/provider_tests/apache/drill/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/operators/__init__.py rename to providers/apache/drill/tests/provider_tests/apache/drill/hooks/__init__.py diff --git a/providers/tests/apache/drill/hooks/test_drill.py b/providers/apache/drill/tests/provider_tests/apache/drill/hooks/test_drill.py similarity index 100% rename from providers/tests/apache/drill/hooks/test_drill.py rename to providers/apache/drill/tests/provider_tests/apache/drill/hooks/test_drill.py diff --git a/providers/src/airflow/providers/apache/druid/transfers/__init__.py b/providers/apache/drill/tests/system/apache/drill/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/transfers/__init__.py rename to providers/apache/drill/tests/system/apache/drill/__init__.py diff --git a/providers/tests/system/apache/drill/example_drill_dag.py b/providers/apache/drill/tests/system/apache/drill/example_drill_dag.py similarity index 100% rename from providers/tests/system/apache/drill/example_drill_dag.py rename to providers/apache/drill/tests/system/apache/drill/example_drill_dag.py diff --git a/providers/apache/druid/README.rst b/providers/apache/druid/README.rst new file mode 100644 index 0000000000000..ee723b1cf434f --- /dev/null +++ b/providers/apache/druid/README.rst @@ -0,0 +1,83 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-apache-druid`` + +Release: ``4.0.0`` + + +`Apache Druid `__. + + +Provider package +---------------- + +This is a provider package for ``apache.druid`` provider. All classes for this provider package +are in ``airflow.providers.apache.druid`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-apache-druid`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +======================================= ================== +PIP package Version required +======================================= ================== +``apache-airflow`` ``>=2.9.0`` +``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``pydruid`` ``>=0.4.1`` +======================================= ================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider packages in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-apache-druid[apache.hive] + + +============================================================================================================== =============== +Dependent package Extra +============================================================================================================== =============== +`apache-airflow-providers-apache-hive `_ ``apache.hive`` +`apache-airflow-providers-common-sql `_ ``common.sql`` +============================================================================================================== =============== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/apache/druid/.latest-doc-only-change.txt b/providers/apache/druid/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/apache/druid/.latest-doc-only-change.txt rename to providers/apache/druid/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/apache/druid/CHANGELOG.rst b/providers/apache/druid/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/apache/druid/CHANGELOG.rst rename to providers/apache/druid/docs/changelog.rst diff --git a/docs/apache-airflow-providers-apache-druid/commits.rst b/providers/apache/druid/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-apache-druid/commits.rst rename to providers/apache/druid/docs/commits.rst diff --git a/docs/apache-airflow-providers-apache-druid/index.rst b/providers/apache/druid/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-apache-druid/index.rst rename to providers/apache/druid/docs/index.rst diff --git a/docs/apache-airflow-providers-apache-druid/installing-providers-from-sources.rst b/providers/apache/druid/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-apache-druid/installing-providers-from-sources.rst rename to providers/apache/druid/docs/installing-providers-from-sources.rst diff --git a/docs/integration-logos/apache/druid-1.png b/providers/apache/druid/docs/integration-logos/druid-1.png similarity index 100% rename from docs/integration-logos/apache/druid-1.png rename to providers/apache/druid/docs/integration-logos/druid-1.png diff --git a/docs/apache-airflow-providers-apache-druid/operators.rst b/providers/apache/druid/docs/operators.rst similarity index 95% rename from docs/apache-airflow-providers-apache-druid/operators.rst rename to providers/apache/druid/docs/operators.rst index 758c51c538538..1c0566202c235 100644 --- a/docs/apache-airflow-providers-apache-druid/operators.rst +++ b/providers/apache/druid/docs/operators.rst @@ -38,7 +38,7 @@ For parameter definition take a look at :class:`~airflow.providers.apache.druid. Using the operator """""""""""""""""" -.. exampleinclude:: /../../providers/tests/system/apache/druid/example_druid_dag.py +.. exampleinclude:: /../../providers/apache/druid/tests/system/apache/druid/example_druid_dag.py :language: python :dedent: 4 :start-after: [START howto_operator_druid_submit] diff --git a/docs/apache-airflow-providers-apache-druid/security.rst b/providers/apache/druid/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-apache-druid/security.rst rename to providers/apache/druid/docs/security.rst diff --git a/providers/src/airflow/providers/apache/druid/provider.yaml b/providers/apache/druid/provider.yaml similarity index 93% rename from providers/src/airflow/providers/apache/druid/provider.yaml rename to providers/apache/druid/provider.yaml index 7d71784796f6f..f87180f5820de 100644 --- a/providers/src/airflow/providers/apache/druid/provider.yaml +++ b/providers/apache/druid/provider.yaml @@ -60,15 +60,10 @@ versions: - 1.0.1 - 1.0.0 -dependencies: - - apache-airflow>=2.9.0 - - apache-airflow-providers-common-sql>=1.20.0 - - pydruid>=0.4.1 - integrations: - integration-name: Apache Druid external-doc-url: https://druid.apache.org/ - logo: /integration-logos/apache/druid-1.png + logo: /docs/integration-logos/druid-1.png how-to-guide: - /docs/apache-airflow-providers-apache-druid/operators.rst tags: [apache] diff --git a/providers/apache/druid/pyproject.toml b/providers/apache/druid/pyproject.toml new file mode 100644 index 0000000000000..baa5ede29fed4 --- /dev/null +++ b/providers/apache/druid/pyproject.toml @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-apache-druid" +version = "4.0.0" +description = "Provider package apache-airflow-providers-apache-druid for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "apache.druid", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "pydruid>=0.4.1", +] + +# The optional dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +[project.optional-dependencies] +"apache.hive" = [ + "apache-airflow-providers-apache-hive" +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-druid/4.0.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-druid/4.0.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.apache.druid.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.apache.druid" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/apache/druid/src/airflow/providers/apache/druid/LICENSE b/providers/apache/druid/src/airflow/providers/apache/druid/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/apache/druid/src/airflow/providers/apache/druid/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/apache/druid/__init__.py b/providers/apache/druid/src/airflow/providers/apache/druid/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/__init__.py rename to providers/apache/druid/src/airflow/providers/apache/druid/__init__.py diff --git a/providers/apache/druid/src/airflow/providers/apache/druid/get_provider_info.py b/providers/apache/druid/src/airflow/providers/apache/druid/get_provider_info.py new file mode 100644 index 0000000000000..33088aafa69bf --- /dev/null +++ b/providers/apache/druid/src/airflow/providers/apache/druid/get_provider_info.py @@ -0,0 +1,107 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-apache-druid", + "name": "Apache Druid", + "description": "`Apache Druid `__.\n", + "state": "ready", + "source-date-epoch": 1734527841, + "versions": [ + "4.0.0", + "3.12.1", + "3.12.0", + "3.11.0", + "3.10.2", + "3.10.1", + "3.10.0", + "3.9.0", + "3.8.1", + "3.8.0", + "3.7.0", + "3.6.0", + "3.5.0", + "3.4.2", + "3.4.1", + "3.4.0", + "3.3.1", + "3.3.0", + "3.2.1", + "3.2.0", + "3.1.0", + "3.0.0", + "2.3.3", + "2.3.2", + "2.3.1", + "2.3.0", + "2.2.0", + "2.1.0", + "2.0.2", + "2.0.1", + "2.0.0", + "1.1.0", + "1.0.1", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "Apache Druid", + "external-doc-url": "https://druid.apache.org/", + "logo": "/docs/integration-logos/druid-1.png", + "how-to-guide": ["/docs/apache-airflow-providers-apache-druid/operators.rst"], + "tags": ["apache"], + } + ], + "operators": [ + { + "integration-name": "Apache Druid", + "python-modules": ["airflow.providers.apache.druid.operators.druid"], + } + ], + "hooks": [ + { + "integration-name": "Apache Druid", + "python-modules": ["airflow.providers.apache.druid.hooks.druid"], + } + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.apache.druid.hooks.druid.DruidDbApiHook", + "connection-type": "druid", + } + ], + "transfers": [ + { + "source-integration-name": "Apache Hive", + "target-integration-name": "Apache Druid", + "python-module": "airflow.providers.apache.druid.transfers.hive_to_druid", + } + ], + "dependencies": [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "pydruid>=0.4.1", + ], + "optional-dependencies": {"apache.hive": ["apache-airflow-providers-apache-hive"]}, + } diff --git a/providers/src/airflow/providers/dbt/cloud/sensors/__init__.py b/providers/apache/druid/src/airflow/providers/apache/druid/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/sensors/__init__.py rename to providers/apache/druid/src/airflow/providers/apache/druid/hooks/__init__.py diff --git a/providers/src/airflow/providers/apache/druid/hooks/druid.py b/providers/apache/druid/src/airflow/providers/apache/druid/hooks/druid.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/hooks/druid.py rename to providers/apache/druid/src/airflow/providers/apache/druid/hooks/druid.py diff --git a/providers/src/airflow/providers/elasticsearch/hooks/__init__.py b/providers/apache/druid/src/airflow/providers/apache/druid/operators/__init__.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/hooks/__init__.py rename to providers/apache/druid/src/airflow/providers/apache/druid/operators/__init__.py diff --git a/providers/src/airflow/providers/apache/druid/operators/druid.py b/providers/apache/druid/src/airflow/providers/apache/druid/operators/druid.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/operators/druid.py rename to providers/apache/druid/src/airflow/providers/apache/druid/operators/druid.py diff --git a/providers/src/airflow/providers/dbt/cloud/hooks/__init__.py b/providers/apache/druid/src/airflow/providers/apache/druid/transfers/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/hooks/__init__.py rename to providers/apache/druid/src/airflow/providers/apache/druid/transfers/__init__.py diff --git a/providers/src/airflow/providers/apache/druid/transfers/hive_to_druid.py b/providers/apache/druid/src/airflow/providers/apache/druid/transfers/hive_to_druid.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/transfers/hive_to_druid.py rename to providers/apache/druid/src/airflow/providers/apache/druid/transfers/hive_to_druid.py diff --git a/providers/apache/druid/tests/conftest.py b/providers/apache/druid/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/apache/druid/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/apache/druid/tests/provider_tests/__init__.py b/providers/apache/druid/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/druid/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/druid/tests/provider_tests/apache/__init__.py b/providers/apache/druid/tests/provider_tests/apache/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/druid/tests/provider_tests/apache/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/apache/drill/__init__.py b/providers/apache/druid/tests/provider_tests/apache/druid/__init__.py similarity index 100% rename from providers/tests/apache/drill/__init__.py rename to providers/apache/druid/tests/provider_tests/apache/druid/__init__.py diff --git a/providers/tests/apache/drill/hooks/__init__.py b/providers/apache/druid/tests/provider_tests/apache/druid/hooks/__init__.py similarity index 100% rename from providers/tests/apache/drill/hooks/__init__.py rename to providers/apache/druid/tests/provider_tests/apache/druid/hooks/__init__.py diff --git a/providers/tests/apache/druid/hooks/test_druid.py b/providers/apache/druid/tests/provider_tests/apache/druid/hooks/test_druid.py similarity index 100% rename from providers/tests/apache/druid/hooks/test_druid.py rename to providers/apache/druid/tests/provider_tests/apache/druid/hooks/test_druid.py diff --git a/providers/tests/apache/druid/__init__.py b/providers/apache/druid/tests/provider_tests/apache/druid/operators/__init__.py similarity index 100% rename from providers/tests/apache/druid/__init__.py rename to providers/apache/druid/tests/provider_tests/apache/druid/operators/__init__.py diff --git a/providers/tests/apache/druid/operators/test_druid.py b/providers/apache/druid/tests/provider_tests/apache/druid/operators/test_druid.py similarity index 100% rename from providers/tests/apache/druid/operators/test_druid.py rename to providers/apache/druid/tests/provider_tests/apache/druid/operators/test_druid.py diff --git a/providers/src/airflow/providers/dbt/cloud/operators/__init__.py b/providers/apache/druid/tests/provider_tests/apache/druid/transfers/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/operators/__init__.py rename to providers/apache/druid/tests/provider_tests/apache/druid/transfers/__init__.py diff --git a/providers/tests/apache/druid/transfers/test_hive_to_druid.py b/providers/apache/druid/tests/provider_tests/apache/druid/transfers/test_hive_to_druid.py similarity index 100% rename from providers/tests/apache/druid/transfers/test_hive_to_druid.py rename to providers/apache/druid/tests/provider_tests/apache/druid/transfers/test_hive_to_druid.py diff --git a/providers/src/airflow/providers/dbt/cloud/triggers/__init__.py b/providers/apache/druid/tests/system/apache/druid/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/triggers/__init__.py rename to providers/apache/druid/tests/system/apache/druid/__init__.py diff --git a/providers/tests/system/apache/druid/example_druid_dag.py b/providers/apache/druid/tests/system/apache/druid/example_druid_dag.py similarity index 100% rename from providers/tests/system/apache/druid/example_druid_dag.py rename to providers/apache/druid/tests/system/apache/druid/example_druid_dag.py diff --git a/providers/dbt/cloud/README.rst b/providers/dbt/cloud/README.rst new file mode 100644 index 0000000000000..4c85eebd9f0c9 --- /dev/null +++ b/providers/dbt/cloud/README.rst @@ -0,0 +1,84 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-dbt-cloud`` + +Release: ``4.0.0`` + + +`dbt Cloud `__ + + +Provider package +---------------- + +This is a provider package for ``dbt.cloud`` provider. All classes for this provider package +are in ``airflow.providers.dbt.cloud`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-dbt-cloud`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +================================= ================== +PIP package Version required +================================= ================== +``apache-airflow`` ``>=2.9.0`` +``apache-airflow-providers-http`` +``asgiref`` ``>=2.3.0`` +``aiohttp`` ``>=3.9.2`` +================================= ================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider packages in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-dbt-cloud[http] + + +============================================================================================================== =============== +Dependent package Extra +============================================================================================================== =============== +`apache-airflow-providers-http `_ ``http`` +`apache-airflow-providers-openlineage `_ ``openlineage`` +============================================================================================================== =============== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/dbt/cloud/.latest-doc-only-change.txt b/providers/dbt/cloud/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/.latest-doc-only-change.txt rename to providers/dbt/cloud/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/dbt/cloud/CHANGELOG.rst b/providers/dbt/cloud/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/CHANGELOG.rst rename to providers/dbt/cloud/docs/changelog.rst diff --git a/docs/apache-airflow-providers-dbt-cloud/commits.rst b/providers/dbt/cloud/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-dbt-cloud/commits.rst rename to providers/dbt/cloud/docs/commits.rst diff --git a/docs/apache-airflow-providers-dbt-cloud/connections.rst b/providers/dbt/cloud/docs/connections.rst similarity index 100% rename from docs/apache-airflow-providers-dbt-cloud/connections.rst rename to providers/dbt/cloud/docs/connections.rst diff --git a/docs/apache-airflow-providers-dbt-cloud/index.rst b/providers/dbt/cloud/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-dbt-cloud/index.rst rename to providers/dbt/cloud/docs/index.rst diff --git a/docs/apache-airflow-providers-dbt-cloud/installing-providers-from-sources.rst b/providers/dbt/cloud/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-dbt-cloud/installing-providers-from-sources.rst rename to providers/dbt/cloud/docs/installing-providers-from-sources.rst diff --git a/docs/integration-logos/dbt/dbt.png b/providers/dbt/cloud/docs/integration-logos/dbt.png similarity index 100% rename from docs/integration-logos/dbt/dbt.png rename to providers/dbt/cloud/docs/integration-logos/dbt.png diff --git a/docs/apache-airflow-providers-dbt-cloud/operators.rst b/providers/dbt/cloud/docs/operators.rst similarity index 93% rename from docs/apache-airflow-providers-dbt-cloud/operators.rst rename to providers/dbt/cloud/docs/operators.rst index eaa285f6d4082..3eeb5b04dbb59 100644 --- a/docs/apache-airflow-providers-dbt-cloud/operators.rst +++ b/providers/dbt/cloud/docs/operators.rst @@ -67,7 +67,7 @@ The below examples demonstrate how to instantiate DbtCloudRunJobOperator tasks w asynchronous waiting for run termination, respectively. To note, the ``account_id`` for the operators is referenced within the ``default_args`` of the example DAG. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_run_job] @@ -76,7 +76,7 @@ referenced within the ``default_args`` of the example DAG. This next example also shows how to pass in custom runtime configuration (in this case for ``threads_override``) via the ``additional_run_config`` dictionary. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_run_job_async] @@ -95,7 +95,7 @@ In the example below, the ``run_id`` value in the example below comes from the o DbtCloudRunJobOperator task by utilizing the ``.output`` property exposed for all operators. Also, to note, the ``account_id`` for the task is referenced within the ``default_args`` of the example DAG. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_run_job_sensor] @@ -104,7 +104,7 @@ the ``account_id`` for the task is referenced within the ``default_args`` of the Also, you can poll for status of the job run asynchronously using ``deferrable`` mode. In this mode, worker slots are freed up while the sensor is running. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_run_job_sensor_deferred] @@ -125,7 +125,7 @@ downloaded. For more information on dbt Cloud artifacts, reference `this documentation `__. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_get_artifact] @@ -146,7 +146,7 @@ If a ``project_id`` is supplied, only jobs pertaining to this project id will be For more information on dbt Cloud list jobs, reference `this documentation `__. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_list_jobs] diff --git a/docs/apache-airflow-providers-dbt-cloud/security.rst b/providers/dbt/cloud/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-dbt-cloud/security.rst rename to providers/dbt/cloud/docs/security.rst diff --git a/providers/src/airflow/providers/dbt/cloud/provider.yaml b/providers/dbt/cloud/provider.yaml similarity index 85% rename from providers/src/airflow/providers/dbt/cloud/provider.yaml rename to providers/dbt/cloud/provider.yaml index dd6dc42b3b75f..99dc44927ff97 100644 --- a/providers/src/airflow/providers/dbt/cloud/provider.yaml +++ b/providers/dbt/cloud/provider.yaml @@ -59,23 +59,10 @@ versions: - 1.0.2 - 1.0.1 -dependencies: - - apache-airflow>=2.9.0 - - apache-airflow-providers-http - - asgiref>=2.3.0 - - aiohttp>=3.9.2 - -additional-extras: - # pip install apache-airflow-providers-dbt-cloud[openlineage] - - name: openlineage - description: Install compatible OpenLineage dependencies - dependencies: - - apache-airflow-providers-openlineage>=1.7.0 - integrations: - integration-name: dbt Cloud external-doc-url: https://docs.getdbt.com/docs/dbt-cloud/cloud-overview - logo: /integration-logos/dbt/dbt.png + logo: /docs/integration-logos/dbt.png how-to-guide: - /docs/apache-airflow-providers-dbt-cloud/operators.rst tags: [dbt] diff --git a/providers/dbt/cloud/pyproject.toml b/providers/dbt/cloud/pyproject.toml new file mode 100644 index 0000000000000..a74cf457016cc --- /dev/null +++ b/providers/dbt/cloud/pyproject.toml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-dbt-cloud" +version = "4.0.0" +description = "Provider package apache-airflow-providers-dbt-cloud for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "dbt.cloud", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-http", + "asgiref>=2.3.0", + "aiohttp>=3.9.2", +] + +# The optional dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +[project.optional-dependencies] +# pip install apache-airflow-providers-dbt-cloud[openlineage] +"openlineage" = [ + "apache-airflow-providers-openlineage>=1.7.0", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/4.0.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/4.0.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.dbt.cloud.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.dbt.cloud" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/dbt/cloud/src/airflow/providers/dbt/cloud/LICENSE b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/dbt/cloud/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/__init__.py diff --git a/providers/dbt/cloud/src/airflow/providers/dbt/cloud/get_provider_info.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/get_provider_info.py new file mode 100644 index 0000000000000..de75fc08c4c6b --- /dev/null +++ b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/get_provider_info.py @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-dbt-cloud", + "name": "dbt Cloud", + "description": "`dbt Cloud `__\n", + "state": "ready", + "source-date-epoch": 1734533324, + "versions": [ + "4.0.0", + "3.11.2", + "3.11.1", + "3.11.0", + "3.10.1", + "3.10.0", + "3.9.0", + "3.8.1", + "3.8.0", + "3.7.1", + "3.7.0", + "3.6.1", + "3.6.0", + "3.5.1", + "3.5.0", + "3.4.1", + "3.4.0", + "3.3.0", + "3.2.3", + "3.2.2", + "3.2.1", + "3.2.0", + "3.1.1", + "3.1.0", + "3.0.0", + "2.3.1", + "2.3.0", + "2.2.0", + "2.1.0", + "2.0.1", + "2.0.0", + "1.0.2", + "1.0.1", + ], + "integrations": [ + { + "integration-name": "dbt Cloud", + "external-doc-url": "https://docs.getdbt.com/docs/dbt-cloud/cloud-overview", + "logo": "/docs/integration-logos/dbt.png", + "how-to-guide": ["/docs/apache-airflow-providers-dbt-cloud/operators.rst"], + "tags": ["dbt"], + } + ], + "operators": [ + {"integration-name": "dbt Cloud", "python-modules": ["airflow.providers.dbt.cloud.operators.dbt"]} + ], + "sensors": [ + {"integration-name": "dbt Cloud", "python-modules": ["airflow.providers.dbt.cloud.sensors.dbt"]} + ], + "hooks": [ + {"integration-name": "dbt Cloud", "python-modules": ["airflow.providers.dbt.cloud.hooks.dbt"]} + ], + "triggers": [ + {"integration-name": "dbt Cloud", "python-modules": ["airflow.providers.dbt.cloud.triggers.dbt"]} + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.dbt.cloud.hooks.dbt.DbtCloudHook", + "connection-type": "dbt_cloud", + } + ], + "extra-links": ["airflow.providers.dbt.cloud.operators.dbt.DbtCloudRunJobOperatorLink"], + "dependencies": [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-http", + "asgiref>=2.3.0", + "aiohttp>=3.9.2", + ], + "optional-dependencies": {"openlineage": ["apache-airflow-providers-openlineage>=1.7.0"]}, + } diff --git a/providers/src/airflow/providers/dbt/cloud/utils/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/utils/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/hooks/__init__.py diff --git a/providers/src/airflow/providers/dbt/cloud/hooks/dbt.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/hooks/dbt.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/hooks/dbt.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/hooks/dbt.py diff --git a/providers/src/airflow/providers/elasticsearch/log/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/operators/__init__.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/log/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/operators/__init__.py diff --git a/providers/src/airflow/providers/dbt/cloud/operators/dbt.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/operators/dbt.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/operators/dbt.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/operators/dbt.py diff --git a/providers/tests/apache/druid/hooks/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/sensors/__init__.py similarity index 100% rename from providers/tests/apache/druid/hooks/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/sensors/__init__.py diff --git a/providers/src/airflow/providers/dbt/cloud/sensors/dbt.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/sensors/dbt.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/sensors/dbt.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/sensors/dbt.py diff --git a/providers/src/airflow/providers/github/hooks/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/triggers/__init__.py similarity index 100% rename from providers/src/airflow/providers/github/hooks/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/triggers/__init__.py diff --git a/providers/src/airflow/providers/dbt/cloud/triggers/dbt.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/triggers/dbt.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/triggers/dbt.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/triggers/dbt.py diff --git a/providers/src/airflow/providers/github/operators/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/utils/__init__.py similarity index 100% rename from providers/src/airflow/providers/github/operators/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/utils/__init__.py diff --git a/providers/src/airflow/providers/dbt/cloud/utils/openlineage.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/utils/openlineage.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/utils/openlineage.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/utils/openlineage.py diff --git a/providers/dbt/cloud/tests/conftest.py b/providers/dbt/cloud/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/dbt/cloud/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/dbt/cloud/tests/provider_tests/__init__.py b/providers/dbt/cloud/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/dbt/cloud/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/dbt/cloud/tests/provider_tests/dbt/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/dbt/cloud/tests/provider_tests/dbt/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/src/airflow/providers/github/sensors/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/__init__.py similarity index 100% rename from providers/src/airflow/providers/github/sensors/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/__init__.py diff --git a/providers/src/airflow/providers/opensearch/hooks/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/opensearch/hooks/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/hooks/__init__.py diff --git a/providers/tests/dbt/cloud/hooks/test_dbt.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/hooks/test_dbt.py similarity index 100% rename from providers/tests/dbt/cloud/hooks/test_dbt.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/hooks/test_dbt.py diff --git a/providers/src/airflow/providers/opensearch/log/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/operators/__init__.py similarity index 100% rename from providers/src/airflow/providers/opensearch/log/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/operators/__init__.py diff --git a/providers/tests/dbt/cloud/operators/test_dbt.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/operators/test_dbt.py similarity index 100% rename from providers/tests/dbt/cloud/operators/test_dbt.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/operators/test_dbt.py diff --git a/providers/src/airflow/providers/opensearch/operators/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/sensors/__init__.py similarity index 100% rename from providers/src/airflow/providers/opensearch/operators/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/sensors/__init__.py diff --git a/providers/tests/dbt/cloud/sensors/test_dbt.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/sensors/test_dbt.py similarity index 100% rename from providers/tests/dbt/cloud/sensors/test_dbt.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/sensors/test_dbt.py diff --git a/providers/tests/apache/druid/transfers/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/__init__.py similarity index 100% rename from providers/tests/apache/druid/transfers/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/__init__.py diff --git a/providers/tests/dbt/cloud/test_data/catalog.json b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/catalog.json similarity index 100% rename from providers/tests/dbt/cloud/test_data/catalog.json rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/catalog.json diff --git a/providers/tests/dbt/cloud/test_data/job_run.json b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/job_run.json similarity index 100% rename from providers/tests/dbt/cloud/test_data/job_run.json rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/job_run.json diff --git a/providers/tests/dbt/cloud/test_data/manifest.json b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/manifest.json similarity index 100% rename from providers/tests/dbt/cloud/test_data/manifest.json rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/manifest.json diff --git a/providers/tests/dbt/cloud/test_data/run_results.json b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/run_results.json similarity index 100% rename from providers/tests/dbt/cloud/test_data/run_results.json rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/run_results.json diff --git a/providers/tests/dbt/cloud/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/triggers/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/triggers/__init__.py diff --git a/providers/tests/dbt/cloud/triggers/test_dbt.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/triggers/test_dbt.py similarity index 100% rename from providers/tests/dbt/cloud/triggers/test_dbt.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/triggers/test_dbt.py diff --git a/providers/tests/dbt/cloud/hooks/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/utils/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/hooks/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/utils/__init__.py diff --git a/providers/tests/dbt/cloud/utils/test_openlineage.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/utils/test_openlineage.py similarity index 100% rename from providers/tests/dbt/cloud/utils/test_openlineage.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/utils/test_openlineage.py diff --git a/providers/tests/dbt/cloud/operators/__init__.py b/providers/dbt/cloud/tests/system/dbt/cloud/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/operators/__init__.py rename to providers/dbt/cloud/tests/system/dbt/cloud/__init__.py diff --git a/providers/tests/system/dbt/cloud/example_dbt_cloud.py b/providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py similarity index 100% rename from providers/tests/system/dbt/cloud/example_dbt_cloud.py rename to providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py diff --git a/providers/edge/src/airflow/providers/edge/example_dags/integration_test.py b/providers/edge/src/airflow/providers/edge/example_dags/integration_test.py index 418164832576d..777a85ef2dd99 100644 --- a/providers/edge/src/airflow/providers/edge/example_dags/integration_test.py +++ b/providers/edge/src/airflow/providers/edge/example_dags/integration_test.py @@ -30,10 +30,10 @@ from airflow.exceptions import AirflowNotFoundException from airflow.hooks.base import BaseHook from airflow.models.dag import DAG -from airflow.models.param import Param from airflow.models.variable import Variable from airflow.operators.empty import EmptyOperator from airflow.providers.common.compat.standard.operators import PythonOperator +from airflow.sdk import Param from airflow.utils.trigger_rule import TriggerRule try: diff --git a/providers/edge/src/airflow/providers/edge/example_dags/win_notepad.py b/providers/edge/src/airflow/providers/edge/example_dags/win_notepad.py index da50fff8b96ee..a3b229a28f722 100644 --- a/providers/edge/src/airflow/providers/edge/example_dags/win_notepad.py +++ b/providers/edge/src/airflow/providers/edge/example_dags/win_notepad.py @@ -34,7 +34,7 @@ from airflow.models import BaseOperator from airflow.models.dag import DAG -from airflow.models.param import Param +from airflow.sdk import Param if TYPE_CHECKING: from airflow.utils.context import Context diff --git a/providers/edge/src/airflow/providers/edge/example_dags/win_test.py b/providers/edge/src/airflow/providers/edge/example_dags/win_test.py index 3a730009d50c3..630092180b590 100644 --- a/providers/edge/src/airflow/providers/edge/example_dags/win_test.py +++ b/providers/edge/src/airflow/providers/edge/example_dags/win_test.py @@ -37,9 +37,9 @@ from airflow.hooks.base import BaseHook from airflow.models import BaseOperator from airflow.models.dag import DAG -from airflow.models.param import Param from airflow.models.variable import Variable from airflow.operators.empty import EmptyOperator +from airflow.sdk import Param from airflow.utils.operator_helpers import context_to_airflow_vars from airflow.utils.trigger_rule import TriggerRule from airflow.utils.types import ArgNotSet diff --git a/providers/elasticsearch/README.rst b/providers/elasticsearch/README.rst new file mode 100644 index 0000000000000..65252da3eb32c --- /dev/null +++ b/providers/elasticsearch/README.rst @@ -0,0 +1,82 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-elasticsearch`` + +Release: ``6.0.0`` + + +`Elasticsearch `__ + + +Provider package +---------------- + +This is a provider package for ``elasticsearch`` provider. All classes for this provider package +are in ``airflow.providers.elasticsearch`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-elasticsearch`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +======================================= ================== +PIP package Version required +======================================= ================== +``apache-airflow`` ``>=2.9.0`` +``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``elasticsearch`` ``>=8.10,<9`` +======================================= ================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider packages in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-elasticsearch[common.sql] + + +============================================================================================================ ============== +Dependent package Extra +============================================================================================================ ============== +`apache-airflow-providers-common-sql `_ ``common.sql`` +============================================================================================================ ============== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/elasticsearch/.latest-doc-only-change.txt b/providers/elasticsearch/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/elasticsearch/.latest-doc-only-change.txt rename to providers/elasticsearch/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/elasticsearch/CHANGELOG.rst b/providers/elasticsearch/docs/changelog.rst similarity index 99% rename from providers/src/airflow/providers/elasticsearch/CHANGELOG.rst rename to providers/elasticsearch/docs/changelog.rst index 1c9dc79d348ac..6ee3bfc59b6df 100644 --- a/providers/src/airflow/providers/elasticsearch/CHANGELOG.rst +++ b/providers/elasticsearch/docs/changelog.rst @@ -104,7 +104,7 @@ Misc ~~~~ * ``Generalize caching of connection in DbApiHook to improve performance (#40751)`` -* ``filename template arg in providers file task handlers backward compitability support (#41633)`` +* ``filename template arg in providers file task handlers backward compatibility support (#41633)`` * ``Remove deprecated log handler argument filename_template (#41552)`` diff --git a/docs/apache-airflow-providers-elasticsearch/commits.rst b/providers/elasticsearch/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/commits.rst rename to providers/elasticsearch/docs/commits.rst diff --git a/docs/apache-airflow-providers-elasticsearch/configurations-ref.rst b/providers/elasticsearch/docs/configurations-ref.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/configurations-ref.rst rename to providers/elasticsearch/docs/configurations-ref.rst diff --git a/docs/apache-airflow-providers-elasticsearch/connections/elasticsearch.rst b/providers/elasticsearch/docs/connections/elasticsearch.rst similarity index 95% rename from docs/apache-airflow-providers-elasticsearch/connections/elasticsearch.rst rename to providers/elasticsearch/docs/connections/elasticsearch.rst index 8097b8bd61c4f..7d52bbcfb7f90 100644 --- a/docs/apache-airflow-providers-elasticsearch/connections/elasticsearch.rst +++ b/providers/elasticsearch/docs/connections/elasticsearch.rst @@ -72,7 +72,7 @@ For example: export AIRFLOW_CONN_ELASTICSEARCH_DEFAULT='elasticsearch://elasticsearchlogin:elasticsearchpassword@elastic.co:80/http' -.. exampleinclude:: /../../providers/tests/system/elasticsearch/example_elasticsearch_query.py +.. exampleinclude:: /../../providers/elasticsearch/tests/system/elasticsearch/example_elasticsearch_query.py :language: python :dedent: 4 :start-after: [START howto_elasticsearch_query] diff --git a/docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_python_hook.rst b/providers/elasticsearch/docs/hooks/elasticsearch_python_hook.rst similarity index 93% rename from docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_python_hook.rst rename to providers/elasticsearch/docs/hooks/elasticsearch_python_hook.rst index 537b4973b41cf..d1a9e5300fe13 100644 --- a/docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_python_hook.rst +++ b/providers/elasticsearch/docs/hooks/elasticsearch_python_hook.rst @@ -36,7 +36,7 @@ es_conn_args Usage Example --------------------- -.. exampleinclude:: /../../providers/tests/system/elasticsearch/example_elasticsearch_query.py +.. exampleinclude:: /../../providers/elasticsearch/tests/system/elasticsearch/example_elasticsearch_query.py :language: python :start-after: [START howto_elasticsearch_python_hook] :end-before: [END howto_elasticsearch_python_hook] diff --git a/docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_sql_hook.rst b/providers/elasticsearch/docs/hooks/elasticsearch_sql_hook.rst similarity index 91% rename from docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_sql_hook.rst rename to providers/elasticsearch/docs/hooks/elasticsearch_sql_hook.rst index 084d445cb0bad..658a5f59ae3c7 100644 --- a/docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_sql_hook.rst +++ b/providers/elasticsearch/docs/hooks/elasticsearch_sql_hook.rst @@ -26,7 +26,7 @@ Elasticsearch Hook that interact with Elasticsearch through the elasticsearch-db Usage Example --------------------- -.. exampleinclude:: /../../providers/tests/system/elasticsearch/example_elasticsearch_query.py +.. exampleinclude:: /../../providers/elasticsearch/tests/system/elasticsearch/example_elasticsearch_query.py :language: python :start-after: [START howto_elasticsearch_query] :end-before: [END howto_elasticsearch_query] diff --git a/docs/apache-airflow-providers-elasticsearch/hooks/index.rst b/providers/elasticsearch/docs/hooks/index.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/hooks/index.rst rename to providers/elasticsearch/docs/hooks/index.rst diff --git a/docs/apache-airflow-providers-elasticsearch/index.rst b/providers/elasticsearch/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/index.rst rename to providers/elasticsearch/docs/index.rst diff --git a/docs/apache-airflow-providers-elasticsearch/installing-providers-from-sources.rst b/providers/elasticsearch/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/installing-providers-from-sources.rst rename to providers/elasticsearch/docs/installing-providers-from-sources.rst diff --git a/docs/integration-logos/elasticsearch/Elasticsearch.png b/providers/elasticsearch/docs/integration-logos/Elasticsearch.png similarity index 100% rename from docs/integration-logos/elasticsearch/Elasticsearch.png rename to providers/elasticsearch/docs/integration-logos/Elasticsearch.png diff --git a/docs/apache-airflow-providers-elasticsearch/logging/index.rst b/providers/elasticsearch/docs/logging/index.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/logging/index.rst rename to providers/elasticsearch/docs/logging/index.rst diff --git a/docs/apache-airflow-providers-elasticsearch/redirects.txt b/providers/elasticsearch/docs/redirects.txt similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/redirects.txt rename to providers/elasticsearch/docs/redirects.txt diff --git a/docs/apache-airflow-providers-elasticsearch/security.rst b/providers/elasticsearch/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/security.rst rename to providers/elasticsearch/docs/security.rst diff --git a/providers/src/airflow/providers/elasticsearch/provider.yaml b/providers/elasticsearch/provider.yaml similarity index 97% rename from providers/src/airflow/providers/elasticsearch/provider.yaml rename to providers/elasticsearch/provider.yaml index 88ebba2a510c4..3e76e6ff76d8a 100644 --- a/providers/src/airflow/providers/elasticsearch/provider.yaml +++ b/providers/elasticsearch/provider.yaml @@ -70,15 +70,10 @@ versions: - 1.0.1 - 1.0.0 -dependencies: - - apache-airflow>=2.9.0 - - apache-airflow-providers-common-sql>=1.20.0 - - elasticsearch>=8.10,<9 - integrations: - integration-name: Elasticsearch external-doc-url: https://www.elastic.co/elasticsearch - logo: /integration-logos/elasticsearch/Elasticsearch.png + logo: /docs/integration-logos/Elasticsearch.png tags: [software] hooks: diff --git a/providers/elasticsearch/pyproject.toml b/providers/elasticsearch/pyproject.toml new file mode 100644 index 0000000000000..63f28a026ee10 --- /dev/null +++ b/providers/elasticsearch/pyproject.toml @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-elasticsearch" +version = "6.0.0" +description = "Provider package apache-airflow-providers-elasticsearch for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "elasticsearch", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "elasticsearch>=8.10,<9", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/6.0.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/6.0.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.elasticsearch.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.elasticsearch" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/elasticsearch/src/airflow/providers/elasticsearch/LICENSE b/providers/elasticsearch/src/airflow/providers/elasticsearch/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/elasticsearch/src/airflow/providers/elasticsearch/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/elasticsearch/__init__.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/__init__.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/__init__.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/__init__.py diff --git a/providers/elasticsearch/src/airflow/providers/elasticsearch/get_provider_info.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/get_provider_info.py new file mode 100644 index 0000000000000..8b1ef52d8ea50 --- /dev/null +++ b/providers/elasticsearch/src/airflow/providers/elasticsearch/get_provider_info.py @@ -0,0 +1,221 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-elasticsearch", + "name": "Elasticsearch", + "description": "`Elasticsearch `__\n", + "state": "ready", + "source-date-epoch": 1734533734, + "versions": [ + "6.0.0", + "5.5.3", + "5.5.2", + "5.5.1", + "5.5.0", + "5.4.2", + "5.4.1", + "5.4.0", + "5.3.4", + "5.3.3", + "5.3.2", + "5.3.1", + "5.3.0", + "5.2.0", + "5.1.1", + "5.1.0", + "5.0.2", + "5.0.1", + "5.0.0", + "4.5.1", + "4.5.0", + "4.4.0", + "4.3.3", + "4.3.2", + "4.3.1", + "4.3.0", + "4.2.1", + "4.2.0", + "4.1.0", + "4.0.0", + "3.0.3", + "3.0.2", + "3.0.1", + "3.0.0", + "2.2.0", + "2.1.0", + "2.0.3", + "2.0.2", + "2.0.1", + "1.0.4", + "1.0.3", + "1.0.2", + "1.0.1", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "Elasticsearch", + "external-doc-url": "https://www.elastic.co/elasticsearch", + "logo": "/docs/integration-logos/Elasticsearch.png", + "tags": ["software"], + } + ], + "hooks": [ + { + "integration-name": "Elasticsearch", + "python-modules": ["airflow.providers.elasticsearch.hooks.elasticsearch"], + } + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.elasticsearch.hooks.elasticsearch.ElasticsearchSQLHook", + "connection-type": "elasticsearch", + } + ], + "logging": ["airflow.providers.elasticsearch.log.es_task_handler.ElasticsearchTaskHandler"], + "config": { + "elasticsearch": { + "description": None, + "options": { + "host": { + "description": "Elasticsearch host\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "default": "", + }, + "log_id_template": { + "description": "Format of the log_id, which is used to query for a given tasks logs\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "is_template": True, + "default": "{dag_id}-{task_id}-{run_id}-{map_index}-{try_number}", + }, + "end_of_log_mark": { + "description": "Used to mark the end of a log stream for a task\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "default": "end_of_log", + }, + "frontend": { + "description": "Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id\nCode will construct log_id using the log_id template from the argument above.\nNOTE: scheme will default to https if one is not provided\n", + "version_added": "1.10.4", + "type": "string", + "example": "http://localhost:5601/app/kibana#/discover?_a=(columns:!(message),query:(language:kuery,query:'log_id: \"{log_id}\"'),sort:!(log.offset,asc))", + "default": "", + }, + "write_stdout": { + "description": "Write the task logs to the stdout of the worker, rather than the default files\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "default": "False", + }, + "write_to_es": { + "description": "Write the task logs to the ElasticSearch\n", + "version_added": "5.5.4", + "type": "string", + "example": None, + "default": "False", + }, + "target_index": { + "description": "Name of the index to write to, when enabling writing the task logs to the ElasticSearch\n", + "version_added": "5.5.4", + "type": "string", + "example": None, + "default": "airflow-logs", + }, + "json_format": { + "description": "Instead of the default log formatter, write the log lines as JSON\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "default": "False", + }, + "json_fields": { + "description": "Log fields to also attach to the json output, if enabled\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "default": "asctime, filename, lineno, levelname, message", + }, + "host_field": { + "description": "The field where host name is stored (normally either `host` or `host.name`)\n", + "version_added": "2.1.1", + "type": "string", + "example": None, + "default": "host", + }, + "offset_field": { + "description": "The field where offset is stored (normally either `offset` or `log.offset`)\n", + "version_added": "2.1.1", + "type": "string", + "example": None, + "default": "offset", + }, + "index_patterns": { + "description": "Comma separated list of index patterns to use when searching for logs (default: `_all`).\nThe index_patterns_callable takes precedence over this.\n", + "version_added": "2.6.0", + "type": "string", + "example": "something-*", + "default": "_all", + }, + "index_patterns_callable": { + "description": "A string representing the full path to the Python callable path which accept TI object and\nreturn comma separated list of index patterns. This will takes precedence over index_patterns.\n", + "version_added": "5.5.0", + "type": "string", + "example": "module.callable", + "default": "", + }, + }, + }, + "elasticsearch_configs": { + "description": None, + "options": { + "http_compress": { + "description": None, + "version_added": "1.10.5", + "type": "string", + "example": None, + "default": "False", + }, + "verify_certs": { + "description": None, + "version_added": "1.10.5", + "type": "string", + "example": None, + "default": "True", + }, + }, + }, + }, + "dependencies": [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "elasticsearch>=8.10,<9", + ], + } diff --git a/providers/tests/apache/druid/operators/__init__.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/hooks/__init__.py similarity index 100% rename from providers/tests/apache/druid/operators/__init__.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/hooks/__init__.py diff --git a/providers/src/airflow/providers/elasticsearch/hooks/elasticsearch.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/hooks/elasticsearch.py similarity index 99% rename from providers/src/airflow/providers/elasticsearch/hooks/elasticsearch.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/hooks/elasticsearch.py index 70c60d78f9a1d..ab1bc433d94a4 100644 --- a/providers/src/airflow/providers/elasticsearch/hooks/elasticsearch.py +++ b/providers/elasticsearch/src/airflow/providers/elasticsearch/hooks/elasticsearch.py @@ -21,10 +21,9 @@ from typing import TYPE_CHECKING, Any from urllib import parse -from elasticsearch import Elasticsearch - from airflow.hooks.base import BaseHook from airflow.providers.common.sql.hooks.sql import DbApiHook +from elasticsearch import Elasticsearch if TYPE_CHECKING: from elastic_transport import ObjectApiResponse diff --git a/providers/tests/dbt/cloud/sensors/__init__.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/log/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/sensors/__init__.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/log/__init__.py diff --git a/providers/src/airflow/providers/elasticsearch/log/es_json_formatter.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_json_formatter.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/log/es_json_formatter.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_json_formatter.py diff --git a/providers/src/airflow/providers/elasticsearch/log/es_response.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_response.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/log/es_response.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_response.py diff --git a/providers/src/airflow/providers/elasticsearch/log/es_task_handler.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_task_handler.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/log/es_task_handler.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_task_handler.py index 15904e7ebf3b4..5343ba46618ea 100644 --- a/providers/src/airflow/providers/elasticsearch/log/es_task_handler.py +++ b/providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_task_handler.py @@ -31,12 +31,10 @@ from typing import TYPE_CHECKING, Any, Callable, Literal from urllib.parse import quote, urlparse -# Using `from elasticsearch import *` would break elasticsearch mocking used in unit test. -import elasticsearch import pendulum -from elasticsearch import helpers -from elasticsearch.exceptions import NotFoundError +# Using `from elasticsearch import *` would break elasticsearch mocking used in unit test. +import elasticsearch from airflow.configuration import conf from airflow.exceptions import AirflowException from airflow.models.dagrun import DagRun @@ -48,6 +46,8 @@ from airflow.utils.log.logging_mixin import ExternalLoggingMixin, LoggingMixin from airflow.utils.module_loading import import_string from airflow.utils.session import create_session +from elasticsearch import helpers +from elasticsearch.exceptions import NotFoundError if TYPE_CHECKING: from datetime import datetime diff --git a/providers/src/airflow/providers/elasticsearch/version_compat.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/version_compat.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/version_compat.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/version_compat.py diff --git a/providers/elasticsearch/tests/conftest.py b/providers/elasticsearch/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/elasticsearch/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/elasticsearch/tests/provider_tests/__init__.py b/providers/elasticsearch/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/elasticsearch/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/elasticsearch/__init__.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/__init__.py similarity index 100% rename from providers/tests/elasticsearch/__init__.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/__init__.py diff --git a/providers/tests/elasticsearch/hooks/__init__.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/hooks/__init__.py similarity index 100% rename from providers/tests/elasticsearch/hooks/__init__.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/hooks/__init__.py diff --git a/providers/tests/elasticsearch/hooks/test_elasticsearch.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/hooks/test_elasticsearch.py similarity index 100% rename from providers/tests/elasticsearch/hooks/test_elasticsearch.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/hooks/test_elasticsearch.py diff --git a/providers/tests/dbt/cloud/test_data/__init__.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/test_data/__init__.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/__init__.py diff --git a/providers/tests/elasticsearch/log/elasticmock/__init__.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/__init__.py similarity index 97% rename from providers/tests/elasticsearch/log/elasticmock/__init__.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/__init__.py index 44e242d114574..912d754a966c7 100644 --- a/providers/tests/elasticsearch/log/elasticmock/__init__.py +++ b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/__init__.py @@ -1,3 +1,5 @@ +"""Elastic mock module used for testing""" + # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -38,12 +40,11 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -"""Elastic mock module used for testing""" from functools import wraps from unittest.mock import patch from urllib.parse import unquote, urlparse -from providers.tests.elasticsearch.log.elasticmock.fake_elasticsearch import FakeElasticsearch +from provider_tests.elasticsearch.log.elasticmock.fake_elasticsearch import FakeElasticsearch ELASTIC_INSTANCES: dict[str, FakeElasticsearch] = {} diff --git a/providers/tests/elasticsearch/log/elasticmock/fake_elasticsearch.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/fake_elasticsearch.py similarity index 99% rename from providers/tests/elasticsearch/log/elasticmock/fake_elasticsearch.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/fake_elasticsearch.py index 1d975ee718c3f..39aa0fc66082c 100644 --- a/providers/tests/elasticsearch/log/elasticmock/fake_elasticsearch.py +++ b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/fake_elasticsearch.py @@ -22,7 +22,7 @@ from elasticsearch import Elasticsearch from elasticsearch.exceptions import NotFoundError -from providers.tests.elasticsearch.log.elasticmock.utilities import ( +from provider_tests.elasticsearch.log.elasticmock.utilities import ( MissingIndexException, get_random_id, query_params, diff --git a/providers/tests/elasticsearch/log/elasticmock/utilities/__init__.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/utilities/__init__.py similarity index 99% rename from providers/tests/elasticsearch/log/elasticmock/utilities/__init__.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/utilities/__init__.py index f5a6c14dba2aa..62fef03473aa6 100644 --- a/providers/tests/elasticsearch/log/elasticmock/utilities/__init__.py +++ b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/utilities/__init__.py @@ -1,3 +1,5 @@ +"""Utilities for Elastic mock""" + # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -38,7 +40,6 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -"""Utilities for Elastic mock""" import base64 import random import string diff --git a/providers/tests/elasticsearch/log/test_es_json_formatter.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_json_formatter.py similarity index 100% rename from providers/tests/elasticsearch/log/test_es_json_formatter.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_json_formatter.py diff --git a/providers/tests/elasticsearch/log/test_es_response.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_response.py similarity index 100% rename from providers/tests/elasticsearch/log/test_es_response.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_response.py diff --git a/providers/tests/elasticsearch/log/test_es_task_handler.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_task_handler.py similarity index 99% rename from providers/tests/elasticsearch/log/test_es_task_handler.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_task_handler.py index f6b3f79395009..af17f151b9be0 100644 --- a/providers/tests/elasticsearch/log/test_es_task_handler.py +++ b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_task_handler.py @@ -43,9 +43,9 @@ from airflow.utils import timezone from airflow.utils.state import DagRunState, TaskInstanceState from airflow.utils.timezone import datetime +from provider_tests.elasticsearch.log.elasticmock import elasticmock +from provider_tests.elasticsearch.log.elasticmock.utilities import SearchFailedException -from providers.tests.elasticsearch.log.elasticmock import elasticmock -from providers.tests.elasticsearch.log.elasticmock.utilities import SearchFailedException from tests_common.test_utils.config import conf_vars from tests_common.test_utils.db import clear_db_dags, clear_db_runs from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS diff --git a/providers/tests/dbt/cloud/triggers/__init__.py b/providers/elasticsearch/tests/system/elasticsearch/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/triggers/__init__.py rename to providers/elasticsearch/tests/system/elasticsearch/__init__.py diff --git a/providers/tests/system/elasticsearch/example_elasticsearch_query.py b/providers/elasticsearch/tests/system/elasticsearch/example_elasticsearch_query.py similarity index 100% rename from providers/tests/system/elasticsearch/example_elasticsearch_query.py rename to providers/elasticsearch/tests/system/elasticsearch/example_elasticsearch_query.py diff --git a/providers/github/README.rst b/providers/github/README.rst new file mode 100644 index 0000000000000..fae79eec7dc6d --- /dev/null +++ b/providers/github/README.rst @@ -0,0 +1,62 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-github`` + +Release: ``2.8.0`` + + +`GitHub `__ + + +Provider package +---------------- + +This is a provider package for ``github`` provider. All classes for this provider package +are in ``airflow.providers.github`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-github`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +================== ================== +PIP package Version required +================== ================== +``apache-airflow`` ``>=2.9.0`` +``PyGithub`` ``>=2.1.1`` +================== ================== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/github/.latest-doc-only-change.txt b/providers/github/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/github/.latest-doc-only-change.txt rename to providers/github/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/github/CHANGELOG.rst b/providers/github/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/github/CHANGELOG.rst rename to providers/github/docs/changelog.rst diff --git a/docs/apache-airflow-providers-github/commits.rst b/providers/github/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-github/commits.rst rename to providers/github/docs/commits.rst diff --git a/docs/apache-airflow-providers-github/connections/github.rst b/providers/github/docs/connections/github.rst similarity index 100% rename from docs/apache-airflow-providers-github/connections/github.rst rename to providers/github/docs/connections/github.rst diff --git a/docs/apache-airflow-providers-github/index.rst b/providers/github/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-github/index.rst rename to providers/github/docs/index.rst diff --git a/docs/apache-airflow-providers-github/installing-providers-from-sources.rst b/providers/github/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-github/installing-providers-from-sources.rst rename to providers/github/docs/installing-providers-from-sources.rst diff --git a/docs/apache-airflow-providers-github/operators/index.rst b/providers/github/docs/operators/index.rst similarity index 90% rename from docs/apache-airflow-providers-github/operators/index.rst rename to providers/github/docs/operators/index.rst index 448fb8dc3dc20..e8d3126f90bbc 100644 --- a/docs/apache-airflow-providers-github/operators/index.rst +++ b/providers/github/docs/operators/index.rst @@ -33,7 +33,7 @@ You can further process the result using An example of Listing all Repositories owned by a user, **client.get_user().get_repos()** can be implemented as following: -.. exampleinclude:: /../../providers/tests/system/github/example_github.py +.. exampleinclude:: /../../providers/github/tests/system/github/example_github.py :language: python :dedent: 4 :start-after: [START howto_operator_list_repos_github] @@ -43,7 +43,7 @@ An example of Listing all Repositories owned by a user, **client.get_user().get_ An example of Listing Tags in a Repository, **client.get_repo(full_name_or_id='apache/airflow').get_tags()** can be implemented as following: -.. exampleinclude:: /../../providers/tests/system/github/example_github.py +.. exampleinclude:: /../../providers/github/tests/system/github/example_github.py :language: python :dedent: 4 :start-after: [START howto_operator_list_tags_github] @@ -64,7 +64,7 @@ a Tag in `GitHub `__. An example for tag **v1.0**: -.. exampleinclude:: /../../providers/tests/system/github/example_github.py +.. exampleinclude:: /../../providers/github/tests/system/github/example_github.py :language: python :dedent: 4 :start-after: [START howto_tag_sensor_github] @@ -73,7 +73,7 @@ An example for tag **v1.0**: Similar Functionality can be achieved by directly using :class:`~from airflow.providers.github.sensors.github.GithubSensor`. -.. exampleinclude:: /../../providers/tests/system/github/example_github.py +.. exampleinclude:: /../../providers/github/tests/system/github/example_github.py :language: python :dedent: 4 :start-after: [START howto_sensor_github] diff --git a/docs/apache-airflow-providers-github/security.rst b/providers/github/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-github/security.rst rename to providers/github/docs/security.rst diff --git a/providers/src/airflow/providers/github/provider.yaml b/providers/github/provider.yaml similarity index 96% rename from providers/src/airflow/providers/github/provider.yaml rename to providers/github/provider.yaml index f154c68f47df5..6087dc01f3304 100644 --- a/providers/src/airflow/providers/github/provider.yaml +++ b/providers/github/provider.yaml @@ -22,10 +22,6 @@ name: Github description: | `GitHub `__ -dependencies: - - apache-airflow>=2.9.0 - - PyGithub>=2.1.1 - state: ready source-date-epoch: 1734533986 # note that those versions are maintained by release manager - do not update them manually diff --git a/providers/github/pyproject.toml b/providers/github/pyproject.toml new file mode 100644 index 0000000000000..b0e9ff3648b52 --- /dev/null +++ b/providers/github/pyproject.toml @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-github" +version = "2.8.0" +description = "Provider package apache-airflow-providers-github for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "github", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "PyGithub>=2.1.1", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-github/2.8.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-github/2.8.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.github.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.github" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/github/src/airflow/providers/github/LICENSE b/providers/github/src/airflow/providers/github/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/github/src/airflow/providers/github/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/github/__init__.py b/providers/github/src/airflow/providers/github/__init__.py similarity index 100% rename from providers/src/airflow/providers/github/__init__.py rename to providers/github/src/airflow/providers/github/__init__.py diff --git a/providers/github/src/airflow/providers/github/get_provider_info.py b/providers/github/src/airflow/providers/github/get_provider_info.py new file mode 100644 index 0000000000000..37ca8ed6e950c --- /dev/null +++ b/providers/github/src/airflow/providers/github/get_provider_info.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-github", + "name": "Github", + "description": "`GitHub `__\n", + "state": "ready", + "source-date-epoch": 1734533986, + "versions": [ + "2.8.0", + "2.7.0", + "2.6.2", + "2.6.1", + "2.6.0", + "2.5.1", + "2.5.0", + "2.4.0", + "2.3.2", + "2.3.1", + "2.3.0", + "2.2.1", + "2.2.0", + "2.1.0", + "2.0.0", + "1.0.3", + "1.0.2", + "1.0.1", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "Github", + "external-doc-url": "https://www.github.com/", + "tags": ["software"], + } + ], + "hooks": [ + {"integration-name": "Github", "python-modules": ["airflow.providers.github.hooks.github"]} + ], + "operators": [ + {"integration-name": "Github", "python-modules": ["airflow.providers.github.operators.github"]} + ], + "sensors": [ + {"integration-name": "Github", "python-modules": ["airflow.providers.github.sensors.github"]} + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.github.hooks.github.GithubHook", + "connection-type": "github", + } + ], + "dependencies": ["apache-airflow>=2.9.0", "PyGithub>=2.1.1"], + } diff --git a/providers/tests/dbt/cloud/utils/__init__.py b/providers/github/src/airflow/providers/github/hooks/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/utils/__init__.py rename to providers/github/src/airflow/providers/github/hooks/__init__.py diff --git a/providers/src/airflow/providers/github/hooks/github.py b/providers/github/src/airflow/providers/github/hooks/github.py similarity index 99% rename from providers/src/airflow/providers/github/hooks/github.py rename to providers/github/src/airflow/providers/github/hooks/github.py index 6be50fd31fec4..fb2b2b0416008 100644 --- a/providers/src/airflow/providers/github/hooks/github.py +++ b/providers/github/src/airflow/providers/github/hooks/github.py @@ -21,10 +21,9 @@ from typing import TYPE_CHECKING -from github import Github as GithubClient - from airflow.exceptions import AirflowException from airflow.hooks.base import BaseHook +from github import Github as GithubClient class GithubHook(BaseHook): diff --git a/providers/tests/elasticsearch/log/__init__.py b/providers/github/src/airflow/providers/github/operators/__init__.py similarity index 100% rename from providers/tests/elasticsearch/log/__init__.py rename to providers/github/src/airflow/providers/github/operators/__init__.py diff --git a/providers/src/airflow/providers/github/operators/github.py b/providers/github/src/airflow/providers/github/operators/github.py similarity index 99% rename from providers/src/airflow/providers/github/operators/github.py rename to providers/github/src/airflow/providers/github/operators/github.py index 82996d3ecedb4..3889335628d03 100644 --- a/providers/src/airflow/providers/github/operators/github.py +++ b/providers/github/src/airflow/providers/github/operators/github.py @@ -19,11 +19,10 @@ from typing import TYPE_CHECKING, Any, Callable -from github import GithubException - from airflow.exceptions import AirflowException from airflow.models import BaseOperator from airflow.providers.github.hooks.github import GithubHook +from github import GithubException if TYPE_CHECKING: try: diff --git a/providers/tests/github/__init__.py b/providers/github/src/airflow/providers/github/sensors/__init__.py similarity index 100% rename from providers/tests/github/__init__.py rename to providers/github/src/airflow/providers/github/sensors/__init__.py diff --git a/providers/src/airflow/providers/github/sensors/github.py b/providers/github/src/airflow/providers/github/sensors/github.py similarity index 99% rename from providers/src/airflow/providers/github/sensors/github.py rename to providers/github/src/airflow/providers/github/sensors/github.py index cacaef9e32fb8..b40420ce207ab 100644 --- a/providers/src/airflow/providers/github/sensors/github.py +++ b/providers/github/src/airflow/providers/github/sensors/github.py @@ -19,11 +19,10 @@ from typing import TYPE_CHECKING, Any, Callable -from github import GithubException - from airflow.exceptions import AirflowException from airflow.providers.github.hooks.github import GithubHook from airflow.sensors.base import BaseSensorOperator +from github import GithubException if TYPE_CHECKING: try: diff --git a/providers/github/tests/conftest.py b/providers/github/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/github/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/github/tests/provider_tests/__init__.py b/providers/github/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/github/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/github/hooks/__init__.py b/providers/github/tests/provider_tests/github/__init__.py similarity index 100% rename from providers/tests/github/hooks/__init__.py rename to providers/github/tests/provider_tests/github/__init__.py diff --git a/providers/tests/github/operators/__init__.py b/providers/github/tests/provider_tests/github/hooks/__init__.py similarity index 100% rename from providers/tests/github/operators/__init__.py rename to providers/github/tests/provider_tests/github/hooks/__init__.py diff --git a/providers/tests/github/hooks/test_github.py b/providers/github/tests/provider_tests/github/hooks/test_github.py similarity index 100% rename from providers/tests/github/hooks/test_github.py rename to providers/github/tests/provider_tests/github/hooks/test_github.py diff --git a/providers/tests/github/sensors/__init__.py b/providers/github/tests/provider_tests/github/operators/__init__.py similarity index 100% rename from providers/tests/github/sensors/__init__.py rename to providers/github/tests/provider_tests/github/operators/__init__.py diff --git a/providers/tests/github/operators/test_github.py b/providers/github/tests/provider_tests/github/operators/test_github.py similarity index 100% rename from providers/tests/github/operators/test_github.py rename to providers/github/tests/provider_tests/github/operators/test_github.py diff --git a/providers/tests/opensearch/__init__.py b/providers/github/tests/provider_tests/github/sensors/__init__.py similarity index 100% rename from providers/tests/opensearch/__init__.py rename to providers/github/tests/provider_tests/github/sensors/__init__.py diff --git a/providers/tests/github/sensors/test_github.py b/providers/github/tests/provider_tests/github/sensors/test_github.py similarity index 100% rename from providers/tests/github/sensors/test_github.py rename to providers/github/tests/provider_tests/github/sensors/test_github.py diff --git a/providers/tests/opensearch/hooks/__init__.py b/providers/github/tests/system/github/__init__.py similarity index 100% rename from providers/tests/opensearch/hooks/__init__.py rename to providers/github/tests/system/github/__init__.py diff --git a/providers/tests/system/github/example_github.py b/providers/github/tests/system/github/example_github.py similarity index 99% rename from providers/tests/system/github/example_github.py rename to providers/github/tests/system/github/example_github.py index b076647177cc9..c3d1d3a4c7e4e 100644 --- a/providers/tests/system/github/example_github.py +++ b/providers/github/tests/system/github/example_github.py @@ -21,12 +21,11 @@ from datetime import datetime from typing import Any -from github import GithubException - from airflow.exceptions import AirflowException from airflow.models.dag import DAG from airflow.providers.github.operators.github import GithubOperator from airflow.providers.github.sensors.github import GithubSensor, GithubTagSensor +from github import GithubException ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID") DAG_ID = "example_github_operator" diff --git a/providers/mongo/src/airflow/providers/mongo/hooks/mongo.py b/providers/mongo/src/airflow/providers/mongo/hooks/mongo.py index 1deb7d01a268b..e69bdbbc9be39 100644 --- a/providers/mongo/src/airflow/providers/mongo/hooks/mongo.py +++ b/providers/mongo/src/airflow/providers/mongo/hooks/mongo.py @@ -193,6 +193,9 @@ def get_conn(self) -> MongoClient: self.client = MongoClient(self.uri, **options) return self.client + def close(self): + self.client.close() + def _create_uri(self) -> str: """ Create URI string from the given credentials. diff --git a/providers/mongo/tests/provider_tests/mongo/hooks/test_mongo.py b/providers/mongo/tests/provider_tests/mongo/hooks/test_mongo.py index 9207fdce71f6e..0d646e6e6cbc6 100644 --- a/providers/mongo/tests/provider_tests/mongo/hooks/test_mongo.py +++ b/providers/mongo/tests/provider_tests/mongo/hooks/test_mongo.py @@ -97,6 +97,9 @@ def setup_method(self): self.hook = MongoHookTest(mongo_conn_id="mongo_default") self.conn = self.hook.get_conn() + def teardown_method(self): + self.conn.close() + def test_mongo_conn_id(self): # Use default "mongo_default" assert MongoHook().mongo_conn_id == "mongo_default" diff --git a/providers/opensearch/README.rst b/providers/opensearch/README.rst new file mode 100644 index 0000000000000..640015fdeacbb --- /dev/null +++ b/providers/opensearch/README.rst @@ -0,0 +1,62 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-opensearch`` + +Release: ``1.6.0`` + + +`OpenSearch `__ + + +Provider package +---------------- + +This is a provider package for ``opensearch`` provider. All classes for this provider package +are in ``airflow.providers.opensearch`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-opensearch`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +================== ================== +PIP package Version required +================== ================== +``apache-airflow`` ``>=2.9.0`` +``opensearch-py`` ``>=2.2.0`` +================== ================== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/opensearch/.latest-doc-only-change.txt b/providers/opensearch/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/opensearch/.latest-doc-only-change.txt rename to providers/opensearch/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/opensearch/CHANGELOG.rst b/providers/opensearch/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/opensearch/CHANGELOG.rst rename to providers/opensearch/docs/changelog.rst diff --git a/docs/apache-airflow-providers-opensearch/commits.rst b/providers/opensearch/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/commits.rst rename to providers/opensearch/docs/commits.rst diff --git a/docs/apache-airflow-providers-opensearch/configurations-ref.rst b/providers/opensearch/docs/configurations-ref.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/configurations-ref.rst rename to providers/opensearch/docs/configurations-ref.rst diff --git a/docs/apache-airflow-providers-opensearch/connections/index.rst b/providers/opensearch/docs/connections/index.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/connections/index.rst rename to providers/opensearch/docs/connections/index.rst diff --git a/docs/apache-airflow-providers-opensearch/connections/opensearch.rst b/providers/opensearch/docs/connections/opensearch.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/connections/opensearch.rst rename to providers/opensearch/docs/connections/opensearch.rst diff --git a/docs/apache-airflow-providers-opensearch/index.rst b/providers/opensearch/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/index.rst rename to providers/opensearch/docs/index.rst diff --git a/docs/apache-airflow-providers-opensearch/installing-providers-from-sources.rst b/providers/opensearch/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/installing-providers-from-sources.rst rename to providers/opensearch/docs/installing-providers-from-sources.rst diff --git a/docs/integration-logos/opensearch/opensearch.png b/providers/opensearch/docs/integration-logos/opensearch.png similarity index 100% rename from docs/integration-logos/opensearch/opensearch.png rename to providers/opensearch/docs/integration-logos/opensearch.png diff --git a/docs/apache-airflow-providers-opensearch/logging/index.rst b/providers/opensearch/docs/logging/index.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/logging/index.rst rename to providers/opensearch/docs/logging/index.rst diff --git a/docs/apache-airflow-providers-opensearch/operators/index.rst b/providers/opensearch/docs/operators/index.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/operators/index.rst rename to providers/opensearch/docs/operators/index.rst diff --git a/docs/apache-airflow-providers-opensearch/operators/opensearch.rst b/providers/opensearch/docs/operators/opensearch.rst similarity index 88% rename from docs/apache-airflow-providers-opensearch/operators/opensearch.rst rename to providers/opensearch/docs/operators/opensearch.rst index b85a014ebefee..fc66fa0548842 100644 --- a/docs/apache-airflow-providers-opensearch/operators/opensearch.rst +++ b/providers/opensearch/docs/operators/opensearch.rst @@ -35,7 +35,7 @@ to create a new index in an OpenSearch domain. -.. exampleinclude:: /../../providers/tests/system/opensearch/example_opensearch.py +.. exampleinclude:: /../../providers/opensearch/tests/system/opensearch/example_opensearch.py :language: python :start-after: [START howto_operator_opensearch_create_index] :dedent: 4 @@ -50,7 +50,7 @@ Add a Document to an Index on OpenSearch Use :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchAddDocumentOperator` to add single documents to an OpenSearch Index -.. exampleinclude:: /../../providers/tests/system/opensearch/example_opensearch.py +.. exampleinclude:: /../../providers/opensearch/tests/system/opensearch/example_opensearch.py :language: python :start-after: [START howto_operator_opensearch_add_document] :dedent: 4 @@ -65,7 +65,7 @@ Run a query against an OpenSearch Index Use :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchQueryOperator` to run a query against an OpenSearch index. -.. exampleinclude:: /../../providers/tests/system/opensearch/example_opensearch.py +.. exampleinclude:: /../../providers/opensearch/tests/system/opensearch/example_opensearch.py :language: python :start-after: [START howto_operator_opensearch_query] :dedent: 4 diff --git a/docs/apache-airflow-providers-opensearch/security.rst b/providers/opensearch/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/security.rst rename to providers/opensearch/docs/security.rst diff --git a/providers/src/airflow/providers/opensearch/provider.yaml b/providers/opensearch/provider.yaml similarity index 97% rename from providers/src/airflow/providers/opensearch/provider.yaml rename to providers/opensearch/provider.yaml index 04ce301b905a6..288d39dca4a56 100644 --- a/providers/src/airflow/providers/opensearch/provider.yaml +++ b/providers/opensearch/provider.yaml @@ -36,16 +36,12 @@ versions: - 1.1.0 - 1.0.0 -dependencies: - - apache-airflow>=2.9.0 - - opensearch-py>=2.2.0 - integrations: - integration-name: OpenSearch external-doc-url: https://opensearch.org/ how-to-guide: - /docs/apache-airflow-providers-opensearch/operators/opensearch.rst - logo: /integration-logos/opensearch/opensearch.png + logo: /docs/integration-logos/opensearch.png tags: [software] hooks: diff --git a/providers/opensearch/pyproject.toml b/providers/opensearch/pyproject.toml new file mode 100644 index 0000000000000..3015634ad4293 --- /dev/null +++ b/providers/opensearch/pyproject.toml @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-opensearch" +version = "1.6.0" +description = "Provider package apache-airflow-providers-opensearch for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "opensearch", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "opensearch-py>=2.2.0", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-opensearch/1.6.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-opensearch/1.6.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.opensearch.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.opensearch" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/opensearch/src/airflow/providers/opensearch/LICENSE b/providers/opensearch/src/airflow/providers/opensearch/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/opensearch/src/airflow/providers/opensearch/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/opensearch/__init__.py b/providers/opensearch/src/airflow/providers/opensearch/__init__.py similarity index 100% rename from providers/src/airflow/providers/opensearch/__init__.py rename to providers/opensearch/src/airflow/providers/opensearch/__init__.py diff --git a/providers/opensearch/src/airflow/providers/opensearch/get_provider_info.py b/providers/opensearch/src/airflow/providers/opensearch/get_provider_info.py new file mode 100644 index 0000000000000..feb4f5639507a --- /dev/null +++ b/providers/opensearch/src/airflow/providers/opensearch/get_provider_info.py @@ -0,0 +1,220 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-opensearch", + "name": "OpenSearch", + "description": "`OpenSearch `__\n", + "state": "ready", + "source-date-epoch": 1734536033, + "versions": [ + "1.6.0", + "1.5.0", + "1.4.0", + "1.3.0", + "1.2.1", + "1.2.0", + "1.1.2", + "1.1.1", + "1.1.0", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "OpenSearch", + "external-doc-url": "https://opensearch.org/", + "how-to-guide": ["/docs/apache-airflow-providers-opensearch/operators/opensearch.rst"], + "logo": "/docs/integration-logos/opensearch.png", + "tags": ["software"], + } + ], + "hooks": [ + { + "integration-name": "OpenSearch", + "python-modules": ["airflow.providers.opensearch.hooks.opensearch"], + } + ], + "operators": [ + { + "integration-name": "OpenSearch", + "python-modules": ["airflow.providers.opensearch.operators.opensearch"], + } + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.opensearch.hooks.opensearch.OpenSearchHook", + "connection-type": "opensearch", + } + ], + "logging": ["airflow.providers.opensearch.log.os_task_handler.OpensearchTaskHandler"], + "config": { + "opensearch": { + "description": None, + "options": { + "host": { + "description": "Opensearch host\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "", + }, + "port": { + "description": "The port number of Opensearch host\n", + "version_added": "1.5.0", + "type": "integer", + "example": None, + "default": "", + }, + "username": { + "description": "The username for connecting to Opensearch\n", + "version_added": "1.5.0", + "type": "string", + "sensitive": True, + "example": None, + "default": "", + }, + "password": { + "description": "The password for connecting to Opensearch\n", + "version_added": "1.5.0", + "type": "string", + "sensitive": True, + "example": None, + "default": "", + }, + "log_id_template": { + "description": "Format of the log_id, which is used to query for a given tasks logs\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "is_template": True, + "default": "{dag_id}-{task_id}-{run_id}-{map_index}-{try_number}", + }, + "end_of_log_mark": { + "description": "Used to mark the end of a log stream for a task\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "end_of_log", + }, + "write_stdout": { + "description": "Write the task logs to the stdout of the worker, rather than the default files\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "json_format": { + "description": "Instead of the default log formatter, write the log lines as JSON\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "json_fields": { + "description": "Log fields to also attach to the json output, if enabled\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "asctime, filename, lineno, levelname, message", + }, + "host_field": { + "description": "The field where host name is stored (normally either `host` or `host.name`)\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "host", + }, + "offset_field": { + "description": "The field where offset is stored (normally either `offset` or `log.offset`)\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "offset", + }, + "index_patterns": { + "description": "Comma separated list of index patterns to use when searching for logs (default: `_all`).\nThe index_patterns_callable takes precedence over this.\n", + "version_added": "1.5.0", + "type": "string", + "example": "something-*", + "default": "_all", + }, + "index_patterns_callable": { + "description": "A string representing the full path to the Python callable path which accept TI object and\nreturn comma separated list of index patterns. This will takes precedence over index_patterns.\n", + "version_added": "1.5.0", + "type": "string", + "example": "module.callable", + "default": "", + }, + }, + }, + "opensearch_configs": { + "description": None, + "options": { + "http_compress": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "use_ssl": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "verify_certs": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "ssl_assert_hostname": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "ssl_show_warn": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "ca_certs": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "", + }, + }, + }, + }, + "dependencies": ["apache-airflow>=2.9.0", "opensearch-py>=2.2.0"], + } diff --git a/providers/tests/opensearch/log/__init__.py b/providers/opensearch/src/airflow/providers/opensearch/hooks/__init__.py similarity index 100% rename from providers/tests/opensearch/log/__init__.py rename to providers/opensearch/src/airflow/providers/opensearch/hooks/__init__.py diff --git a/providers/src/airflow/providers/opensearch/hooks/opensearch.py b/providers/opensearch/src/airflow/providers/opensearch/hooks/opensearch.py similarity index 100% rename from providers/src/airflow/providers/opensearch/hooks/opensearch.py rename to providers/opensearch/src/airflow/providers/opensearch/hooks/opensearch.py diff --git a/providers/tests/opensearch/operators/__init__.py b/providers/opensearch/src/airflow/providers/opensearch/log/__init__.py similarity index 100% rename from providers/tests/opensearch/operators/__init__.py rename to providers/opensearch/src/airflow/providers/opensearch/log/__init__.py diff --git a/providers/src/airflow/providers/opensearch/log/os_json_formatter.py b/providers/opensearch/src/airflow/providers/opensearch/log/os_json_formatter.py similarity index 100% rename from providers/src/airflow/providers/opensearch/log/os_json_formatter.py rename to providers/opensearch/src/airflow/providers/opensearch/log/os_json_formatter.py diff --git a/providers/src/airflow/providers/opensearch/log/os_response.py b/providers/opensearch/src/airflow/providers/opensearch/log/os_response.py similarity index 100% rename from providers/src/airflow/providers/opensearch/log/os_response.py rename to providers/opensearch/src/airflow/providers/opensearch/log/os_response.py diff --git a/providers/src/airflow/providers/opensearch/log/os_task_handler.py b/providers/opensearch/src/airflow/providers/opensearch/log/os_task_handler.py similarity index 100% rename from providers/src/airflow/providers/opensearch/log/os_task_handler.py rename to providers/opensearch/src/airflow/providers/opensearch/log/os_task_handler.py diff --git a/providers/tests/system/apache/drill/__init__.py b/providers/opensearch/src/airflow/providers/opensearch/operators/__init__.py similarity index 100% rename from providers/tests/system/apache/drill/__init__.py rename to providers/opensearch/src/airflow/providers/opensearch/operators/__init__.py diff --git a/providers/src/airflow/providers/opensearch/operators/opensearch.py b/providers/opensearch/src/airflow/providers/opensearch/operators/opensearch.py similarity index 100% rename from providers/src/airflow/providers/opensearch/operators/opensearch.py rename to providers/opensearch/src/airflow/providers/opensearch/operators/opensearch.py diff --git a/providers/src/airflow/providers/opensearch/version_compat.py b/providers/opensearch/src/airflow/providers/opensearch/version_compat.py similarity index 100% rename from providers/src/airflow/providers/opensearch/version_compat.py rename to providers/opensearch/src/airflow/providers/opensearch/version_compat.py diff --git a/providers/opensearch/tests/conftest.py b/providers/opensearch/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/opensearch/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/opensearch/tests/provider_tests/__init__.py b/providers/opensearch/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/opensearch/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/system/apache/druid/__init__.py b/providers/opensearch/tests/provider_tests/opensearch/__init__.py similarity index 100% rename from providers/tests/system/apache/druid/__init__.py rename to providers/opensearch/tests/provider_tests/opensearch/__init__.py diff --git a/providers/tests/opensearch/conftest.py b/providers/opensearch/tests/provider_tests/opensearch/conftest.py similarity index 100% rename from providers/tests/opensearch/conftest.py rename to providers/opensearch/tests/provider_tests/opensearch/conftest.py diff --git a/providers/tests/system/dbt/cloud/__init__.py b/providers/opensearch/tests/provider_tests/opensearch/hooks/__init__.py similarity index 100% rename from providers/tests/system/dbt/cloud/__init__.py rename to providers/opensearch/tests/provider_tests/opensearch/hooks/__init__.py diff --git a/providers/tests/opensearch/hooks/test_opensearch.py b/providers/opensearch/tests/provider_tests/opensearch/hooks/test_opensearch.py similarity index 99% rename from providers/tests/opensearch/hooks/test_opensearch.py rename to providers/opensearch/tests/provider_tests/opensearch/hooks/test_opensearch.py index 53364a50072f7..79ca0a34a363d 100644 --- a/providers/tests/opensearch/hooks/test_opensearch.py +++ b/providers/opensearch/tests/provider_tests/opensearch/hooks/test_opensearch.py @@ -20,14 +20,13 @@ from unittest import mock import pytest - -opensearchpy = pytest.importorskip("opensearchpy") from opensearchpy import Urllib3HttpConnection from airflow.exceptions import AirflowException from airflow.models import Connection from airflow.providers.opensearch.hooks.opensearch import OpenSearchHook +opensearchpy = pytest.importorskip("opensearchpy") pytestmark = pytest.mark.db_test diff --git a/providers/tests/system/elasticsearch/__init__.py b/providers/opensearch/tests/provider_tests/opensearch/log/__init__.py similarity index 100% rename from providers/tests/system/elasticsearch/__init__.py rename to providers/opensearch/tests/provider_tests/opensearch/log/__init__.py diff --git a/providers/tests/opensearch/log/test_os_json_formatter.py b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_json_formatter.py similarity index 99% rename from providers/tests/opensearch/log/test_os_json_formatter.py rename to providers/opensearch/tests/provider_tests/opensearch/log/test_os_json_formatter.py index bae039e199bdb..e85f02b56beea 100644 --- a/providers/tests/opensearch/log/test_os_json_formatter.py +++ b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_json_formatter.py @@ -24,12 +24,11 @@ import pendulum import pytest -opensearchpy = pytest.importorskip("opensearchpy") - from airflow.providers.opensearch.log.os_task_handler import ( OpensearchJSONFormatter, ) +opensearchpy = pytest.importorskip("opensearchpy") pytestmark = pytest.mark.db_test diff --git a/providers/tests/opensearch/log/test_os_response.py b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_response.py similarity index 99% rename from providers/tests/opensearch/log/test_os_response.py rename to providers/opensearch/tests/provider_tests/opensearch/log/test_os_response.py index 5b2f36d3c21b9..31af433754ff4 100644 --- a/providers/tests/opensearch/log/test_os_response.py +++ b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_response.py @@ -22,8 +22,6 @@ import pytest -opensearchpy = pytest.importorskip("opensearchpy") - from airflow.providers.opensearch.log.os_response import ( AttributeList, Hit, @@ -32,6 +30,7 @@ ) from airflow.providers.opensearch.log.os_task_handler import OpensearchTaskHandler +opensearchpy = pytest.importorskip("opensearchpy") pytestmark = pytest.mark.db_test diff --git a/providers/tests/opensearch/log/test_os_task_handler.py b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_task_handler.py similarity index 99% rename from providers/tests/opensearch/log/test_os_task_handler.py rename to providers/opensearch/tests/provider_tests/opensearch/log/test_os_task_handler.py index 4d7c9eb53a790..cbe9f086950cc 100644 --- a/providers/tests/opensearch/log/test_os_task_handler.py +++ b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_task_handler.py @@ -29,8 +29,6 @@ import pendulum import pytest - -opensearchpy = pytest.importorskip("opensearchpy") from opensearchpy.exceptions import NotFoundError from airflow.configuration import conf @@ -43,12 +41,13 @@ from airflow.utils import timezone from airflow.utils.state import DagRunState, TaskInstanceState from airflow.utils.timezone import datetime +from provider_tests.opensearch.conftest import MockClient -from providers.tests.opensearch.conftest import MockClient from tests_common.test_utils.config import conf_vars from tests_common.test_utils.db import clear_db_dags, clear_db_runs from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS +opensearchpy = pytest.importorskip("opensearchpy") pytestmark = pytest.mark.db_test AIRFLOW_SOURCES_ROOT_DIR = Path(__file__).parents[4].resolve() diff --git a/providers/tests/system/github/__init__.py b/providers/opensearch/tests/provider_tests/opensearch/operators/__init__.py similarity index 100% rename from providers/tests/system/github/__init__.py rename to providers/opensearch/tests/provider_tests/opensearch/operators/__init__.py diff --git a/providers/tests/opensearch/operators/test_opensearch.py b/providers/opensearch/tests/provider_tests/opensearch/operators/test_opensearch.py similarity index 99% rename from providers/tests/opensearch/operators/test_opensearch.py rename to providers/opensearch/tests/provider_tests/opensearch/operators/test_opensearch.py index 63ad7eafe48de..fd42a91a71dd6 100644 --- a/providers/tests/opensearch/operators/test_opensearch.py +++ b/providers/opensearch/tests/provider_tests/opensearch/operators/test_opensearch.py @@ -17,9 +17,6 @@ from __future__ import annotations import pytest - -opensearchpy = pytest.importorskip("opensearchpy") - from opensearchpy import Document, Keyword, Text from airflow.models import DAG @@ -30,6 +27,7 @@ ) from airflow.utils.timezone import datetime +opensearchpy = pytest.importorskip("opensearchpy") pytestmark = pytest.mark.db_test diff --git a/providers/tests/system/opensearch/__init__.py b/providers/opensearch/tests/system/opensearch/__init__.py similarity index 100% rename from providers/tests/system/opensearch/__init__.py rename to providers/opensearch/tests/system/opensearch/__init__.py diff --git a/providers/tests/system/opensearch/example_opensearch.py b/providers/opensearch/tests/system/opensearch/example_opensearch.py similarity index 100% rename from providers/tests/system/opensearch/example_opensearch.py rename to providers/opensearch/tests/system/opensearch/example_opensearch.py diff --git a/providers/src/airflow/providers/amazon/aws/links/datasync.py b/providers/src/airflow/providers/amazon/aws/links/datasync.py new file mode 100644 index 0000000000000..f9a643aa99e18 --- /dev/null +++ b/providers/src/airflow/providers/amazon/aws/links/datasync.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from airflow.providers.amazon.aws.links.base_aws import BASE_AWS_CONSOLE_LINK, BaseAwsLink + + +class DataSyncTaskLink(BaseAwsLink): + """Helper class for constructing AWS DataSync Task console link.""" + + name = "DataSync Task" + key = "datasync_task" + format_str = BASE_AWS_CONSOLE_LINK + "/datasync/home?region={region_name}#" + "/tasks/{task_id}" + + +class DataSyncTaskExecutionLink(BaseAwsLink): + """Helper class for constructing AWS DataSync TaskExecution console link.""" + + name = "DataSync Task Execution" + key = "datasync_task_execution" + format_str = ( + BASE_AWS_CONSOLE_LINK + "/datasync/home?region={region_name}#/history/{task_id}/{task_execution_id}" + ) diff --git a/providers/src/airflow/providers/amazon/aws/operators/datasync.py b/providers/src/airflow/providers/amazon/aws/operators/datasync.py index d5c97843f16fb..7b2b7282efca7 100644 --- a/providers/src/airflow/providers/amazon/aws/operators/datasync.py +++ b/providers/src/airflow/providers/amazon/aws/operators/datasync.py @@ -25,6 +25,7 @@ from airflow.exceptions import AirflowException, AirflowTaskTimeout from airflow.providers.amazon.aws.hooks.datasync import DataSyncHook +from airflow.providers.amazon.aws.links.datasync import DataSyncTaskExecutionLink, DataSyncTaskLink from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator from airflow.providers.amazon.aws.utils.mixins import aws_template_fields @@ -130,6 +131,8 @@ class DataSyncOperator(AwsBaseOperator[DataSyncHook]): } ui_color = "#44b5e2" + operator_extra_links = (DataSyncTaskLink(), DataSyncTaskExecutionLink()) + def __init__( self, *, @@ -215,6 +218,23 @@ def execute(self, context: Context): if not self.task_arn: raise AirflowException("DataSync TaskArn could not be identified or created.") + task_id = self.task_arn.split("/")[-1] + + task_url = DataSyncTaskLink.format_str.format( + aws_domain=DataSyncTaskLink.get_aws_domain(self.hook.conn_partition), + region_name=self.hook.conn_region_name, + task_id=task_id, + ) + + DataSyncTaskLink.persist( + context=context, + operator=self, + region_name=self.hook.conn_region_name, + aws_partition=self.hook.conn_partition, + task_id=task_id, + ) + self.log.info("You can view this DataSync task at %s", task_url) + self.log.info("Using DataSync TaskArn %s", self.task_arn) # Update the DataSync Task @@ -222,7 +242,7 @@ def execute(self, context: Context): self._update_datasync_task() # Execute the DataSync Task - self._execute_datasync_task() + self._execute_datasync_task(context=context) if not self.task_execution_arn: raise AirflowException("Nothing was executed") @@ -327,7 +347,7 @@ def _update_datasync_task(self) -> None: self.hook.update_task(self.task_arn, **self.update_task_kwargs) self.log.info("Updated TaskArn %s", self.task_arn) - def _execute_datasync_task(self) -> None: + def _execute_datasync_task(self, context: Context) -> None: """Create and monitor an AWS DataSync TaskExecution for a Task.""" if not self.task_arn: raise AirflowException("Missing TaskArn") @@ -337,6 +357,24 @@ def _execute_datasync_task(self) -> None: self.task_execution_arn = self.hook.start_task_execution(self.task_arn, **self.task_execution_kwargs) self.log.info("Started TaskExecutionArn %s", self.task_execution_arn) + # Create the execution extra link + execution_url = DataSyncTaskExecutionLink.format_str.format( + aws_domain=DataSyncTaskExecutionLink.get_aws_domain(self.hook.conn_partition), + region_name=self.hook.conn_region_name, + task_id=self.task_arn.split("/")[-1], + task_execution_id=self.task_execution_arn.split("/")[-1], + ) + DataSyncTaskExecutionLink.persist( + context=context, + operator=self, + region_name=self.hook.conn_region_name, + aws_partition=self.hook.conn_partition, + task_id=self.task_arn.split("/")[-1], + task_execution_id=self.task_execution_arn.split("/")[-1], + ) + + self.log.info("You can view this DataSync task execution at %s", execution_url) + if not self.wait_for_completion: return diff --git a/providers/src/airflow/providers/amazon/provider.yaml b/providers/src/airflow/providers/amazon/provider.yaml index 824c9b08dee66..43569a28827ab 100644 --- a/providers/src/airflow/providers/amazon/provider.yaml +++ b/providers/src/airflow/providers/amazon/provider.yaml @@ -889,6 +889,8 @@ extra-links: - airflow.providers.amazon.aws.links.step_function.StateMachineExecutionsDetailsLink - airflow.providers.amazon.aws.links.comprehend.ComprehendPiiEntitiesDetectionLink - airflow.providers.amazon.aws.links.comprehend.ComprehendDocumentClassifierLink + - airflow.providers.amazon.aws.links.datasync.DataSyncTaskLink + - airflow.providers.amazon.aws.links.datasync.DataSyncTaskExecutionLink connection-types: diff --git a/providers/src/airflow/providers/google/CHANGELOG.rst b/providers/src/airflow/providers/google/CHANGELOG.rst index 8b8a8bc83ff9d..de464fb4849da 100644 --- a/providers/src/airflow/providers/google/CHANGELOG.rst +++ b/providers/src/airflow/providers/google/CHANGELOG.rst @@ -27,6 +27,37 @@ Changelog --------- +13.0.0 +...... + +.. note:: + This release of provider is only available for Airflow 2.9+ as explained in the + `Apache Airflow providers support policy `_. + +Breaking changes +~~~~~~~~~~~~~~~~ + +.. warning:: + Deprecated classes, parameters and features have been removed from the Google provider package. + The following breaking changes were introduced: + + * Operators + + * Removed ``AutoMLBatchPredictOperator``. Please use the operators from ``airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job`` instead + * Removed ``DataflowStartSqlJobOperator``. Please ``DataflowStartYamlJobOperator`` instead + * Removed ``PromptLanguageModelOperator``. Please ``TextGenerationModelPredictOperator`` instead + * Removed ``GenerateTextEmbeddingsOperator``. Please ``TextEmbeddingModelGetEmbeddingsOperator`` instead + * Removed ``PromptMultimodalModelOperator``. Please ``GenerativeModelGenerateContentOperator`` instead + * Removed ``PromptMultimodalModelWithMediaOperator``. Please ``GenerativeModelGenerateContentOperator`` instead + + * Hooks + + * Removed ``GenerativeModelHook.prompt_multimodal_model_with_media()``. Please use ``GenerativeModelHook.generative_model_generate_content()`` instead + * Removed ``GenerativeModelHook.prompt_multimodal_model()``. Please use ``GenerativeModelHook.generative_model_generate_content()`` instead + * Removed ``GenerativeModelHook.get_generative_model_part()``. Please use ``GenerativeModelHook.generative_model_generate_content()`` instead + * Removed ``GenerativeModelHook.prompt_language_model()``. Please use ``GenerativeModelHook.text_generation_model_predict()`` instead + * Removed ``GenerativeModelHook.generate_text_embeddings()``. Please use ``GenerativeModelHook.text_generation_model_predict()`` instead + 12.0.0 ...... diff --git a/providers/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py b/providers/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py index 7e506641484b3..8f06d4974e137 100644 --- a/providers/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +++ b/providers/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py @@ -24,7 +24,7 @@ from typing import TYPE_CHECKING import vertexai -from vertexai.generative_models import GenerativeModel, Part +from vertexai.generative_models import GenerativeModel from vertexai.language_models import TextEmbeddingModel, TextGenerationModel from vertexai.preview.caching import CachedContent from vertexai.preview.evaluation import EvalResult, EvalTask @@ -100,186 +100,6 @@ def get_cached_context_model( cached_context_model = preview_generative_model.from_cached_content(cached_content) return cached_context_model - @deprecated( - planned_removal_date="January 01, 2025", - use_instead="Part objects included in contents parameter of " - "airflow.providers.google.cloud.hooks.generative_model." - "GenerativeModelHook.generative_model_generate_content", - category=AirflowProviderDeprecationWarning, - ) - def get_generative_model_part(self, content_gcs_path: str, content_mime_type: str | None = None) -> Part: - """Return a Generative Model Part object.""" - part = Part.from_uri(content_gcs_path, mime_type=content_mime_type) - return part - - @deprecated( - planned_removal_date="January 01, 2025", - use_instead="airflow.providers.google.cloud.hooks.generative_model." - "GenerativeModelHook.text_generation_model_predict", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def prompt_language_model( - self, - prompt: str, - pretrained_model: str, - temperature: float, - max_output_tokens: int, - top_p: float, - top_k: int, - location: str, - project_id: str = PROVIDE_PROJECT_ID, - ) -> str: - """ - Use the Vertex AI PaLM API to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param prompt: Required. Inputs or queries that a user or a program gives - to the Vertex AI PaLM API, in order to elicit a specific response. - :param pretrained_model: A pre-trained model optimized for performing natural - language tasks such as classification, summarization, extraction, content - creation, and ideation. - :param temperature: Temperature controls the degree of randomness in token - selection. - :param max_output_tokens: Token limit determines the maximum amount of text - output. - :param top_p: Tokens are selected from most probable to least until the sum - of their probabilities equals the top_p value. Defaults to 0.8. - :param top_k: A top_k of 1 means the selected token is the most probable - among all tokens. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - - parameters = { - "temperature": temperature, - "max_output_tokens": max_output_tokens, - "top_p": top_p, - "top_k": top_k, - } - - model = self.get_text_generation_model(pretrained_model) - - response = model.predict( - prompt=prompt, - **parameters, - ) - return response.text - - @deprecated( - planned_removal_date="January 01, 2025", - use_instead="airflow.providers.google.cloud.hooks.generative_model." - "GenerativeModelHook.text_embedding_model_get_embeddings", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def generate_text_embeddings( - self, - prompt: str, - pretrained_model: str, - location: str, - project_id: str = PROVIDE_PROJECT_ID, - ) -> list: - """ - Use the Vertex AI PaLM API to generate text embeddings. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param prompt: Required. Inputs or queries that a user or a program gives - to the Vertex AI PaLM API, in order to elicit a specific response. - :param pretrained_model: A pre-trained model optimized for generating text embeddings. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - model = self.get_text_embedding_model(pretrained_model) - - response = model.get_embeddings([prompt])[0] # single prompt - - return response.values - - @deprecated( - planned_removal_date="January 01, 2025", - use_instead="airflow.providers.google.cloud.hooks.generative_model." - "GenerativeModelHook.generative_model_generate_content", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def prompt_multimodal_model( - self, - prompt: str, - location: str, - generation_config: dict | None = None, - safety_settings: dict | None = None, - pretrained_model: str = "gemini-pro", - project_id: str = PROVIDE_PROJECT_ID, - ) -> str: - """ - Use the Vertex AI Gemini Pro foundation model to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param prompt: Required. Inputs or queries that a user or a program gives - to the Multi-modal model, in order to elicit a specific response. - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - :param pretrained_model: By default uses the pre-trained model `gemini-pro`, - supporting prompts with text-only input, including natural language - tasks, multi-turn text and code chat, and code generation. It can - output text and code. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - - model = self.get_generative_model(pretrained_model) - response = model.generate_content( - contents=[prompt], generation_config=generation_config, safety_settings=safety_settings - ) - - return response.text - - @deprecated( - planned_removal_date="January 01, 2025", - use_instead="airflow.providers.google.cloud.hooks.generative_model." - "GenerativeModelHook.generative_model_generate_content", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def prompt_multimodal_model_with_media( - self, - prompt: str, - location: str, - media_gcs_path: str, - mime_type: str, - generation_config: dict | None = None, - safety_settings: dict | None = None, - pretrained_model: str = "gemini-pro-vision", - project_id: str = PROVIDE_PROJECT_ID, - ) -> str: - """ - Use the Vertex AI Gemini Pro foundation model to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param prompt: Required. Inputs or queries that a user or a program gives - to the Multi-modal model, in order to elicit a specific response. - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - :param pretrained_model: By default uses the pre-trained model `gemini-pro-vision`, - supporting prompts with text-only input, including natural language - tasks, multi-turn text and code chat, and code generation. It can - output text and code. - :param media_gcs_path: A GCS path to a content file such as an image or a video. - Can be passed to the multi-modal model as part of the prompt. Used with vision models. - :param mime_type: Validates the media type presented by the file in the media_gcs_path. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - - model = self.get_generative_model(pretrained_model) - part = self.get_generative_model_part(media_gcs_path, mime_type) - response = model.generate_content( - contents=[prompt, part], generation_config=generation_config, safety_settings=safety_settings - ) - - return response.text - @deprecated( planned_removal_date="April 09, 2025", use_instead="GenerativeModelHook.generative_model_generate_content", diff --git a/providers/src/airflow/providers/google/cloud/operators/automl.py b/providers/src/airflow/providers/google/cloud/operators/automl.py index 7ef0716615126..2a683938ed9ac 100644 --- a/providers/src/airflow/providers/google/cloud/operators/automl.py +++ b/providers/src/airflow/providers/google/cloud/operators/automl.py @@ -26,7 +26,6 @@ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault from google.cloud.automl_v1beta1 import ( - BatchPredictResult, ColumnSpec, Dataset, Model, @@ -322,145 +321,6 @@ def execute(self, context: Context): return PredictResponse.to_dict(result) -@deprecated( - planned_removal_date="January 01, 2025", - use_instead="airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job", - category=AirflowProviderDeprecationWarning, -) -class AutoMLBatchPredictOperator(GoogleCloudBaseOperator): - """ - Perform a batch prediction on Google Cloud AutoML. - - .. warning:: - AutoMLBatchPredictOperator for tables, video intelligence, vision and natural language has been deprecated - and no longer available. Please use - :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.CreateBatchPredictionJobOperator`, - :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.GetBatchPredictionJobOperator`, - :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.ListBatchPredictionJobsOperator`, - :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.DeleteBatchPredictionJobOperator`, - instead. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:AutoMLBatchPredictOperator` - - :param project_id: ID of the Google Cloud project where model will be created if None then - default project_id is used. - :param location: The location of the project. - :param model_id: Name of the model_id requested to serve the batch prediction. - :param input_config: Required. The input configuration for batch prediction. - If a dict is provided, it must be of the same form as the protobuf message - `google.cloud.automl_v1beta1.types.BatchPredictInputConfig` - :param output_config: Required. The Configuration specifying where output predictions should be - written. If a dict is provided, it must be of the same form as the protobuf message - `google.cloud.automl_v1beta1.types.BatchPredictOutputConfig` - :param prediction_params: Additional domain-specific parameters for the predictions, - any string must be up to 25000 characters long. - :param project_id: ID of the Google Cloud project where model is located if None then - default project_id is used. - :param location: The location of the project. - :param retry: A retry object used to retry requests. If `None` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - `retry` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "model_id", - "input_config", - "output_config", - "location", - "project_id", - "impersonation_chain", - ) - operator_extra_links = (TranslationLegacyModelPredictLink(),) - - def __init__( - self, - *, - model_id: str, - input_config: dict, - output_config: dict, - location: str, - project_id: str = PROVIDE_PROJECT_ID, - prediction_params: dict[str, str] | None = None, - metadata: MetaData = (), - timeout: float | None = None, - retry: Retry | _MethodDefault = DEFAULT, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - - self.model_id = model_id - self.location = location - self.project_id = project_id - self.prediction_params = prediction_params - self.metadata = metadata - self.timeout = timeout - self.retry = retry - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - self.input_config = input_config - self.output_config = output_config - - @cached_property - def hook(self) -> CloudAutoMLHook: - return CloudAutoMLHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - - @cached_property - def model(self) -> Model: - return self.hook.get_model( - model_id=self.model_id, - location=self.location, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - - def execute(self, context: Context): - self.log.info("Fetch batch prediction.") - operation = self.hook.batch_predict( - model_id=self.model_id, - input_config=self.input_config, - output_config=self.output_config, - project_id=self.project_id, - location=self.location, - params=self.prediction_params, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - operation_result = self.hook.wait_for_operation(timeout=self.timeout, operation=operation) - result = BatchPredictResult.to_dict(operation_result) - self.log.info("Batch prediction is ready.") - project_id = self.project_id or self.hook.project_id - if project_id: - TranslationLegacyModelPredictLink.persist( - context=context, - task_instance=self, - model_id=self.model_id, - project_id=project_id, - dataset_id=self.model.dataset_id, - ) - return result - - @deprecated( planned_removal_date="September 30, 2025", use_instead="airflow.providers.google.cloud.operators.vertex_ai.dataset.CreateDatasetOperator, " diff --git a/providers/src/airflow/providers/google/cloud/operators/dataflow.py b/providers/src/airflow/providers/google/cloud/operators/dataflow.py index 3fcbc7f67b784..c881853374ead 100644 --- a/providers/src/airflow/providers/google/cloud/operators/dataflow.py +++ b/providers/src/airflow/providers/google/cloud/operators/dataflow.py @@ -28,7 +28,7 @@ from googleapiclient.errors import HttpError from airflow.configuration import conf -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.providers.google.cloud.hooks.dataflow import ( DEFAULT_DATAFLOW_LOCATION, DataflowHook, @@ -40,7 +40,6 @@ TemplateJobStartTrigger, ) from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME -from airflow.providers.google.common.deprecated import deprecated from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID if TYPE_CHECKING: @@ -654,116 +653,6 @@ def on_kill(self) -> None: ) -@deprecated( - planned_removal_date="January 31, 2025", - use_instead="DataflowStartYamlJobOperator", - category=AirflowProviderDeprecationWarning, -) -class DataflowStartSqlJobOperator(GoogleCloudBaseOperator): - """ - Starts Dataflow SQL query. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:DataflowStartSqlJobOperator` - - .. warning:: - This operator requires ``gcloud`` command (Google Cloud SDK) must be installed on the Airflow worker - `__ - - :param job_name: The unique name to assign to the Cloud Dataflow job. - :param query: The SQL query to execute. - :param options: Job parameters to be executed. It can be a dictionary with the following keys. - - For more information, look at: - `https://cloud.google.com/sdk/gcloud/reference/beta/dataflow/sql/query - `__ - command reference - - :param location: The location of the Dataflow job (for example europe-west1) - :param project_id: The ID of the GCP project that owns the job. - If set to ``None`` or missing, the default project_id from the GCP connection is used. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud - Platform. - :param drain_pipeline: Optional, set to True if want to stop streaming job by draining it - instead of canceling during killing task instance. See: - https://cloud.google.com/dataflow/docs/guides/stopping-a-pipeline - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "job_name", - "query", - "options", - "location", - "project_id", - "gcp_conn_id", - ) - template_fields_renderers = {"query": "sql"} - - def __init__( - self, - job_name: str, - query: str, - options: dict[str, Any], - location: str = DEFAULT_DATAFLOW_LOCATION, - project_id: str = PROVIDE_PROJECT_ID, - gcp_conn_id: str = "google_cloud_default", - drain_pipeline: bool = False, - impersonation_chain: str | Sequence[str] | None = None, - *args, - **kwargs, - ) -> None: - super().__init__(*args, **kwargs) - self.job_name = job_name - self.query = query - self.options = options - self.location = location - self.project_id = project_id - self.gcp_conn_id = gcp_conn_id - self.drain_pipeline = drain_pipeline - self.impersonation_chain = impersonation_chain - self.job = None - self.hook: DataflowHook | None = None - - def execute(self, context: Context): - self.hook = DataflowHook( - gcp_conn_id=self.gcp_conn_id, - drain_pipeline=self.drain_pipeline, - impersonation_chain=self.impersonation_chain, - ) - - def set_current_job(current_job): - self.job = current_job - - job = self.hook.start_sql_job( - job_name=self.job_name, - query=self.query, - options=self.options, - location=self.location, - project_id=self.project_id, - on_new_job_callback=set_current_job, - ) - - return job - - def on_kill(self) -> None: - self.log.info("On kill.") - if self.job: - self.hook.cancel_job( - job_id=self.job.get("id"), - project_id=self.job.get("projectId"), - location=self.job.get("location"), - ) - - class DataflowStartYamlJobOperator(GoogleCloudBaseOperator): """ Launch a Dataflow YAML job and return the result. diff --git a/providers/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py b/providers/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py index 42e4fdc588e43..71af5659552e2 100644 --- a/providers/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +++ b/providers/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py @@ -31,328 +31,6 @@ from airflow.utils.context import Context -@deprecated( - planned_removal_date="January 01, 2025", - use_instead="TextGenerationModelPredictOperator", - category=AirflowProviderDeprecationWarning, -) -class PromptLanguageModelOperator(GoogleCloudBaseOperator): - """ - Uses the Vertex AI PaLM API to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to (templated). - :param location: Required. The ID of the Google Cloud location that the - service belongs to (templated). - :param prompt: Required. Inputs or queries that a user or a program gives - to the Vertex AI PaLM API, in order to elicit a specific response (templated). - :param pretrained_model: By default uses the pre-trained model `text-bison`, - optimized for performing natural language tasks such as classification, - summarization, extraction, content creation, and ideation. - :param temperature: Temperature controls the degree of randomness in token - selection. Defaults to 0.0. - :param max_output_tokens: Token limit determines the maximum amount of text - output. Defaults to 256. - :param top_p: Tokens are selected from most probable to least until the sum - of their probabilities equals the top_p value. Defaults to 0.8. - :param top_k: A top_k of 1 means the selected token is the most probable - among all tokens. Defaults to 0.4. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ("location", "project_id", "impersonation_chain", "prompt") - - def __init__( - self, - *, - project_id: str, - location: str, - prompt: str, - pretrained_model: str = "text-bison", - temperature: float = 0.0, - max_output_tokens: int = 256, - top_p: float = 0.8, - top_k: int = 40, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.prompt = prompt - self.pretrained_model = pretrained_model - self.temperature = temperature - self.max_output_tokens = max_output_tokens - self.top_p = top_p - self.top_k = top_k - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - - self.log.info("Submitting prompt") - response = self.hook.prompt_language_model( - project_id=self.project_id, - location=self.location, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - temperature=self.temperature, - max_output_tokens=self.max_output_tokens, - top_p=self.top_p, - top_k=self.top_k, - ) - - self.log.info("Model response: %s", response) - self.xcom_push(context, key="prompt_response", value=response) - - return response - - -@deprecated( - planned_removal_date="January 01, 2025", - use_instead="TextEmbeddingModelGetEmbeddingsOperator", - category=AirflowProviderDeprecationWarning, -) -class GenerateTextEmbeddingsOperator(GoogleCloudBaseOperator): - """ - Uses the Vertex AI PaLM API to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to (templated). - :param location: Required. The ID of the Google Cloud location that the - service belongs to (templated). - :param prompt: Required. Inputs or queries that a user or a program gives - to the Vertex AI PaLM API, in order to elicit a specific response (templated). - :param pretrained_model: By default uses the pre-trained model `textembedding-gecko`, - optimized for performing text embeddings. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ("location", "project_id", "impersonation_chain", "prompt") - - def __init__( - self, - *, - project_id: str, - location: str, - prompt: str, - pretrained_model: str = "textembedding-gecko", - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.prompt = prompt - self.pretrained_model = pretrained_model - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - - self.log.info("Generating text embeddings") - response = self.hook.generate_text_embeddings( - project_id=self.project_id, - location=self.location, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - ) - - self.log.info("Model response: %s", response) - self.xcom_push(context, key="prompt_response", value=response) - - return response - - -@deprecated( - planned_removal_date="January 01, 2025", - use_instead="GenerativeModelGenerateContentOperator", - category=AirflowProviderDeprecationWarning, -) -class PromptMultimodalModelOperator(GoogleCloudBaseOperator): - """ - Use the Vertex AI Gemini Pro foundation model to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to (templated). - :param location: Required. The ID of the Google Cloud location that the - service belongs to (templated). - :param prompt: Required. Inputs or queries that a user or a program gives - to the Multi-modal model, in order to elicit a specific response (templated). - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - :param pretrained_model: By default uses the pre-trained model `gemini-pro`, - supporting prompts with text-only input, including natural language - tasks, multi-turn text and code chat, and code generation. It can - output text and code. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ("location", "project_id", "impersonation_chain", "prompt") - - def __init__( - self, - *, - project_id: str, - location: str, - prompt: str, - generation_config: dict | None = None, - safety_settings: dict | None = None, - pretrained_model: str = "gemini-pro", - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.prompt = prompt - self.generation_config = generation_config - self.safety_settings = safety_settings - self.pretrained_model = pretrained_model - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - response = self.hook.prompt_multimodal_model( - project_id=self.project_id, - location=self.location, - prompt=self.prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - ) - - self.log.info("Model response: %s", response) - self.xcom_push(context, key="prompt_response", value=response) - - return response - - -@deprecated( - planned_removal_date="January 01, 2025", - use_instead="GenerativeModelGenerateContentOperator", - category=AirflowProviderDeprecationWarning, -) -class PromptMultimodalModelWithMediaOperator(GoogleCloudBaseOperator): - """ - Use the Vertex AI Gemini Pro foundation model to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to (templated). - :param location: Required. The ID of the Google Cloud location that the - service belongs to (templated). - :param prompt: Required. Inputs or queries that a user or a program gives - to the Multi-modal model, in order to elicit a specific response (templated). - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - :param pretrained_model: By default uses the pre-trained model `gemini-pro-vision`, - supporting prompts with text-only input, including natural language - tasks, multi-turn text and code chat, and code generation. It can - output text and code. - :param media_gcs_path: A GCS path to a media file such as an image or a video. - Can be passed to the multi-modal model as part of the prompt. Used with vision models. - :param mime_type: Validates the media type presented by the file in the media_gcs_path. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ("location", "project_id", "impersonation_chain", "prompt") - - def __init__( - self, - *, - project_id: str, - location: str, - prompt: str, - media_gcs_path: str, - mime_type: str, - generation_config: dict | None = None, - safety_settings: dict | None = None, - pretrained_model: str = "gemini-pro-vision", - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.prompt = prompt - self.generation_config = generation_config - self.safety_settings = safety_settings - self.pretrained_model = pretrained_model - self.media_gcs_path = media_gcs_path - self.mime_type = mime_type - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - response = self.hook.prompt_multimodal_model_with_media( - project_id=self.project_id, - location=self.location, - prompt=self.prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - media_gcs_path=self.media_gcs_path, - mime_type=self.mime_type, - ) - - self.log.info("Model response: %s", response) - self.xcom_push(context, key="prompt_response", value=response) - - return response - - @deprecated( planned_removal_date="April 09, 2025", use_instead="GenerativeModelGenerateContentOperator", diff --git a/providers/tests/amazon/aws/links/test_datasync.py b/providers/tests/amazon/aws/links/test_datasync.py new file mode 100644 index 0000000000000..9ff1610ac30c8 --- /dev/null +++ b/providers/tests/amazon/aws/links/test_datasync.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from airflow.providers.amazon.aws.links.datasync import DataSyncTaskExecutionLink, DataSyncTaskLink + +from providers.tests.amazon.aws.links.test_base_aws import BaseAwsLinksTestCase + +TASK_ID = "task-0b36221bf94ad2bdd" +EXECUTION_ID = "exec-00000000000000004" + + +class TestDataSyncTaskLink(BaseAwsLinksTestCase): + link_class = DataSyncTaskLink + + def test_extra_link(self): + task_id = TASK_ID + self.assert_extra_link_url( + expected_url=(f"https://console.aws.amazon.com/datasync/home?region=us-east-1#/tasks/{TASK_ID}"), + region_name="us-east-1", + aws_partition="aws", + task_id=task_id, + ) + + +class TestDataSyncTaskExecutionLink(BaseAwsLinksTestCase): + link_class = DataSyncTaskExecutionLink + + def test_extra_link(self): + self.assert_extra_link_url( + expected_url=( + f"https://console.aws.amazon.com/datasync/home?region=us-east-1#/history/{TASK_ID}/{EXECUTION_ID}" + ), + region_name="us-east-1", + aws_partition="aws", + task_id=TASK_ID, + task_execution_id=EXECUTION_ID, + ) diff --git a/providers/tests/amazon/aws/operators/test_datasync.py b/providers/tests/amazon/aws/operators/test_datasync.py index 6b6b64caa130e..81c8ea7445dbf 100644 --- a/providers/tests/amazon/aws/operators/test_datasync.py +++ b/providers/tests/amazon/aws/operators/test_datasync.py @@ -25,6 +25,7 @@ from airflow.exceptions import AirflowException from airflow.models import DAG, DagRun, TaskInstance from airflow.providers.amazon.aws.hooks.datasync import DataSyncHook +from airflow.providers.amazon.aws.links.datasync import DataSyncTaskLink from airflow.providers.amazon.aws.operators.datasync import DataSyncOperator from airflow.utils import timezone from airflow.utils.state import DagRunState @@ -748,6 +749,27 @@ def test_init_fails(self, mock_get_conn): # ### Check mocks: mock_get_conn.assert_not_called() + def test_task_extra_links(self, mock_get_conn): + mock_get_conn.return_value = self.client + self.set_up_operator() + + region = "us-east-1" + aws_domain = DataSyncTaskLink.get_aws_domain("aws") + task_id = self.task_arn.split("/")[-1] + + base_url = f"https://console.{aws_domain}/datasync/home?region={region}#" + task_url = f"{base_url}/tasks/{task_id}" + + with mock.patch.object(self.datasync.log, "info") as mock_logging: + result = self.datasync.execute(None) + task_execution_arn = result["TaskExecutionArn"] + execution_id = task_execution_arn.split("/")[-1] + execution_url = f"{base_url}/history/{task_id}/{execution_id}" + + assert self.datasync.task_arn == self.task_arn + mock_logging.assert_any_call("You can view this DataSync task at %s", task_url) + mock_logging.assert_any_call("You can view this DataSync task execution at %s", execution_url) + def test_execute_task(self, mock_get_conn): # ### Set up mocks: mock_get_conn.return_value = self.client diff --git a/providers/tests/cncf/kubernetes/test_template_rendering.py b/providers/tests/cncf/kubernetes/test_template_rendering.py index 95ad2bf6c2787..7c587de7b9280 100644 --- a/providers/tests/cncf/kubernetes/test_template_rendering.py +++ b/providers/tests/cncf/kubernetes/test_template_rendering.py @@ -76,7 +76,7 @@ def test_render_k8s_pod_yaml(pod_mutation_hook, create_task_instance): "op1", "test_run_id", "--subdir", - __file__, + mock.ANY, ], "name": "base", "env": [{"name": "AIRFLOW_IS_K8S_EXECUTOR_POD", "value": "True"}], diff --git a/providers/tests/cncf/kubernetes/utils/test_pod_manager.py b/providers/tests/cncf/kubernetes/utils/test_pod_manager.py index 6964e1b4fa157..24ea794c2bffd 100644 --- a/providers/tests/cncf/kubernetes/utils/test_pod_manager.py +++ b/providers/tests/cncf/kubernetes/utils/test_pod_manager.py @@ -404,7 +404,8 @@ def test_start_pod_raises_informative_error_on_timeout(self): startup_timeout=0, ) - def test_start_pod_startup_interval_seconds(self): + @mock.patch("airflow.providers.cncf.kubernetes.utils.pod_manager.time.sleep") + def test_start_pod_startup_interval_seconds(self, mock_time_sleep): pod_info_pending = mock.MagicMock(**{"status.phase": PodPhase.PENDING}) pod_info_succeeded = mock.MagicMock(**{"status.phase": PodPhase.SUCCEEDED}) @@ -414,21 +415,15 @@ def pod_state_gen(): while True: yield pod_info_succeeded - import time - - # Avoid race condition when we can run to a lot of sleeps when mock takes no time at all - original_time_sleep = time.sleep - with mock.patch("airflow.providers.cncf.kubernetes.utils.pod_manager.time.sleep") as mock_time_sleep: - mock_time_sleep.side_effect = lambda _: original_time_sleep(0.2) - self.mock_kube_client.read_namespaced_pod.side_effect = pod_state_gen() - startup_check_interval = 10 # Any value is fine, as time.sleep is mocked to do almost nothing - mock_pod = MagicMock() - self.pod_manager.await_pod_start( - pod=mock_pod, - startup_timeout=60, # Never hit, any value is fine, as time.sleep is mocked to do nothing - startup_check_interval=startup_check_interval, - ) - mock_time_sleep.assert_called_with(startup_check_interval) + self.mock_kube_client.read_namespaced_pod.side_effect = pod_state_gen() + startup_check_interval = 10 # Any value is fine, as time.sleep is mocked to do nothing + mock_pod = MagicMock() + self.pod_manager.await_pod_start( + pod=mock_pod, + startup_timeout=60, # Never hit, any value is fine, as time.sleep is mocked to do nothing + startup_check_interval=startup_check_interval, + ) + mock_time_sleep.assert_called_with(startup_check_interval) assert mock_time_sleep.call_count == 2 @mock.patch("airflow.providers.cncf.kubernetes.utils.pod_manager.container_is_running") diff --git a/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py b/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py index e745d3d655bdc..d0aecf20f92a6 100644 --- a/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py +++ b/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py @@ -22,8 +22,8 @@ from airflow.models.dag import DagModel from airflow.models.dagrun import DagRun -from airflow.models.param import Param from airflow.providers.fab.www.security import permissions +from airflow.sdk.definitions.param import Param from airflow.utils import timezone from airflow.utils.session import create_session from airflow.utils.state import DagRunState diff --git a/providers/tests/google/cloud/hooks/vertex_ai/test_generative_model.py b/providers/tests/google/cloud/hooks/vertex_ai/test_generative_model.py index 21741a617ea92..762958d621a58 100644 --- a/providers/tests/google/cloud/hooks/vertex_ai/test_generative_model.py +++ b/providers/tests/google/cloud/hooks/vertex_ai/test_generative_model.py @@ -148,61 +148,6 @@ def setup_method(self): self.hook = GenerativeModelHook(gcp_conn_id=TEST_GCP_CONN_ID) self.hook.get_credentials = self.dummy_get_credentials - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_text_generation_model")) - def test_prompt_language_model(self, mock_model) -> None: - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - self.hook.prompt_language_model( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=TEST_PROMPT, - pretrained_model=TEST_LANGUAGE_PRETRAINED_MODEL, - temperature=TEST_TEMPERATURE, - max_output_tokens=TEST_MAX_OUTPUT_TOKENS, - top_p=TEST_TOP_P, - top_k=TEST_TOP_K, - ) - assert_warning("text_generation_model_predict", warnings) - - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_text_embedding_model")) - def test_generate_text_embeddings(self, mock_model) -> None: - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - self.hook.generate_text_embeddings( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=TEST_PROMPT, - pretrained_model=TEST_TEXT_EMBEDDING_MODEL, - ) - assert_warning("text_embedding_model_get_embeddings", warnings) - - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_generative_model")) - def test_prompt_multimodal_model(self, mock_model) -> None: - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - self.hook.prompt_multimodal_model( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=TEST_PROMPT, - generation_config=TEST_GENERATION_CONFIG, - safety_settings=TEST_SAFETY_SETTINGS, - pretrained_model=TEST_MULTIMODAL_PRETRAINED_MODEL, - ) - assert_warning("generative_model_generate_content", warnings) - - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_generative_model_part")) - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_generative_model")) - def test_prompt_multimodal_model_with_media(self, mock_model, mock_part) -> None: - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - self.hook.prompt_multimodal_model_with_media( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=TEST_VISION_PROMPT, - generation_config=TEST_GENERATION_CONFIG, - safety_settings=TEST_SAFETY_SETTINGS, - pretrained_model=TEST_MULTIMODAL_VISION_MODEL, - media_gcs_path=TEST_MEDIA_GCS_PATH, - mime_type=TEST_MIME_TYPE, - ) - assert_warning("generative_model_generate_content", warnings) - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_text_generation_model")) def test_text_generation_model_predict(self, mock_model) -> None: with pytest.warns(AirflowProviderDeprecationWarning) as warnings: diff --git a/providers/tests/google/cloud/links/test_translate.py b/providers/tests/google/cloud/links/test_translate.py index 69c860a8c53fb..1d3822ad32d3e 100644 --- a/providers/tests/google/cloud/links/test_translate.py +++ b/providers/tests/google/cloud/links/test_translate.py @@ -22,19 +22,14 @@ # For no Pydantic environment, we need to skip the tests pytest.importorskip("google.cloud.aiplatform_v1") -from google.cloud.automl_v1beta1 import Model - -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.google.cloud.links.translate import ( TRANSLATION_BASE_LINK, TranslationDatasetListLink, TranslationLegacyDatasetLink, TranslationLegacyModelLink, - TranslationLegacyModelPredictLink, TranslationLegacyModelTrainLink, ) from airflow.providers.google.cloud.operators.automl import ( - AutoMLBatchPredictOperator, AutoMLCreateDatasetOperator, AutoMLListDatasetOperator, AutoMLTrainModelOperator, @@ -137,36 +132,3 @@ def test_get_link(self, create_task_instance_of_operator, session): ) actual_url = link.get_link(operator=ti.task, ti_key=ti.key) assert actual_url == expected_url - - -class TestTranslationLegacyModelPredictLink: - @pytest.mark.db_test - def test_get_link(self, create_task_instance_of_operator, session): - expected_url = ( - f"{TRANSLATION_BASE_LINK}/locations/{GCP_LOCATION}/datasets/{DATASET}/" - f"predict;modelId={MODEL}?project={GCP_PROJECT_ID}" - ) - link = TranslationLegacyModelPredictLink() - with pytest.warns(AirflowProviderDeprecationWarning): - ti = create_task_instance_of_operator( - AutoMLBatchPredictOperator, - dag_id="test_legacy_model_predict_link_dag", - task_id="test_legacy_model_predict_link_task", - model_id=MODEL, - project_id=GCP_PROJECT_ID, - location=GCP_LOCATION, - input_config="input_config", - output_config="input_config", - ) - ti.task.model = Model(dataset_id=DATASET, display_name=MODEL) - session.add(ti) - session.commit() - link.persist( - context={"ti": ti}, - task_instance=ti.task, - model_id=MODEL, - project_id=GCP_PROJECT_ID, - dataset_id=DATASET, - ) - actual_url = link.get_link(operator=ti.task, ti_key=ti.key) - assert actual_url == expected_url diff --git a/providers/tests/google/cloud/operators/test_automl.py b/providers/tests/google/cloud/operators/test_automl.py index 94dca98be917b..7ae70c83c9ed3 100644 --- a/providers/tests/google/cloud/operators/test_automl.py +++ b/providers/tests/google/cloud/operators/test_automl.py @@ -26,13 +26,12 @@ pytest.importorskip("google.cloud.aiplatform_v1") from google.api_core.gapic_v1.method import DEFAULT -from google.cloud.automl_v1beta1 import BatchPredictResult, Dataset, Model, PredictResponse +from google.cloud.automl_v1beta1 import Dataset, Model, PredictResponse from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook from airflow.providers.google.cloud.hooks.vertex_ai.prediction_service import PredictionServiceHook from airflow.providers.google.cloud.operators.automl import ( - AutoMLBatchPredictOperator, AutoMLCreateDatasetOperator, AutoMLDeleteDatasetOperator, AutoMLDeleteModelOperator, @@ -125,73 +124,6 @@ def test_templating(self, create_task_instance_of_operator, session): assert task.impersonation_chain == "impersonation_chain" -class TestAutoMLBatchPredictOperator: - @mock.patch("airflow.providers.google.cloud.links.translate.TranslationLegacyModelPredictLink.persist") - @mock.patch("airflow.providers.google.cloud.operators.automl.CloudAutoMLHook") - def test_execute(self, mock_hook, mock_link_persist): - mock_hook.return_value.batch_predict.return_value.result.return_value = BatchPredictResult() - mock_hook.return_value.extract_object_id = extract_object_id - mock_hook.return_value.wait_for_operation.return_value = BatchPredictResult() - mock_hook.return_value.get_model.return_value = mock.MagicMock(**MODEL) - mock_context = {"ti": mock.MagicMock()} - with pytest.warns(AirflowProviderDeprecationWarning): - op = AutoMLBatchPredictOperator( - model_id=MODEL_ID, - location=GCP_LOCATION, - project_id=GCP_PROJECT_ID, - input_config=INPUT_CONFIG, - output_config=OUTPUT_CONFIG, - task_id=TASK_ID, - prediction_params={}, - ) - op.execute(context=mock_context) - mock_hook.return_value.batch_predict.assert_called_once_with( - input_config=INPUT_CONFIG, - location=GCP_LOCATION, - metadata=(), - model_id=MODEL_ID, - output_config=OUTPUT_CONFIG, - params={}, - project_id=GCP_PROJECT_ID, - retry=DEFAULT, - timeout=None, - ) - mock_link_persist.assert_called_once_with( - context=mock_context, - task_instance=op, - model_id=MODEL_ID, - project_id=GCP_PROJECT_ID, - dataset_id=DATASET_ID, - ) - - @pytest.mark.db_test - def test_templating(self, create_task_instance_of_operator, session): - with pytest.warns(AirflowProviderDeprecationWarning): - ti = create_task_instance_of_operator( - AutoMLBatchPredictOperator, - # Templated fields - model_id="{{ 'model' }}", - input_config="{{ 'input-config' }}", - output_config="{{ 'output-config' }}", - location="{{ 'location' }}", - project_id="{{ 'project-id' }}", - impersonation_chain="{{ 'impersonation-chain' }}", - # Other parameters - dag_id="test_template_body_templating_dag", - task_id="test_template_body_templating_task", - ) - session.add(ti) - session.commit() - ti.render_templates() - task: AutoMLBatchPredictOperator = ti.task - assert task.model_id == "model" - assert task.input_config == "input-config" - assert task.output_config == "output-config" - assert task.location == "location" - assert task.project_id == "project-id" - assert task.impersonation_chain == "impersonation-chain" - - class TestAutoMLPredictOperator: @mock.patch("airflow.providers.google.cloud.links.translate.TranslationLegacyModelPredictLink.persist") @mock.patch("airflow.providers.google.cloud.operators.automl.CloudAutoMLHook") diff --git a/providers/tests/google/cloud/operators/test_dataflow.py b/providers/tests/google/cloud/operators/test_dataflow.py index 83b33eaccf001..89b5f9180838f 100644 --- a/providers/tests/google/cloud/operators/test_dataflow.py +++ b/providers/tests/google/cloud/operators/test_dataflow.py @@ -17,14 +17,13 @@ # under the License. from __future__ import annotations -from copy import deepcopy from unittest import mock import httplib2 import pytest from googleapiclient.errors import HttpError -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.providers.google.cloud.hooks.dataflow import ( DEFAULT_DATAFLOW_LOCATION, DataflowJobStatus, @@ -34,7 +33,6 @@ DataflowDeletePipelineOperator, DataflowRunPipelineOperator, DataflowStartFlexTemplateOperator, - DataflowStartSqlJobOperator, DataflowStartYamlJobOperator, DataflowStopJobOperator, DataflowTemplatedJobStartOperator, @@ -348,40 +346,6 @@ def test_execute_with_deferrable_mode(self, mock_hook, mock_defer_method, deferr mock_defer_method.assert_called_once() -class TestDataflowStartSqlJobOperator: - @mock.patch("airflow.providers.google.cloud.operators.dataflow.DataflowHook") - def test_execute(self, mock_hook): - with pytest.warns(AirflowProviderDeprecationWarning): - start_sql = DataflowStartSqlJobOperator( - task_id="start_sql_query", - job_name=TEST_SQL_JOB_NAME, - query=TEST_SQL_QUERY, - options=deepcopy(TEST_SQL_OPTIONS), - location=TEST_LOCATION, - do_xcom_push=True, - ) - start_sql.execute(mock.MagicMock()) - - mock_hook.assert_called_once_with( - gcp_conn_id="google_cloud_default", - drain_pipeline=False, - impersonation_chain=None, - ) - mock_hook.return_value.start_sql_job.assert_called_once_with( - job_name=TEST_SQL_JOB_NAME, - query=TEST_SQL_QUERY, - options=TEST_SQL_OPTIONS, - location=TEST_LOCATION, - project_id=None, - on_new_job_callback=mock.ANY, - ) - start_sql.job = TEST_SQL_JOB - start_sql.on_kill() - mock_hook.return_value.cancel_job.assert_called_once_with( - job_id="test-job-id", project_id=None, location=None - ) - - class TestDataflowStartYamlJobOperator: @pytest.fixture def sync_operator(self): diff --git a/providers/tests/google/cloud/operators/vertex_ai/test_generative_model.py b/providers/tests/google/cloud/operators/vertex_ai/test_generative_model.py index 709e5d1f78402..8712830c6eee3 100644 --- a/providers/tests/google/cloud/operators/vertex_ai/test_generative_model.py +++ b/providers/tests/google/cloud/operators/vertex_ai/test_generative_model.py @@ -35,11 +35,7 @@ CountTokensOperator, CreateCachedContentOperator, GenerateFromCachedContentOperator, - GenerateTextEmbeddingsOperator, GenerativeModelGenerateContentOperator, - PromptLanguageModelOperator, - PromptMultimodalModelOperator, - PromptMultimodalModelWithMediaOperator, RunEvaluationOperator, SupervisedFineTuningTrainOperator, TextEmbeddingModelGetEmbeddingsOperator, @@ -59,224 +55,6 @@ def assert_warning(msg: str, warnings): assert any(msg in str(w) for w in warnings) -class TestVertexAIPromptLanguageModelOperator: - prompt = "In 10 words or less, what is Apache Airflow?" - pretrained_model = "text-bison" - temperature = 0.0 - max_output_tokens = 256 - top_p = 0.8 - top_k = 40 - - def test_deprecation_warning(self): - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - PromptLanguageModelOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - temperature=self.temperature, - max_output_tokens=self.max_output_tokens, - top_p=self.top_p, - top_k=self.top_k, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - assert_warning("TextGenerationModelPredictOperator", warnings) - - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - with pytest.warns(AirflowProviderDeprecationWarning): - op = PromptLanguageModelOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - temperature=self.temperature, - max_output_tokens=self.max_output_tokens, - top_p=self.top_p, - top_k=self.top_k, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.prompt_language_model.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - temperature=self.temperature, - max_output_tokens=self.max_output_tokens, - top_p=self.top_p, - top_k=self.top_k, - ) - - -class TestVertexAIGenerateTextEmbeddingsOperator: - prompt = "In 10 words or less, what is Apache Airflow?" - pretrained_model = "textembedding-gecko" - - def test_deprecation_warning(self): - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - GenerateTextEmbeddingsOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - assert_warning("TextEmbeddingModelGetEmbeddingsOperator", warnings) - - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - with pytest.warns(AirflowProviderDeprecationWarning): - op = GenerateTextEmbeddingsOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.generate_text_embeddings.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - ) - - -class TestVertexAIPromptMultimodalModelOperator: - prompt = "In 10 words or less, what is Apache Airflow?" - pretrained_model = "gemini-pro" - safety_settings = { - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - } - generation_config = {"max_output_tokens": 256, "top_p": 0.8, "temperature": 0.0} - - def test_deprecation_warning(self): - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - PromptMultimodalModelOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - assert_warning("GenerativeModelGenerateContentOperator", warnings) - - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - with pytest.warns(AirflowProviderDeprecationWarning): - op = PromptMultimodalModelOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.prompt_multimodal_model.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - ) - - -class TestVertexAIPromptMultimodalModelWithMediaOperator: - pretrained_model = "gemini-pro-vision" - vision_prompt = "In 10 words or less, describe this content." - media_gcs_path = "gs://download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg" - mime_type = "image/jpeg" - safety_settings = { - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - } - generation_config = {"max_output_tokens": 256, "top_p": 0.8, "temperature": 0.0} - - def test_deprecation_warning(self): - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - PromptMultimodalModelWithMediaOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.vision_prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - media_gcs_path=self.media_gcs_path, - mime_type=self.mime_type, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - assert_warning("GenerativeModelGenerateContentOperator", warnings) - - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - with pytest.warns(AirflowProviderDeprecationWarning): - op = PromptMultimodalModelWithMediaOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.vision_prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - media_gcs_path=self.media_gcs_path, - mime_type=self.mime_type, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.prompt_multimodal_model_with_media.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.vision_prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - media_gcs_path=self.media_gcs_path, - mime_type=self.mime_type, - ) - - class TestVertexAITextGenerationModelPredictOperator: prompt = "In 10 words or less, what is Apache Airflow?" pretrained_model = "text-bison" diff --git a/providers/tests/microsoft/azure/operators/test_container_instances.py b/providers/tests/microsoft/azure/operators/test_container_instances.py index 99e54663941b0..8985927ecfb18 100644 --- a/providers/tests/microsoft/azure/operators/test_container_instances.py +++ b/providers/tests/microsoft/azure/operators/test_container_instances.py @@ -453,30 +453,24 @@ def test_execute_fails_with_incorrect_restart_policy(self, aci_mock): ) @mock.patch("airflow.providers.microsoft.azure.operators.container_instances.AzureContainerInstanceHook") - def test_execute_correct_sleep_cycle(self, aci_mock): + @mock.patch("airflow.providers.microsoft.azure.operators.container_instances.time.sleep") + def test_execute_correct_sleep_cycle(self, sleep_mock, aci_mock): expected_cg1 = make_mock_container(state="Running", exit_code=0, detail_status="test") expected_cg2 = make_mock_container(state="Terminated", exit_code=0, detail_status="test") - import time - - original_time_sleep = time.sleep - with mock.patch( - "airflow.providers.microsoft.azure.operators.container_instances.time.sleep" - ) as sleep_mock: - sleep_mock.side_effect = lambda _: original_time_sleep(0.1) - aci_mock.return_value.get_state.side_effect = [expected_cg1, expected_cg1, expected_cg2] - aci_mock.return_value.exists.return_value = False + aci_mock.return_value.get_state.side_effect = [expected_cg1, expected_cg1, expected_cg2] + aci_mock.return_value.exists.return_value = False - aci = AzureContainerInstancesOperator( - ci_conn_id=None, - registry_conn_id=None, - resource_group="resource-group", - name="container-name", - image="container-image", - region="region", - task_id="task", - ) - aci.execute(None) + aci = AzureContainerInstancesOperator( + ci_conn_id=None, + registry_conn_id=None, + resource_group="resource-group", + name="container-name", + image="container-image", + region="region", + task_id="task", + ) + aci.execute(None) # sleep is called at the end of cycles. Thus, the Terminated call does not trigger sleep assert sleep_mock.call_count == 2 diff --git a/providers/tests/system/google/cloud/dataflow/example_dataflow_sql.py b/providers/tests/system/google/cloud/dataflow/example_dataflow_sql.py deleted file mode 100644 index 2ba0bf0534c59..0000000000000 --- a/providers/tests/system/google/cloud/dataflow/example_dataflow_sql.py +++ /dev/null @@ -1,149 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -Example Airflow DAG for Google Cloud Dataflow service -""" - -from __future__ import annotations - -import os -from datetime import datetime - -from airflow.models.dag import DAG -from airflow.providers.google.cloud.operators.bigquery import ( - BigQueryCreateEmptyDatasetOperator, - BigQueryCreateEmptyTableOperator, - BigQueryDeleteDatasetOperator, - BigQueryDeleteTableOperator, - BigQueryInsertJobOperator, -) -from airflow.providers.google.cloud.operators.dataflow import DataflowStartSqlJobOperator -from airflow.utils.trigger_rule import TriggerRule - -from providers.tests.system.google import DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID - -PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") or DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID -ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default") -DAG_ID = "dataflow_sql" -LOCATION = "europe-west3" -DATAFLOW_SQL_JOB_NAME = f"{DAG_ID}_{ENV_ID}".replace("_", "-") -BQ_SQL_DATASET = f"{DAG_ID}_{ENV_ID}".replace("-", "_") -BQ_SQL_TABLE_INPUT = f"input_{ENV_ID}".replace("-", "_") -BQ_SQL_TABLE_OUTPUT = f"output_{ENV_ID}".replace("-", "_") -INSERT_ROWS_QUERY = ( - f"INSERT {BQ_SQL_DATASET}.{BQ_SQL_TABLE_INPUT} VALUES " - "('John Doe', 900), " - "('Alice Storm', 1200)," - "('Bob Max', 1000)," - "('Peter Jackson', 800)," - "('Mia Smith', 1100);" -) - - -with DAG( - dag_id=DAG_ID, - start_date=datetime(2021, 1, 1), - schedule="@once", - catchup=False, - tags=["example", "dataflow-sql"], -) as dag: - create_bq_dataset = BigQueryCreateEmptyDatasetOperator( - task_id="create_bq_dataset", - dataset_id=BQ_SQL_DATASET, - location=LOCATION, - ) - - create_bq_table = BigQueryCreateEmptyTableOperator( - task_id="create_bq_table", - dataset_id=BQ_SQL_DATASET, - table_id=BQ_SQL_TABLE_INPUT, - schema_fields=[ - {"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}, - ], - ) - - insert_query_job = BigQueryInsertJobOperator( - task_id="insert_query_job", - configuration={ - "query": { - "query": INSERT_ROWS_QUERY, - "useLegacySql": False, - "priority": "BATCH", - } - }, - location=LOCATION, - ) - - # [START howto_operator_start_sql_job] - start_sql = DataflowStartSqlJobOperator( - task_id="start_sql_query", - job_name=DATAFLOW_SQL_JOB_NAME, - query=f""" - SELECT - emp_name as employee, - salary as employee_salary - FROM - bigquery.table.`{PROJECT_ID}`.`{BQ_SQL_DATASET}`.`{BQ_SQL_TABLE_INPUT}` - WHERE salary >= 1000; - """, - options={ - "bigquery-project": PROJECT_ID, - "bigquery-dataset": BQ_SQL_DATASET, - "bigquery-table": BQ_SQL_TABLE_OUTPUT, - }, - location=LOCATION, - do_xcom_push=True, - ) - # [END howto_operator_start_sql_job] - - delete_bq_table = BigQueryDeleteTableOperator( - task_id="delete_bq_table", - deletion_dataset_table=f"{PROJECT_ID}.{BQ_SQL_DATASET}.{BQ_SQL_TABLE_INPUT}", - trigger_rule=TriggerRule.ALL_DONE, - ) - - delete_bq_dataset = BigQueryDeleteDatasetOperator( - task_id="delete_bq_dataset", - dataset_id=BQ_SQL_DATASET, - delete_contents=True, - trigger_rule=TriggerRule.ALL_DONE, - ) - - ( - # TEST SETUP - create_bq_dataset - >> create_bq_table - >> insert_query_job - # TEST BODY - >> start_sql - # TEST TEARDOWN - >> delete_bq_table - >> delete_bq_dataset - ) - - from tests_common.test_utils.watcher import watcher - - # This test needs watcher in order to properly mark success/failure - # when "tearDown" task with trigger rule is part of the DAG - list(dag.tasks) >> watcher() - -from tests_common.test_utils.system_tests import get_test_run # noqa: E402 - -# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) -test_run = get_test_run(dag) diff --git a/pyproject.toml b/pyproject.toml index 8f3418fc5a409..52144ab2b4e82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -625,6 +625,8 @@ dev = [ "apache-airflow-providers-alibaba", "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-cassandra", + "apache-airflow-providers-apache-drill", + "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-iceberg", "apache-airflow-providers-apache-kafka", "apache-airflow-providers-apache-kylin", @@ -642,11 +644,14 @@ dev = [ "apache-airflow-providers-common-sql", "apache-airflow-providers-docker", "apache-airflow-providers-datadog", + "apache-airflow-providers-dbt-cloud", "apache-airflow-providers-discord", "apache-airflow-providers-edge", + "apache-airflow-providers-elasticsearch", "apache-airflow-providers-exasol", "apache-airflow-providers-facebook", "apache-airflow-providers-ftp", + "apache-airflow-providers-github", "apache-airflow-providers-http", "apache-airflow-providers-influxdb", "apache-airflow-providers-mongo", @@ -656,6 +661,7 @@ dev = [ "apache-airflow-providers-neo4j", "apache-airflow-providers-openai", "apache-airflow-providers-openfaas", + "apache-airflow-providers-opensearch", "apache-airflow-providers-opsgenie", "apache-airflow-providers-papermill", "apache-airflow-providers-pgvector", @@ -694,6 +700,8 @@ apache-airflow-providers-airbyte = {workspace = true} apache-airflow-providers-alibaba = { workspace = true } apache-airflow-providers-apache-beam = { workspace = true } apache-airflow-providers-apache-cassandra = { workspace = true } +apache-airflow-providers-apache-drill = { workspace = true } +apache-airflow-providers-apache-druid = { workspace = true } apache-airflow-providers-apache-iceberg = {workspace = true} apache-airflow-providers-apache-kafka = { workspace = true } apache-airflow-providers-apache-kylin = { workspace = true } @@ -710,12 +718,15 @@ apache-airflow-providers-common-compat = { workspace = true } apache-airflow-providers-common-io = { workspace = true } apache-airflow-providers-common-sql = { workspace = true } apache-airflow-providers-datadog = { workspace = true } +apache-airflow-providers-dbt-cloud = { workspace = true } apache-airflow-providers-discord = { workspace = true } apache-airflow-providers-docker = { workspace = true } apache-airflow-providers-edge = {workspace = true} +apache-airflow-providers-elasticsearch = { workspace = true } apache-airflow-providers-exasol = { workspace = true } apache-airflow-providers-facebook = { workspace = true } apache-airflow-providers-ftp = { workspace = true } +apache-airflow-providers-github = { workspace = true } apache-airflow-providers-http = { workspace = true } apache-airflow-providers-influxdb = { workspace = true } apache-airflow-providers-mongo = { workspace = true } @@ -727,6 +738,7 @@ apache-airflow-providers-openai = { workspace = true } apache-airflow-providers-jenkins = { workspace = true } apache-airflow-providers-mysql = { workspace = true } apache-airflow-providers-odbc = { workspace = true } +apache-airflow-providers-opensearch = { workspace = true } apache-airflow-providers-pagerduty = { workspace = true } apache-airflow-providers-openfaas = { workspace = true } apache-airflow-providers-opsgenie = { workspace = true } @@ -761,6 +773,8 @@ members = [ "providers/alibaba", "providers/apache/beam", "providers/apache/cassandra", + "providers/apache/drill", + "providers/apache/druid", "providers/apache/iceberg", "providers/apache/kafka", "providers/apache/kylin", @@ -777,12 +791,15 @@ members = [ "providers/common/io", "providers/common/sql", "providers/datadog", + "providers/dbt/cloud", "providers/discord", "providers/docker", "providers/edge", + "providers/elasticsearch", "providers/exasol", "providers/facebook", "providers/ftp", + "providers/github", "providers/hashicorp", "providers/http", "providers/imap", @@ -795,6 +812,7 @@ members = [ "providers/openai", "providers/openfaas", "providers/openlineage", + "providers/opensearch", "providers/opsgenie", "providers/pagerduty", "providers/papermill", diff --git a/scripts/ci/docker-compose/remove-sources.yml b/scripts/ci/docker-compose/remove-sources.yml index 3e72fa832f890..65d18df71275d 100644 --- a/scripts/ci/docker-compose/remove-sources.yml +++ b/scripts/ci/docker-compose/remove-sources.yml @@ -36,6 +36,8 @@ services: - ../../../empty:/opt/airflow/providers/alibaba/src - ../../../empty:/opt/airflow/providers/apache/beam/src - ../../../empty:/opt/airflow/providers/apache/cassandra/src + - ../../../empty:/opt/airflow/providers/apache/drill/src + - ../../../empty:/opt/airflow/providers/apache/druid/src - ../../../empty:/opt/airflow/providers/apache/iceberg/src - ../../../empty:/opt/airflow/providers/apache/kafka/src - ../../../empty:/opt/airflow/providers/apache/kylin/src @@ -52,12 +54,15 @@ services: - ../../../empty:/opt/airflow/providers/common/io/src - ../../../empty:/opt/airflow/providers/common/sql/src - ../../../empty:/opt/airflow/providers/datadog/src + - ../../../empty:/opt/airflow/providers/dbt/cloud/src - ../../../empty:/opt/airflow/providers/discord/src - ../../../empty:/opt/airflow/providers/docker/src - ../../../empty:/opt/airflow/providers/edge/src + - ../../../empty:/opt/airflow/providers/elasticsearch/src - ../../../empty:/opt/airflow/providers/exasol/src - ../../../empty:/opt/airflow/providers/facebook/src - ../../../empty:/opt/airflow/providers/ftp/src + - ../../../empty:/opt/airflow/providers/github/src - ../../../empty:/opt/airflow/providers/hashicorp/src - ../../../empty:/opt/airflow/providers/http/src - ../../../empty:/opt/airflow/providers/imap/src @@ -70,6 +75,7 @@ services: - ../../../empty:/opt/airflow/providers/openai/src - ../../../empty:/opt/airflow/providers/openfaas/src - ../../../empty:/opt/airflow/providers/openlineage/src + - ../../../empty:/opt/airflow/providers/opensearch/src - ../../../empty:/opt/airflow/providers/opsgenie/src - ../../../empty:/opt/airflow/providers/pagerduty/src - ../../../empty:/opt/airflow/providers/papermill/src diff --git a/scripts/ci/docker-compose/tests-sources.yml b/scripts/ci/docker-compose/tests-sources.yml index 4864c3f5b51e8..2941b44c37fb6 100644 --- a/scripts/ci/docker-compose/tests-sources.yml +++ b/scripts/ci/docker-compose/tests-sources.yml @@ -43,6 +43,8 @@ services: - ../../../providers/alibaba/tests:/opt/airflow/providers/alibaba/tests - ../../../providers/apache/beam/tests:/opt/airflow/providers/apache/beam/tests - ../../../providers/apache/cassandra/tests:/opt/airflow/providers/apache/cassandra/tests + - ../../../providers/apache/drill/tests:/opt/airflow/providers/apache/drill/tests + - ../../../providers/apache/druid/tests:/opt/airflow/providers/apache/druid/tests - ../../../providers/apache/iceberg/tests:/opt/airflow/providers/apache/iceberg/tests - ../../../providers/apache/kafka/tests:/opt/airflow/providers/apache/kafka/tests - ../../../providers/apache/kylin/tests:/opt/airflow/providers/apache/kylin/tests @@ -59,12 +61,15 @@ services: - ../../../providers/common/io/tests:/opt/airflow/providers/common/io/tests - ../../../providers/common/sql/tests:/opt/airflow/providers/common/sql/tests - ../../../providers/datadog/tests:/opt/airflow/providers/datadog/tests + - ../../../providers/dbt/cloud/tests:/opt/airflow/providers/dbt/cloud/tests - ../../../providers/discord/tests:/opt/airflow/providers/discord/tests - ../../../providers/docker/tests:/opt/airflow/providers/docker/tests - ../../../providers/edge/tests:/opt/airflow/providers/edge/tests + - ../../../providers/elasticsearch/tests:/opt/airflow/providers/elasticsearch/tests - ../../../providers/exasol/tests:/opt/airflow/providers/exasol/tests - ../../../providers/facebook/tests:/opt/airflow/providers/facebook/tests - ../../../providers/ftp/tests:/opt/airflow/providers/ftp/tests + - ../../../providers/github/tests:/opt/airflow/providers/github/tests - ../../../providers/hashicorp/tests:/opt/airflow/providers/hashicorp/tests - ../../../providers/http/tests:/opt/airflow/providers/http/tests - ../../../providers/imap/tests:/opt/airflow/providers/imap/tests @@ -77,6 +82,7 @@ services: - ../../../providers/openai/tests:/opt/airflow/providers/openai/tests - ../../../providers/openfaas/tests:/opt/airflow/providers/openfaas/tests - ../../../providers/openlineage/tests:/opt/airflow/providers/openlineage/tests + - ../../../providers/opensearch/tests:/opt/airflow/providers/opensearch/tests - ../../../providers/opsgenie/tests:/opt/airflow/providers/opsgenie/tests - ../../../providers/pagerduty/tests:/opt/airflow/providers/pagerduty/tests - ../../../providers/papermill/tests:/opt/airflow/providers/papermill/tests diff --git a/scripts/docker/entrypoint_ci.sh b/scripts/docker/entrypoint_ci.sh index df676df0dde41..826e05e045378 100755 --- a/scripts/docker/entrypoint_ci.sh +++ b/scripts/docker/entrypoint_ci.sh @@ -255,17 +255,10 @@ function check_boto_upgrade() { # We need to include few dependencies to pass pip check with other dependencies: # * oss2 as dependency as otherwise jmespath will be bumped (sync with alibaba provider) # * cryptography is kept for snowflake-connector-python limitation (sync with snowflake provider) - # * requests needs to be limited to be compatible with apache beam (sync with apache-beam provider) - # * yandexcloud requirements for requests does not match those of apache.beam and latest botocore - # Both requests and yandexcloud exclusion above might be removed after - # https://github.com/apache/beam/issues/32080 is addressed - # This is already addressed and planned for 2.59.0 release. - # When you remove yandexcloud and opensearch from the above list, you can also remove the - # optional providers_dependencies exclusions from "test_example_dags.py" in "tests/always". set -x # shellcheck disable=SC2086 ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade boto3 botocore \ - "oss2>=2.14.0" "cryptography<43.0.0" "requests!=2.32.*,<3.0.0,>=2.24.0" + "oss2>=2.14.0" "cryptography<43.0.0" "opensearch-py" set +x pip check } diff --git a/task_sdk/src/airflow/sdk/__init__.py b/task_sdk/src/airflow/sdk/__init__.py index b8d6b6609dba7..d8968fd416f11 100644 --- a/task_sdk/src/airflow/sdk/__init__.py +++ b/task_sdk/src/airflow/sdk/__init__.py @@ -48,6 +48,8 @@ __lazy_imports: dict[str, str] = { "BaseOperator": ".definitions.baseoperator", "Connection": ".definitions.connection", + "Param": ".definitions.param", + "ParamsDict": ".definitions.param", "DAG": ".definitions.dag", "EdgeModifier": ".definitions.edges", "Label": ".definitions.edges", diff --git a/task_sdk/src/airflow/sdk/definitions/asset/decorators.py b/task_sdk/src/airflow/sdk/definitions/asset/decorators.py index 1f1d90883240b..579cc94b3ce34 100644 --- a/task_sdk/src/airflow/sdk/definitions/asset/decorators.py +++ b/task_sdk/src/airflow/sdk/definitions/asset/decorators.py @@ -31,9 +31,9 @@ from sqlalchemy.orm import Session from airflow.io.path import ObjectStoragePath - from airflow.models.param import ParamsDict from airflow.sdk.definitions.asset import AssetAlias, AssetUniqueKey from airflow.sdk.definitions.dag import DAG, DagStateChangeCallback, ScheduleArg + from airflow.sdk.definitions.param import ParamsDict from airflow.serialization.dag_dependency import DagDependency from airflow.triggers.base import BaseTrigger from airflow.typing_compat import Self diff --git a/task_sdk/src/airflow/sdk/definitions/baseoperator.py b/task_sdk/src/airflow/sdk/definitions/baseoperator.py index e7ecec69411ba..14d67656008e5 100644 --- a/task_sdk/src/airflow/sdk/definitions/baseoperator.py +++ b/task_sdk/src/airflow/sdk/definitions/baseoperator.py @@ -33,7 +33,6 @@ import attrs -from airflow.models.param import ParamsDict from airflow.sdk.definitions._internal.abstractoperator import ( DEFAULT_IGNORE_FIRST_DEPENDS_ON_PAST, DEFAULT_OWNER, @@ -54,6 +53,7 @@ from airflow.sdk.definitions._internal.node import validate_key from airflow.sdk.definitions._internal.types import NOTSET, ArgNotSet, validate_instance_args from airflow.sdk.definitions.mappedoperator import OperatorPartial, validate_mapping_kwargs +from airflow.sdk.definitions.param import ParamsDict from airflow.task.priority_strategy import ( PriorityWeightStrategy, airflow_priority_weight_strategies, diff --git a/task_sdk/src/airflow/sdk/definitions/dag.py b/task_sdk/src/airflow/sdk/definitions/dag.py index cd5217c8111d0..5662d542859f7 100644 --- a/task_sdk/src/airflow/sdk/definitions/dag.py +++ b/task_sdk/src/airflow/sdk/definitions/dag.py @@ -51,12 +51,12 @@ ParamValidationError, TaskNotFound, ) -from airflow.models.param import DagParam, ParamsDict from airflow.sdk.definitions._internal.abstractoperator import AbstractOperator from airflow.sdk.definitions._internal.types import NOTSET from airflow.sdk.definitions.asset import AssetAll, BaseAsset from airflow.sdk.definitions.baseoperator import BaseOperator from airflow.sdk.definitions.context import Context +from airflow.sdk.definitions.param import DagParam, ParamsDict from airflow.timetables.base import Timetable from airflow.timetables.simple import ( AssetTriggeredTimetable, @@ -426,6 +426,7 @@ class DAG: ) fileloc: str = attrs.field(init=False, factory=_default_fileloc) + relative_fileloc: str | None = attrs.field(init=False, default=None) partial: bool = attrs.field(init=False, default=False) edge_info: dict[str, dict[str, EdgeInfoType]] = attrs.field(init=False, factory=dict) diff --git a/task_sdk/src/airflow/sdk/definitions/mappedoperator.py b/task_sdk/src/airflow/sdk/definitions/mappedoperator.py index 0fc0a7fa1896a..136400534243f 100644 --- a/task_sdk/src/airflow/sdk/definitions/mappedoperator.py +++ b/task_sdk/src/airflow/sdk/definitions/mappedoperator.py @@ -72,10 +72,10 @@ OperatorExpandArgument, OperatorExpandKwargsArgument, ) - from airflow.models.param import ParamsDict from airflow.models.xcom_arg import XComArg from airflow.sdk.definitions.baseoperator import BaseOperator from airflow.sdk.definitions.dag import DAG + from airflow.sdk.definitions.param import ParamsDict from airflow.sdk.types import Operator from airflow.ti_deps.deps.base_ti_dep import BaseTIDep from airflow.utils.context import Context diff --git a/task_sdk/src/airflow/sdk/definitions/param.py b/task_sdk/src/airflow/sdk/definitions/param.py new file mode 100644 index 0000000000000..cd3ccec26a48a --- /dev/null +++ b/task_sdk/src/airflow/sdk/definitions/param.py @@ -0,0 +1,353 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import contextlib +import copy +import json +import logging +from collections.abc import ItemsView, Iterable, MutableMapping, ValuesView +from typing import TYPE_CHECKING, Any, ClassVar + +from airflow.exceptions import AirflowException, ParamValidationError +from airflow.sdk.definitions._internal.mixins import ResolveMixin +from airflow.utils.types import NOTSET, ArgNotSet + +if TYPE_CHECKING: + from airflow.sdk.definitions.context import Context + from airflow.sdk.definitions.dag import DAG + from airflow.sdk.types import Operator + +logger = logging.getLogger(__name__) + + +class Param: + """ + Class to hold the default value of a Param and rule set to do the validations. + + Without the rule set it always validates and returns the default value. + + :param default: The value this Param object holds + :param description: Optional help text for the Param + :param schema: The validation schema of the Param, if not given then all kwargs except + default & description will form the schema + """ + + __version__: ClassVar[int] = 1 + + CLASS_IDENTIFIER = "__class" + + def __init__(self, default: Any = NOTSET, description: str | None = None, **kwargs): + if default is not NOTSET: + self._check_json(default) + self.value = default + self.description = description + self.schema = kwargs.pop("schema") if "schema" in kwargs else kwargs + + def __copy__(self) -> Param: + return Param(self.value, self.description, schema=self.schema) + + @staticmethod + def _check_json(value): + try: + json.dumps(value) + except Exception: + raise ParamValidationError( + "All provided parameters must be json-serializable. " + f"The value '{value}' is not serializable." + ) + + def resolve(self, value: Any = NOTSET, suppress_exception: bool = False) -> Any: + """ + Run the validations and returns the Param's final value. + + May raise ValueError on failed validations, or TypeError + if no value is passed and no value already exists. + We first check that value is json-serializable; if not, warn. + In future release we will require the value to be json-serializable. + + :param value: The value to be updated for the Param + :param suppress_exception: To raise an exception or not when the validations fails. + If true and validations fails, the return value would be None. + """ + import jsonschema + from jsonschema import FormatChecker + from jsonschema.exceptions import ValidationError + + if value is not NOTSET: + self._check_json(value) + final_val = self.value if value is NOTSET else value + if isinstance(final_val, ArgNotSet): + if suppress_exception: + return None + raise ParamValidationError("No value passed and Param has no default value") + try: + jsonschema.validate(final_val, self.schema, format_checker=FormatChecker()) + except ValidationError as err: + if suppress_exception: + return None + raise ParamValidationError(err) from None + self.value = final_val + return final_val + + def dump(self) -> dict: + """Dump the Param as a dictionary.""" + out_dict: dict[str, str | None] = { + self.CLASS_IDENTIFIER: f"{self.__module__}.{self.__class__.__name__}" + } + out_dict.update(self.__dict__) + # Ensure that not set is translated to None + if self.value is NOTSET: + out_dict["value"] = None + return out_dict + + @property + def has_value(self) -> bool: + return self.value is not NOTSET and self.value is not None + + def serialize(self) -> dict: + return {"value": self.value, "description": self.description, "schema": self.schema} + + @staticmethod + def deserialize(data: dict[str, Any], version: int) -> Param: + if version > Param.__version__: + raise TypeError("serialized version > class version") + + return Param(default=data["value"], description=data["description"], schema=data["schema"]) + + +class ParamsDict(MutableMapping[str, Any]): + """ + Class to hold all params for dags or tasks. + + All the keys are strictly string and values are converted into Param's object + if they are not already. This class is to replace param's dictionary implicitly + and ideally not needed to be used directly. + + + :param dict_obj: A dict or dict like object to init ParamsDict + :param suppress_exception: Flag to suppress value exceptions while initializing the ParamsDict + """ + + __version__: ClassVar[int] = 1 + __slots__ = ["__dict", "suppress_exception"] + + def __init__(self, dict_obj: MutableMapping | None = None, suppress_exception: bool = False): + params_dict: dict[str, Param] = {} + dict_obj = dict_obj or {} + for k, v in dict_obj.items(): + if not isinstance(v, Param): + params_dict[k] = Param(v) + else: + params_dict[k] = v + self.__dict = params_dict + self.suppress_exception = suppress_exception + + def __bool__(self) -> bool: + return bool(self.__dict) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, ParamsDict): + return self.dump() == other.dump() + if isinstance(other, dict): + return self.dump() == other + return NotImplemented + + def __copy__(self) -> ParamsDict: + return ParamsDict(self.__dict, self.suppress_exception) + + def __deepcopy__(self, memo: dict[int, Any] | None) -> ParamsDict: + return ParamsDict(copy.deepcopy(self.__dict, memo), self.suppress_exception) + + def __contains__(self, o: object) -> bool: + return o in self.__dict + + def __len__(self) -> int: + return len(self.__dict) + + def __delitem__(self, v: str) -> None: + del self.__dict[v] + + def __iter__(self): + return iter(self.__dict) + + def __repr__(self): + return repr(self.dump()) + + def __setitem__(self, key: str, value: Any) -> None: + """ + Override for dictionary's ``setitem`` method to ensure all values are of Param's type only. + + :param key: A key which needs to be inserted or updated in the dict + :param value: A value which needs to be set against the key. It could be of any + type but will be converted and stored as a Param object eventually. + """ + if isinstance(value, Param): + param = value + elif key in self.__dict: + param = self.__dict[key] + try: + param.resolve(value=value, suppress_exception=self.suppress_exception) + except ParamValidationError as ve: + raise ParamValidationError(f"Invalid input for param {key}: {ve}") from None + else: + # if the key isn't there already and if the value isn't of Param type create a new Param object + param = Param(value) + + self.__dict[key] = param + + def __getitem__(self, key: str) -> Any: + """ + Override for dictionary's ``getitem`` method to call the resolve method after fetching the key. + + :param key: The key to fetch + """ + param = self.__dict[key] + return param.resolve(suppress_exception=self.suppress_exception) + + def get_param(self, key: str) -> Param: + """Get the internal :class:`.Param` object for this key.""" + return self.__dict[key] + + def items(self): + return ItemsView(self.__dict) + + def values(self): + return ValuesView(self.__dict) + + def update(self, *args, **kwargs) -> None: + if len(args) == 1 and not kwargs and isinstance(args[0], ParamsDict): + return super().update(args[0].__dict) + super().update(*args, **kwargs) + + def dump(self) -> dict[str, Any]: + """Dump the ParamsDict object as a dictionary, while suppressing exceptions.""" + return {k: v.resolve(suppress_exception=True) for k, v in self.items()} + + def validate(self) -> dict[str, Any]: + """Validate & returns all the Params object stored in the dictionary.""" + resolved_dict = {} + try: + for k, v in self.items(): + resolved_dict[k] = v.resolve(suppress_exception=self.suppress_exception) + except ParamValidationError as ve: + raise ParamValidationError(f"Invalid input for param {k}: {ve}") from None + + return resolved_dict + + def serialize(self) -> dict[str, Any]: + return self.dump() + + @staticmethod + def deserialize(data: dict, version: int) -> ParamsDict: + if version > ParamsDict.__version__: + raise TypeError("serialized version > class version") + + return ParamsDict(data) + + +class DagParam(ResolveMixin): + """ + DAG run parameter reference. + + This binds a simple Param object to a name within a DAG instance, so that it + can be resolved during the runtime via the ``{{ context }}`` dictionary. The + ideal use case of this class is to implicitly convert args passed to a + method decorated by ``@dag``. + + It can be used to parameterize a DAG. You can overwrite its value by setting + it on conf when you trigger your DagRun. + + This can also be used in templates by accessing ``{{ context.params }}``. + + **Example**: + + with DAG(...) as dag: + EmailOperator(subject=dag.param('subject', 'Hi from Airflow!')) + + :param current_dag: Dag being used for parameter. + :param name: key value which is used to set the parameter + :param default: Default value used if no parameter was set. + """ + + def __init__(self, current_dag: DAG, name: str, default: Any = NOTSET): + if default is not NOTSET: + current_dag.params[name] = default + self._name = name + self._default = default + self.current_dag = current_dag + + def iter_references(self) -> Iterable[tuple[Operator, str]]: + return () + + def resolve(self, context: Context, *, include_xcom: bool = True) -> Any: + """Pull DagParam value from DagRun context. This method is run during ``op.execute()``.""" + with contextlib.suppress(KeyError): + if context["dag_run"].conf: + return context["dag_run"].conf[self._name] + if self._default is not NOTSET: + return self._default + with contextlib.suppress(KeyError): + return context["params"][self._name] + raise AirflowException(f"No value could be resolved for parameter {self._name}") + + def serialize(self) -> dict: + """Serialize the DagParam object into a dictionary.""" + return { + "dag_id": self.current_dag.dag_id, + "name": self._name, + "default": self._default, + } + + @classmethod + def deserialize(cls, data: dict, dags: dict) -> DagParam: + """ + Deserializes the dictionary back into a DagParam object. + + :param data: The serialized representation of the DagParam. + :param dags: A dictionary of available DAGs to look up the DAG. + """ + dag_id = data["dag_id"] + # Retrieve the current DAG from the provided DAGs dictionary + current_dag = dags.get(dag_id) + if not current_dag: + raise ValueError(f"DAG with id {dag_id} not found.") + + return cls(current_dag=current_dag, name=data["name"], default=data["default"]) + + +def process_params( + dag: DAG, + task: Operator, + dagrun_conf: dict[str, Any] | None, + *, + suppress_exception: bool, +) -> dict[str, Any]: + """Merge, validate params, and convert them into a simple dict.""" + from airflow.configuration import conf + + dagrun_conf = dagrun_conf or {} + + params = ParamsDict(suppress_exception=suppress_exception) + with contextlib.suppress(AttributeError): + params.update(dag.params) + if task.params: + params.update(task.params) + if conf.getboolean("core", "dag_run_conf_overrides_params") and dagrun_conf: + logger.debug("Updating task params (%s) with DagRun.conf (%s)", params, dagrun_conf) + params.update(dagrun_conf) + return params.validate() diff --git a/task_sdk/src/airflow/sdk/execution_time/task_runner.py b/task_sdk/src/airflow/sdk/execution_time/task_runner.py index 715dbf75dd77d..8b21fcfaf486a 100644 --- a/task_sdk/src/airflow/sdk/execution_time/task_runner.py +++ b/task_sdk/src/airflow/sdk/execution_time/task_runner.py @@ -37,6 +37,7 @@ from airflow.sdk.definitions._internal.dag_parsing_context import _airflow_parsing_context_manager from airflow.sdk.definitions.asset import Asset, AssetAlias, AssetNameRef, AssetUriRef from airflow.sdk.definitions.baseoperator import BaseOperator +from airflow.sdk.definitions.param import process_params from airflow.sdk.execution_time.comms import ( DeferTask, GetXCom, @@ -86,6 +87,16 @@ def get_template_context(self) -> Context: # TODO: Move this to `airflow.sdk.execution_time.context` # once we port the entire context logic from airflow/utils/context.py ? + dag_run_conf = None + if ( + self._ti_context_from_server + and self._ti_context_from_server.dag_run + and self._ti_context_from_server.dag_run.conf + ): + dag_run_conf = self._ti_context_from_server.dag_run.conf + + validated_params = process_params(self.task.dag, self.task, dag_run_conf, suppress_exception=False) + # TODO: Assess if we need to it through airflow.utils.timezone.coerce_datetime() context: Context = { # From the Task Execution interface @@ -102,7 +113,7 @@ def get_template_context(self) -> Context: "outlet_events": OutletEventAccessors(), # "inlet_events": InletEventsAccessors(task.inlets, session=session), "macros": MacrosAccessor(), - # "params": validated_params, + "params": validated_params, # TODO: Make this go through Public API longer term. # "test_mode": task_instance.test_mode, # "triggering_asset_events": lazy_object_proxy.Proxy(get_triggering_events), @@ -322,6 +333,7 @@ def parse(what: StartupDetails) -> RuntimeTaskInstance: name=bundle_info.name, version=bundle_info.version, ) + bundle_instance.initialize() dag_absolute_path = os.fspath(Path(bundle_instance.path, what.dag_rel_path)) bag = DagBag( diff --git a/task_sdk/tests/conftest.py b/task_sdk/tests/conftest.py index e24f6e397d3e5..cc4bc4f96148a 100644 --- a/task_sdk/tests/conftest.py +++ b/task_sdk/tests/conftest.py @@ -184,6 +184,7 @@ def _make_context( data_interval_end: str | datetime = "2024-12-01T01:00:00Z", start_date: str | datetime = "2024-12-01T01:00:00Z", run_type: str = "manual", + conf=None, ) -> TIRunContext: return TIRunContext( dag_run=DagRun( @@ -194,6 +195,7 @@ def _make_context( data_interval_end=data_interval_end, # type: ignore start_date=start_date, # type: ignore run_type=run_type, # type: ignore + conf=conf, ), max_tries=0, ) diff --git a/task_sdk/tests/definitions/test_dag.py b/task_sdk/tests/definitions/test_dag.py index f0e634f19b667..e6baeabe98dee 100644 --- a/task_sdk/tests/definitions/test_dag.py +++ b/task_sdk/tests/definitions/test_dag.py @@ -23,9 +23,9 @@ import pytest from airflow.exceptions import DuplicateTaskIdFound -from airflow.models.param import Param, ParamsDict from airflow.sdk.definitions.baseoperator import BaseOperator from airflow.sdk.definitions.dag import DAG, dag as dag_decorator +from airflow.sdk.definitions.param import Param, ParamsDict DEFAULT_DATE = datetime(2016, 1, 1, tzinfo=timezone.utc) diff --git a/task_sdk/tests/definitions/test_mappedoperator.py b/task_sdk/tests/definitions/test_mappedoperator.py index aba7523b5ad39..eeb79f31b4d47 100644 --- a/task_sdk/tests/definitions/test_mappedoperator.py +++ b/task_sdk/tests/definitions/test_mappedoperator.py @@ -22,10 +22,10 @@ import pendulum import pytest -from airflow.models.param import ParamsDict from airflow.sdk.definitions.baseoperator import BaseOperator from airflow.sdk.definitions.dag import DAG from airflow.sdk.definitions.mappedoperator import MappedOperator +from airflow.sdk.definitions.param import ParamsDict from airflow.sdk.definitions.xcom_arg import XComArg from airflow.utils.trigger_rule import TriggerRule diff --git a/task_sdk/tests/definitions/test_param.py b/task_sdk/tests/definitions/test_param.py new file mode 100644 index 0000000000000..93e863222ef87 --- /dev/null +++ b/task_sdk/tests/definitions/test_param.py @@ -0,0 +1,308 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from contextlib import nullcontext + +import pytest + +from airflow.exceptions import ParamValidationError +from airflow.sdk.definitions.param import Param, ParamsDict +from airflow.serialization.serialized_objects import BaseSerialization + + +class TestParam: + def test_param_without_schema(self): + p = Param("test") + assert p.resolve() == "test" + + p.value = 10 + assert p.resolve() == 10 + + def test_null_param(self): + p = Param() + with pytest.raises(ParamValidationError, match="No value passed and Param has no default value"): + p.resolve() + assert p.resolve(None) is None + assert p.dump()["value"] is None + assert not p.has_value + + p = Param(None) + assert p.resolve() is None + assert p.resolve(None) is None + assert p.dump()["value"] is None + assert not p.has_value + + p = Param(None, type="null") + assert p.resolve() is None + assert p.resolve(None) is None + assert p.dump()["value"] is None + assert not p.has_value + with pytest.raises(ParamValidationError): + p.resolve("test") + + def test_string_param(self): + p = Param("test", type="string") + assert p.resolve() == "test" + + p = Param("test") + assert p.resolve() == "test" + + p = Param("10.0.0.0", type="string", format="ipv4") + assert p.resolve() == "10.0.0.0" + + p = Param(type="string") + with pytest.raises(ParamValidationError): + p.resolve(None) + with pytest.raises(ParamValidationError, match="No value passed and Param has no default value"): + p.resolve() + + @pytest.mark.parametrize( + "dt", + [ + pytest.param("2022-01-02T03:04:05.678901Z", id="microseconds-zed-timezone"), + pytest.param("2022-01-02T03:04:05.678Z", id="milliseconds-zed-timezone"), + pytest.param("2022-01-02T03:04:05+00:00", id="seconds-00-00-timezone"), + pytest.param("2022-01-02T03:04:05+04:00", id="seconds-custom-timezone"), + ], + ) + def test_string_rfc3339_datetime_format(self, dt): + """Test valid rfc3339 datetime.""" + assert Param(dt, type="string", format="date-time").resolve() == dt + + @pytest.mark.parametrize( + "dt", + [ + pytest.param("2022-01-02", id="date"), + pytest.param("03:04:05", id="time"), + pytest.param("Thu, 04 Mar 2021 05:06:07 GMT", id="rfc2822-datetime"), + ], + ) + def test_string_datetime_invalid_format(self, dt): + """Test invalid iso8601 and rfc3339 datetime format.""" + with pytest.raises(ParamValidationError, match="is not a 'date-time'"): + Param(dt, type="string", format="date-time").resolve() + + def test_string_time_format(self): + """Test string time format.""" + assert Param("03:04:05", type="string", format="time").resolve() == "03:04:05" + + error_pattern = "is not a 'time'" + with pytest.raises(ParamValidationError, match=error_pattern): + Param("03:04:05.06", type="string", format="time").resolve() + + with pytest.raises(ParamValidationError, match=error_pattern): + Param("03:04", type="string", format="time").resolve() + + with pytest.raises(ParamValidationError, match=error_pattern): + Param("24:00:00", type="string", format="time").resolve() + + @pytest.mark.parametrize( + "date_string", + [ + "2021-01-01", + ], + ) + def test_string_date_format(self, date_string): + """Test string date format.""" + assert Param(date_string, type="string", format="date").resolve() == date_string + + # Note that 20120503 behaved differently in 3.11.3 Official python image. It was validated as a date + # there but it started to fail again in 3.11.4 released on 2023-07-05. + @pytest.mark.parametrize( + "date_string", + [ + "01/01/2021", + "21 May 1975", + "20120503", + ], + ) + def test_string_date_format_error(self, date_string): + """Test string date format failures.""" + with pytest.raises(ParamValidationError, match="is not a 'date'"): + Param(date_string, type="string", format="date").resolve() + + def test_int_param(self): + p = Param(5) + assert p.resolve() == 5 + + p = Param(type="integer", minimum=0, maximum=10) + assert p.resolve(value=5) == 5 + + with pytest.raises(ParamValidationError): + p.resolve(value=20) + + def test_number_param(self): + p = Param(42, type="number") + assert p.resolve() == 42 + + p = Param(1.2, type="number") + assert p.resolve() == 1.2 + + p = Param("42", type="number") + with pytest.raises(ParamValidationError): + p.resolve() + + def test_list_param(self): + p = Param([1, 2], type="array") + assert p.resolve() == [1, 2] + + def test_dict_param(self): + p = Param({"a": 1, "b": 2}, type="object") + assert p.resolve() == {"a": 1, "b": 2} + + def test_composite_param(self): + p = Param(type=["string", "number"]) + assert p.resolve(value="abc") == "abc" + assert p.resolve(value=5.0) == 5.0 + + def test_param_with_description(self): + p = Param(10, description="Sample description") + assert p.description == "Sample description" + + def test_suppress_exception(self): + p = Param("abc", type="string", minLength=2, maxLength=4) + assert p.resolve() == "abc" + + p.value = "long_string" + assert p.resolve(suppress_exception=True) is None + + def test_explicit_schema(self): + p = Param("abc", schema={type: "string"}) + assert p.resolve() == "abc" + + def test_custom_param(self): + class S3Param(Param): + def __init__(self, path: str): + schema = {"type": "string", "pattern": r"s3:\/\/(.+?)\/(.+)"} + super().__init__(default=path, schema=schema) + + p = S3Param("s3://my_bucket/my_path") + assert p.resolve() == "s3://my_bucket/my_path" + + p = S3Param("file://not_valid/s3_path") + with pytest.raises(ParamValidationError): + p.resolve() + + def test_value_saved(self): + p = Param("hello", type="string") + assert p.resolve("world") == "world" + assert p.resolve() == "world" + + def test_dump(self): + p = Param("hello", description="world", type="string", minLength=2) + dump = p.dump() + assert dump["__class"] == "airflow.sdk.definitions.param.Param" + assert dump["value"] == "hello" + assert dump["description"] == "world" + assert dump["schema"] == {"type": "string", "minLength": 2} + + @pytest.mark.parametrize( + "param", + [ + Param("my value", description="hello", schema={"type": "string"}), + Param("my value", description="hello"), + Param(None, description=None), + Param([True], type="array", items={"type": "boolean"}), + Param(), + ], + ) + def test_param_serialization(self, param: Param): + """ + Test to make sure that native Param objects can be correctly serialized + """ + + serializer = BaseSerialization() + serialized_param = serializer.serialize(param) + restored_param: Param = serializer.deserialize(serialized_param) + + assert restored_param.value == param.value + assert isinstance(restored_param, Param) + assert restored_param.description == param.description + assert restored_param.schema == param.schema + + @pytest.mark.parametrize( + "default, should_raise", + [ + pytest.param({0, 1, 2}, True, id="default-non-JSON-serializable"), + pytest.param(None, False, id="default-None"), # Param init should not warn + pytest.param({"b": 1}, False, id="default-JSON-serializable"), # Param init should not warn + ], + ) + def test_param_json_validation(self, default, should_raise): + exception_msg = "All provided parameters must be json-serializable" + cm = pytest.raises(ParamValidationError, match=exception_msg) if should_raise else nullcontext() + with cm: + p = Param(default=default) + if not should_raise: + p.resolve() # when resolved with NOTSET, should not warn. + p.resolve(value={"a": 1}) # when resolved with JSON-serializable, should not warn. + with pytest.raises(ParamValidationError, match=exception_msg): + p.resolve(value={1, 2, 3}) # when resolved with not JSON-serializable, should warn. + + +class TestParamsDict: + def test_params_dict(self): + # Init with a simple dictionary + pd = ParamsDict(dict_obj={"key": "value"}) + assert isinstance(pd.get_param("key"), Param) + assert pd["key"] == "value" + assert pd.suppress_exception is False + + # Init with a dict which contains Param objects + pd2 = ParamsDict({"key": Param("value", type="string")}, suppress_exception=True) + assert isinstance(pd2.get_param("key"), Param) + assert pd2["key"] == "value" + assert pd2.suppress_exception is True + + # Init with another object of another ParamsDict + pd3 = ParamsDict(pd2) + assert isinstance(pd3.get_param("key"), Param) + assert pd3["key"] == "value" + assert pd3.suppress_exception is False # as it's not a deepcopy of pd2 + + # Dump the ParamsDict + assert pd.dump() == {"key": "value"} + assert pd2.dump() == {"key": "value"} + assert pd3.dump() == {"key": "value"} + + # Validate the ParamsDict + plain_dict = pd.validate() + assert isinstance(plain_dict, dict) + pd2.validate() + pd3.validate() + + # Update the ParamsDict + with pytest.raises(ParamValidationError, match=r"Invalid input for param key: 1 is not"): + pd3["key"] = 1 + + # Should not raise an error as suppress_exception is True + pd2["key"] = 1 + pd2.validate() + + def test_update(self): + pd = ParamsDict({"key": Param("value", type="string")}) + + pd.update({"key": "a"}) + internal_value = pd.get_param("key") + assert isinstance(internal_value, Param) + with pytest.raises(ParamValidationError, match=r"Invalid input for param key: 1 is not"): + pd.update({"key": 1}) + + def test_repr(self): + pd = ParamsDict({"key": Param("value", type="string")}) + assert repr(pd) == "{'key': 'value'}" diff --git a/task_sdk/tests/execution_time/conftest.py b/task_sdk/tests/execution_time/conftest.py index 832f2b60ca351..ac0c21246c1ce 100644 --- a/task_sdk/tests/execution_time/conftest.py +++ b/task_sdk/tests/execution_time/conftest.py @@ -71,6 +71,8 @@ def set_dag(what: StartupDetails, dag_id: str, task: BaseOperator) -> RuntimeTas from airflow.utils import timezone dag = DAG(dag_id=dag_id, start_date=timezone.datetime(2024, 12, 3)) + if what.ti_context.dag_run.conf: + dag.params = what.ti_context.dag_run.conf # type: ignore[assignment] task.dag = dag t = dag.task_dict[task.task_id] ti = RuntimeTaskInstance.model_construct( @@ -120,6 +122,7 @@ def _create_task_instance( start_date: str | datetime = "2024-12-01T01:00:00Z", run_type: str = "manual", try_number: int = 1, + conf=None, ti_id=None, ) -> RuntimeTaskInstance: if not ti_id: @@ -133,6 +136,7 @@ def _create_task_instance( data_interval_end=data_interval_end, start_date=start_date, run_type=run_type, + conf=conf, ) startup_details = StartupDetails( diff --git a/task_sdk/tests/execution_time/test_task_runner.py b/task_sdk/tests/execution_time/test_task_runner.py index d9aa675242cd0..fef2f505b2a55 100644 --- a/task_sdk/tests/execution_time/test_task_runner.py +++ b/task_sdk/tests/execution_time/test_task_runner.py @@ -711,6 +711,7 @@ def test_get_context_without_ti_context_from_server(self, mocked_parse, make_ti_ # Verify the context keys and values assert context == { + "params": {}, "var": { "json": VariableAccessor(deserialize_json=True), "value": VariableAccessor(deserialize_json=False), @@ -751,6 +752,7 @@ def test_get_context_with_ti_context_from_server(self, create_runtime_ti, mock_s context = runtime_ti.get_template_context() assert context == { + "params": {}, "var": { "json": VariableAccessor(deserialize_json=True), "value": VariableAccessor(deserialize_json=False), @@ -983,6 +985,36 @@ def execute(self, context): ), ) + def test_get_param_from_context( + self, mocked_parse, make_ti_context, mock_supervisor_comms, create_runtime_ti + ): + """Test that a params can be retrieved from context.""" + + class CustomOperator(BaseOperator): + def execute(self, context): + value = context["params"] + print("The dag params are", value) + + task = CustomOperator(task_id="print-params") + runtime_ti = create_runtime_ti( + dag_id="basic_param_dag", + task=task, + conf={ + "x": 3, + "text": "Hello World!", + "flag": False, + "a_simple_list": ["one", "two", "three", "actually one value is made per line"], + }, + ) + run(runtime_ti, log=mock.MagicMock()) + + assert runtime_ti.task.dag.params == { + "x": 3, + "text": "Hello World!", + "flag": False, + "a_simple_list": ["one", "two", "three", "actually one value is made per line"], + } + class TestXComAfterTaskExecution: @pytest.mark.parametrize( @@ -1092,3 +1124,85 @@ def execute(self, context): assert str(exc_info.value) == ( f"Returned dictionary keys must be strings when using multiple_outputs, found 2 ({int}) instead" ) + + +class TestDagParamRuntime: + def test_dag_param_resolves_from_task(self, create_runtime_ti, mock_supervisor_comms, time_machine): + """Test dagparam resolves on operator execution""" + instant = timezone.datetime(2024, 12, 3, 10, 0) + time_machine.move_to(instant, tick=False) + + dag = DAG(dag_id="dag_with_dag_params", start_date=timezone.datetime(2024, 12, 3)) + dag.param("value", default="NOTSET") + + class CustomOperator(BaseOperator): + def execute(self, context): + assert dag.params["value"] == "NOTSET" + + task = CustomOperator(task_id="task_with_dag_params") + runtime_ti = create_runtime_ti(task=task, dag_id="dag_with_dag_params") + + run(runtime_ti, log=mock.MagicMock()) + + mock_supervisor_comms.send_request.assert_called_once_with( + msg=SucceedTask( + state=TerminalTIState.SUCCESS, end_date=instant, task_outlets=[], outlet_events=[] + ), + log=mock.ANY, + ) + + def test_dag_param_dag_overwrite(self, create_runtime_ti, mock_supervisor_comms, time_machine): + """Test dag param is overwritten from dagrun config""" + instant = timezone.datetime(2024, 12, 3, 10, 0) + time_machine.move_to(instant, tick=False) + + dag = DAG(dag_id="dag_with_dag_params_overwrite", start_date=timezone.datetime(2024, 12, 3)) + dag.param("value", default="NOTSET") + + class CustomOperator(BaseOperator): + def execute(self, context): + # important to use self.dag here + assert self.dag.params["value"] == "new_value" + + # asserting on the default value when not set in dag run + assert dag.params["value"] == "NOTSET" + task = CustomOperator(task_id="task_with_dag_params_overwrite") + + # we reparse the dag here, and if conf passed, added as params + runtime_ti = create_runtime_ti( + task=task, dag_id="dag_with_dag_params_overwrite", conf={"value": "new_value"} + ) + run(runtime_ti, log=mock.MagicMock()) + mock_supervisor_comms.send_request.assert_called_once_with( + msg=SucceedTask( + state=TerminalTIState.SUCCESS, end_date=instant, task_outlets=[], outlet_events=[] + ), + log=mock.ANY, + ) + + def test_dag_param_dag_default(self, create_runtime_ti, mock_supervisor_comms, time_machine): + """ "Test dag param is retrieved from default config""" + instant = timezone.datetime(2024, 12, 3, 10, 0) + time_machine.move_to(instant, tick=False) + + dag = DAG( + dag_id="dag_with_dag_params_default", + start_date=timezone.datetime(2024, 12, 3), + params={"value": "test"}, + ) + + class CustomOperator(BaseOperator): + def execute(self, context): + assert dag.params["value"] == "test" + + assert dag.params["value"] == "test" + task = CustomOperator(task_id="task_with_dag_params_default") + runtime_ti = create_runtime_ti(task=task, dag_id="dag_with_dag_params_default") + + run(runtime_ti, log=mock.MagicMock()) + mock_supervisor_comms.send_request.assert_called_once_with( + msg=SucceedTask( + state=TerminalTIState.SUCCESS, end_date=instant, task_outlets=[], outlet_events=[] + ), + log=mock.ANY, + ) diff --git a/tests/always/test_example_dags.py b/tests/always/test_example_dags.py index f4b0cc7c75f62..c827de6090687 100644 --- a/tests/always/test_example_dags.py +++ b/tests/always/test_example_dags.py @@ -44,8 +44,7 @@ # The format of the dictionary is as follows: # key: the regexp matching the file to be excluded, # value: a dictionary containing package distributions with an optional version specifier, e.g., >=2.3.4 - ".*example_bedrock_retrieve_and_generate.py": {"opensearch-py": None}, - ".*example_opensearch.py": {"opensearch-py": None}, + # yandexcloud is automatically removed in case botocore is upgraded to latest r".*example_yandexcloud.*\.py": {"yandexcloud": None}, } IGNORE_AIRFLOW_PROVIDER_DEPRECATION_WARNING: tuple[str, ...] = ( diff --git a/tests/always/test_project_structure.py b/tests/always/test_project_structure.py index 3d609c3048e60..2548193c2fba3 100644 --- a/tests/always/test_project_structure.py +++ b/tests/always/test_project_structure.py @@ -381,7 +381,6 @@ class TestGoogleProviderProjectStructure(ExampleCoverageTest, AssetsCoverageTest "airflow.providers.google.cloud.operators.automl.AutoMLTablesListTableSpecsOperator", "airflow.providers.google.cloud.operators.automl.AutoMLTablesUpdateDatasetOperator", "airflow.providers.google.cloud.operators.automl.AutoMLDeployModelOperator", - "airflow.providers.google.cloud.operators.automl.AutoMLBatchPredictOperator", "airflow.providers.google.cloud.operators.automl.AutoMLTrainModelOperator", "airflow.providers.google.cloud.operators.automl.AutoMLPredictOperator", "airflow.providers.google.cloud.operators.automl.AutoMLCreateDatasetOperator", @@ -405,10 +404,6 @@ class TestGoogleProviderProjectStructure(ExampleCoverageTest, AssetsCoverageTest "airflow.providers.google.cloud.operators.mlengine.MLEngineStartBatchPredictionJobOperator", "airflow.providers.google.cloud.operators.mlengine.MLEngineStartTrainingJobOperator", "airflow.providers.google.cloud.operators.mlengine.MLEngineTrainingCancelJobOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.PromptLanguageModelOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.GenerateTextEmbeddingsOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.PromptMultimodalModelOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.PromptMultimodalModelWithMediaOperator", "airflow.providers.google.cloud.operators.vertex_ai.generative_model.TextGenerationModelPredictOperator", "airflow.providers.google.marketing_platform.operators.GoogleDisplayVideo360CreateQueryOperator", "airflow.providers.google.marketing_platform.operators.GoogleDisplayVideo360RunQueryOperator", diff --git a/tests/api_connexion/endpoints/test_dag_endpoint.py b/tests/api_connexion/endpoints/test_dag_endpoint.py index 93bdb600cb80a..79190a3664709 100644 --- a/tests/api_connexion/endpoints/test_dag_endpoint.py +++ b/tests/api_connexion/endpoints/test_dag_endpoint.py @@ -320,7 +320,7 @@ def test_should_respond_200(self, url_safe_serializer): "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "description": None, "schema": {}, "value": 1, @@ -380,7 +380,7 @@ def test_should_respond_200_with_asset_expression(self, url_safe_serializer): "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "description": None, "schema": {}, "value": 1, @@ -533,7 +533,7 @@ def test_should_respond_200_serialized(self, url_safe_serializer, testing_dag_bu "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "description": None, "schema": {}, "value": 1, @@ -591,7 +591,7 @@ def test_should_respond_200_serialized(self, url_safe_serializer, testing_dag_bu "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "description": None, "schema": {}, "value": 1, diff --git a/tests/api_connexion/endpoints/test_dag_run_endpoint.py b/tests/api_connexion/endpoints/test_dag_run_endpoint.py index 9558dd4fd256a..dc3073a475c3d 100644 --- a/tests/api_connexion/endpoints/test_dag_run_endpoint.py +++ b/tests/api_connexion/endpoints/test_dag_run_endpoint.py @@ -30,9 +30,9 @@ from airflow.models.asset import AssetEvent, AssetModel from airflow.models.dag import DAG, DagModel from airflow.models.dagrun import DagRun -from airflow.models.param import Param from airflow.operators.empty import EmptyOperator from airflow.sdk.definitions.asset import Asset +from airflow.sdk.definitions.param import Param from airflow.utils import timezone from airflow.utils.session import create_session, provide_session from airflow.utils.state import DagRunState, State diff --git a/tests/api_connexion/endpoints/test_mapped_task_instance_endpoint.py b/tests/api_connexion/endpoints/test_mapped_task_instance_endpoint.py index 79dcde6cbd2a5..68ba4395cfc0a 100644 --- a/tests/api_connexion/endpoints/test_mapped_task_instance_endpoint.py +++ b/tests/api_connexion/endpoints/test_mapped_task_instance_endpoint.py @@ -226,6 +226,7 @@ def test_mapped_task_instances(self, one_task_with_mapped_tis, session): "priority_weight": 1, "queue": "default", "queued_when": None, + "scheduled_when": None, "rendered_fields": {}, "rendered_map_index": None, "start_date": "2020-01-01T00:00:00+00:00", diff --git a/tests/api_connexion/endpoints/test_task_endpoint.py b/tests/api_connexion/endpoints/test_task_endpoint.py index 826b912ddc989..874c5d0508547 100644 --- a/tests/api_connexion/endpoints/test_task_endpoint.py +++ b/tests/api_connexion/endpoints/test_task_endpoint.py @@ -124,7 +124,7 @@ def test_should_respond_200(self): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -207,7 +207,7 @@ def test_unscheduled_task(self): "owner": "airflow", "params": { "is_unscheduled": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": True, "description": None, "schema": {}, @@ -271,7 +271,7 @@ def test_should_respond_200_serialized(self, testing_dag_bundle): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -348,7 +348,7 @@ def test_should_respond_200(self): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -508,7 +508,7 @@ def test_get_unscheduled_tasks(self): "owner": "airflow", "params": { "is_unscheduled": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": True, "description": None, "schema": {}, diff --git a/tests/api_connexion/endpoints/test_task_instance_endpoint.py b/tests/api_connexion/endpoints/test_task_instance_endpoint.py index b5079c47aa17e..c51116c368ded 100644 --- a/tests/api_connexion/endpoints/test_task_instance_endpoint.py +++ b/tests/api_connexion/endpoints/test_task_instance_endpoint.py @@ -196,6 +196,7 @@ def test_should_respond_200(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "print_the_context", @@ -254,6 +255,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "deferred", "task_id": "print_the_context", @@ -301,6 +303,7 @@ def test_should_respond_200_with_task_state_in_removed(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "removed", "task_id": "print_the_context", @@ -344,6 +347,7 @@ def test_should_respond_200_task_instance_with_rendered(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "print_the_context", @@ -396,6 +400,7 @@ def test_should_respond_200_mapped_task_instance_with_rtif(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "print_the_context", @@ -2305,6 +2310,7 @@ def test_should_respond_200(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "print_the_context", @@ -2364,6 +2370,7 @@ def test_should_respond_200_mapped_task_instance_with_rtif(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "print_the_context", @@ -2586,6 +2593,7 @@ def test_should_respond_200(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "success", "task_id": "print_the_context", @@ -2621,6 +2629,7 @@ def test_should_respond_200_with_different_try_numbers(self, try_number, session "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "success" if try_number == 1 else None, "task_id": "print_the_context", @@ -2683,6 +2692,7 @@ def test_should_respond_200_with_mapped_task_at_different_try_numbers(self, try_ "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "failed" if try_number == 1 else None, "task_id": "print_the_context", @@ -2745,6 +2755,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "failed", "task_id": "print_the_context", @@ -2780,6 +2791,7 @@ def test_should_respond_200_with_task_state_in_removed(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "removed", "task_id": "print_the_context", diff --git a/tests/api_connexion/schemas/test_dag_schema.py b/tests/api_connexion/schemas/test_dag_schema.py index d6438045249aa..800b512f993bc 100644 --- a/tests/api_connexion/schemas/test_dag_schema.py +++ b/tests/api_connexion/schemas/test_dag_schema.py @@ -167,7 +167,7 @@ def test_serialize_test_dag_detail_schema(url_safe_serializer): "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": 1, "description": None, "schema": {}, @@ -229,7 +229,7 @@ def test_serialize_test_dag_with_asset_schedule_detail_schema(url_safe_serialize "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": 1, "description": None, "schema": {}, diff --git a/tests/api_connexion/schemas/test_task_instance_schema.py b/tests/api_connexion/schemas/test_task_instance_schema.py index 3f130517dc3ed..a14cd7dbbd1ec 100644 --- a/tests/api_connexion/schemas/test_task_instance_schema.py +++ b/tests/api_connexion/schemas/test_task_instance_schema.py @@ -87,6 +87,7 @@ def test_task_instance_schema_without_rendered(self, session): "priority_weight": 1, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "TEST_TASK_ID", diff --git a/tests/api_connexion/schemas/test_task_schema.py b/tests/api_connexion/schemas/test_task_schema.py index 5748529b864af..eee51c3aac73a 100644 --- a/tests/api_connexion/schemas/test_task_schema.py +++ b/tests/api_connexion/schemas/test_task_schema.py @@ -86,7 +86,7 @@ def test_serialize(self): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, diff --git a/tests/api_fastapi/core_api/routes/public/test_backfills.py b/tests/api_fastapi/core_api/routes/public/test_backfills.py index 1836bae3ac7a1..1eeae8b667566 100644 --- a/tests/api_fastapi/core_api/routes/public/test_backfills.py +++ b/tests/api_fastapi/core_api/routes/public/test_backfills.py @@ -425,7 +425,6 @@ def test_create_backfill_dry_run_with_depends_on_past( "max_active_runs": max_active_runs, "run_backwards": run_backwards, "dag_run_conf": {"param1": "val1", "param2": True}, - "dry_run": False, "reprocess_behavior": repro_act, } response = test_client.post( diff --git a/tests/api_fastapi/core_api/routes/public/test_dag_run.py b/tests/api_fastapi/core_api/routes/public/test_dag_run.py index b316b0119dd19..3d70f4dbf29f8 100644 --- a/tests/api_fastapi/core_api/routes/public/test_dag_run.py +++ b/tests/api_fastapi/core_api/routes/public/test_dag_run.py @@ -27,9 +27,9 @@ from airflow.listeners.listener import get_listener_manager from airflow.models import DagModel, DagRun from airflow.models.asset import AssetEvent, AssetModel -from airflow.models.param import Param from airflow.operators.empty import EmptyOperator from airflow.sdk.definitions.asset import Asset +from airflow.sdk.definitions.param import Param from airflow.utils import timezone from airflow.utils.session import provide_session from airflow.utils.state import DagRunState, State diff --git a/tests/api_fastapi/core_api/routes/public/test_dags.py b/tests/api_fastapi/core_api/routes/public/test_dags.py index 748baae71a413..8ef4a82613775 100644 --- a/tests/api_fastapi/core_api/routes/public/test_dags.py +++ b/tests/api_fastapi/core_api/routes/public/test_dags.py @@ -377,7 +377,7 @@ def test_dag_details( "owners": ["airflow"], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "description": None, "schema": {}, "value": 1, diff --git a/tests/api_fastapi/core_api/routes/public/test_task_instances.py b/tests/api_fastapi/core_api/routes/public/test_task_instances.py index 80cf61dc684cf..857ca1ffa65db 100644 --- a/tests/api_fastapi/core_api/routes/public/test_task_instances.py +++ b/tests/api_fastapi/core_api/routes/public/test_task_instances.py @@ -189,6 +189,7 @@ def test_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": "print_the_context", @@ -247,6 +248,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "deferred", "task_id": "print_the_context", @@ -294,6 +296,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "removed", "task_id": "print_the_context", @@ -337,6 +340,7 @@ def test_should_respond_200_task_instance_with_rendered(self, test_client, sessi "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": "print_the_context", @@ -437,6 +441,7 @@ def test_should_respond_200_mapped_task_instance_with_rtif(self, test_client, se "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": "print_the_context", @@ -1509,6 +1514,7 @@ def test_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "success", "task_id": "print_the_context", @@ -1542,6 +1548,7 @@ def test_should_respond_200_with_different_try_numbers(self, test_client, try_nu "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "success" if try_number == 1 else None, "task_id": "print_the_context", @@ -1604,6 +1611,7 @@ def test_should_respond_200_with_mapped_task_at_different_try_numbers( "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "failed" if try_number == 1 else None, "task_id": "print_the_context", @@ -1664,6 +1672,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "failed", "task_id": "print_the_context", @@ -1698,6 +1707,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "removed", "task_id": "print_the_context", @@ -2146,6 +2156,7 @@ def test_should_respond_200_with_dag_run_id(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "rendered_fields": {}, "rendered_map_index": None, "start_date": "2020-01-02T00:00:00Z", @@ -2485,6 +2496,7 @@ def test_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "success", "task_id": "print_the_context", @@ -2509,6 +2521,7 @@ def test_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": None, "task_id": "print_the_context", @@ -2554,6 +2567,7 @@ def test_ti_in_retry_state_not_returned(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "success", "task_id": "print_the_context", @@ -2620,6 +2634,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "failed", "task_id": "print_the_context", @@ -2644,6 +2659,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": None, "task_id": "print_the_context", @@ -2718,6 +2734,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_display_name": self.TASK_ID, @@ -2912,6 +2929,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_display_name": "print_the_context", @@ -3009,6 +3027,7 @@ def test_update_mask_set_note_should_respond_200(self, test_client, session, new "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": self.TASK_ID, @@ -3049,6 +3068,7 @@ def test_set_note_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": self.TASK_ID, @@ -3103,6 +3123,7 @@ def test_set_note_should_respond_200_mapped_task_instance_with_rtif(self, test_c "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": self.TASK_ID, @@ -3202,6 +3223,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_display_name": self.TASK_ID, @@ -3423,6 +3445,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_display_name": "print_the_context", diff --git a/tests/api_fastapi/core_api/routes/public/test_tasks.py b/tests/api_fastapi/core_api/routes/public/test_tasks.py index 2c00a9e96a7b5..b2e7671365690 100644 --- a/tests/api_fastapi/core_api/routes/public/test_tasks.py +++ b/tests/api_fastapi/core_api/routes/public/test_tasks.py @@ -103,7 +103,7 @@ def test_should_respond_200(self, test_client): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -185,7 +185,7 @@ def test_unscheduled_task(self, test_client): "owner": "airflow", "params": { "is_unscheduled": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": True, "description": None, "schema": {}, @@ -248,7 +248,7 @@ def test_should_respond_200_serialized(self, test_client, testing_dag_bundle): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -313,7 +313,7 @@ def test_should_respond_200(self, test_client): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -469,7 +469,7 @@ def test_get_unscheduled_tasks(self, test_client): "owner": "airflow", "params": { "is_unscheduled": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": True, "description": None, "schema": {}, diff --git a/tests/api_fastapi/core_api/routes/ui/test_structure.py b/tests/api_fastapi/core_api/routes/ui/test_structure.py index 1f865a140c5be..5fabd396ed955 100644 --- a/tests/api_fastapi/core_api/routes/ui/test_structure.py +++ b/tests/api_fastapi/core_api/routes/ui/test_structure.py @@ -17,11 +17,14 @@ # under the License. from __future__ import annotations +import copy + import pendulum import pytest from deepdiff import DeepDiff from airflow.models import DagBag +from airflow.models.serialized_dag import SerializedDagModel from airflow.operators.empty import EmptyOperator from airflow.providers.standard.operators.trigger_dagrun import TriggerDagRunOperator from airflow.providers.standard.sensors.external_task import ExternalTaskSensor @@ -33,6 +36,53 @@ DAG_ID = "test_dag_id" DAG_ID_EXTERNAL_TRIGGER = "external_trigger" +LATEST_VERSION_DAG_RESPONSE: dict = { + "edges": [], + "nodes": [ + { + "children": None, + "id": "task1", + "is_mapped": None, + "label": "task1", + "tooltip": None, + "setup_teardown_type": None, + "type": "task", + "operator": "EmptyOperator", + "asset_condition_type": None, + }, + { + "children": None, + "id": "task2", + "is_mapped": None, + "label": "task2", + "tooltip": None, + "setup_teardown_type": None, + "type": "task", + "operator": "EmptyOperator", + "asset_condition_type": None, + }, + { + "children": None, + "id": "task3", + "is_mapped": None, + "label": "task3", + "tooltip": None, + "setup_teardown_type": None, + "type": "task", + "operator": "EmptyOperator", + "asset_condition_type": None, + }, + ], + "arrange": "LR", +} +SECOND_VERSION_DAG_RESPONSE: dict = copy.deepcopy(LATEST_VERSION_DAG_RESPONSE) +SECOND_VERSION_DAG_RESPONSE["nodes"] = [ + node for node in SECOND_VERSION_DAG_RESPONSE["nodes"] if node["id"] != "task3" +] +FIRST_VERSION_DAG_RESPONSE: dict = copy.deepcopy(SECOND_VERSION_DAG_RESPONSE) +FIRST_VERSION_DAG_RESPONSE["nodes"] = [ + node for node in FIRST_VERSION_DAG_RESPONSE["nodes"] if node["id"] != "task2" +] @pytest.fixture(autouse=True, scope="module") @@ -81,6 +131,21 @@ def make_dag(dag_maker, session, time_machine): dag_maker.sync_dagbag_to_db() +@pytest.fixture +def make_dag_with_multiple_version(dag_maker): + """ + Create DAG with multiple versions + + Version 1 will have 1 task, version 2 will have 2 tasks, and version 3 will have 3 tasks. + """ + for version_number in range(1, 4): + with dag_maker(DAG_ID) as dag: + for i in range(version_number): + EmptyOperator(task_id=f"task{i+1}") + dag.sync_to_db() + SerializedDagModel.write_dag(dag, bundle_name="dag_maker") + + class TestStructureDataEndpoint: @pytest.mark.parametrize( "params, expected", @@ -407,7 +472,45 @@ def test_should_return_200(self, test_client, params, expected): assert response.status_code == 200 assert not DeepDiff(response.json(), expected, ignore_order=True) + @pytest.mark.parametrize( + "params, expected", + [ + pytest.param( + {"dag_id": DAG_ID}, + LATEST_VERSION_DAG_RESPONSE, + id="get_default_version", + ), + pytest.param( + {"dag_id": DAG_ID, "dag_version": 1}, + FIRST_VERSION_DAG_RESPONSE, + id="get_oldest_version", + ), + pytest.param( + {"dag_id": DAG_ID, "dag_version": 2}, + SECOND_VERSION_DAG_RESPONSE, + id="get_specific_version", + ), + pytest.param( + {"dag_id": DAG_ID, "dag_version": 3}, + LATEST_VERSION_DAG_RESPONSE, + id="get_latest_version", + ), + ], + ) + @pytest.mark.usefixtures("make_dag_with_multiple_version") + def test_should_return_200_with_multiple_versions(self, test_client, params, expected): + response = test_client.get("/ui/structure/structure_data", params=params) + assert response.status_code == 200 + assert response.json() == expected + def test_should_return_404(self, test_client): response = test_client.get("/ui/structure/structure_data", params={"dag_id": "not_existing"}) assert response.status_code == 404 assert response.json()["detail"] == "Dag with id not_existing was not found" + + def test_should_return_404_when_dag_version_not_found(self, test_client): + response = test_client.get( + "/ui/structure/structure_data", params={"dag_id": DAG_ID, "dag_version": 999} + ) + assert response.status_code == 404 + assert response.json()["detail"] == "Dag with id test_dag_id and version 999 was not found" diff --git a/tests/dag_processing/test_collection.py b/tests/dag_processing/test_collection.py index 65f56c96733c4..2a0a40a634afa 100644 --- a/tests/dag_processing/test_collection.py +++ b/tests/dag_processing/test_collection.py @@ -494,13 +494,14 @@ def _sync_perms(): ], ) @pytest.mark.usefixtures("clean_db") - def test_dagmodel_properties(self, attrs, expected, session, time_machine, testing_dag_bundle): + def test_dagmodel_properties(self, attrs, expected, session, time_machine, testing_dag_bundle, dag_maker): """Test that properties on the dag model are correctly set when dealing with a LazySerializedDag""" dt = tz.datetime(2020, 1, 5, 0, 0, 0) time_machine.move_to(dt, tick=False) tasks = attrs.pop("_tasks_", None) - dag = DAG("dag", **attrs) + with dag_maker("dag", **attrs) as dag: + ... if tasks: dag.add_tasks(tasks) @@ -513,7 +514,6 @@ def test_dagmodel_properties(self, attrs, expected, session, time_machine, testi dr1 = DagRun(logical_date=dt, run_id="test_run_id_1", **dr_kwargs, start_date=dt) session.add(dr1) session.commit() - update_dag_parsing_results_in_db("testing", None, [self.dag_to_lazy_serdag(dag)], {}, set(), session) orm_dag = session.get(DagModel, ("dag",)) @@ -526,20 +526,23 @@ def test_dagmodel_properties(self, attrs, expected, session, time_machine, testi assert orm_dag.last_parsed_time == dt - def test_existing_dag_is_paused_upon_creation(self, testing_dag_bundle, session): - dag = DAG("dag_paused", schedule=None) + def test_existing_dag_is_paused_upon_creation(self, testing_dag_bundle, session, dag_maker): + with dag_maker("dag_paused", schedule=None) as dag: + ... update_dag_parsing_results_in_db("testing", None, [self.dag_to_lazy_serdag(dag)], {}, set(), session) orm_dag = session.get(DagModel, ("dag_paused",)) assert orm_dag.is_paused is False - dag = DAG("dag_paused", schedule=None, is_paused_upon_creation=True) + with dag_maker("dag_paused", schedule=None, is_paused_upon_creation=True) as dag: + ... update_dag_parsing_results_in_db("testing", None, [self.dag_to_lazy_serdag(dag)], {}, set(), session) # Since the dag existed before, it should not follow the pause flag upon creation orm_dag = session.get(DagModel, ("dag_paused",)) assert orm_dag.is_paused is False - def test_bundle_name_and_version_are_stored(self, testing_dag_bundle, session): - dag = DAG("mydag", schedule=None) + def test_bundle_name_and_version_are_stored(self, testing_dag_bundle, session, dag_maker): + with dag_maker("mydag", schedule=None) as dag: + ... update_dag_parsing_results_in_db("testing", "1.0", [self.dag_to_lazy_serdag(dag)], {}, set(), session) orm_dag = session.get(DagModel, "mydag") assert orm_dag.bundle_name == "testing" diff --git a/tests/dag_processing/test_manager.py b/tests/dag_processing/test_manager.py index c0b9813973ceb..a68749f831ee0 100644 --- a/tests/dag_processing/test_manager.py +++ b/tests/dag_processing/test_manager.py @@ -21,7 +21,6 @@ import json import logging import os -import pathlib import random import signal import textwrap @@ -29,6 +28,7 @@ from collections import deque from datetime import datetime, timedelta from logging.config import dictConfig +from pathlib import Path from unittest import mock from unittest.mock import MagicMock @@ -71,12 +71,12 @@ pytestmark = pytest.mark.db_test logger = logging.getLogger(__name__) -TEST_DAG_FOLDER = pathlib.Path(__file__).parents[1].resolve() / "dags" +TEST_DAG_FOLDER = Path(__file__).parents[1].resolve() / "dags" DEFAULT_DATE = timezone.datetime(2016, 1, 1) -def _get_dag_file_paths(files: list[str]) -> list[DagFileInfo]: - return [DagFileInfo(bundle_name="testing", path=f) for f in files] +def _get_dag_file_paths(files: list[str | Path]) -> list[DagFileInfo]: + return [DagFileInfo(bundle_name="testing", bundle_path=TEST_DAGS_FOLDER, rel_path=Path(f)) for f in files] class TestDagFileProcessorManager: @@ -164,9 +164,9 @@ def test_start_new_processes_with_same_filepath(self): """ manager = DagFileProcessorManager(max_runs=1) - file_1 = DagFileInfo(bundle_name="testing", path="file_1.py") - file_2 = DagFileInfo(bundle_name="testing", path="file_2.py") - file_3 = DagFileInfo(bundle_name="testing", path="file_3.py") + file_1 = DagFileInfo(bundle_name="testing", rel_path=Path("file_1.py"), bundle_path=TEST_DAGS_FOLDER) + file_2 = DagFileInfo(bundle_name="testing", rel_path=Path("file_2.py"), bundle_path=TEST_DAGS_FOLDER) + file_3 = DagFileInfo(bundle_name="testing", rel_path=Path("file_3.py"), bundle_path=TEST_DAGS_FOLDER) manager._file_path_queue = deque([file_1, file_2, file_3]) # Mock that only one processor exists. This processor runs with 'file_1' @@ -188,7 +188,9 @@ def test_start_new_processes_with_same_filepath(self): def test_set_file_paths_when_processor_file_path_not_in_new_file_paths(self): """Ensure processors and file stats are removed when the file path is not in the new file paths""" manager = DagFileProcessorManager(max_runs=1) - file = DagFileInfo(bundle_name="testing", path="missing_file.txt") + file = DagFileInfo( + bundle_name="testing", rel_path=Path("missing_file.txt"), bundle_path=TEST_DAGS_FOLDER + ) manager._processors[file] = MagicMock() manager._file_stats[file] = DagFileStat() @@ -199,7 +201,7 @@ def test_set_file_paths_when_processor_file_path_not_in_new_file_paths(self): def test_set_file_paths_when_processor_file_path_is_in_new_file_paths(self): manager = DagFileProcessorManager(max_runs=1) - file = DagFileInfo(bundle_name="testing", path="abc.txt") + file = DagFileInfo(bundle_name="testing", rel_path=Path("abc.txt"), bundle_path=TEST_DAGS_FOLDER) mock_processor = MagicMock() manager._processors[file] = mock_processor @@ -286,7 +288,12 @@ def test_add_new_file_to_parsing_queue(self, mock_getmtime): ordered_files = _get_dag_file_paths(["file_3.py", "file_2.py", "file_1.py"]) assert manager._file_path_queue == deque(ordered_files) - manager.set_file_paths([*dag_files, DagFileInfo(bundle_name="testing", path="file_4.py")]) + manager.set_file_paths( + [ + *dag_files, + DagFileInfo(bundle_name="testing", rel_path=Path("file_4.py"), bundle_path=TEST_DAGS_FOLDER), + ] + ) manager.add_new_file_path_to_queue() ordered_files = _get_dag_file_paths(["file_4.py", "file_3.py", "file_2.py", "file_1.py"]) assert manager._file_path_queue == deque(ordered_files) @@ -299,7 +306,9 @@ def test_recently_modified_file_is_parsed_with_mtime_mode(self, mock_getmtime): """ freezed_base_time = timezone.datetime(2020, 1, 5, 0, 0, 0) initial_file_1_mtime = (freezed_base_time - timedelta(minutes=5)).timestamp() - dag_file = DagFileInfo(bundle_name="testing", path="file_1.py") + dag_file = DagFileInfo( + bundle_name="testing", rel_path=Path("file_1.py"), bundle_path=TEST_DAGS_FOLDER + ) dag_files = [dag_file] mock_getmtime.side_effect = [initial_file_1_mtime] @@ -369,9 +378,15 @@ def test_scan_stale_dags(self, testing_dag_bundle): test_dag_path = DagFileInfo( bundle_name="testing", - path=str(TEST_DAG_FOLDER / "test_example_bash_operator.py"), + rel_path=Path("test_example_bash_operator.py"), + bundle_path=TEST_DAGS_FOLDER, + ) + dagbag = DagBag( + test_dag_path.absolute_path, + read_dags_from_db=False, + include_examples=False, + bundle_path=test_dag_path.bundle_path, ) - dagbag = DagBag(test_dag_path.path, read_dags_from_db=False, include_examples=False) with create_session() as session: # Add stale DAG to the DB @@ -394,7 +409,11 @@ def test_scan_stale_dags(self, testing_dag_bundle): active_dag_count = ( session.query(func.count(DagModel.dag_id)) - .filter(DagModel.is_active, DagModel.fileloc == test_dag_path.path) + .filter( + DagModel.is_active, + DagModel.relative_fileloc == str(test_dag_path.rel_path), + DagModel.bundle_name == test_dag_path.bundle_name, + ) .scalar() ) assert active_dag_count == 1 @@ -403,7 +422,11 @@ def test_scan_stale_dags(self, testing_dag_bundle): active_dag_count = ( session.query(func.count(DagModel.dag_id)) - .filter(DagModel.is_active, DagModel.fileloc == test_dag_path.path) + .filter( + DagModel.is_active, + DagModel.relative_fileloc == str(test_dag_path.rel_path), + DagModel.bundle_name == test_dag_path.bundle_name, + ) .scalar() ) assert active_dag_count == 0 @@ -422,7 +445,11 @@ def test_kill_timed_out_processors_kill(self): processor = self.mock_processor() processor._process.create_time.return_value = timezone.make_aware(datetime.min).timestamp() - manager._processors = {DagFileInfo(bundle_name="testing", path="abc.txt"): processor} + manager._processors = { + DagFileInfo( + bundle_name="testing", rel_path=Path("abc.txt"), bundle_path=TEST_DAGS_FOLDER + ): processor + } with mock.patch.object(type(processor), "kill") as mock_kill: manager._kill_timed_out_processors() mock_kill.assert_called_once_with(signal.SIGKILL) @@ -436,7 +463,11 @@ def test_kill_timed_out_processors_no_kill(self): processor = self.mock_processor() processor._process.create_time.return_value = timezone.make_aware(datetime.max).timestamp() - manager._processors = {DagFileInfo(bundle_name="testing", path="abc.txt"): processor} + manager._processors = { + DagFileInfo( + bundle_name="testing", rel_path=Path("abc.txt"), bundle_path=TEST_DAGS_FOLDER + ): processor + } with mock.patch.object(type(processor), "kill") as mock_kill: manager._kill_timed_out_processors() mock_kill.assert_not_called() @@ -447,8 +478,13 @@ def test_kill_timed_out_processors_no_kill(self): pytest.param( [], "/opt/airflow/dags/test_dag.py", - b'{"file":"/opt/airflow/dags/test_dag.py","requests_fd":123,"callback_requests":[],' - b'"type":"DagFileParseRequest"}\n', + b"{" + b'"file":"/opt/airflow/dags/test_dag.py",' + b'"bundle_path":"/opt/airflow/dags",' + b'"requests_fd":123,' + b'"callback_requests":[],' + b'"type":"DagFileParseRequest"' + b"}\n", ), pytest.param( [ @@ -460,16 +496,28 @@ def test_kill_timed_out_processors_no_kill(self): ) ], "/opt/airflow/dags/dag_callback_dag.py", - b'{"file":"/opt/airflow/dags/dag_callback_dag.py","requests_fd":123,"callback_requests":' - b'[{"full_filepath":"/opt/airflow/dags/dag_callback_dag.py","msg":null,"dag_id":"dag_id",' - b'"run_id":"run_id","is_failure_callback":false,"type":"DagCallbackRequest"}],' - b'"type":"DagFileParseRequest"}\n', + b"{" + b'"file":"/opt/airflow/dags/dag_callback_dag.py",' + b'"bundle_path":"/opt/airflow/dags",' + b'"requests_fd":123,"callback_requests":' + b"[" + b"{" + b'"full_filepath":"/opt/airflow/dags/dag_callback_dag.py",' + b'"msg":null,' + b'"dag_id":"dag_id",' + b'"run_id":"run_id",' + b'"is_failure_callback":false,' + b'"type":"DagCallbackRequest"' + b"}" + b"]," + b'"type":"DagFileParseRequest"' + b"}\n", ), ], ) def test_serialize_callback_requests(self, callbacks, path, expected_buffer): processor = self.mock_processor() - processor._on_child_started(callbacks, path) + processor._on_child_started(callbacks, path, bundle_path=Path("/opt/airflow/dags")) # Verify the response was added to the buffer val = processor.stdin.getvalue() @@ -636,31 +684,40 @@ def test_fetch_callbacks_from_database_max_per_loop(self, tmp_path, configure_te @pytest.mark.skip("AIP-66: callbacks are not implemented yet") def test_callback_queue(self, tmp_path): + """ + This test has gotten a bit out of sync with the codebase. + + I am just updating it to be consistent with the changes in DagFileInfo + """ # given manager = DagFileProcessorManager( max_runs=1, processor_timeout=365 * 86_400, ) - dag1_path = DagFileInfo(bundle_name="testing", path="/green_eggs/ham/file1.py") + dag1_path = DagFileInfo( + bundle_name="testing", rel_path=Path("green_eggs/ham/file1.py"), bundle_path=TEST_DAGS_FOLDER + ) dag1_req1 = DagCallbackRequest( - full_filepath="/green_eggs/ham/file1.py", + full_filepath=TEST_DAGS_FOLDER / "green_eggs/ham/file1.py", dag_id="dag1", run_id="run1", is_failure_callback=False, msg=None, ) dag1_req2 = DagCallbackRequest( - full_filepath="/green_eggs/ham/file1.py", + full_filepath=TEST_DAGS_FOLDER / "green_eggs/ham/file1.py", dag_id="dag1", run_id="run1", is_failure_callback=False, msg=None, ) - dag2_path = DagFileInfo(bundle_name="testing", path="/green_eggs/ham/file2.py") + dag2_path = DagFileInfo( + bundle_name="testing", rel_path=Path("green_eggs/ham/file2.py"), bundle_path=TEST_DAGS_FOLDER + ) dag2_req1 = DagCallbackRequest( - full_filepath="/green_eggs/ham/file2.py", + full_filepath=TEST_DAGS_FOLDER / "green_eggs/ham/file2.py", dag_id="dag2", run_id="run1", is_failure_callback=False, @@ -743,10 +800,12 @@ def test_bundles_are_refreshed(self): bundleone = MagicMock() bundleone.name = "bundleone" + bundleone.path = "/dev/null" bundleone.refresh_interval = 0 bundleone.get_current_version.return_value = None bundletwo = MagicMock() bundletwo.name = "bundletwo" + bundletwo.path = "/dev/null" bundletwo.refresh_interval = 300 bundletwo.get_current_version.return_value = None @@ -788,7 +847,7 @@ def _update_bundletwo_version(): manager.run() assert bundletwo.refresh.call_count == 2 - def test_bundles_versions_are_stored(self): + def test_bundles_versions_are_stored(self, session): config = [ { "name": "mybundle", @@ -798,7 +857,8 @@ def test_bundles_versions_are_stored(self): ] mybundle = MagicMock() - mybundle.name = "bundleone" + mybundle.name = "mybundle" + mybundle.path = "/dev/null" mybundle.refresh_interval = 0 mybundle.supports_versioning = True mybundle.get_current_version.return_value = "123" @@ -808,11 +868,11 @@ def test_bundles_versions_are_stored(self): with mock.patch( "airflow.dag_processing.bundles.manager.DagBundlesManager" ) as mock_bundle_manager: - mock_bundle_manager.return_value._bundle_config = {"bundleone": None} + mock_bundle_manager.return_value._bundle_config = {"mybundle": None} mock_bundle_manager.return_value.get_all_dag_bundles.return_value = [mybundle] manager = DagFileProcessorManager(max_runs=1) manager.run() with create_session() as session: - model = session.get(DagBundleModel, "bundleone") + model = session.get(DagBundleModel, "mybundle") assert model.version == "123" diff --git a/tests/dag_processing/test_processor.py b/tests/dag_processing/test_processor.py index a70890684f3b5..2ba95f15049ed 100644 --- a/tests/dag_processing/test_processor.py +++ b/tests/dag_processing/test_processor.py @@ -77,7 +77,12 @@ def _process_file( self, file_path, callback_requests: list[CallbackRequest] | None = None ) -> DagFileParsingResult: return _parse_file( - DagFileParseRequest(file=file_path, requests_fd=1, callback_requests=callback_requests or []), + DagFileParseRequest( + file=file_path, + bundle_path=TEST_DAG_FOLDER, + requests_fd=1, + callback_requests=callback_requests or [], + ), log=structlog.get_logger(), ) @@ -185,7 +190,7 @@ def test_parse_file_entrypoint_parses_dag_callbacks(spy_agency): _, w2 = socketpair() w.makefile("wb").write( - b'{"file":"/files/dags/wait.py","requests_fd":' + b'{"file":"/files/dags/wait.py","bundle_path":"/files/dags","requests_fd":' + str(w2.fileno()).encode("ascii") + b',"callback_requests": [{"full_filepath": "/files/dags/wait.py", ' b'"msg": "task_failure", "dag_id": "wait_to_fail", "run_id": ' @@ -237,7 +242,8 @@ def fake_collect_dags(self, *args, **kwargs): ) ] _parse_file( - DagFileParseRequest(file="A", requests_fd=1, callback_requests=requests), log=structlog.get_logger() + DagFileParseRequest(file="A", bundle_path="no matter", requests_fd=1, callback_requests=requests), + log=structlog.get_logger(), ) assert called is True diff --git a/tests/dags/test_invalid_param.py b/tests/dags/test_invalid_param.py index fb0d3c854d12d..547fc7c11253d 100644 --- a/tests/dags/test_invalid_param.py +++ b/tests/dags/test_invalid_param.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow.models.dag import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_invalid_param", diff --git a/tests/dags/test_invalid_param2.py b/tests/dags/test_invalid_param2.py index 69ffda442301d..5678f46090c89 100644 --- a/tests/dags/test_invalid_param2.py +++ b/tests/dags/test_invalid_param2.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_invalid_param2", diff --git a/tests/dags/test_invalid_param3.py b/tests/dags/test_invalid_param3.py index a8017a3402b66..ea3bfa202a319 100644 --- a/tests/dags/test_invalid_param3.py +++ b/tests/dags/test_invalid_param3.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_invalid_param3", diff --git a/tests/dags/test_invalid_param4.py b/tests/dags/test_invalid_param4.py index bbfc7e970c51c..0156072ba11cf 100644 --- a/tests/dags/test_invalid_param4.py +++ b/tests/dags/test_invalid_param4.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_invalid_param4", diff --git a/tests/dags/test_valid_param.py b/tests/dags/test_valid_param.py index afa0f98ce21d5..ddb858a9acc1e 100644 --- a/tests/dags/test_valid_param.py +++ b/tests/dags/test_valid_param.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_valid_param", diff --git a/tests/dags/test_valid_param2.py b/tests/dags/test_valid_param2.py index d59d6278c3a71..ee6920bd92ee7 100644 --- a/tests/dags/test_valid_param2.py +++ b/tests/dags/test_valid_param2.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_valid_param2", diff --git a/tests/models/test_dag.py b/tests/models/test_dag.py index bafddd532738f..78fffd45fd098 100644 --- a/tests/models/test_dag.py +++ b/tests/models/test_dag.py @@ -43,6 +43,7 @@ ParamValidationError, UnknownExecutorException, ) +from airflow.models import DagBag from airflow.models.asset import ( AssetActive, AssetAliasModel, @@ -63,7 +64,6 @@ ) from airflow.models.dag_version import DagVersion from airflow.models.dagrun import DagRun -from airflow.models.param import DagParam, Param from airflow.models.serialized_dag import SerializedDagModel from airflow.models.taskinstance import TaskInstance as TI from airflow.operators.empty import EmptyOperator @@ -73,6 +73,7 @@ from airflow.sdk.definitions._internal.contextmanager import TaskGroupContext from airflow.sdk.definitions._internal.templater import NativeEnvironment, SandboxedEnvironment from airflow.sdk.definitions.asset import Asset, AssetAlias, AssetAll, AssetAny +from airflow.sdk.definitions.param import DagParam, Param from airflow.security import permissions from airflow.timetables.base import DagRunInfo, DataInterval, TimeRestriction, Timetable from airflow.timetables.simple import ( @@ -96,7 +97,6 @@ TestPriorityWeightStrategyPlugin, ) from tests_common.test_utils.asserts import assert_queries_count -from tests_common.test_utils.config import conf_vars from tests_common.test_utils.db import ( clear_db_assets, clear_db_dags, @@ -138,6 +138,15 @@ def clear_assets(): clear_db_assets() +TEST_DAGS_FOLDER = Path(__file__).parent.parent / "dags" + + +@pytest.fixture +def test_dags_bundle(configure_testing_dag_bundle): + with configure_testing_dag_bundle(TEST_DAGS_FOLDER): + yield + + def _create_dagrun( dag: DAG, *, @@ -150,7 +159,7 @@ def _create_dagrun( triggered_by_kwargs: dict = {"triggered_by": DagRunTriggeredByType.TEST} if AIRFLOW_V_3_0_PLUS else {} run_id = dag.timetable.generate_run_id( run_type=run_type, - logical_date=logical_date, + logical_date=logical_date, # type: ignore data_interval=data_interval, ) return dag.create_dagrun( @@ -1072,18 +1081,16 @@ def add_failed_dag_run(dag, id, logical_date): dag.clear() self._clean_up(dag_id) - def test_dag_is_deactivated_upon_dagfile_deletion(self): + def test_dag_is_deactivated_upon_dagfile_deletion(self, dag_maker): dag_id = "old_existing_dag" - dag_fileloc = "/usr/local/airflow/dags/non_existing_path.py" - dag = DAG(dag_id, schedule=None, is_paused_upon_creation=True) - dag.fileloc = dag_fileloc + with dag_maker(dag_id, schedule=None, is_paused_upon_creation=True) as dag: + ... session = settings.Session() dag.sync_to_db(session=session) orm_dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).one() assert orm_dag.is_active - assert orm_dag.fileloc == dag_fileloc DagModel.deactivate_deleted_dags(list_py_file_paths(settings.DAGS_FOLDER)) @@ -2355,50 +2362,27 @@ def test_dags_needing_dagruns_doesnot_send_dagmodel_with_import_errors(self, ses needed = query.all() assert needed == [] - @pytest.mark.parametrize( - ("fileloc", "expected_relative"), - [ - (os.path.join(settings.DAGS_FOLDER, "a.py"), Path("a.py")), - ("/tmp/foo.py", Path("/tmp/foo.py")), - ], - ) - def test_relative_fileloc(self, fileloc, expected_relative): - dag = DAG(dag_id="test", schedule=None) - dag.fileloc = fileloc + def test_relative_fileloc(self, session): + rel_path = "test_assets.py" + bundle_path = TEST_DAGS_FOLDER + file_path = bundle_path / rel_path + bag = DagBag(dag_folder=file_path, bundle_path=bundle_path) - assert dag.relative_fileloc == expected_relative + dag = bag.get_dag("dag_with_skip_task") + dag.sync_to_db(session=session) - @pytest.mark.parametrize( - "reader_dags_folder", [settings.DAGS_FOLDER, str(repo_root / "airflow/example_dags")] - ) - @pytest.mark.parametrize( - ("fileloc", "expected_relative"), - [ - (str(Path(settings.DAGS_FOLDER, "a.py")), Path("a.py")), - ("/tmp/foo.py", Path("/tmp/foo.py")), - ], - ) - def test_relative_fileloc_serialized( - self, fileloc, expected_relative, session, clear_dags, reader_dags_folder, testing_dag_bundle - ): - """ - The serialized dag model includes the dags folder as configured on the thing serializing - the dag. On the thing deserializing the dag, when determining relative fileloc, - we should use the dags folder of the processor. So even if the dags folder of - the deserializer is different (meaning that the full path is no longer relative to - the dags folder) then we should still get the relative fileloc as it existed on the - serializer process. When the full path is not relative to the configured dags folder, - then relative fileloc should just be the full path. - """ - dag = DAG(dag_id="test", schedule=None) - dag.fileloc = fileloc - dag.sync_to_db() - SerializedDagModel.write_dag(dag, bundle_name="dag_maker") + assert dag.fileloc == str(file_path) + assert dag.relative_fileloc == str(rel_path) + + SerializedDagModel.write_dag(dag, bundle_name="dag_maker", session=session) + session.commit() session.expunge_all() - sdm = SerializedDagModel.get(dag.dag_id, session) - dag = sdm.dag - with conf_vars({("core", "dags_folder"): reader_dags_folder}): - assert dag.relative_fileloc == expected_relative + dm = session.get(DagModel, dag.dag_id) + assert dm.fileloc == str(file_path) + assert dm.relative_fileloc == str(rel_path) + sdm = session.scalar(select(SerializedDagModel).where(SerializedDagModel.dag_id == dag.dag_id)) + assert sdm.dag.fileloc == str(file_path) + assert sdm.dag.relative_fileloc == str(rel_path) def test__processor_dags_folder(self, session, testing_dag_bundle): """Only populated after deserializtion""" diff --git a/tests/models/test_param.py b/tests/models/test_param.py index 152419db2fa4d..77cf96eda2226 100644 --- a/tests/models/test_param.py +++ b/tests/models/test_param.py @@ -22,278 +22,13 @@ from airflow.decorators import task from airflow.exceptions import ParamValidationError -from airflow.models.param import Param, ParamsDict -from airflow.serialization.serialized_objects import BaseSerialization +from airflow.sdk.definitions.param import Param from airflow.utils import timezone from airflow.utils.types import DagRunType from tests_common.test_utils.db import clear_db_dags, clear_db_runs, clear_db_xcom -class TestParam: - def test_param_without_schema(self): - p = Param("test") - assert p.resolve() == "test" - - p.value = 10 - assert p.resolve() == 10 - - def test_null_param(self): - p = Param() - with pytest.raises(ParamValidationError, match="No value passed and Param has no default value"): - p.resolve() - assert p.resolve(None) is None - assert p.dump()["value"] is None - assert not p.has_value - - p = Param(None) - assert p.resolve() is None - assert p.resolve(None) is None - assert p.dump()["value"] is None - assert not p.has_value - - p = Param(None, type="null") - assert p.resolve() is None - assert p.resolve(None) is None - assert p.dump()["value"] is None - assert not p.has_value - with pytest.raises(ParamValidationError): - p.resolve("test") - - def test_string_param(self): - p = Param("test", type="string") - assert p.resolve() == "test" - - p = Param("test") - assert p.resolve() == "test" - - p = Param("10.0.0.0", type="string", format="ipv4") - assert p.resolve() == "10.0.0.0" - - p = Param(type="string") - with pytest.raises(ParamValidationError): - p.resolve(None) - with pytest.raises(ParamValidationError, match="No value passed and Param has no default value"): - p.resolve() - - @pytest.mark.parametrize( - "dt", - [ - pytest.param("2022-01-02T03:04:05.678901Z", id="microseconds-zed-timezone"), - pytest.param("2022-01-02T03:04:05.678Z", id="milliseconds-zed-timezone"), - pytest.param("2022-01-02T03:04:05+00:00", id="seconds-00-00-timezone"), - pytest.param("2022-01-02T03:04:05+04:00", id="seconds-custom-timezone"), - ], - ) - def test_string_rfc3339_datetime_format(self, dt): - """Test valid rfc3339 datetime.""" - assert Param(dt, type="string", format="date-time").resolve() == dt - - @pytest.mark.parametrize( - "dt", - [ - pytest.param("2022-01-02", id="date"), - pytest.param("03:04:05", id="time"), - pytest.param("Thu, 04 Mar 2021 05:06:07 GMT", id="rfc2822-datetime"), - ], - ) - def test_string_datetime_invalid_format(self, dt): - """Test invalid iso8601 and rfc3339 datetime format.""" - with pytest.raises(ParamValidationError, match="is not a 'date-time'"): - Param(dt, type="string", format="date-time").resolve() - - def test_string_time_format(self): - """Test string time format.""" - assert Param("03:04:05", type="string", format="time").resolve() == "03:04:05" - - error_pattern = "is not a 'time'" - with pytest.raises(ParamValidationError, match=error_pattern): - Param("03:04:05.06", type="string", format="time").resolve() - - with pytest.raises(ParamValidationError, match=error_pattern): - Param("03:04", type="string", format="time").resolve() - - with pytest.raises(ParamValidationError, match=error_pattern): - Param("24:00:00", type="string", format="time").resolve() - - @pytest.mark.parametrize( - "date_string", - [ - "2021-01-01", - ], - ) - def test_string_date_format(self, date_string): - """Test string date format.""" - assert Param(date_string, type="string", format="date").resolve() == date_string - - # Note that 20120503 behaved differently in 3.11.3 Official python image. It was validated as a date - # there but it started to fail again in 3.11.4 released on 2023-07-05. - @pytest.mark.parametrize( - "date_string", - [ - "01/01/2021", - "21 May 1975", - "20120503", - ], - ) - def test_string_date_format_error(self, date_string): - """Test string date format failures.""" - with pytest.raises(ParamValidationError, match="is not a 'date'"): - Param(date_string, type="string", format="date").resolve() - - def test_int_param(self): - p = Param(5) - assert p.resolve() == 5 - - p = Param(type="integer", minimum=0, maximum=10) - assert p.resolve(value=5) == 5 - - with pytest.raises(ParamValidationError): - p.resolve(value=20) - - def test_number_param(self): - p = Param(42, type="number") - assert p.resolve() == 42 - - p = Param(1.2, type="number") - assert p.resolve() == 1.2 - - p = Param("42", type="number") - with pytest.raises(ParamValidationError): - p.resolve() - - def test_list_param(self): - p = Param([1, 2], type="array") - assert p.resolve() == [1, 2] - - def test_dict_param(self): - p = Param({"a": 1, "b": 2}, type="object") - assert p.resolve() == {"a": 1, "b": 2} - - def test_composite_param(self): - p = Param(type=["string", "number"]) - assert p.resolve(value="abc") == "abc" - assert p.resolve(value=5.0) == 5.0 - - def test_param_with_description(self): - p = Param(10, description="Sample description") - assert p.description == "Sample description" - - def test_suppress_exception(self): - p = Param("abc", type="string", minLength=2, maxLength=4) - assert p.resolve() == "abc" - - p.value = "long_string" - assert p.resolve(suppress_exception=True) is None - - def test_explicit_schema(self): - p = Param("abc", schema={type: "string"}) - assert p.resolve() == "abc" - - def test_custom_param(self): - class S3Param(Param): - def __init__(self, path: str): - schema = {"type": "string", "pattern": r"s3:\/\/(.+?)\/(.+)"} - super().__init__(default=path, schema=schema) - - p = S3Param("s3://my_bucket/my_path") - assert p.resolve() == "s3://my_bucket/my_path" - - p = S3Param("file://not_valid/s3_path") - with pytest.raises(ParamValidationError): - p.resolve() - - def test_value_saved(self): - p = Param("hello", type="string") - assert p.resolve("world") == "world" - assert p.resolve() == "world" - - def test_dump(self): - p = Param("hello", description="world", type="string", minLength=2) - dump = p.dump() - assert dump["__class"] == "airflow.models.param.Param" - assert dump["value"] == "hello" - assert dump["description"] == "world" - assert dump["schema"] == {"type": "string", "minLength": 2} - - @pytest.mark.parametrize( - "param", - [ - Param("my value", description="hello", schema={"type": "string"}), - Param("my value", description="hello"), - Param(None, description=None), - Param([True], type="array", items={"type": "boolean"}), - Param(), - ], - ) - def test_param_serialization(self, param: Param): - """ - Test to make sure that native Param objects can be correctly serialized - """ - - serializer = BaseSerialization() - serialized_param = serializer.serialize(param) - restored_param: Param = serializer.deserialize(serialized_param) - - assert restored_param.value == param.value - assert isinstance(restored_param, Param) - assert restored_param.description == param.description - assert restored_param.schema == param.schema - - -class TestParamsDict: - def test_params_dict(self): - # Init with a simple dictionary - pd = ParamsDict(dict_obj={"key": "value"}) - assert isinstance(pd.get_param("key"), Param) - assert pd["key"] == "value" - assert pd.suppress_exception is False - - # Init with a dict which contains Param objects - pd2 = ParamsDict({"key": Param("value", type="string")}, suppress_exception=True) - assert isinstance(pd2.get_param("key"), Param) - assert pd2["key"] == "value" - assert pd2.suppress_exception is True - - # Init with another object of another ParamsDict - pd3 = ParamsDict(pd2) - assert isinstance(pd3.get_param("key"), Param) - assert pd3["key"] == "value" - assert pd3.suppress_exception is False # as it's not a deepcopy of pd2 - - # Dump the ParamsDict - assert pd.dump() == {"key": "value"} - assert pd2.dump() == {"key": "value"} - assert pd3.dump() == {"key": "value"} - - # Validate the ParamsDict - plain_dict = pd.validate() - assert isinstance(plain_dict, dict) - pd2.validate() - pd3.validate() - - # Update the ParamsDict - with pytest.raises(ParamValidationError, match=r"Invalid input for param key: 1 is not"): - pd3["key"] = 1 - - # Should not raise an error as suppress_exception is True - pd2["key"] = 1 - pd2.validate() - - def test_update(self): - pd = ParamsDict({"key": Param("value", type="string")}) - - pd.update({"key": "a"}) - internal_value = pd.get_param("key") - assert isinstance(internal_value, Param) - with pytest.raises(ParamValidationError, match=r"Invalid input for param key: 1 is not"): - pd.update({"key": 1}) - - def test_repr(self): - pd = ParamsDict({"key": Param("value", type="string")}) - assert repr(pd) == "{'key': 'value'}" - - class TestDagParamRuntime: VALUE = 42 DEFAULT_DATE = timezone.datetime(2016, 1, 1) diff --git a/tests/models/test_taskinstance.py b/tests/models/test_taskinstance.py index e01daac205ad4..628945ebf2aaa 100644 --- a/tests/models/test_taskinstance.py +++ b/tests/models/test_taskinstance.py @@ -60,7 +60,6 @@ from airflow.models.dagbag import DagBag from airflow.models.dagrun import DagRun from airflow.models.expandinput import EXPAND_INPUT_EMPTY, NotFullyPopulated -from airflow.models.param import process_params from airflow.models.pool import Pool from airflow.models.renderedtifields import RenderedTaskInstanceFields from airflow.models.serialized_dag import SerializedDagModel @@ -81,6 +80,7 @@ from airflow.providers.standard.operators.python import PythonOperator from airflow.providers.standard.sensors.python import PythonSensor from airflow.sdk.definitions.asset import Asset, AssetAlias +from airflow.sdk.definitions.param import process_params from airflow.sensors.base import BaseSensorOperator from airflow.serialization.serialized_objects import SerializedBaseOperator, SerializedDAG from airflow.stats import Stats @@ -3906,9 +3906,11 @@ def test_task_stats(self, stats_mock, create_task_instance): assert call("ti.start", tags={"dag_id": ti.dag_id, "task_id": ti.task_id}) in stats_mock.mock_calls assert stats_mock.call_count == (2 * len(State.task_states)) + 7 - def test_command_as_list(self, create_task_instance): - ti = create_task_instance() - ti.task.dag.fileloc = os.path.join(TEST_DAGS_FOLDER, "x.py") + def test_command_as_list(self, dag_maker): + with dag_maker(): + PythonOperator(python_callable=print, task_id="hi") + dr = dag_maker.create_dagrun() + ti = dr.task_instances[0] assert ti.command_as_list() == [ "airflow", "tasks", @@ -3917,7 +3919,7 @@ def test_command_as_list(self, create_task_instance): ti.task_id, ti.run_id, "--subdir", - "DAGS_FOLDER/x.py", + "DAGS_FOLDER/test_taskinstance.py", ] def test_generate_command_default_param(self): @@ -4005,6 +4007,7 @@ def test_refresh_from_db(self, create_task_instance): "operator": "some_custom_operator", "custom_operator_name": "some_custom_operator", "queued_dttm": run_date + datetime.timedelta(hours=1), + "scheduled_dttm": run_date + datetime.timedelta(hours=1), "rendered_map_index": None, "queued_by_job_id": 321, "pid": 123, diff --git a/tests/serialization/serializers/test_serializers.py b/tests/serialization/serializers/test_serializers.py index 5936a95b23d6d..f3afdbbf769cc 100644 --- a/tests/serialization/serializers/test_serializers.py +++ b/tests/serialization/serializers/test_serializers.py @@ -31,7 +31,7 @@ from pendulum import DateTime from pendulum.tz.timezone import FixedTimezone, Timezone -from airflow.models.param import Param, ParamsDict +from airflow.sdk.definitions.param import Param, ParamsDict from airflow.serialization.serde import DATA, deserialize, serialize PENDULUM3 = version.parse(metadata.version("pendulum")).major == 3 diff --git a/tests/serialization/test_dag_serialization.py b/tests/serialization/test_dag_serialization.py index 84a63674e5119..2cd1ce14a5073 100644 --- a/tests/serialization/test_dag_serialization.py +++ b/tests/serialization/test_dag_serialization.py @@ -63,13 +63,13 @@ from airflow.models.dagbag import DagBag from airflow.models.expandinput import EXPAND_INPUT_EMPTY from airflow.models.mappedoperator import MappedOperator -from airflow.models.param import Param, ParamsDict from airflow.models.xcom import XCom from airflow.operators.empty import EmptyOperator from airflow.providers.cncf.kubernetes.pod_generator import PodGenerator from airflow.providers.standard.operators.bash import BashOperator from airflow.providers.standard.sensors.bash import BashSensor from airflow.sdk.definitions.asset import Asset +from airflow.sdk.definitions.param import Param, ParamsDict from airflow.security import permissions from airflow.serialization.enums import Encoding from airflow.serialization.json_schema import load_dag_schema_dict @@ -628,7 +628,7 @@ def validate_deserialized_dag(self, serialized_dag: DAG, dag: DAG): fields_to_check = dag.get_serialized_fields() - exclusion_list for field in fields_to_check: actual = getattr(serialized_dag, field) - expected = getattr(dag, field) + expected = getattr(dag, field, None) assert actual == expected, f"{dag.dag_id}.{field} does not match" # _processor_dags_folder is only populated at serialization time # it's only used when relying on serialized dag to determine a dag's relative path diff --git a/tests/serialization/test_serialized_objects.py b/tests/serialization/test_serialized_objects.py index 06bb477becdf4..ca6cb78a62794 100644 --- a/tests/serialization/test_serialized_objects.py +++ b/tests/serialization/test_serialized_objects.py @@ -37,12 +37,12 @@ from airflow.models.connection import Connection from airflow.models.dag import DAG from airflow.models.dagrun import DagRun -from airflow.models.param import Param from airflow.models.taskinstance import SimpleTaskInstance, TaskInstance from airflow.models.xcom_arg import XComArg from airflow.operators.empty import EmptyOperator from airflow.providers.standard.operators.python import PythonOperator from airflow.sdk.definitions.asset import Asset, AssetAlias, AssetAliasEvent, AssetUniqueKey +from airflow.sdk.definitions.param import Param from airflow.sdk.execution_time.context import OutletEventAccessor, OutletEventAccessors from airflow.serialization.enums import DagAttributeTypes as DAT, Encoding from airflow.serialization.serialized_objects import BaseSerialization diff --git a/tests/www/views/test_views_tasks.py b/tests/www/views/test_views_tasks.py index 44c4316058171..3efb0a13189cc 100644 --- a/tests/www/views/test_views_tasks.py +++ b/tests/www/views/test_views_tasks.py @@ -1094,6 +1094,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1131,6 +1132,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1168,6 +1170,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1205,6 +1208,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1242,6 +1246,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1279,6 +1284,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1316,6 +1322,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, diff --git a/tests/www/views/test_views_trigger_dag.py b/tests/www/views/test_views_trigger_dag.py index 17d0b687b8572..c4136520d7f9b 100644 --- a/tests/www/views/test_views_trigger_dag.py +++ b/tests/www/views/test_views_trigger_dag.py @@ -25,8 +25,8 @@ import pytest from airflow.models import DagBag, DagRun -from airflow.models.param import Param from airflow.operators.empty import EmptyOperator +from airflow.sdk.definitions.param import Param from airflow.security import permissions from airflow.utils import timezone from airflow.utils.json import WebEncoder diff --git a/tests_common/pytest_plugin.py b/tests_common/pytest_plugin.py index 4d9a7d407129e..904b05b946a76 100644 --- a/tests_common/pytest_plugin.py +++ b/tests_common/pytest_plugin.py @@ -970,6 +970,7 @@ def __call__( serialized=want_serialized, activate_assets=want_activate_assets, fileloc=None, + relative_fileloc=None, bundle_name=None, session=None, **kwargs, @@ -1000,6 +1001,8 @@ def __call__( # other scheduling arguments are set. self.dag = DAG(dag_id, schedule=schedule, **self.kwargs) self.dag.fileloc = fileloc or request.module.__file__ + if AIRFLOW_V_3_0_PLUS: + self.dag.relative_fileloc = relative_fileloc or Path(request.module.__file__).name self.want_serialized = serialized self.want_activate_assets = activate_assets self.bundle_name = bundle_name or "dag_maker"