Lee-W commented on code in PR #42612: URL: https://github.com/apache/airflow/pull/42612#discussion_r1793325941
########## airflow/models/asset.py: ########## @@ -232,6 +232,61 @@ def to_public(self) -> Asset: return Asset(uri=self.uri, extra=self.extra) +class AssetActive(Base): + """ + Collection of active assets. + + An asset is considered active if it is declared by the user in any DAG files. + AssetModel entries that are not active (also called orphaned in some parts + of the code base) are still kept in the database, but have their corresponding + entries in this table removed. This ensures we keep all possible history on + distinct assets (those with non-matching name-URI pairs), but still ensure + *name and URI are each unique* within active assets. + """ + + name = Column( + String(length=1500).with_variant( + String( + length=1500, + # latin1 allows for more indexed length in mysql + # and this field should only be ascii chars + collation="latin1_general_cs", + ), + "mysql", + ), + nullable=False, + ) + uri = Column( + String(length=1500).with_variant( + String( + length=1500, + # latin1 allows for more indexed length in mysql + # and this field should only be ascii chars + collation="latin1_general_cs", + ), + "mysql", + ), + nullable=False, + ) + + __tablename__ = "asset_active" + __table_args__ = ( + PrimaryKeyConstraint(name, uri, name="asset_active_pkey"), + ForeignKeyConstraint( + columns=[name, uri], + refcolumns=["dataset.name", "dataset.uri"], + name="asset_active_asset_name_uri_fkey", + ondelete="CASCADE", + ), + Index("idx_asset_active_name_unique", name, unique=True), + Index("idx_asset_active_uri_unique", uri, unique=True), + ) + + @classmethod + def for_asset(cls, asset: AssetModel) -> AssetActive: Review Comment: What does `for_asset` mean? ########## airflow/models/asset.py: ########## @@ -232,6 +232,61 @@ def to_public(self) -> Asset: return Asset(uri=self.uri, extra=self.extra) +class AssetActive(Base): + """ + Collection of active assets. + + An asset is considered active if it is declared by the user in any DAG files. + AssetModel entries that are not active (also called orphaned in some parts + of the code base) are still kept in the database, but have their corresponding + entries in this table removed. This ensures we keep all possible history on + distinct assets (those with non-matching name-URI pairs), but still ensure + *name and URI are each unique* within active assets. + """ + + name = Column( + String(length=1500).with_variant( + String( + length=1500, + # latin1 allows for more indexed length in mysql + # and this field should only be ascii chars + collation="latin1_general_cs", + ), + "mysql", + ), + nullable=False, + ) + uri = Column( + String(length=1500).with_variant( + String( + length=1500, + # latin1 allows for more indexed length in mysql + # and this field should only be ascii chars + collation="latin1_general_cs", + ), + "mysql", + ), + nullable=False, + ) + + __tablename__ = "asset_active" + __table_args__ = ( + PrimaryKeyConstraint(name, uri, name="asset_active_pkey"), + ForeignKeyConstraint( + columns=[name, uri], + refcolumns=["dataset.name", "dataset.uri"], + name="asset_active_asset_name_uri_fkey", + ondelete="CASCADE", + ), + Index("idx_asset_active_name_unique", name, unique=True), + Index("idx_asset_active_uri_unique", uri, unique=True), + ) + + @classmethod + def for_asset(cls, asset: AssetModel) -> AssetActive: Review Comment: from_asset? ```suggestion def from_asset(cls, asset: AssetModel) -> AssetActive: ``` ########## airflow/jobs/scheduler_job_runner.py: ########## @@ -2066,8 +2066,13 @@ def _orphan_unreferenced_assets(self, session: Session = NEW_SESSION) -> None: ) ) - updated_count = sum(self._set_orphaned(asset) for asset in orphaned_asset_query) - Stats.gauge("asset.orphaned", updated_count) + orphaning_identifiers = list(self._get_orphaning_identifier(asset) for asset in orphaned_asset_query) Review Comment: why do we want to use `list` instead of `[]` here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org