Zkplo commented on code in PR #11568:
URL: https://github.com/apache/inlong/pull/11568#discussion_r1865161288


##########
inlong-manager/manager-schedule/src/test/resources/airflow/dag_cleaner.py:
##########
@@ -29,33 +29,46 @@
 from airflow import configuration
 
 DAG_PATH = configuration.get('core', 'dags_folder') + "/"
-
+DAG_PREFIX = 'inlong_offline_task_'
 
 def clean_expired_dags(**context):
+
     original_time = context.get('execution_date')
     target_timezone = pytz.timezone("Asia/Shanghai")
     utc_time = original_time.astimezone(target_timezone)
-    current_time = utc_time.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
-    logging.info(f"Current time: {current_time}")
-    for dag_file in os.listdir(DAG_PATH):
-        if dag_file.endswith(".py") and 
dag_file.startswith("inlong_offline_task_"):
-            with open(DAG_PATH + dag_file, "r") as file:
-                line = file.readline()
-                while line and "end_offset_datetime_str" not in line:
+    current_time = utc_time.strftime("%Y-%m-%d %H:%M:%S.%f")
+    logging.info(f"The execution time of this cleaning task is: 
{current_time}")

Review Comment:
   done.



##########
inlong-manager/manager-schedule/src/test/resources/airflow/dag_cleaner.py:
##########
@@ -29,33 +29,46 @@
 from airflow import configuration
 
 DAG_PATH = configuration.get('core', 'dags_folder') + "/"
-
+DAG_PREFIX = 'inlong_offline_task_'
 
 def clean_expired_dags(**context):
+
     original_time = context.get('execution_date')
     target_timezone = pytz.timezone("Asia/Shanghai")
     utc_time = original_time.astimezone(target_timezone)
-    current_time = utc_time.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
-    logging.info(f"Current time: {current_time}")
-    for dag_file in os.listdir(DAG_PATH):
-        if dag_file.endswith(".py") and 
dag_file.startswith("inlong_offline_task_"):
-            with open(DAG_PATH + dag_file, "r") as file:
-                line = file.readline()
-                while line and "end_offset_datetime_str" not in line:
+    current_time = utc_time.strftime("%Y-%m-%d %H:%M:%S.%f")
+    logging.info(f"The execution time of this cleaning task is: 
{current_time}")
+
+    conf = context.get('dag_run').conf
+    logging.info(f"Execution parameters for this cleaning task: {conf}")
+    groupId = conf.get('inlong_group_id')
+
+    if groupId is None or len(groupId) == 0:
+        for dag_file in os.listdir(DAG_PATH):
+            if dag_file.endswith(".py") and dag_file.startswith(DAG_PREFIX):
+                dag_file_path = os.path.join(DAG_PATH, dag_file)
+                with open(dag_file_path, "r") as file:
                     line = file.readline()
-                end_date_str = None
-                if len(line.split("=")) > 1:
-                    end_date_str = line.split("=")[1].strip().strip("\"")
-                logging.info(f"DAG end time: {end_date_str}")
-                if end_date_str:
+                    while line and "end_offset_datetime_str" not in line:
+                        line = file.readline()
+                    end_date_str = None
+                    row = line.split("=")
+                    if len(row) > 1:
+                        end_date_str = 
datetime.fromtimestamp(int(row[1].strip().strip("\"")) / 1000, 
tz=target_timezone)
+                    logging.info(f"The end time of '{dag_file}' is: 
{end_date_str}")

Review Comment:
   done.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@inlong.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to