diff --git a/backend/dataspace/cdse_connector.py b/backend/dataspace/cdse_connector.py index 45c851c..39b7148 100644 --- a/backend/dataspace/cdse_connector.py +++ b/backend/dataspace/cdse_connector.py @@ -1,4 +1,5 @@ import logging +import os import re import httpx @@ -66,14 +67,18 @@ def get_available_files(self) -> list[tuple[str, str]]: def download_selected_files(self, files_to_download: list[tuple[str, str]]) -> list[str]: downloaded_files = [] + total_size = 0 for file_to_download in files_to_download: downloaded_file_path = self._cdse_s3_client.download_file( bucket_key=file_to_download[1], root_output_directory=self._workdir ) + file_size = os.path.getsize(downloaded_file_path) + total_size += file_size downloaded_files.append(str(downloaded_file_path)) + self._logger.error(f"BENCHMARK: {total_size / (1024 * 1024):.2f} MB FILES DOWNLOADED") return downloaded_files def get_polygon(self) -> list[list[float]]: diff --git a/backend/fastapi_server/routes/request_processing.py b/backend/fastapi_server/routes/request_processing.py index 20c4c80..31e9cbb 100644 --- a/backend/fastapi_server/routes/request_processing.py +++ b/backend/fastapi_server/routes/request_processing.py @@ -1,3 +1,5 @@ +import time + from fastapi import APIRouter, HTTPException from fastapi_server import fastapi_shared @@ -21,6 +23,7 @@ async def request_processing( processed_feature_model: ProcessedFeatureModel = ProcessedFeatureModel() ): logger.debug(f"[{__name__}]: request_feature_model: {processed_feature_model}") + logger.error(f"BENCHMARK: {time.time()} REQUEST RECEIVED") request_hash = processed_feature_model.hash_myself() logger.debug(f"[{__name__}]: request_hash: {request_hash}") diff --git a/backend/feature/processing/processed_feature.py b/backend/feature/processing/processed_feature.py index 76a3ec6..5ca3d6d 100644 --- a/backend/feature/processing/processed_feature.py +++ b/backend/feature/processing/processed_feature.py @@ -2,6 +2,7 @@ import logging import re import shutil +import time import docker @@ -207,11 +208,13 @@ def process_feature(self): print( f"[{__name__}]: Downloading feature {self._feature_id} started at {time_download_start}" ) + self._logger.error(f"BENCHMARK: {time.time()} DOWNLOADING FEATURE") downloaded_feature_files_paths = self._download_feature() time_download_finish = datetime.now(tz=timezone.utc) time_download_elapsed = time_download_finish - time_download_start + self._logger.error(f"BENCHMARK: {time.time()} FEATURE DOWNLOADED") print( f"[{__name__}]: Downloading feature {self._feature_id} finished at {time_download_finish}," f" elapsed {time_download_elapsed}" @@ -293,7 +296,9 @@ def _run_gjtiff_docker( gjtiff_container = docker.from_env().containers.get("oculus_gjtiff") + self._logger.error(f"BENCHMARK: {time.time()} RUN GJTIFF") stdout, stderr = gjtiff_container.exec_run(command, stdout=True, stderr=True, tty=False, demux=True).output + self._logger.error(f"BENCHMARK: {time.time()} FINISHED GJTIFF") if stderr: self._logger.error(f"[{__name__}]: gjtiff stderr: {stderr.decode('utf-8')}") diff --git a/backend/tasks/data_tasks.py b/backend/tasks/data_tasks.py index 39c3668..a82cbff 100644 --- a/backend/tasks/data_tasks.py +++ b/backend/tasks/data_tasks.py @@ -1,3 +1,5 @@ +import time + from celery_app import celery_app from database.mongo_database_connector import MongoDatabaseConnector from celery.utils.log import get_task_logger @@ -14,6 +16,7 @@ def init_db(): @celery_app.task(ignore_result=True) def process_feature_task(hash: str): + logger.error(f"BENCHMARK: {time.time()} TASK STARTED") init_db() # will have more complex payload once we implement additional bands processing for existing files logger.info(f"Task {hash}")