import datetime import itertools import logging import os import sys from collections import defaultdict from pathlib import Path from typing import DefaultDict, Sequence from codemodder import __version__, providers, registry from codemodder.cli import parse_args from codemodder.codemods.api import BaseCodemod from codemodder.codemods.semgrep import SemgrepRuleDetector from codemodder.codetf import CodeTF from codemodder.context import CodemodExecutionContext from codemodder.dependency import Dependency from codemodder.llm import TokenUsage, log_token_usage from codemodder.logging import configure_logger, log_list, log_section, logger from codemodder.project_analysis.file_parsers.package_store import PackageStore from codemodder.project_analysis.python_repo_manager import PythonRepoManager from codemodder.result import ResultSet from codemodder.sarifs import detect_sarif_tools from codemodder.semgrep import run as run_semgrep def find_semgrep_results( context: CodemodExecutionContext, codemods: Sequence[BaseCodemod], files_to_analyze: list[Path] | None = None, ) -> ResultSet: """Run semgrep once with all configuration files from all codemods and return a set of applicable rule IDs""" if not ( yaml_files := list( itertools.chain.from_iterable( [ codemod.detector.get_yaml_files(codemod._internal_name) for codemod in codemods if codemod.detector and isinstance(codemod.detector, SemgrepRuleDetector) ] ) ) ): return ResultSet() return run_semgrep(context, yaml_files, files_to_analyze) def log_report(context, output, elapsed_ms, files_to_analyze, token_usage): log_section("report") logger.info("scanned: %s files", len(files_to_analyze)) all_failures = context.get_failed_files() logger.info( "failed: %s files (%s unique)", len(all_failures), len(set(all_failures)), ) all_changes = context.get_changed_files() logger.info( "changed: %s files (%s unique)", len(all_changes), len(set(all_changes)), ) logger.info("report file: %s", output) log_token_usage("All", token_usage) logger.info("total elapsed: %s ms", elapsed_ms) logger.info(" semgrep: %s ms", context.timer.get_time_ms("semgrep")) logger.info(" parse: %s ms", context.timer.get_time_ms("parse")) logger.info(" transform: %s ms", context.timer.get_time_ms("transform")) logger.info(" write: %s ms", context.timer.get_time_ms("write")) def apply_codemods( context: CodemodExecutionContext, codemods_to_run: Sequence[BaseCodemod], remediation: bool, ) -> TokenUsage: log_section("scanning") token_usage = TokenUsage() if not context.files_to_analyze: logger.info("no files to scan")