diff --git a/.changeset/empty-years-press.md b/.changeset/empty-years-press.md new file mode 100644 index 000000000..452dfc17a --- /dev/null +++ b/.changeset/empty-years-press.md @@ -0,0 +1,5 @@ +--- +"@codemod.com/jssg-types": minor +--- + +feat: new jssgTransform type and SgRoot.rename() API diff --git a/Cargo.lock b/Cargo.lock index b8b708408..7ad742ea5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -244,6 +244,21 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "ast-grep-dynamic" +version = "0.39.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f9f4ca501155a8f22b8b6d0a59381ce1f57e0228ce6d23d1719611d2c368450" +dependencies = [ + "ast-grep-core", + "ignore", + "libloading", + "serde", + "target-triple", + "thiserror 2.0.16", + "tree-sitter", +] + [[package]] name = "ast-grep-language" version = "0.39.7" @@ -717,7 +732,7 @@ dependencies = [ [[package]] name = "butterflow-core" -version = "1.3.14" +version = "1.4.0" dependencies = [ "anyhow", "async-trait", @@ -756,7 +771,7 @@ dependencies = [ [[package]] name = "butterflow-models" -version = "1.3.14" +version = "1.4.0" dependencies = [ "chrono", "codemod-llrt-capabilities", @@ -774,7 +789,7 @@ dependencies = [ [[package]] name = "butterflow-runners" -version = "1.3.14" +version = "1.4.0" dependencies = [ "async-trait", "butterflow-models", @@ -787,7 +802,7 @@ dependencies = [ [[package]] name = "butterflow-scheduler" -version = "1.3.14" +version = "1.4.0" dependencies = [ "butterflow-models", "chrono", @@ -804,7 +819,7 @@ dependencies = [ [[package]] name = "butterflow-state" -version = "1.3.14" +version = "1.4.0" dependencies = [ "async-trait", "butterflow-models", @@ -1037,7 +1052,7 @@ dependencies = [ [[package]] name = "codemod" -version = "1.3.14" +version = "1.4.0" dependencies = [ "anyhow", "ast-grep-config", @@ -1100,7 +1115,7 @@ dependencies = [ [[package]] name = "codemod-ai" -version = "1.3.14" +version = "1.4.0" dependencies = [ "async-trait", "coro-core", @@ -1126,7 +1141,7 @@ dependencies = [ [[package]] name = "codemod-llrt-capabilities" -version = "1.3.14" +version = "1.4.0" dependencies = [ "llrt_modules", "serde", @@ -1135,7 +1150,7 @@ dependencies = [ [[package]] name = "codemod-mcp" -version = "1.3.14" +version = "1.4.0" dependencies = [ "anyhow", "ast-grep-core", @@ -1153,10 +1168,11 @@ dependencies = [ [[package]] name = "codemod-sandbox" -version = "1.3.14" +version = "1.4.0" dependencies = [ "ast-grep-config", "ast-grep-core", + "ast-grep-dynamic", "ast-grep-language", "bytes", "codemod-llrt-capabilities", @@ -1174,11 +1190,13 @@ dependencies = [ "serde-wasm-bindgen", "serde_json", "serde_yaml", + "sha2 0.10.9", "swc_core", "swc_ts_fast_strip", "tempfile", "thiserror 2.0.16", "tokio", + "tree-sitter-loader", "wasm-bindgen", "wasm-bindgen-futures", "web-tree-sitter-sg", @@ -1200,7 +1218,7 @@ dependencies = [ [[package]] name = "codemod-telemetry" -version = "1.3.14" +version = "1.4.0" dependencies = [ "async-trait", "chrono", @@ -3457,7 +3475,7 @@ dependencies = [ [[package]] name = "language-core" -version = "1.3.14" +version = "1.4.0" dependencies = [ "serde", "tempfile", @@ -3467,7 +3485,7 @@ dependencies = [ [[package]] name = "language-javascript" -version = "1.3.14" +version = "1.4.0" dependencies = [ "ignore", "language-core", @@ -3484,7 +3502,7 @@ dependencies = [ [[package]] name = "language-python" -version = "1.3.14" +version = "1.4.0" dependencies = [ "anyhow", "language-core", @@ -6290,6 +6308,7 @@ dependencies = [ "base64 0.22.1", "bytes", "encoding_rs", + "futures-channel", "futures-core", "futures-util", "h2 0.4.12", @@ -7500,7 +7519,7 @@ checksum = "16c2f82143577edb4921b71ede051dac62ca3c16084e918bf7b40c96ae10eb33" [[package]] name = "semantic-factory" -version = "1.3.14" +version = "1.4.0" dependencies = [ "language-core", "language-javascript", @@ -9078,6 +9097,12 @@ dependencies = [ "xattr", ] +[[package]] +name = "target-triple" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ac9aa371f599d22256307c24a9d748c041e548cbf599f35d890f9d365361790" + [[package]] name = "tempfile" version = "3.22.0" @@ -9135,7 +9160,7 @@ dependencies = [ [[package]] name = "testing-utils" -version = "1.3.14" +version = "1.4.0" dependencies = [ "anyhow", "codemod-llrt-capabilities", @@ -9718,6 +9743,17 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" +[[package]] +name = "tree-sitter-loader" +version = "1.4.0" +dependencies = [ + "ast-grep-dynamic", + "dirs", + "log", + "reqwest 0.12.23", + "thiserror 2.0.16", +] + [[package]] name = "tree-sitter-lua" version = "0.2.0" @@ -11170,7 +11206,7 @@ dependencies = [ [[package]] name = "xtask" -version = "1.3.14" +version = "1.4.0" dependencies = [ "anyhow", "butterflow-models", diff --git a/Cargo.toml b/Cargo.toml index 7eff4721c..b5e917293 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ members = [ "crates/language-javascript", "crates/language-python", "crates/semantic-factory", + "crates/tree-sitter-loader", ] resolver = "2" @@ -31,7 +32,7 @@ repository = "https://github.com/codemod/codemod" homepage = "https://github.com/codemod/codemod" license = "Apache-2.0" rust-version = "1.76" -version = "1.3.14" +version = "1.4.0" edition = "2021" [workspace.dependencies] @@ -50,6 +51,7 @@ language-core = { path = "crates/language-core" } language-javascript = { path = "crates/language-javascript" } language-python = { path = "crates/language-python" } semantic-factory = { path = "crates/semantic-factory" } +tree-sitter-loader = { path = "crates/tree-sitter-loader" } # Ruff Python semantic analysis and ty_ide ruff_python_semantic = { git = "https://github.com/astral-sh/ruff.git", tag = "0.14.0" } @@ -65,6 +67,7 @@ ty_python_semantic = { git = "https://github.com/astral-sh/ruff.git", tag = "0.1 anyhow = "1.0" ast-grep-core = "0.39.7" ast-grep-config = "0.39.7" +ast-grep-dynamic = "0.39.7" ast-grep-language = "0.39.7" async-trait = "0.1" chrono = { version = "0.4", features = ["serde"] } diff --git a/crates/cli/src/commands/jssg/config.rs b/crates/cli/src/commands/jssg/config.rs index 96b41e39b..5fd3bc73b 100644 --- a/crates/cli/src/commands/jssg/config.rs +++ b/crates/cli/src/commands/jssg/config.rs @@ -23,6 +23,10 @@ pub struct TestConfig { /// Parameters to pass to the codemod pub params: Option>, + + /// Expected file extension after rename (e.g., ".mjs", ".css"). + /// If set, tests verify the codemod's rename() target ends with this extension. + pub expected_extension: Option, } /// Merged configuration from CLI args and config files @@ -35,6 +39,7 @@ pub struct ResolvedTestConfig { pub ignore_whitespace: bool, pub expect_errors: Vec, pub params: Option>, + pub expected_extension: Option, // Global-only options (CLI args only) pub filter: Option, @@ -150,6 +155,9 @@ impl TestConfig { if other.params.is_some() { self.params = other.params; } + if other.expected_extension.is_some() { + self.expected_extension = other.expected_extension; + } } } @@ -202,6 +210,7 @@ impl ResolvedTestConfig { merged_config.expect_errors.clone().unwrap_or_default() }; let params = merged_config.params.clone(); + let expected_extension = merged_config.expected_extension.clone(); Ok(Self { language, @@ -218,6 +227,7 @@ impl ResolvedTestConfig { reporter, context_lines, params, + expected_extension, }) } } diff --git a/crates/cli/src/commands/jssg/list_applicable.rs b/crates/cli/src/commands/jssg/list_applicable.rs index a1b6b8616..8d9818826 100644 --- a/crates/cli/src/commands/jssg/list_applicable.rs +++ b/crates/cli/src/commands/jssg/list_applicable.rs @@ -1,12 +1,12 @@ use anyhow::Result; use ast_grep_config::CombinedScan; -use ast_grep_language::SupportLang; use butterflow_core::execution::CodemodExecutionConfig; use clap::Args; use codemod_sandbox::sandbox::engine::{extract_selector_with_quickjs, SelectorEngineOptions}; use codemod_sandbox::sandbox::resolvers::OxcResolver; use codemod_sandbox::scan_file_with_combined_scan; use codemod_sandbox::utils::project_discovery::find_tsconfig; +use codemod_sandbox::CodemodLang; use std::sync::Arc; use std::{ path::{Path, PathBuf}, @@ -100,7 +100,7 @@ pub async fn handler(args: &Command) -> Result<()> { capabilities: config.capabilities.clone(), }) .await?; - let combined_scan: Option>> = selector_config + let combined_scan: Option>> = selector_config .as_ref() .map(|c| Arc::new(CombinedScan::new(vec![c]))); diff --git a/crates/cli/src/commands/jssg/run.rs b/crates/cli/src/commands/jssg/run.rs index 43d33b6bf..c76a08627 100644 --- a/crates/cli/src/commands/jssg/run.rs +++ b/crates/cli/src/commands/jssg/run.rs @@ -10,8 +10,7 @@ use butterflow_core::utils::generate_execution_id; use butterflow_core::utils::parse_params; use butterflow_core::{execution::CodemodExecutionConfig, execution::PreRunCallback}; use clap::Args; -use codemod_sandbox::sandbox::engine::ExecutionResult; -use codemod_sandbox::sandbox::engine::JssgExecutionOptions; +use codemod_sandbox::sandbox::engine::{CodemodOutput, ExecutionResult, JssgExecutionOptions}; use codemod_sandbox::sandbox::{ engine::execute_codemod_with_quickjs, filesystem::RealFileSystem, resolvers::OxcResolver, }; @@ -180,7 +179,7 @@ pub async fn handler(args: &Command, telemetry: TelemetrySenderMutex) -> Result< } let capabilities_for_closure = config.capabilities.clone(); - let language: ast_grep_language::SupportLang = args + let language: codemod_sandbox::CodemodLang = args .language .clone() .parse() @@ -220,43 +219,98 @@ pub async fn handler(args: &Command, telemetry: TelemetrySenderMutex) -> Result< capabilities: capabilities_for_closure.clone(), semantic_provider: semantic_provider.clone(), metrics_context: Some(metrics_context_clone.clone()), + test_mode: false, + target_directory: Some(&target_directory), }; // Execute the codemod on this file match execute_codemod_with_quickjs(options).await { - Ok(execution_output) => { - // Handle the execution output (write back if modified and not dry run) - if let ExecutionResult::Modified(ref new_content) = execution_output { - if !config.dry_run { - if let Err(e) = tokio::fs::write(&file_path, new_content).await { - error!( - "Failed to write modified file {}: {}", - file_path.display(), - e - ); + Ok(CodemodOutput { primary, secondary }) => { + // Collect all file changes: primary + secondary from jssgTransform + let mut all_changes: Vec<( + std::path::PathBuf, + &codemod_sandbox::sandbox::engine::ExecutionResult, + )> = Vec::new(); + if let ExecutionResult::Modified(_) = &primary { + all_changes.push((file_path.to_path_buf(), &primary)); + } + for change in &secondary { + if let ExecutionResult::Modified(_) = &change.result { + all_changes.push((change.path.clone(), &change.result)); + } + } + + for (change_path, change_result) in &all_changes { + if let ExecutionResult::Modified(ref modified) = change_result { + let write_path = modified.rename_to.as_deref().unwrap_or(change_path); + if !config.dry_run { + if let Err(e) = + tokio::fs::write(write_path, &modified.content).await + { + error!( + "Failed to write modified file {}: {}", + write_path.display(), + e + ); + } else { + // If renamed, delete the original file + if modified.rename_to.is_some() + && write_path != change_path.as_path() + { + if let Err(e) = tokio::fs::remove_file(change_path).await { + error!( + "Failed to remove original file {}: {}", + change_path.display(), + e + ); + } else { + debug!( + "Renamed file: {} -> {}", + change_path.display(), + write_path.display() + ); + } + } else { + debug!("Modified file: {}", change_path.display()); + } + // Notify semantic provider of the change + if let Some(ref provider) = semantic_provider { + let _ = provider + .notify_file_processed(write_path, &modified.content); + } + } } else { - debug!("Modified file: {}", file_path.display()); - // Notify semantic provider of the change - if let Some(ref provider) = semantic_provider { - let _ = provider.notify_file_processed(file_path, new_content); + // Dry-run mode: print diff + if modified.rename_to.is_some() { + println!( + "Rename: {} -> {}", + change_path.display(), + write_path.display() + ); } + // For secondary changes, read original content from disk + let original = if change_path == file_path { + content.clone() + } else { + tokio::fs::read_to_string(change_path) + .await + .unwrap_or_default() + }; + let diff = generate_unified_diff( + change_path, + &original, + &modified.content, + &diff_config, + ); + diff.print(); + debug!("Would modify file (dry run): {}", change_path.display()); } - } else { - // Dry-run mode: print diff - let diff = generate_unified_diff( - file_path, - &content, - new_content, - &diff_config, - ); - diff.print(); - debug!("Would modify file (dry run): {}", file_path.display()); } } } Err(e) => { error!( - "Failed to execute codemod on {}:\n{:?}", + "Failed to execute codemod on {}:\n{}", file_path.display(), e ); diff --git a/crates/cli/src/commands/jssg/test.rs b/crates/cli/src/commands/jssg/test.rs index 87ede80da..302de693f 100644 --- a/crates/cli/src/commands/jssg/test.rs +++ b/crates/cli/src/commands/jssg/test.rs @@ -1,6 +1,6 @@ use anyhow::Result; use clap::Args; -use codemod_sandbox::sandbox::engine::{ExecutionResult, JssgExecutionOptions}; +use codemod_sandbox::sandbox::engine::{CodemodOutput, ExecutionResult, JssgExecutionOptions}; use language_core::SemanticProvider; use semantic_factory::LazySemanticProvider; use std::collections::HashSet; @@ -8,8 +8,8 @@ use std::path::{Path, PathBuf}; use std::pin::Pin; use std::sync::Arc; -use ast_grep_language::SupportLang; use codemod_llrt_capabilities::types::LlrtSupportedModules; +use codemod_sandbox::CodemodLang; use codemod_sandbox::{ sandbox::{ engine::{execute_codemod_with_quickjs, language_data::get_extensions_for_language}, @@ -17,7 +17,7 @@ use codemod_sandbox::{ }, utils::project_discovery::find_tsconfig, }; -use testing_utils::{TestOptions, TestRunner, TestSource, TransformationResult}; +use testing_utils::{TestOptions, TestRunner, TestSource, TransformOutput, TransformationResult}; use crate::utils::resolve_capabilities::{resolve_capabilities, ResolveCapabilitiesArgs}; @@ -128,7 +128,9 @@ pub async fn handler(args: &Command) -> Result<()> { ) })?; - let default_language_enum: SupportLang = default_language_str.parse()?; + let default_language_enum: CodemodLang = default_language_str + .parse() + .map_err(|e: String| anyhow::anyhow!("{}", e))?; let strictness: testing_utils::Strictness = args .strictness @@ -150,6 +152,7 @@ pub async fn handler(args: &Command) -> Result<()> { expect_errors: global_config.expect_errors, strictness, language: global_config.language.clone(), + expected_extension: global_config.expected_extension.clone(), }; let script_base_dir = codemod_path @@ -208,7 +211,9 @@ pub async fn handler(args: &Command) -> Result<()> { .language .as_ref() .ok_or_else(|| anyhow::anyhow!("Language must be specified for test case"))?; - let language_enum: SupportLang = language_str.parse()?; + let language_enum: CodemodLang = language_str + .parse() + .map_err(|e: String| anyhow::anyhow!("{}", e))?; let options = JssgExecutionOptions { script_path: &codemod_path, @@ -222,15 +227,23 @@ pub async fn handler(args: &Command) -> Result<()> { capabilities, semantic_provider, metrics_context: None, + test_mode: true, + target_directory: None, }; - let execution_output = execute_codemod_with_quickjs(options).await?; - - match execution_output { - ExecutionResult::Modified(content) => { - Ok(TransformationResult::Success(content)) + let CodemodOutput { primary, .. } = execute_codemod_with_quickjs(options).await?; + + match primary { + ExecutionResult::Modified(modified) => { + Ok(TransformationResult::Success(TransformOutput { + content: modified.content, + rename_to: modified.rename_to, + })) } ExecutionResult::Unmodified | ExecutionResult::Skipped => { - Ok(TransformationResult::Success(input_code)) + Ok(TransformationResult::Success(TransformOutput { + content: input_code, + rename_to: None, + })) } } }) diff --git a/crates/codemod-sandbox/Cargo.toml b/crates/codemod-sandbox/Cargo.toml index 285063e99..7bbe22c52 100644 --- a/crates/codemod-sandbox/Cargo.toml +++ b/crates/codemod-sandbox/Cargo.toml @@ -16,6 +16,7 @@ tokio = { version = "1.0", features = [ ], optional = true } bytes = "1.0" dashmap = "6" +sha2 = "0.10" oxc_resolver = "11.13.1" oxc = { version = "0.99.0", features = ["transformer", "codegen", "semantic"] } swc_core = { version = "33.0", features = [ @@ -39,8 +40,8 @@ wasm-bindgen-futures = { workspace = true, optional = true } js-sys = { workspace = true, optional = true } web-tree-sitter-sg = { git = "https://github.com/mohebifar/tree-sitter-wasm.git", branch = "upgrade/0.25.4", optional = true } serde-wasm-bindgen = { version = "0.6", optional = true } -ast-grep-core = { workspace = true, default-features = false, optional = true } -ast-grep-config = { workspace = true, default-features = false, optional = true } +ast-grep-core = { workspace = true, optional = true } +ast-grep-config = { workspace = true, optional = true } ast-grep-language = { workspace = true, default-features = true, optional = true } ignore = { workspace = true, optional = true } serde_yaml = { workspace = true, optional = true } @@ -48,6 +49,8 @@ codemod-llrt-capabilities = { workspace = true, optional = true, features = ["na language-core = { workspace = true, optional = true } language-javascript = { workspace = true, optional = true } language-python = { workspace = true, optional = true } +tree-sitter-loader = { workspace = true, optional = true } +ast-grep-dynamic = { workspace = true, optional = true } [dev-dependencies] tempfile = { workspace = true } @@ -81,6 +84,8 @@ native = [ "language-core", "language-javascript", "language-python", + "tree-sitter-loader", + "ast-grep-dynamic", ] jssg-in-memory = ["native"] real-fs = ["tokio", "ignore"] diff --git a/crates/codemod-sandbox/src/ast_grep/mod.rs b/crates/codemod-sandbox/src/ast_grep/mod.rs index eaf16fde8..e045e119c 100644 --- a/crates/codemod-sandbox/src/ast_grep/mod.rs +++ b/crates/codemod-sandbox/src/ast_grep/mod.rs @@ -11,14 +11,32 @@ pub mod wasm_utils; #[cfg(feature = "native")] pub mod native; -#[cfg(not(feature = "wasm"))] +#[cfg(all(not(feature = "wasm"), not(feature = "native")))] use ast_grep_language::{LanguageExt, SupportLang}; +#[cfg(feature = "native")] +use crate::sandbox::engine::codemod_lang::CodemodLang; +#[cfg(feature = "native")] +use ast_grep_core::tree_sitter::LanguageExt; + #[cfg(feature = "wasm")] use ast_grep_core::language::Language; use rquickjs::module::{Declarations, Exports, ModuleDef}; use rquickjs::{prelude::Func, Class, Ctx, Exception, Object, Result}; +#[cfg(feature = "native")] +use rquickjs::{Function, Value}; + +use crate::sandbox::engine::execution_engine::{ + validate_path_within_target, FileChange, JssgExecutionContext, JssgFileChanges, +}; +use crate::sandbox::engine::transform_helpers::{ + build_transform_options, process_transform_result, ModificationCheck, +}; +use crate::sandbox::engine::ExecutionModeFlag; +use crate::utils::quickjs_utils::maybe_promise; +use std::str::FromStr; +use std::sync::Arc; use sg_node::{SgNodeRjs, SgRootRjs}; @@ -41,6 +59,8 @@ impl ModuleDef for AstGrepModule { declare.declare("default")?; #[cfg(feature = "native")] declare.declare("parseFile")?; + #[cfg(feature = "native")] + declare.declare("jssgTransform")?; Ok(()) } @@ -55,6 +75,8 @@ impl ModuleDef for AstGrepModule { { default.set("parseFile", Func::from(parse_file_rjs))?; exports.export("parseFile", Func::from(parse_file_rjs))?; + default.set("jssgTransform", Func::from(jssg_transform_rjs))?; + exports.export("jssgTransform", Func::from(jssg_transform_rjs))?; } exports.export("default", default)?; exports.export("parse", Func::from(parse_rjs))?; @@ -104,7 +126,7 @@ fn kind_rjs(ctx: Ctx<'_>, lang: String, kind_name: String) -> Result { Ok(kind) } -#[cfg(not(feature = "wasm"))] +#[cfg(all(not(feature = "wasm"), not(feature = "native")))] fn kind_rjs(ctx: Ctx<'_>, lang: String, kind_name: String) -> Result { use std::str::FromStr; @@ -117,3 +139,128 @@ fn kind_rjs(ctx: Ctx<'_>, lang: String, kind_name: String) -> Result { Ok(kind) } + +#[cfg(feature = "native")] +fn kind_rjs(ctx: Ctx<'_>, lang: String, kind_name: String) -> Result { + use std::str::FromStr; + + let lang = CodemodLang::from_str(&lang) + .map_err(|e| Exception::throw_message(&ctx, &format!("Language error: {e}")))?; + + let kind = lang + .get_ts_language() + .id_for_node_kind(&kind_name, /* named */ true); + + Ok(kind) +} + +/// Execute a transform function on a file, writing back the result. +/// +/// `jssgTransform(transformFn, pathToFile, language)` reads the file, +/// parses it, calls the transform, and writes back content + handles rename. +/// +/// Returns a promise that resolves when the transform is complete. +#[cfg(feature = "native")] +fn jssg_transform_rjs<'js>( + ctx: Ctx<'js>, + transform_fn: Function<'js>, + path_to_file: String, + language: String, +) -> Result> { + let should_noop = ctx + .userdata::() + .map(|f| f.test_mode) + .unwrap_or(true); // No flag = in-memory engine → no-op + if should_noop { + let ctx2 = ctx.clone(); + let promise = rquickjs::Promise::wrap_future(&ctx, async move { + Ok::<_, rquickjs::Error>(Value::new_null(ctx2)) + })?; + return Ok(promise.into_value()); + } + + let file_changes = ctx + .userdata::() + .map(|guard| guard.clone()) + .ok_or_else(|| Exception::throw_message(&ctx, "JssgFileChanges not found in userdata"))?; + + let exec_ctx = ctx.userdata::(); + let params = exec_ctx + .as_ref() + .map(|c| c.params.clone()) + .unwrap_or_default(); + let matrix_values = exec_ctx.as_ref().and_then(|c| c.matrix_values.clone()); + + let file_path = std::path::Path::new(&path_to_file); + + // Validate: file path must resolve within the target directory + validate_path_within_target(&ctx, file_path, "jssgTransform()")?; + + // Read the file + let content = std::fs::read_to_string(file_path).map_err(|e| { + Exception::throw_message( + &ctx, + &format!("Failed to read file '{}': {}", path_to_file, e), + ) + })?; + + // Parse with language and filename + let sg_root = SgRootRjs::try_new(language, content.clone(), Some(path_to_file.clone())) + .map_err(|e| Exception::throw_message(&ctx, &format!("Failed to parse: {e}")))?; + + let sg_root_inner = Arc::clone(&sg_root.inner); + + let lang_str = CodemodLang::from_str(sg_root.inner.grep.lang().to_string().as_str()) + .map(|l| l.to_string()) + .unwrap_or_default(); + + let run_options = build_transform_options(&ctx, params, &lang_str, matrix_values, None) + .map_err(|e| Exception::throw_message(&ctx, &format!("Failed to build options: {e}")))?; + + // Call the transform function + let result_val: Value<'js> = transform_fn.call((sg_root, run_options))?; + + // Create a promise to handle async transforms + let ctx2 = ctx.clone(); + let promise = rquickjs::Promise::wrap_future(&ctx, async move { + let result = maybe_promise(result_val) + .await + .map_err(|e| Exception::throw_message(&ctx2, &format!("Transform failed: {e}")))?; + + let exec_result = process_transform_result( + &result, + &sg_root_inner, + ModificationCheck::StringEquality { + original_content: &content, + }, + ) + .map_err(|e| Exception::throw_message(&ctx2, &format!("Transform result error: {e}")))?; + + // Extract content before pushing to accumulator + let return_content = match &exec_result { + crate::sandbox::engine::ExecutionResult::Modified(modified) => { + Some(modified.content.clone()) + } + _ => None, + }; + + // Push the file change to the shared accumulator instead of writing to disk + let mut changes = file_changes.changes.lock().map_err(|e| { + Exception::throw_message(&ctx2, &format!("Failed to lock file_changes mutex: {e}")) + })?; + changes.push(FileChange { + path: std::path::PathBuf::from(&path_to_file), + result: exec_result, + }); + + // Return the transformed content string, or null if unmodified + match return_content { + Some(content) => { + Ok::<_, rquickjs::Error>(rquickjs::String::from_str(ctx2, &content)?.into_value()) + } + None => Ok::<_, rquickjs::Error>(Value::new_null(ctx2)), + } + })?; + + Ok(promise.into_value()) +} diff --git a/crates/codemod-sandbox/src/ast_grep/native.rs b/crates/codemod-sandbox/src/ast_grep/native.rs index 7807cb8c1..41de3b598 100644 --- a/crates/codemod-sandbox/src/ast_grep/native.rs +++ b/crates/codemod-sandbox/src/ast_grep/native.rs @@ -6,7 +6,9 @@ use std::{fs, panic}; use ast_grep_config::{from_yaml_string, CombinedScan, RuleConfig}; use ast_grep_core::tree_sitter::StrDoc; use ast_grep_core::AstGrep; -use ast_grep_language::SupportLang; + +use crate::sandbox::engine::codemod_lang::CodemodLang; +type SupportLang = CodemodLang; use crate::ast_grep::scanner::scan_content; use crate::ast_grep::types::{AstGrepError, AstGrepMatch}; diff --git a/crates/codemod-sandbox/src/ast_grep/sg_node.rs b/crates/codemod-sandbox/src/ast_grep/sg_node.rs index 9a480d8f2..d4eaf34e0 100644 --- a/crates/codemod-sandbox/src/ast_grep/sg_node.rs +++ b/crates/codemod-sandbox/src/ast_grep/sg_node.rs @@ -4,7 +4,7 @@ use crate::ast_grep::wasm_lang::WasmDoc; use ast_grep_core::tree_sitter::StrDoc as TSStrDoc; use ast_grep_core::{AstGrep, Node, NodeMatch}; -#[cfg(not(feature = "wasm"))] +#[cfg(all(not(feature = "wasm"), not(feature = "native")))] use ast_grep_language::SupportLang; #[cfg(feature = "native")] @@ -15,23 +15,32 @@ use rquickjs::{ }; use std::marker::PhantomData; use std::str::FromStr; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use crate::ast_grep::types::JsEdit; use crate::ast_grep::types::JsNodeRange; use crate::ast_grep::utils::convert_matcher; -#[cfg(not(feature = "wasm"))] +#[cfg(all(not(feature = "wasm"), not(feature = "native")))] use ast_grep_language::SupportLang as Lang; -#[cfg(not(feature = "wasm"))] +#[cfg(feature = "native")] +use crate::sandbox::engine::codemod_lang::CodemodLang; +#[cfg(feature = "native")] +use CodemodLang as Lang; + +#[cfg(all(not(feature = "wasm"), not(feature = "native")))] type TSDoc = TSStrDoc; +#[cfg(feature = "native")] +type TSDoc = TSStrDoc; #[cfg(feature = "wasm")] type TSDoc = WasmDoc; pub(crate) struct SgRootInner { - grep: AstGrep, + pub(crate) grep: AstGrep, filename: Option, + /// Optional rename target path set by root.rename() + pub(crate) rename_to: Mutex>, /// Optional semantic provider for symbol indexing (native only) #[cfg(feature = "native")] pub(crate) semantic_provider: Option>, @@ -90,6 +99,53 @@ impl<'js> SgRootRjs<'js> { /// modified content from the `transform()` function instead. /// /// After writing, the semantic provider's cache is updated with the new content. + /// Rename the current file to a new path. + /// + /// If the path is relative, it is resolved against the current file's parent directory. + /// If absolute, it is used as-is. + pub fn rename(&self, new_path: String, ctx: Ctx<'js>) -> Result<()> { + if new_path.is_empty() { + return Err(Exception::throw_message( + &ctx, + "rename() requires a non-empty path", + )); + } + + let resolved_path = if std::path::Path::new(&new_path).is_absolute() { + new_path + } else { + // Resolve relative to current file's parent directory + match &self.inner.filename { + Some(filename) => { + let parent = std::path::Path::new(filename) + .parent() + .unwrap_or(std::path::Path::new(".")); + parent.join(&new_path).to_string_lossy().to_string() + } + None => new_path, + } + }; + + // Validate: resolved path must stay within the target directory + #[cfg(feature = "native")] + { + use crate::sandbox::engine::execution_engine::validate_path_within_target; + validate_path_within_target(&ctx, std::path::Path::new(&resolved_path), "rename()")?; + } + + let mut rename_to = self.inner.rename_to.lock().map_err(|e| { + Exception::throw_message(&ctx, &format!("Failed to lock rename_to mutex: {e}")) + })?; + if rename_to.is_some() { + return Err(Exception::throw_message( + &ctx, + "rename() has already been called for this file. It can only be called once.", + )); + } + *rename_to = Some(resolved_path); + Ok(()) + } + #[cfg(feature = "native")] pub fn write(&self, content: String, ctx: Ctx<'js>) -> Result<()> { let file_path = match &self.inner.filename { @@ -135,6 +191,15 @@ impl<'js> SgRootRjs<'js> { } impl<'js> SgRootRjs<'js> { + /// Get the rename target path, if rename() was called. + pub fn get_rename_to(&self) -> Option { + self.inner + .rename_to + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .clone() + } + pub fn try_new( lang_str: String, src: String, @@ -158,12 +223,13 @@ impl<'js> SgRootRjs<'js> { inner: Arc::new(SgRootInner { grep: unsafe { std::mem::transmute(doc) }, filename, + rename_to: Mutex::new(None), }), _phantom: PhantomData, }) } - #[cfg(not(feature = "wasm"))] + #[cfg(all(not(feature = "wasm"), not(feature = "native")))] { let lang = SupportLang::from_str(&lang_str) .map_err(|e| format!("Unsupported language: {lang_str}. Error: {e}"))?; @@ -172,6 +238,26 @@ impl<'js> SgRootRjs<'js> { inner: Arc::new(SgRootInner { grep, filename, + rename_to: Mutex::new(None), + #[cfg(feature = "native")] + semantic_provider: None, + #[cfg(feature = "native")] + current_file_path: None, + }), + _phantom: PhantomData, + }) + } + + #[cfg(feature = "native")] + { + let lang = CodemodLang::from_str(&lang_str) + .map_err(|e| format!("Unsupported language: {lang_str}. Error: {e}"))?; + let grep = AstGrep::new(src, lang); + Ok(SgRootRjs { + inner: Arc::new(SgRootInner { + grep, + filename, + rename_to: Mutex::new(None), #[cfg(feature = "native")] semantic_provider: None, #[cfg(feature = "native")] @@ -190,6 +276,7 @@ impl<'js> SgRootRjs<'js> { inner: Arc::new(SgRootInner { grep, filename, + rename_to: Mutex::new(None), #[cfg(feature = "native")] semantic_provider: None, #[cfg(feature = "native")] @@ -211,6 +298,7 @@ impl<'js> SgRootRjs<'js> { inner: Arc::new(SgRootInner { grep, filename, + rename_to: Mutex::new(None), semantic_provider, current_file_path, }), @@ -649,6 +737,51 @@ impl<'js> SgNodeRjs<'js> { Ok(new_content) } + pub fn debug(&self) -> Result { + fn format_node( + node: &ast_grep_core::Node, + indent: usize, + ) -> String { + let indent_str = " ".repeat(indent); + let kind = node.kind(); + let is_named = node.is_named(); + + let mut result = String::new(); + + if is_named { + let text = node.text(); + let has_children = node.children().next().is_some(); + + if has_children { + result.push_str(&format!("{indent_str}{kind}:\n")); + for child in node.children() { + result.push_str(&format_node(&child, indent + 1)); + } + } else { + // Leaf named node — show text inline + let display_text = if text.len() > 40 { + match text.char_indices().nth(40) { + Some((idx, _)) => format!("{}...", &text[..idx]), + None => text.to_string(), + } + } else { + text.to_string() + }; + result.push_str(&format!("{indent_str}{kind}: {display_text:?}\n")); + } + } else { + // Anonymous node (punctuation, keywords) + let text = node.text(); + result.push_str(&format!("{indent_str}[{text:?}]\n")); + } + + result + } + + let node: ast_grep_core::Node = self.inner_node.clone().into(); + Ok(format_node(&node, 0)) + } + #[qjs(rename = "getRoot")] pub fn get_root(&self, _ctx: Ctx<'js>) -> Result> { Ok(SgRootRjs { diff --git a/crates/codemod-sandbox/src/ast_grep/utils.rs b/crates/codemod-sandbox/src/ast_grep/utils.rs index be8992570..5346c021d 100644 --- a/crates/codemod-sandbox/src/ast_grep/utils.rs +++ b/crates/codemod-sandbox/src/ast_grep/utils.rs @@ -1,13 +1,15 @@ use crate::ast_grep::types::AstGrepError; #[cfg(feature = "wasm")] use crate::ast_grep::wasm_lang::WasmLang as SupportLang; +#[cfg(feature = "native")] +use crate::sandbox::engine::codemod_lang::CodemodLang as SupportLang; use ast_grep_config::{DeserializeEnv, RuleCore, SerializableRuleCore}; use ast_grep_core::{ matcher::{KindMatcher, Matcher}, meta_var::MetaVarEnv, Doc, Node, Pattern, }; -#[cfg(not(feature = "wasm"))] +#[cfg(all(not(feature = "wasm"), not(feature = "native")))] use ast_grep_language::SupportLang; use rquickjs::{Ctx, Exception, FromJs, Result as QResult, Value}; use std::borrow::Cow; diff --git a/crates/codemod-sandbox/src/lib.rs b/crates/codemod-sandbox/src/lib.rs index 98cfe86e1..27949d94a 100644 --- a/crates/codemod-sandbox/src/lib.rs +++ b/crates/codemod-sandbox/src/lib.rs @@ -10,7 +10,11 @@ pub mod workflow_global; #[cfg(feature = "native")] pub use ast_grep::{scan_file_with_combined_scan, with_combined_scan}; pub use metrics::{MetricsContext, MetricsData}; +#[cfg(feature = "native")] +pub use sandbox::engine::codemod_lang::CodemodLang; #[cfg(feature = "jssg-in-memory")] -pub use sandbox::engine::{execute_codemod_sync, ExecutionResult, InMemoryExecutionOptions}; +pub use sandbox::engine::{ + execute_codemod_sync, CodemodOutput, ExecutionResult, InMemoryExecutionOptions, +}; #[cfg(feature = "jssg-in-memory")] pub use sandbox::resolvers::{InMemoryLoader, InMemoryResolver}; diff --git a/crates/codemod-sandbox/src/sandbox/engine/codemod_lang.rs b/crates/codemod-sandbox/src/sandbox/engine/codemod_lang.rs new file mode 100644 index 000000000..9e74322a8 --- /dev/null +++ b/crates/codemod-sandbox/src/sandbox/engine/codemod_lang.rs @@ -0,0 +1,182 @@ +use ast_grep_core::matcher::{Pattern, PatternBuilder, PatternError}; +use ast_grep_core::tree_sitter::{LanguageExt, TSLanguage}; +use ast_grep_core::Language; +use ast_grep_dynamic::DynamicLang; +use ast_grep_language::SupportLang; +use serde::{Deserialize, Serialize}; +use std::borrow::Cow; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::str::FromStr; + +/// A language type that wraps both statically-linked `SupportLang` (from ast-grep) +/// and dynamically-loaded `DynamicLang` (from tree-sitter-loader). +/// +/// This allows the engine to support languages beyond the 26 built into ast-grep +/// by downloading and loading tree-sitter parsers at runtime. +#[derive(Clone, Copy)] +pub enum CodemodLang { + Static(SupportLang), + Dynamic(DynamicLang), +} + +impl PartialEq for CodemodLang { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (CodemodLang::Static(a), CodemodLang::Static(b)) => a == b, + (CodemodLang::Dynamic(a), CodemodLang::Dynamic(b)) => a == b, + _ => false, + } + } +} + +impl Eq for CodemodLang {} + +impl Hash for CodemodLang { + fn hash(&self, state: &mut H) { + match self { + CodemodLang::Static(lang) => { + 0u8.hash(state); + lang.hash(state); + } + CodemodLang::Dynamic(lang) => { + 1u8.hash(state); + lang.hash(state); + } + } + } +} + +impl fmt::Display for CodemodLang { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CodemodLang::Static(lang) => write!(f, "{}", lang), + CodemodLang::Dynamic(lang) => write!(f, "{}", lang.name()), + } + } +} + +impl fmt::Debug for CodemodLang { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CodemodLang::Static(lang) => write!(f, "CodemodLang::Static({:?})", lang), + CodemodLang::Dynamic(lang) => write!(f, "CodemodLang::Dynamic({})", lang.name()), + } + } +} + +impl FromStr for CodemodLang { + type Err = String; + + fn from_str(s: &str) -> Result { + // Try static languages first + if let Ok(lang) = SupportLang::from_str(s) { + return Ok(CodemodLang::Static(lang)); + } + + // Initialize dynamic parsers and try dynamic languages + if let Err(e) = tree_sitter_loader::init() { + eprintln!("Warning: failed to initialize dynamic tree-sitter parsers: {e}"); + } + + if let Ok(lang) = DynamicLang::from_str(s) { + return Ok(CodemodLang::Dynamic(lang)); + } + + Err(format!("Unsupported language: {s}")) + } +} + +impl From for CodemodLang { + fn from(lang: SupportLang) -> Self { + CodemodLang::Static(lang) + } +} + +impl From for CodemodLang { + fn from(lang: DynamicLang) -> Self { + CodemodLang::Dynamic(lang) + } +} + +impl Serialize for CodemodLang { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(&self.to_string()) + } +} + +impl<'de> Deserialize<'de> for CodemodLang { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let name = String::deserialize(deserializer)?; + CodemodLang::from_str(&name).map_err(serde::de::Error::custom) + } +} + +impl Language for CodemodLang { + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + match self { + CodemodLang::Static(lang) => lang.pre_process_pattern(query), + CodemodLang::Dynamic(lang) => lang.pre_process_pattern(query), + } + } + + fn meta_var_char(&self) -> char { + match self { + CodemodLang::Static(lang) => lang.meta_var_char(), + CodemodLang::Dynamic(lang) => lang.meta_var_char(), + } + } + + fn expando_char(&self) -> char { + match self { + CodemodLang::Static(lang) => lang.expando_char(), + CodemodLang::Dynamic(lang) => lang.expando_char(), + } + } + + fn kind_to_id(&self, kind: &str) -> u16 { + match self { + CodemodLang::Static(lang) => lang.kind_to_id(kind), + CodemodLang::Dynamic(lang) => lang.kind_to_id(kind), + } + } + + fn field_to_id(&self, field: &str) -> Option { + match self { + CodemodLang::Static(lang) => lang.field_to_id(field), + CodemodLang::Dynamic(lang) => lang.field_to_id(field), + } + } + + fn from_path>(path: P) -> Option { + if let Some(lang) = SupportLang::from_path(path.as_ref()) { + return Some(CodemodLang::Static(lang)); + } + if let Some(lang) = DynamicLang::from_path(path.as_ref()) { + return Some(CodemodLang::Dynamic(lang)); + } + None + } + + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + match self { + CodemodLang::Static(lang) => lang.build_pattern(builder), + CodemodLang::Dynamic(lang) => lang.build_pattern(builder), + } + } +} + +impl LanguageExt for CodemodLang { + fn get_ts_language(&self) -> TSLanguage { + match self { + CodemodLang::Static(lang) => lang.get_ts_language(), + CodemodLang::Dynamic(lang) => lang.get_ts_language(), + } + } +} diff --git a/crates/codemod-sandbox/src/sandbox/engine/execution_engine.rs b/crates/codemod-sandbox/src/sandbox/engine/execution_engine.rs index 22bf8c004..bcfdcc108 100644 --- a/crates/codemod-sandbox/src/sandbox/engine/execution_engine.rs +++ b/crates/codemod-sandbox/src/sandbox/engine/execution_engine.rs @@ -1,5 +1,8 @@ +use super::codemod_lang::CodemodLang; use super::quickjs_adapters::{QuickJSLoader, QuickJSResolver}; -use crate::ast_grep::serde::JsValue; +use super::transform_helpers::{ + build_transform_options, process_transform_result, ModificationCheck, +}; use crate::ast_grep::sg_node::{SgNodeRjs, SgRootRjs}; use crate::ast_grep::AstGrepModule; use crate::metrics::{MetricsContext, MetricsModule}; @@ -10,34 +13,139 @@ use crate::workflow_global::WorkflowGlobalModule; use ast_grep_config::RuleConfig; use ast_grep_core::matcher::MatcherExt; use ast_grep_core::AstGrep; -use ast_grep_language::SupportLang; use codemod_llrt_capabilities::module_builder::LlrtModuleBuilder; use codemod_llrt_capabilities::types::LlrtSupportedModules; use language_core::SemanticProvider; use rquickjs::{async_with, AsyncContext, AsyncRuntime}; use rquickjs::{CatchResultExt, Function, Module}; -use rquickjs::{IntoJs, Object}; use std::collections::{HashMap, HashSet}; use std::marker::PhantomData; -use std::path::Path; -use std::sync::Arc; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Mutex}; + +/// Flag indicating whether execution is in test mode. +/// When in test mode, `jssgTransform` becomes a no-op. +#[derive(Debug, Clone)] +pub struct ExecutionModeFlag { + pub test_mode: bool, +} + +unsafe impl<'js> rquickjs::JsLifetime<'js> for ExecutionModeFlag { + type Changed<'to> = ExecutionModeFlag; +} + +/// Execution context passed to `jssgTransform` via QuickJS userdata. +/// Contains the params and matrixValues from the parent codemod execution. +#[derive(Debug, Clone)] +pub struct JssgExecutionContext { + pub params: HashMap, + pub matrix_values: Option>, +} + +unsafe impl<'js> rquickjs::JsLifetime<'js> for JssgExecutionContext { + type Changed<'to> = JssgExecutionContext; +} + +/// Details of a modified file +#[derive(Debug, Clone)] +pub struct ModifiedResult { + pub content: String, + pub rename_to: Option, +} /// Result of executing a codemod on a single file #[derive(Debug, Clone)] pub enum ExecutionResult { - Modified(String), + Modified(ModifiedResult), Unmodified, Skipped, } +/// A file change produced by `jssgTransform` (secondary output) +#[derive(Debug, Clone)] +pub struct FileChange { + pub path: PathBuf, + pub result: ExecutionResult, +} + +/// Output of a codemod execution including both the primary result +/// and any secondary file changes produced by `jssgTransform`. +#[derive(Debug, Clone)] +pub struct CodemodOutput { + pub primary: ExecutionResult, + pub secondary: Vec, +} + +/// Shared accumulator for file changes produced by `jssgTransform`. +/// Stored as QuickJS userdata so the JS-facing function can push changes +/// without touching the filesystem. +#[derive(Debug, Clone, Default)] +pub struct JssgFileChanges { + pub changes: Arc>>, +} + +unsafe impl<'js> rquickjs::JsLifetime<'js> for JssgFileChanges { + type Changed<'to> = JssgFileChanges; +} + +/// The target directory that the codemod is running against. +/// Stored as QuickJS userdata so `jssgTransform` and `rename()` can +/// validate that file paths stay within this directory. +#[derive(Debug, Clone)] +pub struct TargetDirectory(pub PathBuf); + +unsafe impl<'js> rquickjs::JsLifetime<'js> for TargetDirectory { + type Changed<'to> = TargetDirectory; +} + +/// Validate that `path` resolves within the target directory stored in QuickJS userdata. +/// `caller` is used in error messages (e.g. "jssgTransform()" or "rename()"). +/// If the file doesn't exist yet (e.g. rename target), the parent directory is canonicalized instead. +/// Returns `Ok(())` if no `TargetDirectory` userdata is set (e.g. test / in-memory contexts). +pub fn validate_path_within_target<'js>( + ctx: &rquickjs::Ctx<'js>, + path: &Path, + caller: &str, +) -> rquickjs::Result<()> { + if let Some(target_dir) = ctx.userdata::() { + let canonical_target = target_dir + .0 + .canonicalize() + .unwrap_or_else(|_| target_dir.0.clone()); + let canonical_path = path.canonicalize().unwrap_or_else(|_| { + // File may not exist yet (e.g. rename target); canonicalize the parent instead + if let Some(parent) = path.parent() { + let canonical_parent = parent + .canonicalize() + .unwrap_or_else(|_| parent.to_path_buf()); + canonical_parent.join(path.file_name().unwrap_or_default()) + } else { + path.to_path_buf() + } + }); + if !canonical_path.starts_with(&canonical_target) { + return Err(rquickjs::Exception::throw_message( + ctx, + &format!( + "{} path '{}' is outside the target directory '{}'", + caller, + path.display(), + target_dir.0.display() + ), + )); + } + } + Ok(()) +} + /// Options for executing a codemod on a single file pub struct JssgExecutionOptions<'a, R> { pub script_path: &'a Path, pub resolver: Arc, - pub language: SupportLang, + pub language: CodemodLang, pub file_path: &'a Path, pub content: &'a str, - pub selector_config: Option>>>, + pub selector_config: Option>>>, pub params: Option>, pub matrix_values: Option>, pub capabilities: Option>, @@ -45,13 +153,18 @@ pub struct JssgExecutionOptions<'a, R> { pub semantic_provider: Option>, /// Optional metrics context for tracking metrics across execution pub metrics_context: Option, + /// Whether this is a test execution (jssgTransform becomes a no-op) + pub test_mode: bool, + /// The target directory the codemod is running against. + /// Used to validate that `jssgTransform` and `rename()` only access files within this directory. + pub target_directory: Option<&'a Path>, } /// Execute a codemod on string content using QuickJS /// This is the core execution logic that doesn't touch the filesystem pub async fn execute_codemod_with_quickjs<'a, R>( options: JssgExecutionOptions<'a, R>, -) -> Result +) -> Result where R: ModuleResolver + 'static, { @@ -131,9 +244,44 @@ where // Capture metrics context for use inside async block let metrics_context = options.metrics_context.clone(); + let test_mode = options.test_mode; // Execute JavaScript code async_with!(context => |ctx| { + // Store execution mode flag in runtime userdata + ctx.store_userdata(ExecutionModeFlag { test_mode }).map_err(|e| ExecutionError::Runtime { + source: crate::sandbox::errors::RuntimeError::InitializationFailed { + message: format!("Failed to store ExecutionModeFlag: {:?}", e), + }, + })?; + + // Store shared accumulator for jssgTransform file changes + let jssg_file_changes = JssgFileChanges::default(); + ctx.store_userdata(jssg_file_changes.clone()).map_err(|e| ExecutionError::Runtime { + source: crate::sandbox::errors::RuntimeError::InitializationFailed { + message: format!("Failed to store JssgFileChanges: {:?}", e), + }, + })?; + + // Store jssg execution context so jssgTransform can access params/matrixValues + ctx.store_userdata(JssgExecutionContext { + params: params.clone(), + matrix_values: options.matrix_values.clone(), + }).map_err(|e| ExecutionError::Runtime { + source: crate::sandbox::errors::RuntimeError::InitializationFailed { + message: format!("Failed to store JssgExecutionContext: {:?}", e), + }, + })?; + + // Store target directory in runtime userdata if provided + if let Some(target_dir) = options.target_directory { + ctx.store_userdata(TargetDirectory(target_dir.to_path_buf())).map_err(|e| ExecutionError::Runtime { + source: crate::sandbox::errors::RuntimeError::InitializationFailed { + message: format!("Failed to store TargetDirectory: {:?}", e), + }, + })?; + } + // Store metrics context in runtime userdata if provided (must be done inside async_with) if let Some(ref metrics_ctx) = metrics_context { ctx.store_userdata(metrics_ctx.clone()).map_err(|e| ExecutionError::Runtime { @@ -193,6 +341,9 @@ where }, })?; + // Keep a reference to read rename_to after JS execution + let sg_root_inner = Arc::clone(&parsed_content.inner); + // Calculate matches inside the JS context let matches: Option>> = if let Some(selector_config) = &options.selector_config { let root_node = parsed_content.root(ctx.clone()).map_err(|e| ExecutionError::Runtime { @@ -205,7 +356,7 @@ where .collect(); if ast_matches.is_empty() { - return Ok(ExecutionResult::Skipped); + return Ok(CodemodOutput { primary: ExecutionResult::Skipped, secondary: vec![] }); } Some(ast_matches.into_iter().map(|node_match| SgNodeRjs { @@ -219,44 +370,13 @@ where let language_str = options.language.to_string(); - let run_options = Object::new(ctx.clone()).map_err(|e| ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::InitializationFailed { - message: e.to_string(), - }, - })?; - - let params_js = params.into_iter() - .map(|(k, v)| (k, JsValue(v))) - .collect::>(); - run_options.set("params", params_js).map_err(|e| ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::InitializationFailed { - message: e.to_string(), - }, - })?; - - run_options.set("language", &language_str).map_err(|e| ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::InitializationFailed { - message: e.to_string(), - }, - })?; - run_options.set("matches", matches).map_err(|e| ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::InitializationFailed { - message: e.to_string(), - }, - })?; - - let matrix_values_js = options.matrix_values - .map(|input| input.into_iter() - .map(|(k, v)| (k, JsValue(v))) - .collect::>()); - - run_options.set("matrixValues", matrix_values_js).map_err(|e| ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::InitializationFailed { - message: e.to_string(), - }, - })?; - - let run_options_qjs = run_options.into_js(&ctx); + let run_options_qjs = build_transform_options( + &ctx, + params, + &language_str, + options.matrix_values, + matches, + )?; let func = namespace .get::<_, Function>("executeCodemod") @@ -284,22 +404,17 @@ where }, })?; - if result_obj.is_string() { - let new_content = result_obj.get::().unwrap(); - if new_content == options.content { - Ok(ExecutionResult::Unmodified) - } else { - Ok(ExecutionResult::Modified(new_content)) - } - } else if result_obj.is_null() || result_obj.is_undefined() { - Ok(ExecutionResult::Unmodified) - } else { - Err(ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::ExecutionFailed { - message: "Invalid result type".to_string(), - }, - }) - } + let primary = process_transform_result( + &result_obj, + &sg_root_inner, + ModificationCheck::StringEquality { original_content: options.content }, + )?; + + let secondary = jssg_file_changes.changes.lock() + .map(|guard| guard.clone()) + .unwrap_or_default(); + + Ok(CodemodOutput { primary, secondary }) }; execution.await }) @@ -438,6 +553,10 @@ mod tests { use std::sync::Arc; use tempfile::TempDir; + fn js_lang() -> CodemodLang { + CodemodLang::Static(SupportLang::JavaScript) + } + /// Helper to create a temporary codemod file and test directory fn setup_test_codemod(codemod_content: &str) -> (TempDir, std::path::PathBuf) { let temp_dir = TempDir::new().expect("Failed to create temp directory"); @@ -493,7 +612,7 @@ function example() { let options = JssgExecutionOptions { script_path: &codemod_path, resolver, - language: SupportLang::JavaScript, + language: js_lang(), file_path, content, selector_config: None, @@ -502,18 +621,23 @@ function example() { capabilities: None, semantic_provider: None, metrics_context: None, + test_mode: false, + target_directory: None, }; let result = execute_codemod_with_quickjs(options).await; match result { - Ok(ExecutionResult::Modified(new_content)) => { - assert!(new_content.contains("logger.log(\"Hello, world!\")")); - assert!(new_content.contains("logger.log(\"Debug message\")")); - // console.info should remain unchanged - assert!(new_content.contains("console.info(\"Info message\")")); - } - Ok(other) => panic!("Expected modified result, got: {:?}", other), + Ok(output) => match output.primary { + ExecutionResult::Modified(modified) => { + assert!(modified.content.contains("logger.log(\"Hello, world!\")")); + assert!(modified.content.contains("logger.log(\"Debug message\")")); + // console.info should remain unchanged + assert!(modified.content.contains("console.info(\"Info message\")")); + assert!(modified.rename_to.is_none()); + } + other => panic!("Expected modified result, got: {:?}", other), + }, Err(e) => panic!("Expected success, got error: {:?}", e), } } @@ -536,7 +660,7 @@ function example() { let options = JssgExecutionOptions { script_path: &codemod_path, resolver, - language: SupportLang::JavaScript, + language: js_lang(), file_path, content, selector_config: None, @@ -545,15 +669,19 @@ function example() { capabilities: None, semantic_provider: None, metrics_context: None, + test_mode: false, + target_directory: None, }; let result = execute_codemod_with_quickjs(options).await; match result { - Ok(ExecutionResult::Unmodified) => { - // Expected behavior - no console.log or console.debug found - } - Ok(other) => panic!("Expected unmodified result, got: {:?}", other), + Ok(output) => match output.primary { + ExecutionResult::Unmodified => { + // Expected behavior - no console.log or console.debug found + } + other => panic!("Expected unmodified result, got: {:?}", other), + }, Err(e) => panic!("Expected success, got error: {:?}", e), } } @@ -580,7 +708,7 @@ function example() { let options = JssgExecutionOptions { script_path: &codemod_path, resolver, - language: SupportLang::JavaScript, + language: js_lang(), file_path, content, selector_config: None, @@ -589,15 +717,19 @@ function example() { capabilities: None, semantic_provider: None, metrics_context: None, + test_mode: false, + target_directory: None, }; let result = execute_codemod_with_quickjs(options).await; match result { - Ok(ExecutionResult::Unmodified) => { - // Expected behavior - codemod returned null - } - Ok(other) => panic!("Expected unmodified result, got: {:?}", other), + Ok(output) => match output.primary { + ExecutionResult::Unmodified => { + // Expected behavior - codemod returned null + } + other => panic!("Expected unmodified result, got: {:?}", other), + }, Err(e) => panic!("Expected success, got error: {:?}", e), } } @@ -624,7 +756,7 @@ function example() { let options = JssgExecutionOptions { script_path: &codemod_path, resolver, - language: SupportLang::JavaScript, + language: js_lang(), file_path, content, selector_config: None, @@ -633,6 +765,8 @@ function example() { capabilities: None, semantic_provider: None, metrics_context: None, + test_mode: false, + target_directory: None, }; let result = execute_codemod_with_quickjs(options).await; @@ -662,7 +796,7 @@ function example() { let options = JssgExecutionOptions { script_path: &codemod_path, resolver, - language: SupportLang::JavaScript, + language: js_lang(), file_path, content, selector_config: None, @@ -671,17 +805,21 @@ function example() { capabilities: None, semantic_provider: None, metrics_context: None, + test_mode: false, + target_directory: None, }; let result = execute_codemod_with_quickjs(options).await; match result { Err(ExecutionError::Runtime { source }) => { - assert!(source.to_string().contains("Invalid result type")); + assert!(source + .to_string() + .contains("must return either a string or null/undefined")); } - Ok(other) => panic!( + Ok(output) => panic!( "Expected runtime error for invalid return type, got: {:?}", - other + output.primary ), Err(e) => panic!("Expected specific runtime error, got: {:?}", e), } @@ -703,7 +841,7 @@ function example() { let options = JssgExecutionOptions { script_path: nonexistent_path, resolver, - language: SupportLang::JavaScript, + language: js_lang(), file_path, content, selector_config: None, @@ -712,6 +850,8 @@ function example() { capabilities: None, semantic_provider: None, metrics_context: None, + test_mode: false, + target_directory: None, }; let result = execute_codemod_with_quickjs(options).await; @@ -722,12 +862,15 @@ function example() { #[tokio::test] async fn test_execution_result_debug_clone() { - let result1 = ExecutionResult::Modified("test".to_string()); + let result1 = ExecutionResult::Modified(ModifiedResult { + content: "test".to_string(), + rename_to: None, + }); let result2 = result1.clone(); match (result1, result2) { - (ExecutionResult::Modified(content1), ExecutionResult::Modified(content2)) => { - assert_eq!(content1, content2); + (ExecutionResult::Modified(m1), ExecutionResult::Modified(m2)) => { + assert_eq!(m1.content, m2.content); } _ => panic!("Clone should preserve the variant and content"), } @@ -804,7 +947,7 @@ function example() { let options = JssgExecutionOptions { script_path: &codemod_path, resolver, - language: SupportLang::JavaScript, + language: js_lang(), file_path, content, selector_config: None, @@ -813,16 +956,20 @@ function example() { capabilities: None, semantic_provider: None, metrics_context: Some(metrics_ctx.clone()), + test_mode: false, + target_directory: None, }; let result = execute_codemod_with_quickjs(options).await; // Should return Unmodified since we return null match result { - Ok(ExecutionResult::Unmodified) => { - // Expected - } - Ok(other) => panic!("Expected unmodified result, got: {:?}", other), + Ok(output) => match output.primary { + ExecutionResult::Unmodified => { + // Expected + } + other => panic!("Expected unmodified result, got: {:?}", other), + }, Err(e) => panic!("Expected success, got error: {:?}", e), } diff --git a/crates/codemod-sandbox/src/sandbox/engine/in_memory_engine.rs b/crates/codemod-sandbox/src/sandbox/engine/in_memory_engine.rs index 0042ed448..79fd65085 100644 --- a/crates/codemod-sandbox/src/sandbox/engine/in_memory_engine.rs +++ b/crates/codemod-sandbox/src/sandbox/engine/in_memory_engine.rs @@ -1,6 +1,9 @@ -use super::execution_engine::ExecutionResult; +use super::codemod_lang::CodemodLang; +use super::execution_engine::{CodemodOutput, ExecutionResult}; use super::quickjs_adapters::QuickJSResolver; -use crate::ast_grep::serde::JsValue; +use super::transform_helpers::{ + build_transform_options, process_transform_result, ModificationCheck, +}; use crate::ast_grep::sg_node::{SgNodeRjs, SgRootRjs}; use crate::ast_grep::AstGrepModule; use crate::metrics::{MetricsContext, MetricsModule}; @@ -9,13 +12,11 @@ use crate::sandbox::resolvers::{InMemoryLoader, InMemoryResolver, ModuleResolver use crate::utils::quickjs_utils::maybe_promise; use ast_grep_config::RuleConfig; use ast_grep_core::matcher::MatcherExt; +use ast_grep_core::tree_sitter::StrDoc; use ast_grep_core::AstGrep; -use ast_grep_language::SupportLang; use codemod_llrt_capabilities::module_builder::LlrtModuleBuilder; use language_core::SemanticProvider; -use rquickjs::{ - async_with, AsyncContext, AsyncRuntime, CatchResultExt, Function, IntoJs, Module, Object, -}; +use rquickjs::{async_with, AsyncContext, AsyncRuntime, CatchResultExt, Function, Module}; use std::collections::HashMap; use std::marker::PhantomData; use std::sync::atomic::{AtomicBool, Ordering}; @@ -31,18 +32,24 @@ const DEFAULT_MEMORY_LIMIT: usize = 512 * 1024 * 1024; /// Default max stack size in bytes (4 MB) const DEFAULT_MAX_STACK_SIZE: usize = 4 * 1024 * 1024; -/// In-memory execution options for executing a codemod on a string +/// SHA256 hash type (32 bytes) +pub type Sha256Hash = [u8; 32]; + +/// In-memory execution options for executing a codemod on a pre-parsed AST pub struct InMemoryExecutionOptions<'a, R> { /// The JavaScript codemod source code (not a file path) pub codemod_source: &'a str, /// The programming language of the source code to transform - pub language: SupportLang, - /// The source code to transform - pub content: &'a str, + pub language: CodemodLang, + /// The pre-parsed AST (allows leveraging AST caching) + pub ast: AstGrep>, + /// SHA256 hash of the original content (used for modification detection) + /// If None, any non-null result is considered modified + pub original_sha256: Option, /// Optional module resolver (if None, a no-op resolver is used) pub resolver: Option>, /// Optional selector config for pre-filtering - pub selector_config: Option>>>, + pub selector_config: Option>>>, /// Optional parameters passed to the codemod pub params: Option>, /// Optional matrix values for parameterized codemods @@ -65,7 +72,7 @@ pub struct InMemoryExecutionOptions<'a, R> { /// suitable for use in synchronous contexts like PostgreSQL extensions. pub fn execute_codemod_sync( options: InMemoryExecutionOptions, -) -> Result +) -> Result where R: ModuleResolver + 'static, { @@ -86,7 +93,7 @@ where /// This function executes the codemod entirely in memory without filesystem access. pub async fn execute_codemod_in_memory( options: InMemoryExecutionOptions<'_, R>, -) -> Result +) -> Result where R: ModuleResolver + 'static, { @@ -129,7 +136,8 @@ where }))) .await; - let ast_grep = AstGrep::new(options.content, options.language); + // Use the pre-parsed AST from options (allows AST caching) + let ast_grep = options.ast; let module_builder = LlrtModuleBuilder::build(); let (mut built_in_resolver, mut built_in_loader, global_attachment) = @@ -216,6 +224,9 @@ where }, })?; + // Keep a reference to read rename_to after JS execution + let sg_root_inner = Arc::clone(&parsed_content.inner); + let matches: Option>> = if let Some(selector_config) = &options.selector_config { let root_node = parsed_content.root(ctx.clone()).map_err(|e| ExecutionError::Runtime { source: crate::sandbox::errors::RuntimeError::InitializationFailed { @@ -241,39 +252,18 @@ where let language_str = options.language.to_string(); - let run_options = Object::new(ctx.clone()).map_err(|e| ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::InitializationFailed { - message: e.to_string(), - }, - })?; - run_options.set("params", params).map_err(|e| ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::InitializationFailed { - message: e.to_string(), - }, - })?; - run_options.set("language", &language_str).map_err(|e| ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::InitializationFailed { - message: e.to_string(), - }, - })?; - run_options.set("matches", matches).map_err(|e| ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::InitializationFailed { - message: e.to_string(), - }, - })?; - - let matrix_values_js = options.matrix_values - .map(|input| input.into_iter() - .map(|(k, v)| (k, JsValue(v))) - .collect::>()); - - run_options.set("matrixValues", matrix_values_js).map_err(|e| ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::InitializationFailed { - message: e.to_string(), - }, - })?; + // Convert String params to serde_json::Value for the shared helper + let params_json = params.into_iter() + .map(|(k, v)| (k, serde_json::Value::String(v))) + .collect::>(); - let run_options_qjs = run_options.into_js(&ctx); + let run_options_qjs = build_transform_options( + &ctx, + params_json, + &language_str, + options.matrix_values, + matches, + )?; let func = namespace .get::<_, Function>("executeCodemod") @@ -300,22 +290,11 @@ where }, })?; - if result_obj.is_string() { - let new_content = result_obj.get::().unwrap(); - if new_content == options.content { - Ok(ExecutionResult::Unmodified) - } else { - Ok(ExecutionResult::Modified(new_content)) - } - } else if result_obj.is_null() || result_obj.is_undefined() { - Ok(ExecutionResult::Unmodified) - } else { - Err(ExecutionError::Runtime { - source: crate::sandbox::errors::RuntimeError::ExecutionFailed { - message: "Invalid result type".to_string(), - }, - }) - } + process_transform_result( + &result_obj, + &sg_root_inner, + ModificationCheck::Sha256(options.original_sha256), + ) }; execution.await }) @@ -327,7 +306,10 @@ where }); } - result + result.map(|primary| CodemodOutput { + primary, + secondary: vec![], + }) } #[cfg(test)] @@ -335,10 +317,22 @@ mod tests { use super::*; use crate::sandbox::errors::RuntimeError; use crate::sandbox::resolvers::oxc_resolver::OxcResolver; + use ast_grep_language::SupportLang; + use sha2::{Digest, Sha256}; use std::fs; use std::sync::Arc; use tempfile::TempDir; + fn js_lang() -> CodemodLang { + CodemodLang::Static(SupportLang::JavaScript) + } + + fn compute_sha256(content: &str) -> Sha256Hash { + let mut hasher = Sha256::new(); + hasher.update(content.as_bytes()); + hasher.finalize().into() + } + #[test] fn test_execute_codemod_sync_timeout() { let temp_dir = TempDir::new().expect("Failed to create temp directory"); @@ -359,11 +353,13 @@ export default function transform(root) { let resolver = Arc::new(OxcResolver::new(temp_dir.path().to_path_buf(), None).unwrap()); let content = "const x = 1;"; + let ast = AstGrep::new(content, js_lang()); let result = execute_codemod_sync(InMemoryExecutionOptions { codemod_source: codemod_content, - language: SupportLang::JavaScript, - content, + language: js_lang(), + ast, + original_sha256: Some(compute_sha256(content)), resolver: Some(resolver), selector_config: None, params: None, @@ -381,7 +377,10 @@ export default function transform(root) { }) => { assert_eq!(timeout_ms, 50); } - Ok(_) => panic!("Expected timeout error, but got success"), + Ok(output) => panic!( + "Expected timeout error, but got success: {:?}", + output.primary + ), Err(e) => panic!("Expected timeout error, got different error: {:?}", e), } } @@ -410,11 +409,13 @@ export default function transform(root) { let resolver = Arc::new(OxcResolver::new(temp_dir.path().to_path_buf(), None).unwrap()); let content = "console.log('Hello, world!');"; + let ast = AstGrep::new(content, js_lang()); let result = execute_codemod_sync(InMemoryExecutionOptions { codemod_source: codemod_content, - language: SupportLang::JavaScript, - content, + language: js_lang(), + ast, + original_sha256: Some(compute_sha256(content)), resolver: Some(resolver), selector_config: None, params: None, @@ -427,10 +428,13 @@ export default function transform(root) { }); match result { - Ok(ExecutionResult::Modified(new_content)) => { - assert!(new_content.contains("logger.log('Hello, world!')")); - } - Ok(other) => panic!("Expected modified result, got: {:?}", other), + Ok(output) => match output.primary { + ExecutionResult::Modified(modified) => { + assert!(modified.content.contains("logger.log('Hello, world!')")); + assert!(modified.rename_to.is_none()); + } + other => panic!("Expected modified result, got: {:?}", other), + }, Err(e) => panic!("Expected success, got error: {:?}", e), } } diff --git a/crates/codemod-sandbox/src/sandbox/engine/language_data.rs b/crates/codemod-sandbox/src/sandbox/engine/language_data.rs index 9704154a7..2a7640a82 100644 --- a/crates/codemod-sandbox/src/sandbox/engine/language_data.rs +++ b/crates/codemod-sandbox/src/sandbox/engine/language_data.rs @@ -1,66 +1,78 @@ use std::collections::HashMap; #[cfg(feature = "native")] -use ast_grep_language::SupportLang; +use super::codemod_lang::CodemodLang; -/// Creates a map from SupportLang to their associated file extensions -pub fn create_language_extension_map() -> HashMap> { +/// Creates a map from CodemodLang to their associated file extensions +pub fn create_language_extension_map() -> HashMap> { let mut map = HashMap::new(); #[cfg(feature = "native")] { use ast_grep_language::SupportLang::*; - map.insert(JavaScript, vec![".js", ".mjs", ".cjs", ".jsx"]); map.insert( - TypeScript, + CodemodLang::Static(JavaScript), + vec![".js", ".mjs", ".cjs", ".jsx"], + ); + map.insert( + CodemodLang::Static(TypeScript), vec![".ts", ".mts", ".cts", ".js", ".mjs", ".cjs"], ); map.insert( - Tsx, + CodemodLang::Static(Tsx), vec![".tsx", ".jsx", ".ts", ".js", ".mjs", ".cjs", ".mts", ".cts"], ); - map.insert(Bash, vec![".sh", ".bash", ".zsh", ".fish"]); - map.insert(C, vec![".c", ".h"]); - map.insert(CSharp, vec![".cs"]); - map.insert(Css, vec![".css"]); map.insert( - Cpp, + CodemodLang::Static(Bash), + vec![".sh", ".bash", ".zsh", ".fish"], + ); + map.insert(CodemodLang::Static(C), vec![".c", ".h"]); + map.insert(CodemodLang::Static(CSharp), vec![".cs"]); + map.insert(CodemodLang::Static(Css), vec![".css"]); + map.insert( + CodemodLang::Static(Cpp), vec![".cpp", ".cxx", ".cc", ".c++", ".hpp", ".hxx", ".hh", ".h++"], ); - map.insert(Elixir, vec![".ex", ".exs"]); - map.insert(Go, vec![".go"]); - map.insert(Haskell, vec![".hs", ".lhs"]); - map.insert(Html, vec![".html", ".htm"]); - map.insert(Java, vec![".java"]); - map.insert(Json, vec![".json", ".jsonc"]); - map.insert(Kotlin, vec![".kt", ".kts"]); - map.insert(Lua, vec![".lua"]); + map.insert(CodemodLang::Static(Elixir), vec![".ex", ".exs"]); + map.insert(CodemodLang::Static(Go), vec![".go"]); + map.insert(CodemodLang::Static(Haskell), vec![".hs", ".lhs"]); + map.insert(CodemodLang::Static(Html), vec![".html", ".htm"]); + map.insert(CodemodLang::Static(Java), vec![".java"]); + map.insert(CodemodLang::Static(Json), vec![".json", ".jsonc"]); + map.insert(CodemodLang::Static(Kotlin), vec![".kt", ".kts"]); + map.insert(CodemodLang::Static(Lua), vec![".lua"]); map.insert( - Php, + CodemodLang::Static(Php), vec![ ".php", ".phtml", ".php3", ".php4", ".php5", ".php7", ".phps", ".php-s", ], ); - map.insert(Python, vec![".py", ".pyw", ".pyi"]); - map.insert(Ruby, vec![".rb", ".rbw"]); - map.insert(Rust, vec![".rs"]); - map.insert(Scala, vec![".scala", ".sc"]); - map.insert(Swift, vec![".swift"]); - map.insert(Yaml, vec![".yaml", ".yml"]); + map.insert(CodemodLang::Static(Python), vec![".py", ".pyw", ".pyi"]); + map.insert(CodemodLang::Static(Ruby), vec![".rb", ".rbw"]); + map.insert(CodemodLang::Static(Rust), vec![".rs"]); + map.insert(CodemodLang::Static(Scala), vec![".scala", ".sc"]); + map.insert(CodemodLang::Static(Swift), vec![".swift"]); + map.insert(CodemodLang::Static(Yaml), vec![".yaml", ".yml"]); + + // Dynamic languages (registered via tree-sitter-loader) + if let Ok(lang) = std::str::FromStr::from_str("less") { + let lang: CodemodLang = lang; + map.insert(lang, vec![".less"]); + } } map } /// Get file extensions for a specific language -pub fn get_extensions_for_language(lang: SupportLang) -> Vec<&'static str> { +pub fn get_extensions_for_language(lang: CodemodLang) -> Vec<&'static str> { let map = create_language_extension_map(); map.get(&lang).cloned().unwrap_or_default() } /// Determine language from file extension -pub fn get_language_from_extension(extension: &str) -> Option { +pub fn get_language_from_extension(extension: &str) -> Option { let map = create_language_extension_map(); for (lang, extensions) in map.iter() { @@ -84,25 +96,31 @@ pub fn get_all_supported_extensions() -> Vec<&'static str> { #[cfg(all(test, feature = "native"))] mod tests { use super::*; + use ast_grep_language::SupportLang; #[test] fn test_language_extension_mapping() { let map = create_language_extension_map(); assert!(!map.is_empty()); - #[cfg(feature = "native")] - { - use ast_grep_language::SupportLang::*; - assert!(map.get(&JavaScript).unwrap().contains(&".js")); - assert!(map.get(&TypeScript).unwrap().contains(&".ts")); - assert!(map.get(&Rust).unwrap().contains(&".rs")); - } + assert!(map + .get(&CodemodLang::Static(SupportLang::JavaScript)) + .unwrap() + .contains(&".js")); + assert!(map + .get(&CodemodLang::Static(SupportLang::TypeScript)) + .unwrap() + .contains(&".ts")); + assert!(map + .get(&CodemodLang::Static(SupportLang::Rust)) + .unwrap() + .contains(&".rs")); } #[test] fn test_get_extensions_for_language() { - use ast_grep_language::SupportLang::*; - let js_extensions = get_extensions_for_language(JavaScript); + let js_extensions = + get_extensions_for_language(CodemodLang::Static(SupportLang::JavaScript)); assert!(js_extensions.contains(&".js")); assert!(js_extensions.contains(&".mjs")); assert!(js_extensions.contains(&".cjs")); diff --git a/crates/codemod-sandbox/src/sandbox/engine/mod.rs b/crates/codemod-sandbox/src/sandbox/engine/mod.rs index b0719102d..a63c447c8 100644 --- a/crates/codemod-sandbox/src/sandbox/engine/mod.rs +++ b/crates/codemod-sandbox/src/sandbox/engine/mod.rs @@ -2,6 +2,10 @@ pub mod execution_engine; pub mod in_memory_engine; pub mod quickjs_adapters; pub mod selector_engine; +pub(crate) mod transform_helpers; + +#[cfg(feature = "native")] +pub mod codemod_lang; pub use execution_engine::*; pub use in_memory_engine::*; diff --git a/crates/codemod-sandbox/src/sandbox/engine/selector_engine.rs b/crates/codemod-sandbox/src/sandbox/engine/selector_engine.rs index 5a0a1a5f5..d8e04f7c6 100644 --- a/crates/codemod-sandbox/src/sandbox/engine/selector_engine.rs +++ b/crates/codemod-sandbox/src/sandbox/engine/selector_engine.rs @@ -1,3 +1,4 @@ +use super::codemod_lang::CodemodLang; use super::quickjs_adapters::{QuickJSLoader, QuickJSResolver}; use crate::ast_grep::AstGrepModule; use crate::metrics::MetricsModule; @@ -5,7 +6,6 @@ use crate::sandbox::errors::ExecutionError; use crate::sandbox::resolvers::ModuleResolver; use crate::utils::quickjs_utils::maybe_promise; use ast_grep_config::{RuleConfig, SerializableRuleConfig}; -use ast_grep_language::SupportLang; use codemod_llrt_capabilities::module_builder::LlrtModuleBuilder; use codemod_llrt_capabilities::types::LlrtSupportedModules; use rquickjs::{async_with, AsyncContext, AsyncRuntime}; @@ -20,7 +20,7 @@ use crate::workflow_global::WorkflowGlobalModule; pub struct SelectorEngineOptions<'a, R> { pub script_path: &'a Path, - pub language: SupportLang, + pub language: CodemodLang, pub resolver: Arc, pub capabilities: Option>, } @@ -29,7 +29,7 @@ pub struct SelectorEngineOptions<'a, R> { /// This executes the getSelector function and converts the result to RuleConfig pub async fn extract_selector_with_quickjs<'a, R>( options: SelectorEngineOptions<'a, R>, -) -> Result>>, ExecutionError> +) -> Result>>, ExecutionError> where R: ModuleResolver + 'static, { @@ -201,7 +201,7 @@ where }, })?; - let serializable_config: SerializableRuleConfig = + let serializable_config: SerializableRuleConfig = serde_json::from_value(js_value.0) .map_err(|e| ExecutionError::Runtime { source: crate::sandbox::errors::RuntimeError::ExecutionFailed { diff --git a/crates/codemod-sandbox/src/sandbox/engine/transform_helpers.rs b/crates/codemod-sandbox/src/sandbox/engine/transform_helpers.rs new file mode 100644 index 000000000..311f8f3a9 --- /dev/null +++ b/crates/codemod-sandbox/src/sandbox/engine/transform_helpers.rs @@ -0,0 +1,149 @@ +use crate::ast_grep::serde::JsValue; +use crate::ast_grep::sg_node::{SgNodeRjs, SgRootInner}; +use crate::sandbox::engine::execution_engine::{ExecutionResult, ModifiedResult}; +use crate::sandbox::errors::ExecutionError; +use rquickjs::{Ctx, IntoJs, Object, Value}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; + +/// How to check whether content was modified +pub enum ModificationCheck<'a> { + /// Compare new content string against original content string + StringEquality { original_content: &'a str }, + /// Compare SHA256 hash of new content against original hash + #[cfg(feature = "native")] + Sha256(Option<[u8; 32]>), +} + +/// Build the JS `options` object passed to the transform function. +/// +/// Creates an object with: `{ params, language, matches, matrixValues }` +pub fn build_transform_options<'js>( + ctx: &Ctx<'js>, + params: HashMap, + language: &str, + matrix_values: Option>, + matches: Option>>, +) -> Result, ExecutionError> { + let run_options = Object::new(ctx.clone()).map_err(|e| ExecutionError::Runtime { + source: crate::sandbox::errors::RuntimeError::InitializationFailed { + message: e.to_string(), + }, + })?; + + let params_js = params + .into_iter() + .map(|(k, v)| (k, JsValue(v))) + .collect::>(); + run_options + .set("params", params_js) + .map_err(|e| ExecutionError::Runtime { + source: crate::sandbox::errors::RuntimeError::InitializationFailed { + message: e.to_string(), + }, + })?; + + run_options + .set("language", language) + .map_err(|e| ExecutionError::Runtime { + source: crate::sandbox::errors::RuntimeError::InitializationFailed { + message: e.to_string(), + }, + })?; + + run_options + .set("matches", matches) + .map_err(|e| ExecutionError::Runtime { + source: crate::sandbox::errors::RuntimeError::InitializationFailed { + message: e.to_string(), + }, + })?; + + let matrix_values_js = matrix_values.map(|input| { + input + .into_iter() + .map(|(k, v)| (k, JsValue(v))) + .collect::>() + }); + + run_options + .set("matrixValues", matrix_values_js) + .map_err(|e| ExecutionError::Runtime { + source: crate::sandbox::errors::RuntimeError::InitializationFailed { + message: e.to_string(), + }, + })?; + + run_options + .into_js(ctx) + .map_err(|e| ExecutionError::Runtime { + source: crate::sandbox::errors::RuntimeError::InitializationFailed { + message: e.to_string(), + }, + }) +} + +/// Process the result value returned by a transform function. +/// +/// Handles string results, null/undefined, and rename_to logic. +/// Uses `ModificationCheck` to determine whether content was actually modified. +pub fn process_transform_result( + result_obj: &Value<'_>, + sg_root_inner: &Arc, + modification_check: ModificationCheck<'_>, +) -> Result { + let rename_to = sg_root_inner + .rename_to + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + .clone() + .map(PathBuf::from); + + if result_obj.is_string() { + let new_content = result_obj + .get::() + .expect("result_obj should be a String after is_string() check"); + let is_modified = match modification_check { + ModificationCheck::StringEquality { original_content } => { + new_content != original_content + } + #[cfg(feature = "native")] + ModificationCheck::Sha256(original_sha256) => match original_sha256 { + Some(original_hash) => { + use sha2::{Digest, Sha256}; + let mut hasher = Sha256::new(); + hasher.update(new_content.as_bytes()); + let new_hash: [u8; 32] = hasher.finalize().into(); + new_hash != original_hash + } + None => true, + }, + }; + if is_modified || rename_to.is_some() { + Ok(ExecutionResult::Modified(ModifiedResult { + content: new_content, + rename_to, + })) + } else { + Ok(ExecutionResult::Unmodified) + } + } else if result_obj.is_null() || result_obj.is_undefined() { + if rename_to.is_some() { + let original_content = sg_root_inner.grep.source().to_string(); + Ok(ExecutionResult::Modified(ModifiedResult { + content: original_content, + rename_to, + })) + } else { + Ok(ExecutionResult::Unmodified) + } + } else { + let type_name = result_obj.type_name(); + Err(ExecutionError::Runtime { + source: crate::sandbox::errors::RuntimeError::ExecutionFailed { + message: format!("Codemod transform functions must return either a string or null/undefined. Received {type_name}"), + }, + }) + } +} diff --git a/crates/codemod-sandbox/tests/integration/semantic/common_tests.rs b/crates/codemod-sandbox/tests/integration/semantic/common_tests.rs index 6828a6301..caa530c2c 100644 --- a/crates/codemod-sandbox/tests/integration/semantic/common_tests.rs +++ b/crates/codemod-sandbox/tests/integration/semantic/common_tests.rs @@ -3,10 +3,11 @@ use super::fixtures::jssg_test; use ast_grep_language::SupportLang; +use codemod_sandbox::CodemodLang; jssg_test! { name: test_get_definition_without_provider, - language: SupportLang::JavaScript, + language: CodemodLang::Static(SupportLang::JavaScript), codemod: "no_provider_definition.js", fixture_dir: "common/without_provider", target: "input.js", @@ -15,7 +16,7 @@ jssg_test! { jssg_test! { name: test_find_references_without_provider, - language: SupportLang::JavaScript, + language: CodemodLang::Static(SupportLang::JavaScript), codemod: "no_provider_references.js", fixture_dir: "common/without_provider", target: "input.js", @@ -24,7 +25,7 @@ jssg_test! { jssg_test! { name: test_type_info_without_provider, - language: SupportLang::JavaScript, + language: CodemodLang::Static(SupportLang::JavaScript), codemod: "no_provider_type_info.js", fixture_dir: "common/without_provider", target: "input.js", diff --git a/crates/codemod-sandbox/tests/integration/semantic/fixtures/mod.rs b/crates/codemod-sandbox/tests/integration/semantic/fixtures/mod.rs index 2d4a8cee7..59d1557d4 100644 --- a/crates/codemod-sandbox/tests/integration/semantic/fixtures/mod.rs +++ b/crates/codemod-sandbox/tests/integration/semantic/fixtures/mod.rs @@ -6,9 +6,10 @@ use ast_grep_language::SupportLang; use codemod_sandbox::sandbox::engine::execution_engine::{ - execute_codemod_with_quickjs, ExecutionResult, JssgExecutionOptions, + execute_codemod_with_quickjs, CodemodOutput, ExecutionResult, JssgExecutionOptions, }; use codemod_sandbox::sandbox::resolvers::oxc_resolver::OxcResolver; +use codemod_sandbox::CodemodLang; use language_core::SemanticProvider; use language_javascript::OxcSemanticProvider; use language_python::RuffSemanticProvider; @@ -122,7 +123,7 @@ pub struct TestConfig<'a> { pub codemod_name: &'a str, pub fixture_dir: &'a str, pub target_file: &'a str, - pub language: SupportLang, + pub language: CodemodLang, pub scope: ProviderScope, pub preprocess_files: Vec<&'a str>, pub expected_file: Option<&'a str>, @@ -134,7 +135,7 @@ impl<'a> TestConfig<'a> { codemod_name: &'a str, fixture_dir: &'a str, target_file: &'a str, - language: SupportLang, + language: CodemodLang, ) -> Self { Self { codemod_name, @@ -197,10 +198,14 @@ pub async fn run_test(config: TestConfig<'_>) -> Result, String> None } else { let provider = match config.language { - SupportLang::JavaScript | SupportLang::TypeScript | SupportLang::Tsx => { + CodemodLang::Static(SupportLang::JavaScript) + | CodemodLang::Static(SupportLang::TypeScript) + | CodemodLang::Static(SupportLang::Tsx) => { create_js_provider(config.scope, Some(temp_dir.path())) } - SupportLang::Python => create_python_provider(config.scope, Some(temp_dir.path())), + CodemodLang::Static(SupportLang::Python) => { + create_python_provider(config.scope, Some(temp_dir.path())) + } _ => panic!("Unsupported language: {:?}", config.language), }; @@ -232,25 +237,37 @@ pub async fn run_test(config: TestConfig<'_>) -> Result, String> capabilities: None, semantic_provider: provider, metrics_context: None, + test_mode: false, + target_directory: None, }; let result = execute_codemod_with_quickjs(options).await; match result { - Ok(ExecutionResult::Modified(new_content)) => { + Ok(CodemodOutput { + primary: ExecutionResult::Modified(modified), + .. + }) => { // If expected file is specified, verify the output if let Some(expected_file) = config.expected_file { let expected = load_fixture(config.fixture_dir, expected_file); - if new_content.trim() != expected.trim() { + if modified.content.trim() != expected.trim() { return Err(format!( "Output mismatch.\nExpected:\n{}\n\nGot:\n{}", - expected, new_content + expected, modified.content )); } } - Ok(Some(new_content)) + Ok(Some(modified.content)) } - Ok(ExecutionResult::Unmodified) | Ok(ExecutionResult::Skipped) => Ok(None), + Ok(CodemodOutput { + primary: ExecutionResult::Unmodified, + .. + }) + | Ok(CodemodOutput { + primary: ExecutionResult::Skipped, + .. + }) => Ok(None), Err(e) => Err(format!("Execution failed: {:?}", e)), } } diff --git a/crates/codemod-sandbox/tests/integration/semantic/javascript_tests.rs b/crates/codemod-sandbox/tests/integration/semantic/javascript_tests.rs index 3142de1e5..f11b74989 100644 --- a/crates/codemod-sandbox/tests/integration/semantic/javascript_tests.rs +++ b/crates/codemod-sandbox/tests/integration/semantic/javascript_tests.rs @@ -2,10 +2,11 @@ use super::fixtures::jssg_test; use ast_grep_language::SupportLang; +use codemod_sandbox::CodemodLang; jssg_test! { name: test_find_references_same_file_variable, - language: SupportLang::JavaScript, + language: CodemodLang::Static(SupportLang::JavaScript), codemod: "js_find_references_variable.js", fixture_dir: "javascript/find_references_variable", target: "input.js", @@ -13,7 +14,7 @@ jssg_test! { jssg_test! { name: test_find_references_transform_rename, - language: SupportLang::JavaScript, + language: CodemodLang::Static(SupportLang::JavaScript), codemod: "js_transform_rename.js", fixture_dir: "javascript/transform_rename", target: "input.js", @@ -22,7 +23,7 @@ jssg_test! { jssg_test! { name: test_definition_kind_local, - language: SupportLang::JavaScript, + language: CodemodLang::Static(SupportLang::JavaScript), codemod: "js_definition_kind_local.js", fixture_dir: "javascript/definition_kind_local", target: "input.js", @@ -31,7 +32,7 @@ jssg_test! { jssg_test! { name: test_definition_resolve_external_false, - language: SupportLang::JavaScript, + language: CodemodLang::Static(SupportLang::JavaScript), codemod: "js_definition_resolve_external.js", fixture_dir: "javascript/definition_resolve_external", target: "input.js", diff --git a/crates/codemod-sandbox/tests/integration/semantic/python_tests.rs b/crates/codemod-sandbox/tests/integration/semantic/python_tests.rs index ff1220375..d5eef1dfc 100644 --- a/crates/codemod-sandbox/tests/integration/semantic/python_tests.rs +++ b/crates/codemod-sandbox/tests/integration/semantic/python_tests.rs @@ -2,6 +2,7 @@ use super::fixtures::jssg_test; use ast_grep_language::SupportLang; +use codemod_sandbox::CodemodLang; // ============================================================================= // Single-file (File Scope) Tests @@ -9,7 +10,7 @@ use ast_grep_language::SupportLang; jssg_test! { name: test_find_references_variable, - language: SupportLang::Python, + language: CodemodLang::Static(SupportLang::Python), codemod: "py_find_references_variable.js", fixture_dir: "python/find_references_variable", target: "input.py", @@ -17,7 +18,7 @@ jssg_test! { jssg_test! { name: test_find_references_function, - language: SupportLang::Python, + language: CodemodLang::Static(SupportLang::Python), codemod: "py_find_references_function.js", fixture_dir: "python/find_references_function", target: "input.py", @@ -25,7 +26,7 @@ jssg_test! { jssg_test! { name: test_find_references_class, - language: SupportLang::Python, + language: CodemodLang::Static(SupportLang::Python), codemod: "py_find_references_class.js", fixture_dir: "python/find_references_class", target: "input.py", @@ -37,7 +38,7 @@ jssg_test! { jssg_test! { name: test_cross_file_definition_workspace_scope, - language: SupportLang::Python, + language: CodemodLang::Static(SupportLang::Python), codemod: "py_cross_file_definition.js", fixture_dir: "python/cross_file_definition", target: "main.py", @@ -46,7 +47,7 @@ jssg_test! { jssg_test! { name: test_cross_file_references_workspace_scope, - language: SupportLang::Python, + language: CodemodLang::Static(SupportLang::Python), codemod: "py_cross_file_references.js", fixture_dir: "python/cross_file_references", target: "utils.py", @@ -56,7 +57,7 @@ jssg_test! { jssg_test! { name: test_cross_file_references_with_imports, - language: SupportLang::Python, + language: CodemodLang::Static(SupportLang::Python), codemod: "py_cross_file_references_with_imports.js", fixture_dir: "python/cross_file_references_with_imports", target: "models.py", @@ -66,7 +67,7 @@ jssg_test! { jssg_test! { name: test_false_positive_references_with_imports, - language: SupportLang::Python, + language: CodemodLang::Static(SupportLang::Python), codemod: "py_false_positive_references.js", fixture_dir: "python/false_positive_references", target: "app.py", diff --git a/crates/codemod-sandbox/tests/integration/semantic/typescript_tests.rs b/crates/codemod-sandbox/tests/integration/semantic/typescript_tests.rs index e16489ca4..5494cd692 100644 --- a/crates/codemod-sandbox/tests/integration/semantic/typescript_tests.rs +++ b/crates/codemod-sandbox/tests/integration/semantic/typescript_tests.rs @@ -2,10 +2,11 @@ use super::fixtures::jssg_test; use ast_grep_language::SupportLang; +use codemod_sandbox::CodemodLang; jssg_test! { name: test_get_definition_file_scope, - language: SupportLang::TypeScript, + language: CodemodLang::Static(SupportLang::TypeScript), codemod: "ts_definition_file_scope.js", fixture_dir: "typescript/definition_file_scope", target: "input.ts", @@ -13,7 +14,7 @@ jssg_test! { jssg_test! { name: test_find_references_file_scope, - language: SupportLang::TypeScript, + language: CodemodLang::Static(SupportLang::TypeScript), codemod: "ts_find_references_file_scope.js", fixture_dir: "typescript/find_references_file_scope", target: "input.ts", @@ -21,7 +22,7 @@ jssg_test! { jssg_test! { name: test_find_references_function_same_file, - language: SupportLang::TypeScript, + language: CodemodLang::Static(SupportLang::TypeScript), codemod: "ts_find_references_function.js", fixture_dir: "typescript/find_references_function", target: "input.ts", @@ -29,7 +30,7 @@ jssg_test! { jssg_test! { name: test_cross_file_definition_workspace_scope, - language: SupportLang::TypeScript, + language: CodemodLang::Static(SupportLang::TypeScript), codemod: "ts_cross_file_definition.js", fixture_dir: "typescript/cross_file_definition", target: "main.ts", @@ -38,7 +39,7 @@ jssg_test! { jssg_test! { name: test_cross_file_references_workspace_scope, - language: SupportLang::TypeScript, + language: CodemodLang::Static(SupportLang::TypeScript), codemod: "ts_cross_file_references.js", fixture_dir: "typescript/cross_file_references", target: "utils.ts", @@ -47,7 +48,7 @@ jssg_test! { jssg_test! { name: test_find_references_cross_file_with_cache, - language: SupportLang::TypeScript, + language: CodemodLang::Static(SupportLang::TypeScript), codemod: "ts_cross_file_references_with_cache.js", fixture_dir: "typescript/cross_file_references_with_cache", target: "utils.ts", diff --git a/crates/core/src/engine.rs b/crates/core/src/engine.rs index 7c2554f40..50181cc69 100644 --- a/crates/core/src/engine.rs +++ b/crates/core/src/engine.rs @@ -17,7 +17,8 @@ use crate::file_ops::AsyncFileWriter; use crate::utils::validate_workflow; use chrono::Utc; use codemod_sandbox::sandbox::engine::{ - extract_selector_with_quickjs, ExecutionResult, JssgExecutionOptions, SelectorEngineOptions, + extract_selector_with_quickjs, CodemodOutput, ExecutionResult, JssgExecutionOptions, + SelectorEngineOptions, }; use codemod_sandbox::{scan_file_with_combined_scan, with_combined_scan}; use log::{debug, error, info, warn}; @@ -1887,62 +1888,105 @@ impl Engine { capabilities: config.capabilities.clone(), semantic_provider: semantic_provider.clone(), metrics_context: Some(metrics_context_clone.clone()), + test_mode: false, + target_directory: Some(&target_path), }) .await }); match execution_result { - Ok(execution_output) => { - match execution_output { - ExecutionResult::Modified(ref new_content) => { - if config.dry_run { - self.execution_stats - .files_modified - .fetch_add(1, Ordering::Relaxed); - - // Report the change via callback if provided - if let Some(callback) = - &self.workflow_run_config.dry_run_callback - { - callback(DryRunChange { - file_path: file_path.to_path_buf(), - original_content: content.clone(), - new_content: new_content.clone(), - }); - } + Ok(CodemodOutput { primary, secondary }) => { + let apply_change = |change_path: &Path, result: &ExecutionResult| { + match result { + ExecutionResult::Modified(ref modified) => { + let write_path = + modified.rename_to.as_deref().unwrap_or(change_path); + if config.dry_run { + self.execution_stats + .files_modified + .fetch_add(1, Ordering::Relaxed); - debug!("Would modify file (dry run): {}", file_path.display()); - } else { - // Use async file writing to avoid blocking the thread - let write_result = runtime_handle.block_on(async { - file_writer - .write_file( - file_path.to_path_buf(), - new_content.clone(), - ) - .await - }); + // Report the change via callback if provided + if let Some(callback) = + &self.workflow_run_config.dry_run_callback + { + let original = if change_path == file_path { + content.clone() + } else { + std::fs::read_to_string(change_path) + .unwrap_or_default() + }; + callback(DryRunChange { + file_path: change_path.to_path_buf(), + original_content: original, + new_content: modified.content.clone(), + }); + } - if let Err(e) = write_result { - error!( - "Failed to write modified file {}: {}", - file_path.display(), - e + debug!( + "Would modify file (dry run): {}", + change_path.display() ); - self.execution_stats - .files_with_errors - .fetch_add(1, Ordering::Relaxed); } else { - debug!("Modified file: {}", file_path.display()); - if let Some(ref provider) = semantic_provider { - let _ = provider - .notify_file_processed(file_path, new_content); + // Use async file writing to avoid blocking the thread + let write_result = runtime_handle.block_on(async { + file_writer + .write_file( + write_path.to_path_buf(), + modified.content.clone(), + ) + .await + }); + + if let Err(e) = write_result { + error!( + "Failed to write modified file {}: {}", + write_path.display(), + e + ); + self.execution_stats + .files_with_errors + .fetch_add(1, Ordering::Relaxed); + } else { + // If renamed, delete the original file + if modified.rename_to.is_some() + && write_path != change_path + { + if let Err(e) = std::fs::remove_file(change_path) { + error!( + "Failed to remove original file {}: {}", + change_path.display(), + e + ); + } else { + debug!( + "Renamed file: {} -> {}", + change_path.display(), + write_path.display() + ); + } + } else { + debug!("Modified file: {}", change_path.display()); + } + if let Some(ref provider) = semantic_provider { + let _ = provider.notify_file_processed( + write_path, + &modified.content, + ); + } + self.execution_stats + .files_modified + .fetch_add(1, Ordering::Relaxed); } - self.execution_stats - .files_modified - .fetch_add(1, Ordering::Relaxed); } } + ExecutionResult::Unmodified | ExecutionResult::Skipped => {} + } + }; + + match &primary { + ExecutionResult::Modified(_) => { + apply_change(file_path, &primary); } ExecutionResult::Unmodified | ExecutionResult::Skipped => { self.execution_stats @@ -1950,6 +1994,10 @@ impl Engine { .fetch_add(1, Ordering::Relaxed); } } + + for change in &secondary { + apply_change(&change.path, &change.result); + } } Err(e) => { error!( diff --git a/crates/core/tests/engine_tests.rs b/crates/core/tests/engine_tests.rs index c1be03e8b..76f9a179b 100644 --- a/crates/core/tests/engine_tests.rs +++ b/crates/core/tests/engine_tests.rs @@ -2396,63 +2396,6 @@ export default function transform(ast) { ); } -#[tokio::test] -async fn test_execute_js_ast_grep_step_invalid_language() { - let temp_dir = TempDir::new().unwrap(); - let temp_path = temp_dir.path(); - - // Create a simple codemod file - create_test_file( - temp_path, - "codemod.js", - r#" -export default function transform(ast) { - return ast; -} -"#, - ); - - // Create test file - create_test_file(temp_path, "test.js", "console.log('test');"); - - // Create engine with correct bundle path - let config = WorkflowRunConfig { - bundle_path: temp_path.to_path_buf(), - ..WorkflowRunConfig::default() - }; - let engine = Engine::with_workflow_run_config(config); - let result = engine - .execute_js_ast_grep_step( - "test-node".to_string(), - "test-step".to_string(), - &UseJSAstGrep { - js_file: "codemod.js".to_string(), - base_path: None, - include: None, - exclude: None, - max_threads: None, - dry_run: Some(false), - language: Some("invalid-language".to_string()), // Invalid language - capabilities: None, - semantic_analysis: Some(SemanticAnalysisConfig::Mode(SemanticAnalysisMode::File)), - }, - None, - None, - &CapabilitiesData { - capabilities: None, - capabilities_security_callback: None, - }, - &None, - ) - .await; - - // Currently the implementation doesn't validate language strings, so just test that it doesn't panic - // Note: This test was updated because the current implementation doesn't validate language strings - // If validation is needed, it should be added to the execute_js_ast_grep_step method - // assert!(result.is_err(), "Should fail with invalid language"); - println!("Result with invalid language: {result:?}"); -} - // Helper function to create a workflow with JSAstGrep step fn create_js_ast_grep_workflow() -> Workflow { Workflow { diff --git a/crates/mcp/src/data/prompts/jssg-instructions.md b/crates/mcp/src/data/prompts/jssg-instructions.md index 0521b5652..dff8ddd60 100644 --- a/crates/mcp/src/data/prompts/jssg-instructions.md +++ b/crates/mcp/src/data/prompts/jssg-instructions.md @@ -774,9 +774,34 @@ class SgRoot { filename(): string; /** Write content to this file (only for files from definition()/references()) */ write(content: string): void; + /** Rename the current file. Relative paths resolve against the file's directory. */ + rename(newPath: string): void; } ``` +### File Renaming + +Use `root.rename()` to rename a file alongside content changes. This is useful for codemods that convert between file formats (e.g., `.less` → `.css`, `.js` → `.ts`, `.cjs` → `.mjs`). + +```typescript +// Rename .less → .css +const codemod: Transform = async (root) => { + root.rename(root.filename().replace('.less', '.css')); + return transformedContent; // or null for rename-only +}; +``` + +Behavior: +| `return` value | `root.rename()` called | Result | +|---|---|---| +| `string` | no | Modify content only | +| `null` | no | No changes | +| `string` | yes | Modify content + rename file | +| `null` | yes | Rename file only (content unchanged) | + +- If the path is **relative**, it's resolved against the current file's parent directory. +- If the path is **absolute**, it's used as-is. + ## Semantic Types ```typescript @@ -880,6 +905,95 @@ For large codebases: --- +# Part 9: Metrics (IMPORTANT) + +## Always Add Metrics to Codemods + +**Every codemod should include metrics** to provide insights about the codebase being transformed. Metrics help users understand the scope and impact of a codemod before and during execution. Even if a codemod modifies code, it should also track what it's changing. + +Use `useMetricAtom` from `codemod:metrics` to create metric trackers: + +```typescript +import { useMetricAtom } from "codemod:metrics"; + +const changeCount = useMetricAtom("changes"); +``` + +## Why Metrics Matter + +- **Pre-migration analysis**: Run the codemod in read-only mode first to count how many places will change +- **Impact assessment**: Understand the scope before committing to a migration +- **Progress tracking**: Track migration progress over time by running metrics repeatedly +- **Stakeholder communication**: Share concrete numbers about what needs to change + +## How to Add Metrics + +### 1. Count matches before transforming + +```typescript +import type { Transform } from "codemod:ast-grep"; +import type TSX from "codemod:ast-grep/langs/tsx"; +import { useMetricAtom } from "codemod:metrics"; + +const migrationMetric = useMetricAtom("api-migrations"); + +const codemod: Transform = async (root) => { + const rootNode = root.root(); + const edits: Edit[] = []; + + const deprecatedCalls = rootNode.findAll({ + rule: { pattern: "oldApi($$$ARGS)" }, + }); + + // Always track metrics, even when transforming + for (const call of deprecatedCalls) { + migrationMetric.increment({ api: "oldApi" }); + edits.push(call.replace("newApi($$$ARGS)")); + } + + return edits.length > 0 ? rootNode.commitEdits(edits) : null; +}; + +export default codemod; +``` + +### 2. Use cardinality for rich insights + +Track multiple dimensions to enable grouping and filtering: + +```typescript +const componentMetric = useMetricAtom("component-usage"); + +// Track which components use which props +componentMetric.increment({ + component: componentName, + prop: propName, + file: root.filename(), +}); +``` + +### 3. Read-only analysis codemods + +Sometimes you just want to gather data without changing code. Return `null` from your transform and only collect metrics: + +```typescript +const codemod: Transform = async (root) => { + const rootNode = root.root(); + + const patterns = rootNode.findAll({ + rule: { kind: "call_expression", /* ... */ }, + }); + + for (const match of patterns) { + analysisMetric.increment({ pattern: match.text() }); + } + + return null; // No modifications, just collecting data +}; +``` + +--- + # Quality Bar & Anti-Pitfalls * **Package correctness**: Transformations must only apply to the intended library API — verify imports/bindings first diff --git a/crates/mcp/src/handlers/ast_dump.rs b/crates/mcp/src/handlers/ast_dump.rs index cce1434a3..bb1f1c6b0 100644 --- a/crates/mcp/src/handlers/ast_dump.rs +++ b/crates/mcp/src/handlers/ast_dump.rs @@ -1,5 +1,5 @@ use ast_grep_core::AstGrep; -use ast_grep_language::SupportLang; +use codemod_sandbox::CodemodLang; use rmcp::{handler::server::wrapper::Parameters, model::*, schemars, tool, ErrorData as McpError}; #[derive(Debug, serde::Deserialize, schemars::JsonSchema)] @@ -47,7 +47,7 @@ impl AstDumpHandler { fn dump_ast_for_language( &self, source_code: &str, - language: SupportLang, + language: CodemodLang, ) -> Result { let root = AstGrep::new(source_code, language); let result = self.dump_ast_for_ai_context(root.root(), 0); diff --git a/crates/mcp/src/handlers/jssg_test.rs b/crates/mcp/src/handlers/jssg_test.rs index 145d8f4fd..2f0f30afc 100644 --- a/crates/mcp/src/handlers/jssg_test.rs +++ b/crates/mcp/src/handlers/jssg_test.rs @@ -1,4 +1,4 @@ -use codemod_sandbox::sandbox::engine::{ExecutionResult, JssgExecutionOptions}; +use codemod_sandbox::sandbox::engine::{CodemodOutput, ExecutionResult, JssgExecutionOptions}; use rmcp::{handler::server::wrapper::Parameters, model::*, schemars, tool, ErrorData as McpError}; use serde::{Deserialize, Serialize}; use std::collections::HashSet; @@ -7,8 +7,8 @@ use std::pin::Pin; use std::sync::Arc; use std::time::Duration; -use ast_grep_language::SupportLang; use codemod_llrt_capabilities::types::LlrtSupportedModules; +use codemod_sandbox::CodemodLang; use codemod_sandbox::{ sandbox::{ engine::{execute_codemod_with_quickjs, language_data::get_extensions_for_language}, @@ -17,7 +17,8 @@ use codemod_sandbox::{ utils::project_discovery::find_tsconfig, }; use testing_utils::{ - ReporterType, TestOptions, TestRunner, TestSource, TransformationResult, TransformationTestCase, + ReporterType, TestOptions, TestRunner, TestSource, TransformOutput, TransformationResult, + TransformationTestCase, }; #[derive(Debug, Deserialize, Serialize, schemars::JsonSchema)] @@ -171,10 +172,7 @@ impl JssgTestHandler { request: RunJssgTestRequest, ) -> Result> { // Parse language - let language: SupportLang = request - .language - .parse() - .map_err(|_| format!("Unsupported language: {}", request.language))?; + let language: CodemodLang = request.language.parse().map_err(|e: String| e)?; // Set up execution components let codemod_path = PathBuf::from(&request.codemod_file); @@ -231,6 +229,7 @@ impl JssgTestHandler { expect_errors: vec![], strictness, language: Some(language_str), + expected_extension: None, }; // Create execution function @@ -256,15 +255,24 @@ impl JssgTestHandler { capabilities: capabilities.clone(), semantic_provider: None, metrics_context: None, + test_mode: true, + target_directory: None, }; - let execution_output = execute_codemod_with_quickjs(options).await?; + let CodemodOutput { primary, .. } = + execute_codemod_with_quickjs(options).await?; - match execution_output { - ExecutionResult::Modified(content) => { - Ok(TransformationResult::Success(content)) + match primary { + ExecutionResult::Modified(modified) => { + Ok(TransformationResult::Success(TransformOutput { + content: modified.content, + rename_to: modified.rename_to, + })) } ExecutionResult::Unmodified | ExecutionResult::Skipped => { - Ok(TransformationResult::Success(input_code)) + Ok(TransformationResult::Success(TransformOutput { + content: input_code, + rename_to: None, + })) } } }) diff --git a/crates/testing-utils/src/config.rs b/crates/testing-utils/src/config.rs index 75b3025c2..6be287efd 100644 --- a/crates/testing-utils/src/config.rs +++ b/crates/testing-utils/src/config.rs @@ -60,6 +60,7 @@ pub struct TestOptions { pub expect_errors: Vec, pub strictness: Strictness, pub language: Option, + pub expected_extension: Option, } #[derive(Debug, Clone)] diff --git a/crates/testing-utils/src/fixtures.rs b/crates/testing-utils/src/fixtures.rs index cf3b7a5a0..8d9054b26 100644 --- a/crates/testing-utils/src/fixtures.rs +++ b/crates/testing-utils/src/fixtures.rs @@ -92,10 +92,12 @@ impl TestSource { pub fn to_unified_test_cases( &self, extensions: &[&str], + expected_extension: Option<&str>, ) -> Result, TestError> { match self { TestSource::Directory(dir) => { - let fs_test_cases = FileSystemTestCase::discover_in_directory(dir, extensions)?; + let fs_test_cases = + FileSystemTestCase::discover_in_directory(dir, extensions, expected_extension)?; let mut unified_cases = Vec::new(); for fs_case in fs_test_cases { @@ -111,8 +113,10 @@ impl TestSource { ), None => { // Expected file doesn't exist - create placeholder path for snapshot updates - let expected_path = match build_expected_path(&input_file.path) - { + let expected_path = match build_expected_path( + &input_file.path, + expected_extension, + ) { Ok(path) => Some(path), Err(e) => { eprintln!("error constructing path: {}", e); @@ -163,6 +167,7 @@ impl FileSystemTestCase { pub fn discover_in_directory( test_dir: &Path, extensions: &[&str], + expected_extension: Option<&str>, ) -> Result, TestError> { let mut test_cases = Vec::new(); @@ -171,7 +176,7 @@ impl FileSystemTestCase { let path = entry.path(); if path.is_dir() { - if let Ok(test_case) = Self::from_directory(&path, extensions) { + if let Ok(test_case) = Self::from_directory(&path, extensions, expected_extension) { test_cases.push(test_case); } } @@ -185,6 +190,7 @@ impl FileSystemTestCase { fn from_directory( test_dir: &Path, extensions: &[&str], + expected_extension: Option<&str>, ) -> Result { let name = test_dir .file_name() @@ -197,7 +203,7 @@ impl FileSystemTestCase { // Check for single file format (input.js + expected.js) if let Ok(input_files) = find_input_files(test_dir, extensions) { - let expected_files = find_expected_files(&input_files)?; + let expected_files = find_expected_files(&input_files, expected_extension)?; let mut input_files_map = HashMap::new(); let mut expected_files_map = HashMap::new(); @@ -213,10 +219,23 @@ impl FileSystemTestCase { for expected_file_path in expected_files { if let Ok(expected_file) = TestFile::from_path(&expected_file_path) { - if let Some(ext) = expected_file_path.extension().and_then(|e| e.to_str()) { - let key = PathBuf::from(format!("input.{}", ext)); - expected_files_map.insert(key, expected_file); - } + // When expected_extension is set, the expected file has a different ext + let key_ext = if let Some(exp_ext) = expected_extension { + input_files_map + .keys() + .next() + .and_then(|k| k.extension().and_then(|e| e.to_str())) + .map(|e| e.to_string()) + .unwrap_or_else(|| exp_ext.trim_start_matches('.').to_string()) + } else { + expected_file_path + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("") + .to_string() + }; + let key = PathBuf::from(format!("input.{}", key_ext)); + expected_files_map.insert(key, expected_file); } } @@ -356,11 +375,14 @@ fn find_input_files(test_dir: &Path, extensions: &[&str]) -> Result } /// Find expected files corresponding to input files -fn find_expected_files(input_files: &[PathBuf]) -> Result, TestError> { +fn find_expected_files( + input_files: &[PathBuf], + expected_extension: Option<&str>, +) -> Result, TestError> { let mut expected_files = Vec::new(); for input_file in input_files { - if let Ok(expected_file) = build_expected_path(input_file) { + if let Ok(expected_file) = build_expected_path(input_file, expected_extension) { if expected_file.exists() { expected_files.push(expected_file); } else { @@ -400,7 +422,10 @@ fn collect_files_in_directory( Ok(files) } -fn build_expected_path(input_file_path: &Path) -> Result> { +fn build_expected_path( + input_file_path: &Path, + expected_extension: Option<&str>, +) -> Result> { let file_stem = match input_file_path.file_stem().and_then(|s| s.to_str()) { Some(stem) => stem, None => return Err("Invalid file stem".into()), @@ -413,9 +438,13 @@ fn build_expected_path(input_file_path: &Path) -> Result { - let extension = match input_file_path.extension().and_then(|ext| ext.to_str()) { - Some(ext) => ext, - None => return Err("No file extension".into()), + let extension = if let Some(ext) = expected_extension { + ext.trim_start_matches('.') + } else { + match input_file_path.extension().and_then(|ext| ext.to_str()) { + Some(ext) => ext, + None => return Err("No file extension".into()), + } }; parent_dir.join(format!("expected.{}", extension)) diff --git a/crates/testing-utils/src/runner.rs b/crates/testing-utils/src/runner.rs index cb7557515..210bc0d85 100644 --- a/crates/testing-utils/src/runner.rs +++ b/crates/testing-utils/src/runner.rs @@ -12,10 +12,17 @@ use crate::{ strictness::{ast_compare, cst_compare, detect_language, loose_compare}, }; +/// Output of a successful transformation +#[derive(Debug, Clone)] +pub struct TransformOutput { + pub content: String, + pub rename_to: Option, +} + /// Result of executing a transformation on input code #[derive(Debug, Clone)] pub enum TransformationResult { - Success(String), + Success(TransformOutput), Error(String), } @@ -112,7 +119,7 @@ impl TestRunner { ) -> Result { let test_cases = self .test_source - .to_unified_test_cases(extensions) + .to_unified_test_cases(extensions, self.options.expected_extension.as_deref()) .map_err(|e| anyhow::anyhow!("Failed to load test cases: {}", e))?; if test_cases.is_empty() { @@ -227,7 +234,7 @@ impl TestRunner { } let actual_content = match execution_result { - TransformationResult::Success(content) => content, + TransformationResult::Success(output) => output.content, TransformationResult::Error(error) => { return Err(anyhow::anyhow!( "Transformation execution failed:\n{}", diff --git a/crates/tree-sitter-loader/Cargo.toml b/crates/tree-sitter-loader/Cargo.toml new file mode 100644 index 000000000..86fb8cde1 --- /dev/null +++ b/crates/tree-sitter-loader/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "tree-sitter-loader" +version.workspace = true +edition = "2021" +authors.workspace = true +description = "Dynamic tree-sitter parser downloader and loader for codemod" + +[dependencies] +ast-grep-dynamic = { workspace = true } +reqwest = { workspace = true, features = ["blocking"] } +dirs = { workspace = true } +thiserror = { workspace = true } +log = { workspace = true } diff --git a/crates/tree-sitter-loader/src/lib.rs b/crates/tree-sitter-loader/src/lib.rs new file mode 100644 index 000000000..076349c7b --- /dev/null +++ b/crates/tree-sitter-loader/src/lib.rs @@ -0,0 +1,228 @@ +use ast_grep_dynamic::{DynamicLang, Registration}; +use std::path::{Path, PathBuf}; +use std::sync::Once; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum LoaderError { + #[error("Failed to download parser: {0}")] + Download(String), + #[error("Failed to register parser: {0}")] + Register(String), + #[error("No cache directory available")] + NoCacheDir, + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + #[error("Unsupported platform: os={os}, arch={arch}")] + UnsupportedPlatform { os: String, arch: String }, +} + +struct DynamicLanguageDefinition { + name: &'static str, + symbol: &'static str, + extensions: &'static [&'static str], + expando_char: char, + urls: &'static [(&'static str, &'static str, &'static str)], // (os, arch, url) +} + +fn get_definitions() -> &'static [DynamicLanguageDefinition] { + &[DynamicLanguageDefinition { + name: "less", + symbol: "tree_sitter_less", + extensions: &["less"], + expando_char: '_', + urls: &[ + ( + "macos", + "aarch64", + concat!( + "https://tree-sitter-parsers.s3.us-east-1.amazonaws.com/tree-sitter/parsers/tree-sitter-less/", + "945f52c94250309073a96bbfbc5bcd57ff2bde49/darwin-arm64.dylib" + ), + ), + ( + "macos", + "x86_64", + concat!( + "https://tree-sitter-parsers.s3.us-east-1.amazonaws.com/tree-sitter/parsers/tree-sitter-less/", + "945f52c94250309073a96bbfbc5bcd57ff2bde49/darwin-x64.dylib" + ), + ), + ( + "linux", + "aarch64", + concat!( + "https://tree-sitter-parsers.s3.us-east-1.amazonaws.com/tree-sitter/parsers/tree-sitter-less/", + "945f52c94250309073a96bbfbc5bcd57ff2bde49/linux-arm64.so" + ), + ), + ( + "linux", + "x86_64", + concat!( + "https://tree-sitter-parsers.s3.us-east-1.amazonaws.com/tree-sitter/parsers/tree-sitter-less/", + "945f52c94250309073a96bbfbc5bcd57ff2bde49/linux-x64.so" + ), + ), + ( + "windows", + "x86_64", + concat!( + "https://tree-sitter-parsers.s3.us-east-1.amazonaws.com/tree-sitter/parsers/tree-sitter-less/", + "945f52c94250309073a96bbfbc5bcd57ff2bde49/win32-x64.dll" + ), + ), + ], + }] +} + +fn current_platform() -> Result<(&'static str, &'static str), LoaderError> { + let os = if cfg!(target_os = "macos") { + "macos" + } else if cfg!(target_os = "linux") { + "linux" + } else if cfg!(target_os = "windows") { + "windows" + } else { + return Err(LoaderError::UnsupportedPlatform { + os: std::env::consts::OS.to_string(), + arch: std::env::consts::ARCH.to_string(), + }); + }; + + let arch = if cfg!(target_arch = "aarch64") { + "aarch64" + } else if cfg!(target_arch = "x86_64") { + "x86_64" + } else { + return Err(LoaderError::UnsupportedPlatform { + os: std::env::consts::OS.to_string(), + arch: std::env::consts::ARCH.to_string(), + }); + }; + + Ok((os, arch)) +} + +fn get_cache_dir() -> Result { + if let Ok(dir) = std::env::var("CODEMOD_PARSER_CACHE_DIR") { + return Ok(PathBuf::from(dir)); + } + dirs::cache_dir() + .map(|d| d.join("codemod").join("tree-sitter-parsers")) + .ok_or(LoaderError::NoCacheDir) +} + +fn ensure_parser_cached( + def: &DynamicLanguageDefinition, + cache_dir: &Path, +) -> Result { + let (os, arch) = current_platform()?; + + let url = def + .urls + .iter() + .find(|(o, a, _)| *o == os && *a == arch) + .map(|(_, _, u)| *u) + .ok_or_else(|| LoaderError::UnsupportedPlatform { + os: os.to_string(), + arch: arch.to_string(), + })?; + + let ext = if cfg!(target_os = "windows") { + "dll" + } else if cfg!(target_os = "macos") { + "dylib" + } else { + "so" + }; + + let filename = format!("{}.{}", def.name, ext); + let parser_dir = cache_dir.join(def.name); + let cached_path = parser_dir.join(&filename); + + if cached_path.exists() { + log::debug!("Parser {} already cached at {:?}", def.name, cached_path); + return Ok(cached_path); + } + + log::info!("Downloading tree-sitter parser for {} ...", def.name); + std::fs::create_dir_all(&parser_dir)?; + + let response = reqwest::blocking::get(url) + .map_err(|e| LoaderError::Download(format!("HTTP request failed: {e}")))?; + + if !response.status().is_success() { + return Err(LoaderError::Download(format!( + "HTTP {} for {}", + response.status(), + url + ))); + } + + let bytes = response + .bytes() + .map_err(|e| LoaderError::Download(format!("Failed to read response body: {e}")))?; + + std::fs::write(&cached_path, &bytes)?; + log::info!( + "Downloaded {} parser to {:?} ({} bytes)", + def.name, + cached_path, + bytes.len() + ); + + Ok(cached_path) +} + +/// Register all dynamic language parsers, downloading any that are missing. +/// +/// This should be called once before using dynamic languages. +/// Returns Ok(()) if all parsers were registered successfully. +pub fn register_all() -> Result<(), LoaderError> { + let cache_dir = get_cache_dir()?; + let definitions = get_definitions(); + + let mut registrations = Vec::new(); + + for def in definitions { + let lib_path = ensure_parser_cached(def, &cache_dir)?; + registrations.push(Registration { + lang_name: def.name.to_string(), + lib_path, + symbol: def.symbol.to_string(), + meta_var_char: None, + expando_char: Some(def.expando_char), + extensions: def.extensions.iter().map(|s| s.to_string()).collect(), + }); + } + + unsafe { + DynamicLang::register(registrations).map_err(|e| LoaderError::Register(format!("{e}")))?; + } + + Ok(()) +} + +static INIT: Once = Once::new(); +static INIT_ERROR: std::sync::OnceLock = std::sync::OnceLock::new(); + +/// Initialize dynamic language parsers (lazy, called at most once). +/// +/// On first call, downloads and registers all dynamic parsers. +/// Subsequent calls are no-ops. If initialization failed, returns the error +/// on every call. +pub fn init() -> Result<(), LoaderError> { + INIT.call_once(|| { + if let Err(e) = register_all() { + log::warn!("Failed to initialize dynamic parsers: {e}"); + let _ = INIT_ERROR.set(e.to_string()); + } + }); + + if let Some(msg) = INIT_ERROR.get() { + Err(LoaderError::Register(msg.clone())) + } else { + Ok(()) + } +} diff --git a/docs/changelog.mdx b/docs/changelog.mdx index 7bd4eb8e0..72c01c734 100644 --- a/docs/changelog.mdx +++ b/docs/changelog.mdx @@ -5,6 +5,27 @@ icon: 'scroll' mode: "center" --- + +- **Multi-repo insights**: Select multiple repositories in the Insights dashboard to analyze patterns across your entire codebase at once. Template variables are now always visible and editable. + +- **JSSG support in Insights**: JavaScript Static Graph queries are now fully integrated into Insights, with improved metrics, inline source editing, and better performance for large-scale analysis. + +- **Registry discovery improvements**: Sort codemods by "most recent" or "most executions," plus enhanced SEO with structured data for better discoverability. + +- **Trusted publishers**: Manage trusted publisher permissions directly from API key settings for better access control. + +- **Reliability improvements**: + - Historical and legacy codemods no longer clutter the codemod picker + - Improved search with debounce and infinite scroll loading + - Table sorting now correctly handles numeric values + - SSR hydration fixes for smoother page loads + - Insights refresh behavior fixed when changing variables + +- **UI polish**: + - Command execution animation for better feedback + - Cleaner footer with updated links + - Removed counter badge from registry header for a cleaner look + - **Semantic analysis for codemods**: JS/TS and Python now support workspace-wide definitions and references, enabling cross-file aware codemods powered by semantic providers. ([Learn more ->](/jssg/semantic-analysis)) diff --git a/docs/oss-quickstart.mdx b/docs/oss-quickstart.mdx index 2a6b0adb3..bcc28e1a3 100644 --- a/docs/oss-quickstart.mdx +++ b/docs/oss-quickstart.mdx @@ -195,7 +195,7 @@ Once you have a codemod package, validation, running, and publishing work the sa - Go to [studio.codemod.com](https://studio.codemod.com) and sign in with your GitHub account. + Go to [Codemod Studio](https://app.codemod.com/studio) and sign in with your GitHub account. diff --git a/docs/platform/campaigns.mdx b/docs/platform/campaigns.mdx index 0b271c534..7af54448d 100644 --- a/docs/platform/campaigns.mdx +++ b/docs/platform/campaigns.mdx @@ -2,46 +2,223 @@ title: 'Campaigns' sidebarTitle: 'Campaigns' icon: 'arrow-up-right-dots' +description: 'Run codemods with automated PRs and centralized tracking.' --- import { CodemodWorkflowsDemo } from "/snippets/codemod-workflows-demo.mdx"; -Campaigns let engineering teams centrally automate and orchestrate large code changes like security patches, language or framework upgrades, API updates, i18n, and other major refactors or optimizations across teams and repos. + + + -Codemod Campaigns provide: +A Campaign allows you to orchestrate large-scale code migrations. When you create a Campaign, Codemod executes the underlying [workflow](/workflows/introduction), creates tasks, and opens pull requests with the resulting changes. You can monitor progress, review PRs, re-run failed tasks, and track status from a centralized dashboard. -- **Campaign configuration** - Different teams or repos have different needs. Edit parameters, defined in workflow.yaml, right from the UI. -- **Centralized state management** - Auto-create and track tasks and PRs across the organization with a shared persistent state. -- **Multi-repo orchestration** - Run workflows across multiple repos with ease. +## How Campaigns use workflows +Campaigns are powered by the Codemod [workflows](/workflows/introduction), which orchestrates multi-step code migrations at scale. -## Starting a Campaign - - - +### Workflow structure + +A workflow defines what transformations to run and how they're orchestrated: + +- **Nodes** are the primary execution units that represent a stage in your migration. Nodes can depend on other nodes, run automatically or require manual approval, and fan out across matrix dimensions. +- **Steps** are atomic actions within a node. Each step performs one transformation or command (e.g., run a JSSG codemod, execute a shell script, call an AI agent). +- **Tasks** are runtime instances of nodes. When a Campaign runs, the engine creates tasks for each node. For [matrix strategies](/workflows/reference#matrix-strategy), a single node can spawn multiple parallel tasks (e.g., one per team or shard). + +``` +Workflow Definition Runtime Execution +─────────────────────────────────────── +Workflow Campaign Run + └── Node A └── Task A-1 + └── Step 1 (executes steps 1, 2, 3) + └── Step 2 └── Task A-2 (matrix) + └── Step 3 (executes steps 1, 2, 3) + └── Node B (depends on A) └── Task B-1 (waits for A) + └── Step 1 (executes step 1) +``` + +### Why this matters for large-scale migrations + +The workflow engine handles the complexity of coordinating transformations: + +- **Dependency-aware scheduling**: Nodes execute in the correct order based on their dependencies +- **Parallel execution**: Independent nodes and matrix tasks run concurrently +- **Resumable state**: Progress persists across runs, so interrupted migrations can continue where they left off +- **Mixed transformation types**: Combine deterministic AST codemods with AI-powered fixups and shell scripts in a single workflow + +## Creating a Campaign - - Navigate to **[Campaigns -> Create Campaign](https://app.codemod.com/workflows/create/registry)** in the Codemod app. + + Navigate to **[Campaigns](https://app.codemod.com/workflows/create/registry)** and click **Create Campaign**. Search the [Codemod Registry](https://go.codemod.com/registry) by name or keyword, then select the codemod you want to run. Use the version picker to choose a specific version or default to the latest. + + Pro codemods are available to Pro and Enterprise plan customers. - - Configure: - - **Codemod package** – The Codemod package that you want to run. This can be an existing package, a custom pro codemod (for enterprise customers), or one you create yourself. You can build Codemod packages using Codemod MCP or Studio. - - **Target Repository** – The repository that you want to run the Codemod package on. + + + Open the repository selector and choose a repository from your connected GitHub or GitLab accounts. + - Make sure the Codemod GitHub App is installed on repositories you plan to migrate. + Codemod needs access to your repositories via the [GitHub App](/platform/integrations/github) or [GitLab integration](/platform/integrations/gitlab). If no repositories appear, connect your account first. - - **Campaign Name & Description** – Give your Campaign a descriptive name and description. - - Click **Run** to initiate the Campaign. - - Each Campaign defines one or more steps that Codemod will run for the target repository. Codemod opens pull requests, tracks status, and surfaces metrics so you always know what's left. + + + If the codemod accepts configuration, a dynamic form is generated from its schema. Fill in the fields. The form supports text inputs, numbers, booleans, dropdowns, and secrets. + + Parameters make workflows reusable and configurable for different contexts. For example: + - Specify which library version to migrate to + - Configure team-specific behavior or file paths + - Pass API keys or tokens needed by the transformation + + + Use the value reference button to insert organization-level secrets (e.g., `${secrets.apiKey}`) instead of hardcoding sensitive values. + + + The parameters you configure here are accessible at every stage of the workflow: + + - **JSSG transforms**: Access parameters via `options.params`. See [JSSG documentation](/jssg/advanced#accessing-parameters) for details. + - **Shell steps**: Parameters are available as environment variables, each prefixed with `PARAM_`. + - **Nested codemod steps**: Parameters are inherited via environment variable forwarding. + + This ensures your configuration values are available wherever they're needed within your workflow. + + + + Choose a Campaign name and optionally add a description. + Review your configuration summary, then click **Create Campaign**. -## Creating custom workflows + + + Install the Codemod GitHub App to target your repositories. + + + Set up group access tokens for GitLab repositories. + + + +## Managing a Campaign + +Once a Campaign is running, the detail page gives you full visibility into execution progress and lets you manage individual tasks. + +### Campaign dashboard + +The Campaign dashboard provides two primary views for monitoring execution: + +#### 1. Map view (workflow graph) + +An interactive DAG showing workflow **nodes** as connected boxes. The graph visualizes dependencies between nodes and shows the execution order. + +Click any node to expand it and see: +- The **steps** within that node and their individual status +- The **tasks** spawned by that node (one or more for matrix strategies) +- Logs and execution details for each step + +#### 2. Table view + +A detailed breakdown with one row per **task**, showing: +- Task status and the node it belongs to +- Step-by-step execution state within each task +- Assignee, PR links, and available actions +- For matrix nodes, each matrix combination appears as a separate task row + + +Use the table view when you need to see step-level details across all tasks, or to quickly identify which specific step failed within a task. + + +Both views also provide: +- **Pull request links.** Each completed task shows the PRs it created, with state indicators and direct links to GitHub or GitLab. +- **Real-time updates.** Status changes, task creation events, and PR additions stream live without page refresh. + +### Build logs + +Click **View Logs** on any task to open the log panel. Logs stream in real-time and support full-text search, log level filtering, and ANSI color rendering. + + + Toggle debug output on or off to control the level of detail shown in the log panel. + + +### Task statuses + +Tasks move through simple status states like `Todo`, `In Review`, `Done`, or `Won't Do`, which you can update from the status dropdown. + +If a task fails (e.g. a network timeout or build error), you can re-run it individually from the task's dropdown menu without re-running the entire Campaign. Only tasks in a failed or errored state can be re-run. + +To stop an active run, click **Cancel Run**. Completed tasks are not affected, but in-flight tasks will be terminated. + + + Cancelling is immediate. In-flight tasks may leave partial changes. You can re-run individual tasks afterward. + + +## Project management integration + +Link a Campaign to a Jira epic for bidirectional task sync. When task statuses change in Codemod, they update in your project management tool, and vice versa. + +| Campaign status | Jira status | +|-----------------|----------------------| +| Todo | To Do | +| In Review | In Progress | +| Done | Done | +| Won't Do | Won't Do | + + + + Connect your Jira workspace for bidirectional task sync. + + + +## Parameters + +Parameters unlock powerful patterns for managing migrations at scale: + +### Reusable workflows + +Define a workflow once and run it across different contexts by parameterizing variable parts: + +```yaml +params: + schema: + targetVersion: + name: "Target Version" + type: string + default: "v2" + teams: + name: "Teams to migrate" + type: string + description: "Comma-separated list of team directories" +``` + +When creating a Campaign, you configure these parameters in the UI. The same workflow can migrate team A to v2, then team B to v3, without code changes. + +### Dynamic behavior + +Parameters can control conditional execution within workflows: +- Skip certain steps based on configuration +- Adjust file patterns per repository +- Enable or disable AI-powered fixups + +### Secrets management + +For workflows that need API keys, tokens, or other credentials: +1. Store secrets at the organization level in Codemod +2. Reference them in parameters using `${secrets.secretName}` +3. Secrets are injected at runtime and never exposed in logs + + + Complete parameter schema options and access patterns. + + +## Next steps -While a Campaign may be as simple as a single automated codemod, most migrations involve a sequence of steps. You can design those steps by building [Codemod workflows](/workflows/introduction). Pro and enterprise customers can request custom workflows. +For multi-step migrations, you can design workflows using `workflow.yaml` to define nodes, dependencies, matrix strategies, and parameters. Pro and Enterprise customers can also request custom workflows built by the Codemod team. - + + + Run your first codemod and create a PR in minutes. + + + Learn how nodes, steps, and matrix strategies work. + + diff --git a/docs/snippets/codemod-workflows-demo.mdx b/docs/snippets/codemod-workflows-demo.mdx index a4499cd10..e86d9e580 100644 --- a/docs/snippets/codemod-workflows-demo.mdx +++ b/docs/snippets/codemod-workflows-demo.mdx @@ -1,9 +1,9 @@ export const CodemodWorkflowsDemo = () => { return ( -
+