From 4c70c2e2f686a89aebccc050c88bcda1bfc79f13 Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Tue, 2 Jun 2026 17:15:59 -0400 Subject: [PATCH 1/5] feat(setup): package-manager-agnostic PyPI post-install hook via committed .pth wheel Adds a Python post-install patch hook that works under pip/uv/poetry/pdm/hatch alike, since it rides Python's interpreter-startup .pth mechanism rather than any one installer's hook. - New pure-python `socket-patch-hook` wheel (pypi/socket-patch-hook/): ships a RECORD-tracked .pth + a fail-open run() that, on a cheap dist-info change, re-applies offline via whatever `socket-patch` CLI is on PATH. Version-agnostic (no dependency on the CLI). - `socket-patch setup` Python branch (core/src/pth_hook/ + commands/setup.rs): commits a bare `socket-patch-hook` dependency (PEP 621 / Poetry / requirements, + lockfile refresh). The committed dependency is the single source of truth; no separate marker/audit file. `--remove` reverses it. - Flip setup matrix pip+uv to the `pth` hook family and wire run-case.sh + the harness (build/pass the hook wheel, trigger an interpreter). Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 47 ++ Cargo.toml | 1 + crates/socket-patch-cli/src/commands/setup.rs | 677 ++++++++++++------ .../socket-patch-cli/tests/cli_parse_setup.rs | 7 + .../tests/setup_matrix_common/mod.rs | 49 ++ .../tests/setup_matrix_pypi.rs | 13 +- .../tests/setup_pth_invariants.rs | 164 +++++ crates/socket-patch-core/Cargo.toml | 1 + .../src/crawlers/python_crawler.rs | 2 +- crates/socket-patch-core/src/lib.rs | 1 + .../socket-patch-core/src/pth_hook/detect.rs | 182 +++++ crates/socket-patch-core/src/pth_hook/edit.rs | 538 ++++++++++++++ crates/socket-patch-core/src/pth_hook/mod.rs | 25 + pypi/socket-patch-hook/README.md | 51 ++ pypi/socket-patch-hook/pyproject.toml | 37 + pypi/socket-patch-hook/socket_patch_hook.pth | 1 + .../socket_patch_hook/__init__.py | 285 ++++++++ pypi/socket-patch-hook/test_hook.py | 206 ++++++ pypi/socket-patch/pyproject.toml | 8 + pypi/socket-patch/socket_patch/__init__.py | 36 +- scripts/build-pypi-wheels.py | 127 +++- tests/setup_matrix/matrix.json | 8 +- tests/setup_matrix/run-case.sh | 66 +- 23 files changed, 2271 insertions(+), 261 deletions(-) create mode 100644 crates/socket-patch-cli/tests/setup_pth_invariants.rs create mode 100644 crates/socket-patch-core/src/pth_hook/detect.rs create mode 100644 crates/socket-patch-core/src/pth_hook/edit.rs create mode 100644 crates/socket-patch-core/src/pth_hook/mod.rs create mode 100644 pypi/socket-patch-hook/README.md create mode 100644 pypi/socket-patch-hook/pyproject.toml create mode 100644 pypi/socket-patch-hook/socket_patch_hook.pth create mode 100644 pypi/socket-patch-hook/socket_patch_hook/__init__.py create mode 100644 pypi/socket-patch-hook/test_hook.py diff --git a/Cargo.lock b/Cargo.lock index 33f3e66e..4460ce72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2445,6 +2445,7 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "tokio", + "toml_edit", "uuid", "walkdir", ] @@ -2753,6 +2754,43 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.25.12+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2153edc6955a6c354fad8f5efd38b6a8769bdccf9fe50f8e1329f81b0baa5d7" +dependencies = [ + "indexmap 2.13.0", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow", +] + +[[package]] +name = "toml_writer" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db" + [[package]] name = "tonic" version = "0.14.6" @@ -3404,6 +3442,15 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" +dependencies = [ + "memchr", +] + [[package]] name = "winreg" version = "0.10.1" diff --git a/Cargo.toml b/Cargo.toml index 704cc791..69d8dc7b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ dialoguer = "=0.11.0" indicatif = "=0.17.11" tempfile = "=3.26.0" regex = "=1.12.3" +toml_edit = "=0.25.12" once_cell = "=1.21.3" qbsdiff = "=1.4.4" tar = "=0.4.46" diff --git a/crates/socket-patch-cli/src/commands/setup.rs b/crates/socket-patch-cli/src/commands/setup.rs index 5a42cfdc..39dcb537 100644 --- a/crates/socket-patch-cli/src/commands/setup.rs +++ b/crates/socket-patch-cli/src/commands/setup.rs @@ -1,17 +1,22 @@ use clap::Args; +use socket_patch_core::crawlers::python_crawler::is_python_project; use socket_patch_core::package_json::detect::PackageManager; use socket_patch_core::package_json::find::{ detect_package_manager, find_package_json_files, WorkspaceType, }; -use socket_patch_core::package_json::update::{update_package_json, UpdateStatus}; +use socket_patch_core::package_json::update::{update_package_json, UpdateResult, UpdateStatus}; +use socket_patch_core::pth_hook::{ + add_hook_dependency, detect_python_pm, remove_hook_dependency, ManifestKind, PthEditResult, + PthStatus, PythonPackageManager, +}; use socket_patch_core::utils::telemetry::track_patch_setup; use std::io::{self, Write}; -use std::path::Path; +use std::path::{Path, PathBuf}; use crate::args::GlobalArgs; use crate::output::stdin_is_tty; -/// Stringify the detected manager for telemetry. +/// Stringify the detected npm-family manager for telemetry. fn manager_name(pm: PackageManager) -> &'static str { match pm { PackageManager::Npm => "npm", @@ -23,286 +28,492 @@ fn manager_name(pm: PackageManager) -> &'static str { pub struct SetupArgs { #[command(flatten)] pub common: GlobalArgs, + + /// Remove the socket-patch install hook instead of adding it. For Python + /// this drops the `socket-patch-hook` dependency from the manifest (then run + /// `pip uninstall socket-patch-hook`). + #[arg(long, env = "SOCKET_SETUP_REMOVE", default_value_t = false)] + pub remove: bool, } -pub async fn run(args: SetupArgs) -> i32 { - if !args.common.json { - println!("Searching for package.json files..."); +/// A Python manifest `setup` will edit, plus the resolved package manager. +struct PythonPlan { + pm: PythonPackageManager, + manifests: Vec<(PathBuf, ManifestKind)>, +} + +/// Decide which Python manifest(s) to edit for the detected package manager. +/// +/// pyproject-based managers (uv/poetry/pdm/hatch) edit `pyproject.toml`; pip +/// prefers an existing `requirements.txt`, then a PEP 621 `pyproject.toml`, and +/// otherwise creates `requirements.txt`. +async fn choose_python_manifests( + cwd: &Path, + pm: PythonPackageManager, +) -> Vec<(PathBuf, ManifestKind)> { + let pyproject = cwd.join("pyproject.toml"); + let requirements = cwd.join("requirements.txt"); + let pyproject_exists = tokio::fs::metadata(&pyproject).await.is_ok(); + let requirements_exists = tokio::fs::metadata(&requirements).await.is_ok(); + + match pm { + PythonPackageManager::Uv + | PythonPackageManager::Poetry + | PythonPackageManager::Pdm + | PythonPackageManager::Hatch => { + if pyproject_exists { + vec![(pyproject, ManifestKind::Pyproject)] + } else { + vec![] + } + } + PythonPackageManager::Pip => { + if requirements_exists { + vec![(requirements, ManifestKind::Requirements)] + } else if pyproject_exists { + vec![(pyproject, ManifestKind::Pyproject)] + } else { + // Nothing to edit yet: create requirements.txt so a CI + // `pip install -r requirements.txt` installs the hook. + vec![(requirements, ManifestKind::Requirements)] + } + } + } +} + +async fn plan_python(common: &GlobalArgs) -> Option { + if !is_python_project(&common.cwd).await { + return None; + } + let pm = detect_python_pm(&common.cwd).await; + let manifests = choose_python_manifests(&common.cwd, pm).await; + if manifests.is_empty() { + return None; } + Some(PythonPlan { pm, manifests }) +} - let find_result = find_package_json_files(&args.common.cwd).await; +fn rel(path: &str, base: &Path) -> String { + Path::new(path) + .strip_prefix(base) + .map(|r| r.display().to_string()) + .unwrap_or_else(|_| path.to_string()) +} - // For pnpm monorepos, only update root package.json. - // pnpm runs root postinstall on `pnpm install`, so workspace-level - // postinstall scripts are unnecessary. Individual workspaces may not - // have `@socketsecurity/socket-patch` as a dependency, causing - // `npx @socketsecurity/socket-patch apply` to fail due to pnpm's - // strict module isolation. - let package_json_files = match find_result.workspace_type { +fn pth_status_str(s: &PthStatus) -> &'static str { + match s { + PthStatus::Updated => "updated", + PthStatus::AlreadyConfigured => "already_configured", + PthStatus::Error => "error", + } +} + +fn update_status_str(s: &UpdateStatus) -> &'static str { + match s { + UpdateStatus::Updated => "updated", + UpdateStatus::AlreadyConfigured => "already_configured", + UpdateStatus::Error => "error", + } +} + +/// Run the hook-dependency edits for a plan (add or remove) at the given +/// dry-run setting. Returns per-manifest results. +async fn edit_python_manifests( + plan: &PythonPlan, + remove: bool, + dry_run: bool, +) -> Vec { + let mut out = Vec::new(); + for (path, kind) in &plan.manifests { + let res = if remove { + remove_hook_dependency(path, *kind, dry_run).await + } else { + add_hook_dependency(path, *kind, dry_run).await + }; + out.push(res); + } + out +} + +/// After a real (non-dry-run) edit that changed a manifest, refresh the +/// lockfile. Returns any warnings to surface. (There is no separate marker / +/// audit file: the committed dependency line is the source of truth.) +async fn finalize_python(plan: &PythonPlan, edits: &[PthEditResult], cwd: &Path) -> Vec { + let mut warnings = Vec::new(); + let any_changed = edits.iter().any(|e| e.status == PthStatus::Updated); + + // Lockfile refresh (broad auto-edit): only when we changed a manifest and + // the manager uses a lockfile that exists. Best-effort — never fatal. + if any_changed { + if let Some((program, args)) = plan.pm.lock_command() { + let lockfile = match plan.pm { + PythonPackageManager::Uv => Some("uv.lock"), + PythonPackageManager::Poetry => Some("poetry.lock"), + PythonPackageManager::Pdm => Some("pdm.lock"), + _ => None, + }; + let lock_present = match lockfile { + Some(name) => tokio::fs::metadata(cwd.join(name)).await.is_ok(), + None => false, + }; + if lock_present { + match tokio::process::Command::new(program) + .args(args) + .current_dir(cwd) + .output() + .await + { + Ok(o) if o.status.success() => {} + Ok(o) => warnings.push(format!( + "`{program} {}` failed ({}); update the lockfile manually", + args.join(" "), + o.status + )), + Err(e) => warnings.push(format!( + "could not run `{program} {}`: {e}; update the lockfile manually", + args.join(" ") + )), + } + } + } + } + warnings +} + +pub async fn run(args: SetupArgs) -> i32 { + let common = &args.common; + let remove = args.remove; + + if !common.json { + if remove { + println!("Removing socket-patch install hooks..."); + } else { + println!("Configuring socket-patch install hooks..."); + } + } + + // ── discover both ecosystems ──────────────────────────────────────── + let find_result = find_package_json_files(&common.cwd).await; + // pnpm monorepos: only the root package.json (see the original rationale). + let npm_files = match find_result.workspace_type { WorkspaceType::Pnpm => find_result .files .into_iter() .filter(|loc| loc.is_root) - .collect(), + .collect::>(), _ => find_result.files, }; + // `--remove` only reverses the Python hook today; npm postinstall removal + // is left to the user, so we don't touch package.json on remove. + let npm_files = if remove { Vec::new() } else { npm_files }; + let npm_pm = detect_package_manager(&common.cwd).await; + + let py_plan = plan_python(common).await; - if package_json_files.is_empty() { - if args.common.json { - println!("{}", serde_json::to_string_pretty(&serde_json::json!({ - "status": "no_files", - "updated": 0, - "alreadyConfigured": 0, - "errors": 0, - "files": [], - })).unwrap()); + if npm_files.is_empty() && py_plan.is_none() { + if common.json { + println!( + "{}", + serde_json::to_string_pretty(&serde_json::json!({ + "status": "no_files", + "updated": 0, + "alreadyConfigured": 0, + "errors": 0, + "files": [], + })) + .unwrap() + ); + } else if remove { + println!("No socket-patch install hooks found to remove."); } else { - println!("No package.json files found"); + println!("No package.json or Python project found"); } return 0; } - // Detect package manager from lockfiles in the project root. - let pm = detect_package_manager(&args.common.cwd).await; - - // Setup telemetry: emit once we know a real setup is being attempted - // (past the "no files found" early exit) and the package manager is - // resolved. Carries the detected manager so we can see which install - // hooks are exercised in the wild. + // Telemetry: which install-hook surfaces are being exercised. + let telemetry_manager = match (!npm_files.is_empty(), py_plan.is_some()) { + (true, true) => format!("{}+pypi", manager_name(npm_pm)), + (true, false) => manager_name(npm_pm).to_string(), + (false, true) => "pypi".to_string(), + (false, false) => "none".to_string(), + }; track_patch_setup( - manager_name(pm), - args.common.api_token.as_deref(), - args.common.org.as_deref(), + &telemetry_manager, + common.api_token.as_deref(), + common.org.as_deref(), ) .await; - if !args.common.json { - println!("Found {} package.json file(s)", package_json_files.len()); - if pm == PackageManager::Pnpm { - println!("Detected pnpm project (using pnpm dlx)"); - } + // ── preview (always dry-run first) ────────────────────────────────── + let mut npm_preview = Vec::new(); + for loc in &npm_files { + npm_preview.push(update_package_json(&loc.path, true, npm_pm).await); } + let py_preview = match &py_plan { + Some(plan) => edit_python_manifests(plan, remove, true).await, + None => Vec::new(), + }; - // Preview changes (always preview first) - let mut preview_results = Vec::new(); - for loc in &package_json_files { - let result = update_package_json(&loc.path, true, pm).await; - preview_results.push(result); + if !common.json { + print_preview(&npm_preview, &py_preview, common, remove); } - // Display preview - let to_update: Vec<_> = preview_results + let n_changes = npm_preview .iter() .filter(|r| r.status == UpdateStatus::Updated) - .collect(); - let already_configured: Vec<_> = preview_results - .iter() - .filter(|r| r.status == UpdateStatus::AlreadyConfigured) - .collect(); - let errors: Vec<_> = preview_results + .count() + + py_preview + .iter() + .filter(|r| r.status == PthStatus::Updated) + .count(); + let preview_errors = npm_preview .iter() .filter(|r| r.status == UpdateStatus::Error) - .collect(); + .count() + + py_preview + .iter() + .filter(|r| r.status == PthStatus::Error) + .count(); - if !args.common.json { - println!("\nPackage.json files to be updated:\n"); - - if !to_update.is_empty() { - println!("Will update:"); - for result in &to_update { - let rel_path = pathdiff(&result.path, &args.common.cwd); - println!(" + {rel_path}"); - if result.old_script.is_empty() { - println!(" postinstall: (no script)"); - } else { - println!(" postinstall: \"{}\"", result.old_script); - } - println!(" -> postinstall: \"{}\"", result.new_script); - if result.old_dependencies_script.is_empty() { - println!(" dependencies: (no script)"); - } else { - println!(" dependencies: \"{}\"", result.old_dependencies_script); - } - println!( - " -> dependencies: \"{}\"", - result.new_dependencies_script - ); - } - println!(); + // Nothing to change: report already-configured (or surface errors). + if n_changes == 0 { + if common.json { + print_envelope( + if preview_errors > 0 { "error" } else { "already_configured" }, + &npm_preview, + &py_preview, + npm_pm, + py_plan.as_ref(), + &[], + common, + ); + } else if preview_errors > 0 { + println!("No hooks were changed; {preview_errors} item(s) could not be processed (see errors above)."); + } else if remove { + println!("No socket-patch install hooks were configured."); + } else { + println!("All install hooks are already configured with socket-patch!"); } + return if preview_errors > 0 { 1 } else { 0 }; + } - if !already_configured.is_empty() { - println!("Already configured (will skip):"); - for result in &already_configured { - let rel_path = pathdiff(&result.path, &args.common.cwd); - println!(" = {rel_path}"); - } - println!(); + // Dry-run: report the preview and stop. + if common.dry_run { + if common.json { + print_envelope( + "dry_run", + &npm_preview, + &py_preview, + npm_pm, + py_plan.as_ref(), + &[], + common, + ); + } else { + println!("\nSummary (dry run):"); + println!(" {n_changes} item(s) would be {}", if remove { "removed" } else { "updated" }); } + return if preview_errors > 0 { 1 } else { 0 }; + } - if !errors.is_empty() { - println!("Errors:"); - for result in &errors { - let rel_path = pathdiff(&result.path, &args.common.cwd); - println!( - " ! {}: {}", - rel_path, - result.error.as_deref().unwrap_or("unknown error") - ); + // Confirm once (interactive only). + if !common.yes && !common.json { + if !stdin_is_tty() { + eprintln!("Non-interactive mode detected, proceeding automatically."); + } else { + print!("Proceed with these changes? (y/N): "); + io::stdout().flush().unwrap(); + let mut answer = String::new(); + io::stdin().read_line(&mut answer).unwrap(); + let answer = answer.trim().to_lowercase(); + if answer != "y" && answer != "yes" { + println!("Aborted"); + return 0; } - println!(); } } - if to_update.is_empty() { - // Nothing to update — but that can mean two very different things: - // every file is already configured (a clean exit 0), or some files - // failed to process (e.g. malformed JSON). Errors must surface with - // an honest status and a non-zero exit; otherwise a parse failure is - // silently reported as "already configured" and CI reads it as success. - let errs = errors.len(); - if args.common.json { - println!("{}", serde_json::to_string_pretty(&serde_json::json!({ - "status": if errs > 0 { "error" } else { "already_configured" }, - "updated": 0, - "alreadyConfigured": already_configured.len(), - "errors": errs, - "files": preview_results.iter().map(|r| { - serde_json::json!({ - "path": r.path, - "status": match r.status { - UpdateStatus::Updated => "updated", - UpdateStatus::AlreadyConfigured => "already_configured", - UpdateStatus::Error => "error", - }, - "error": r.error, - }) - }).collect::>(), - })).unwrap()); - } else if errs > 0 { - // Individual errors were already listed in the preview above. - println!( - "No files were updated; {errs} file(s) could not be processed (see errors above)." - ); - } else { - println!("All package.json files are already configured with socket-patch!"); - } - return if errs > 0 { 1 } else { 0 }; + if !common.json { + println!("\nApplying changes..."); + } + + // ── apply for real ────────────────────────────────────────────────── + let mut npm_results = Vec::new(); + for loc in &npm_files { + npm_results.push(update_package_json(&loc.path, false, npm_pm).await); + } + let mut py_results = Vec::new(); + let mut warnings = Vec::new(); + if let Some(plan) = &py_plan { + py_results = edit_python_manifests(plan, remove, false).await; + warnings = finalize_python(plan, &py_results, &common.cwd).await; } - // If not dry-run, ask for confirmation - if !args.common.dry_run { - if !args.common.yes && !args.common.json { - if !stdin_is_tty() { - // Non-interactive: default to yes with warning - eprintln!("Non-interactive mode detected, proceeding automatically."); + let errors = npm_results + .iter() + .filter(|r| r.status == UpdateStatus::Error) + .count() + + py_results + .iter() + .filter(|r| r.status == PthStatus::Error) + .count(); + + if common.json { + print_envelope( + if errors > 0 { "partial_failure" } else { "success" }, + &npm_results, + &py_results, + npm_pm, + py_plan.as_ref(), + &warnings, + common, + ); + } else { + let updated = npm_results + .iter() + .filter(|r| r.status == UpdateStatus::Updated) + .count() + + py_results + .iter() + .filter(|r| r.status == PthStatus::Updated) + .count(); + println!("\nSummary:"); + println!(" {updated} item(s) {}", if remove { "removed" } else { "updated" }); + if errors > 0 { + println!(" {errors} error(s)"); + } + for w in &warnings { + println!(" warning: {w}"); + } + if let Some(plan) = &py_plan { + if remove { + println!( + "\nAlso run `pip uninstall socket-patch-hook` to remove the installed .pth." + ); } else { - print!("Proceed with these changes? (y/N): "); - io::stdout().flush().unwrap(); - let mut answer = String::new(); - io::stdin().read_line(&mut answer).unwrap(); - let answer = answer.trim().to_lowercase(); - if answer != "y" && answer != "yes" { - println!("Aborted"); - return 0; - } + println!( + "\nCommit the {} dependency change (and your .socket/ patches) so \ + the hook re-applies in CI after install.", + plan.pm.as_str() + ); } } + } + + if errors > 0 { + 1 + } else { + 0 + } +} + +fn print_preview( + npm: &[UpdateResult], + py: &[PthEditResult], + common: &GlobalArgs, + remove: bool, +) { + let verb = if remove { "remove" } else { "update" }; + let npm_changes: Vec<_> = npm.iter().filter(|r| r.status == UpdateStatus::Updated).collect(); + let py_changes: Vec<_> = py.iter().filter(|r| r.status == PthStatus::Updated).collect(); - if !args.common.json { - println!("\nApplying changes..."); + if !npm_changes.is_empty() { + println!("\npackage.json files to {verb}:"); + for r in &npm_changes { + println!(" + {}", rel(&r.path, &common.cwd)); + println!(" postinstall -> \"{}\"", r.new_script); } - let mut results = Vec::new(); - for loc in &package_json_files { - let result = update_package_json(&loc.path, false, pm).await; - results.push(result); + } + if !py_changes.is_empty() { + println!("\nPython manifests to {verb} (socket-patch[hook]):"); + for r in &py_changes { + println!(" + {}", rel(&r.path, &common.cwd)); } + } - let updated = results.iter().filter(|r| r.status == UpdateStatus::Updated).count(); - let already = results.iter().filter(|r| r.status == UpdateStatus::AlreadyConfigured).count(); - let errs = results.iter().filter(|r| r.status == UpdateStatus::Error).count(); - - if args.common.json { - println!("{}", serde_json::to_string_pretty(&serde_json::json!({ - "status": if errs > 0 { "partial_failure" } else { "success" }, - "updated": updated, - "alreadyConfigured": already, - "errors": errs, - "packageManager": match pm { - PackageManager::Npm => "npm", - PackageManager::Pnpm => "pnpm", - }, - "files": results.iter().map(|r| { - serde_json::json!({ - "path": r.path, - "status": match r.status { - UpdateStatus::Updated => "updated", - UpdateStatus::AlreadyConfigured => "already_configured", - UpdateStatus::Error => "error", - }, - "error": r.error, - }) - }).collect::>(), - })).unwrap()); - } else { - println!("\nSummary:"); - println!(" {updated} file(s) updated"); - println!(" {already} file(s) already configured"); - if errs > 0 { - println!(" {errs} error(s)"); - } - } + let npm_already = npm.iter().filter(|r| r.status == UpdateStatus::AlreadyConfigured).count(); + let py_already = py.iter().filter(|r| r.status == PthStatus::AlreadyConfigured).count(); + if npm_already + py_already > 0 { + println!("\nAlready configured (will skip): {}", npm_already + py_already); + } - if errs > 0 { 1 } else { 0 } - } else { - let updated = preview_results.iter().filter(|r| r.status == UpdateStatus::Updated).count(); - let already = preview_results.iter().filter(|r| r.status == UpdateStatus::AlreadyConfigured).count(); - let errs = preview_results.iter().filter(|r| r.status == UpdateStatus::Error).count(); - - if args.common.json { - println!("{}", serde_json::to_string_pretty(&serde_json::json!({ - "status": "dry_run", - "wouldUpdate": updated, - "alreadyConfigured": already, - "errors": errs, - "dryRun": true, - "packageManager": match pm { - PackageManager::Npm => "npm", - PackageManager::Pnpm => "pnpm", - }, - "files": preview_results.iter().map(|r| { - serde_json::json!({ - "path": r.path, - "status": match r.status { - UpdateStatus::Updated => "updated", - UpdateStatus::AlreadyConfigured => "already_configured", - UpdateStatus::Error => "error", - }, - "oldScript": r.old_script, - "newScript": r.new_script, - "oldDependenciesScript": r.old_dependencies_script, - "newDependenciesScript": r.new_dependencies_script, - "error": r.error, - }) - }).collect::>(), - })).unwrap()); - } else { - println!("\nSummary:"); - println!(" {updated} file(s) would be updated"); - println!(" {already} file(s) already configured"); - if errs > 0 { - println!(" {errs} error(s)"); - } + let errs: Vec<&str> = npm + .iter() + .filter(|r| r.status == UpdateStatus::Error) + .filter_map(|r| r.error.as_deref()) + .chain( + py.iter() + .filter(|r| r.status == PthStatus::Error) + .filter_map(|r| r.error.as_deref()), + ) + .collect(); + if !errs.is_empty() { + println!("\nErrors:"); + for e in errs { + println!(" ! {e}"); } - // Mirror the non-dry-run path: an unprocessable package.json is a - // failure regardless of dry-run, so it must yield a non-zero exit. - if errs > 0 { 1 } else { 0 } } } -fn pathdiff(path: &str, base: &Path) -> String { - let p = Path::new(path); - p.strip_prefix(base) - .map(|r| r.display().to_string()) - .unwrap_or_else(|_| path.to_string()) +#[allow(clippy::too_many_arguments)] +fn print_envelope( + status: &str, + npm: &[UpdateResult], + py: &[PthEditResult], + npm_pm: PackageManager, + py_plan: Option<&PythonPlan>, + warnings: &[String], + _common: &GlobalArgs, +) { + let updated = npm.iter().filter(|r| r.status == UpdateStatus::Updated).count() + + py.iter().filter(|r| r.status == PthStatus::Updated).count(); + let already = npm + .iter() + .filter(|r| r.status == UpdateStatus::AlreadyConfigured) + .count() + + py.iter().filter(|r| r.status == PthStatus::AlreadyConfigured).count(); + let errors = npm.iter().filter(|r| r.status == UpdateStatus::Error).count() + + py.iter().filter(|r| r.status == PthStatus::Error).count(); + + let mut files: Vec = npm + .iter() + .map(|r| { + serde_json::json!({ + "kind": "package_json", + "path": r.path, + "status": update_status_str(&r.status), + "error": r.error, + }) + }) + .collect(); + files.extend(py.iter().map(|r| { + serde_json::json!({ + "kind": "pth", + "path": r.path, + "status": pth_status_str(&r.status), + "error": r.error, + }) + })); + + let mut obj = serde_json::json!({ + "status": status, + "updated": updated, + "alreadyConfigured": already, + "errors": errors, + "packageManager": manager_name(npm_pm), + "files": files, + }); + // Preserve the dry-run envelope schema consumers rely on. + if status == "dry_run" { + obj["dryRun"] = serde_json::json!(true); + obj["wouldUpdate"] = serde_json::json!(updated); + } + if let Some(plan) = py_plan { + obj["pythonPackageManager"] = serde_json::json!(plan.pm.as_str()); + } + if !warnings.is_empty() { + obj["warnings"] = serde_json::json!(warnings); + } + println!("{}", serde_json::to_string_pretty(&obj).unwrap()); } diff --git a/crates/socket-patch-cli/tests/cli_parse_setup.rs b/crates/socket-patch-cli/tests/cli_parse_setup.rs index 3de483d9..4f117624 100644 --- a/crates/socket-patch-cli/tests/cli_parse_setup.rs +++ b/crates/socket-patch-cli/tests/cli_parse_setup.rs @@ -74,6 +74,12 @@ fn json_long_form() { assert!(args.common.json); } +#[test] +fn remove_defaults_false_and_long_form() { + assert!(!parse_setup(&[]).remove); + assert!(parse_setup(&["--remove"]).remove); +} + #[test] fn all_flags_combined() { let args = parse_setup(&["--cwd", "/tmp/x", "--dry-run", "-y", "--json"]); @@ -112,6 +118,7 @@ async fn run_empty_tempdir_exits_zero() { json: true, ..socket_patch_cli::args::GlobalArgs::default() }, + remove: false, }; let exit = run(args).await; assert_eq!( diff --git a/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs b/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs index d2fe02c1..7aca45e9 100644 --- a/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs +++ b/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs @@ -25,6 +25,7 @@ use std::path::{Path, PathBuf}; use std::process::Command; +use std::sync::OnceLock; /// Path to the built binary under test (host mode passes this to the /// driver via `SOCKET_PATCH_BIN`). @@ -32,6 +33,34 @@ fn binary() -> PathBuf { env!("CARGO_BIN_EXE_socket-patch").into() } +/// Build the pure-python `socket-patch-hook` wheel once and cache the path. +/// The pypi cases need it to exercise the `.pth` post-install hook; returns +/// `None` if the build fails (those cases then degrade to a gap). Requires +/// `python3` on PATH (always present in the pypi image / host pypi runs). +fn hook_wheel() -> Option { + static CELL: OnceLock> = OnceLock::new(); + CELL.get_or_init(|| { + let root = workspace_root(); + let dist = root.join("target/setup-matrix-hook"); + std::fs::create_dir_all(&dist).ok()?; + let version = env!("CARGO_PKG_VERSION"); + let ok = Command::new("python3") + .arg(root.join("scripts/build-pypi-wheels.py")) + .args(["--version", version, "--hook-only", "--dist"]) + .arg(&dist) + .stdout(std::process::Stdio::null()) + .status() + .map(|s| s.success()) + .unwrap_or(false); + if !ok { + return None; + } + let wheel = dist.join(format!("socket_patch_hook-{version}-py3-none-any.whl")); + wheel.exists().then_some(wheel) + }) + .clone() +} + /// Workspace root = two levels up from this crate's manifest dir. fn workspace_root() -> PathBuf { Path::new(env!("CARGO_MANIFEST_DIR")) @@ -192,6 +221,14 @@ fn run_case(case: &Case) -> RunResult { let driver = driver_path(); let env = case.sm_env(); + // The pypi cases need the prebuilt hook wheel to exercise the `.pth` + // post-install hook; other ecosystems ignore it. + let wheel = if case.ecosystem == "pypi" { + hook_wheel() + } else { + None + }; + let output = if host_mode() { let mut cmd = Command::new("bash"); cmd.arg(&driver); @@ -199,6 +236,9 @@ fn run_case(case: &Case) -> RunResult { cmd.env(k, v); } cmd.env("SOCKET_PATCH_BIN", binary()); + if let Some(w) = &wheel { + cmd.env("SOCKET_PATCH_HOOK_WHEEL", w); + } cmd.output().expect("spawn bash driver") } else { let script = std::fs::read_to_string(&driver) @@ -208,6 +248,15 @@ fn run_case(case: &Case) -> RunResult { for (k, v) in &env { cmd.args(["-e", &format!("{k}={v}")]); } + // Mount the hook wheel into the container at a fixed path. + if let Some(w) = &wheel { + cmd.args([ + "-v", + &format!("{}:/tmp/socket_patch_hook.whl:ro", w.display()), + "-e", + "SOCKET_PATCH_HOOK_WHEEL=/tmp/socket_patch_hook.whl", + ]); + } cmd.arg(format!("socket-patch-test-{}:latest", case.image)); cmd.args(["bash", "-c", &script]); cmd.output().expect("spawn docker run") diff --git a/crates/socket-patch-cli/tests/setup_matrix_pypi.rs b/crates/socket-patch-cli/tests/setup_matrix_pypi.rs index b1907433..1d58ddc2 100644 --- a/crates/socket-patch-cli/tests/setup_matrix_pypi.rs +++ b/crates/socket-patch-cli/tests/setup_matrix_pypi.rs @@ -1,9 +1,14 @@ //! setup-matrix: pypi ecosystem (pip / uv / poetry / pdm / hatch). //! -//! Python installers have no native post-install hook and `socket-patch -//! setup` is a no-op for them, so the `baseline_with_setup` / -//! `alt_content_patchset` cases are EXPECTED to fail here (BASELINE -//! GAP). The negative-control / empty / wrong-target cases should pass. +//! Python installers have no native post-install hook, so `socket-patch +//! setup` instead commits a `socket-patch[hook]` dependency whose wheel +//! ships a startup `.pth` that re-applies patches after install +//! (package-manager-agnostic). pip and uv are now wired for this, so their +//! `baseline_with_setup` / `alt_content_patchset` cases should APPLY (the +//! harness builds the hook wheel and the driver installs it + fires an +//! interpreter). poetry / pdm / hatch and the nested-workspace layouts are +//! not yet wired (BASELINE GAP) — a follow-up. The negative-control / +//! empty / wrong-target cases must still NOT apply. //! //! Run: `cargo test -p socket-patch-cli --features setup-e2e --test setup_matrix_pypi` #![cfg(feature = "setup-e2e")] diff --git a/crates/socket-patch-cli/tests/setup_pth_invariants.rs b/crates/socket-patch-cli/tests/setup_pth_invariants.rs new file mode 100644 index 00000000..72964dae --- /dev/null +++ b/crates/socket-patch-cli/tests/setup_pth_invariants.rs @@ -0,0 +1,164 @@ +//! Integration tests for `setup`'s Python `.pth`-hook branch. Like the npm +//! `setup_invariants`, these operate entirely on disk (manifest detection + +//! editing + audit record) and need no network. + +use std::path::{Path, PathBuf}; +use std::process::Command; + +fn binary() -> PathBuf { + env!("CARGO_BIN_EXE_socket-patch").into() +} + +fn run_setup(cwd: &Path, extra: &[&str]) -> (i32, serde_json::Value) { + let mut args = vec!["setup", "--json", "--yes"]; + args.extend_from_slice(extra); + let out = Command::new(binary()) + .args(&args) + .current_dir(cwd) + .env_remove("SOCKET_API_TOKEN") + .env("SOCKET_TELEMETRY_DISABLED", "1") + .output() + .expect("run socket-patch"); + let stdout = String::from_utf8_lossy(&out.stdout).to_string(); + let v = serde_json::from_str(&stdout) + .unwrap_or_else(|e| panic!("stdout must be JSON ({e}):\n{stdout}")); + (out.status.code().unwrap_or(-1), v) +} + +fn write(path: &Path, content: &str) { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).expect("create parent"); + } + std::fs::write(path, content).expect("write file"); +} + +fn read(path: &Path) -> String { + std::fs::read_to_string(path).expect("read file") +} + +#[test] +fn pip_requirements_gets_hook_dep() { + let tmp = tempfile::tempdir().unwrap(); + write(&tmp.path().join("requirements.txt"), "requests==2.31.0\n"); + + let (code, v) = run_setup(tmp.path(), &[]); + assert_eq!(code, 0, "setup should succeed; payload={v}"); + assert_eq!(v["status"], "success"); + assert_eq!(v["updated"], 1); + assert_eq!(v["pythonPackageManager"], "pip"); + let entry = &v["files"].as_array().unwrap()[0]; + assert_eq!(entry["kind"], "pth"); + + let req = read(&tmp.path().join("requirements.txt")); + assert!(req.contains("socket-patch-hook"), "got:\n{req}"); + assert!(req.contains("requests==2.31.0"), "must preserve existing deps"); + + // The committed dependency is the source of truth — no separate marker file. + assert!( + !tmp.path().join(".socket/hook.json").exists(), + "setup must not write a separate marker/audit file" + ); +} + +#[test] +fn uv_pyproject_array_edited_and_format_preserved() { + let tmp = tempfile::tempdir().unwrap(); + let original = "[project]\nname = \"x\"\nversion = \"0.0.0\"\ndependencies = [\n \"requests\",\n]\n\n[tool.uv]\n"; + write(&tmp.path().join("pyproject.toml"), original); + write(&tmp.path().join("uv.lock"), ""); // detected as uv + + let (code, v) = run_setup(tmp.path(), &[]); + assert_eq!(code, 0, "payload={v}"); + assert_eq!(v["pythonPackageManager"], "uv"); + + let py = read(&tmp.path().join("pyproject.toml")); + assert!(py.contains("socket-patch-hook")); + assert!(py.contains("[tool.uv]"), "unrelated tables preserved"); + assert!(py.contains("name = \"x\"")); +} + +#[test] +fn idempotent_second_run_reports_already_configured() { + let tmp = tempfile::tempdir().unwrap(); + write(&tmp.path().join("requirements.txt"), "requests\n"); + + let (_, _) = run_setup(tmp.path(), &[]); + let (code, v) = run_setup(tmp.path(), &[]); + assert_eq!(code, 0); + assert_eq!(v["status"], "already_configured"); + let req = read(&tmp.path().join("requirements.txt")); + assert_eq!( + req.matches("socket-patch-hook").count(), + 1, + "must not duplicate the hook dependency" + ); +} + +#[test] +fn dry_run_does_not_modify_or_create_files() { + let tmp = tempfile::tempdir().unwrap(); + let original = "requests\n"; + write(&tmp.path().join("requirements.txt"), original); + + let (code, v) = run_setup(tmp.path(), &["--dry-run"]); + assert_eq!(code, 0); + assert_eq!(v["status"], "dry_run"); + assert_eq!(v["dryRun"], true); + assert_eq!(v["wouldUpdate"], 1); + + assert_eq!(read(&tmp.path().join("requirements.txt")), original); +} + +#[test] +fn remove_reverses_dep() { + let tmp = tempfile::tempdir().unwrap(); + write(&tmp.path().join("requirements.txt"), "requests\n"); + // Configure first. + let (_, v) = run_setup(tmp.path(), &[]); + assert_eq!(v["status"], "success"); + + let (code, v) = run_setup(tmp.path(), &["--remove"]); + assert_eq!(code, 0, "payload={v}"); + let req = read(&tmp.path().join("requirements.txt")); + assert!(!req.contains("socket-patch-hook"), "got:\n{req}"); + assert!(req.contains("requests")); +} + +#[test] +fn polyglot_configures_both_npm_and_python() { + let tmp = tempfile::tempdir().unwrap(); + write( + &tmp.path().join("package.json"), + "{ \"name\": \"x\", \"version\": \"0.0.0\" }\n", + ); + write( + &tmp.path().join("pyproject.toml"), + "[project]\nname = \"x\"\nversion = \"0.0.0\"\ndependencies = []\n", + ); + + let (code, v) = run_setup(tmp.path(), &[]); + assert_eq!(code, 0, "payload={v}"); + assert_eq!(v["updated"], 2); + let kinds: Vec<&str> = v["files"] + .as_array() + .unwrap() + .iter() + .map(|f| f["kind"].as_str().unwrap()) + .collect(); + assert!(kinds.contains(&"package_json")); + assert!(kinds.contains(&"pth")); + + assert!(read(&tmp.path().join("package.json")).contains("socket-patch")); + assert!(read(&tmp.path().join("pyproject.toml")).contains("socket-patch-hook")); +} + +#[test] +fn pure_python_with_no_manifest_files_is_no_op() { + // `setup.py`-only project (no pyproject/requirements): pip path would + // create requirements.txt. But an EMPTY dir with neither markers nor + // package.json must report no_files. + let tmp = tempfile::tempdir().unwrap(); + let (code, v) = run_setup(tmp.path(), &[]); + assert_eq!(code, 0); + assert_eq!(v["status"], "no_files"); +} diff --git a/crates/socket-patch-core/Cargo.toml b/crates/socket-patch-core/Cargo.toml index 3aa4f268..32760aa8 100644 --- a/crates/socket-patch-core/Cargo.toml +++ b/crates/socket-patch-core/Cargo.toml @@ -18,6 +18,7 @@ thiserror = { workspace = true } walkdir = { workspace = true } uuid = { workspace = true } regex = { workspace = true } +toml_edit = { workspace = true } once_cell = { workspace = true } qbsdiff = { workspace = true } tar = { workspace = true } diff --git a/crates/socket-patch-core/src/crawlers/python_crawler.rs b/crates/socket-patch-core/src/crawlers/python_crawler.rs index 087d7437..fa9f6dbd 100644 --- a/crates/socket-patch-core/src/crawlers/python_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/python_crawler.rs @@ -503,7 +503,7 @@ pub async fn get_global_python_site_packages() -> Vec { /// * `requirements.txt` — pip-compile / bare requirements /// * `uv.lock` — uv-managed projects (PEP 751 export sibling is /// `pylock.toml` but in practice `uv.lock` is what ships) -async fn is_python_project(cwd: &Path) -> bool { +pub async fn is_python_project(cwd: &Path) -> bool { let markers = [ "pyproject.toml", "setup.py", diff --git a/crates/socket-patch-core/src/lib.rs b/crates/socket-patch-core/src/lib.rs index 3d5871bb..44b8f890 100644 --- a/crates/socket-patch-core/src/lib.rs +++ b/crates/socket-patch-core/src/lib.rs @@ -5,5 +5,6 @@ pub mod hash; pub mod manifest; pub mod package_json; pub mod patch; +pub mod pth_hook; pub mod utils; pub mod vex; diff --git a/crates/socket-patch-core/src/pth_hook/detect.rs b/crates/socket-patch-core/src/pth_hook/detect.rs new file mode 100644 index 00000000..525f4b39 --- /dev/null +++ b/crates/socket-patch-core/src/pth_hook/detect.rs @@ -0,0 +1,182 @@ +//! Detect a Python project's dependency manager and probe for the hook dep. + +use std::path::Path; + +/// The dependency `setup` adds to activate the hook: the standalone, version- +/// agnostic hook wheel (it has no dependency on the CLI — the hook runs whatever +/// `socket-patch` is on PATH). A bare token so the committed line never needs a +/// version bump. +pub const HOOK_DEP: &str = "socket-patch-hook"; + +/// Substrings (space-insensitive, lower-cased) that mean the hook is already +/// declared — the standalone wheel, the `socket-patch[hook]` convenience extra, +/// or the underscore spelling. +const HOOK_MARKERS: &[&str] = &["socket-patch-hook", "socket_patch_hook", "socket-patch[hook]"]; + +/// Which Python dependency-management style a project uses. Drives both which +/// manifest/table `setup` edits and which lockfile (if any) to refresh. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PythonPackageManager { + Uv, + Poetry, + Pdm, + Hatch, + Pip, +} + +impl PythonPackageManager { + pub fn as_str(&self) -> &'static str { + match self { + Self::Uv => "uv", + Self::Poetry => "poetry", + Self::Pdm => "pdm", + Self::Hatch => "hatch", + Self::Pip => "pip", + } + } + + /// The lockfile-refresh command `(program, args)` for managers whose frozen + /// CI install reads a lockfile that must be regenerated after editing the + /// dependency list. `None` for managers that resolve dependencies directly + /// from the manifest at install time (pip, hatch). + pub fn lock_command(&self) -> Option<(&'static str, &'static [&'static str])> { + match self { + Self::Uv => Some(("uv", &["lock"])), + Self::Poetry => Some(("poetry", &["lock"])), + Self::Pdm => Some(("pdm", &["lock"])), + Self::Hatch | Self::Pip => None, + } + } +} + +/// Detect the dependency manager from lockfiles and `pyproject.toml` tables. +/// +/// Lockfiles are the strongest signal; `[tool.*]` tables come next; a project +/// with only `requirements.txt` / a PEP 621 `pyproject.toml` falls through to +/// `Pip`. +pub async fn detect_python_pm(cwd: &Path) -> PythonPackageManager { + if tokio::fs::metadata(cwd.join("uv.lock")).await.is_ok() { + return PythonPackageManager::Uv; + } + if tokio::fs::metadata(cwd.join("pdm.lock")).await.is_ok() { + return PythonPackageManager::Pdm; + } + if tokio::fs::metadata(cwd.join("poetry.lock")).await.is_ok() { + return PythonPackageManager::Poetry; + } + if let Ok(content) = tokio::fs::read_to_string(cwd.join("pyproject.toml")).await { + // Header-anchored checks so a stray substring in a value/comment does + // not misclassify. + if has_table(&content, "tool.uv") { + return PythonPackageManager::Uv; + } + if has_table(&content, "tool.poetry") { + return PythonPackageManager::Poetry; + } + if has_table(&content, "tool.pdm") { + return PythonPackageManager::Pdm; + } + if has_table(&content, "tool.hatch") { + return PythonPackageManager::Hatch; + } + } + PythonPackageManager::Pip +} + +/// True if a `[prefix]` or `[prefix.*]` table header appears in the TOML text. +fn has_table(content: &str, prefix: &str) -> bool { + content.lines().any(|line| { + let l = line.trim(); + if let Some(rest) = l.strip_prefix('[') { + let header = rest.trim_start_matches('[').trim_end_matches(']'); + header == prefix || header.starts_with(&format!("{prefix}.")) + } else { + false + } + }) +} + +/// True if the given manifest text already declares the hook dependency, in any +/// form. Space- and case-insensitive so `socket-patch [hook]` / `Socket-Patch` +/// are recognised. +pub fn deps_contain_hook(text: &str) -> bool { + let normalized: String = text.to_lowercase().chars().filter(|c| !c.is_whitespace()).collect(); + HOOK_MARKERS + .iter() + .any(|m| normalized.contains(&m.to_lowercase())) +} + +/// True if a single PEP 508 dependency spec is the hook dependency. +pub fn spec_is_hook(spec: &str) -> bool { + deps_contain_hook(spec) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_deps_contain_hook_positive_forms() { + assert!(deps_contain_hook("socket-patch[hook]")); + assert!(deps_contain_hook("socket-patch [hook]")); + assert!(deps_contain_hook("Socket-Patch[hook]>=3.3.0")); + assert!(deps_contain_hook("socket-patch-hook==3.3.0")); + assert!(deps_contain_hook("socket_patch_hook")); + } + + #[test] + fn test_deps_contain_hook_negative() { + // A plain socket-patch dependency is NOT the hook. + assert!(!deps_contain_hook("socket-patch>=3.3.0")); + assert!(!deps_contain_hook("requests==2.31.0")); + assert!(!deps_contain_hook("")); + } + + #[test] + fn test_has_table() { + let toml = "[tool.poetry]\nname='x'\n[tool.poetry.dependencies]\n"; + assert!(has_table(toml, "tool.poetry")); + assert!(!has_table(toml, "tool.pdm")); + assert!(has_table("[project]\n", "project")); + // not fooled by a value that contains the text + assert!(!has_table("name = \"tool.poetry helper\"\n", "tool.poetry")); + } + + #[tokio::test] + async fn test_detect_uv_by_lock() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write(dir.path().join("uv.lock"), "").await.unwrap(); + assert_eq!(detect_python_pm(dir.path()).await, PythonPackageManager::Uv); + } + + #[tokio::test] + async fn test_detect_poetry_by_table() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("pyproject.toml"), + "[tool.poetry]\nname = \"x\"\n", + ) + .await + .unwrap(); + assert_eq!( + detect_python_pm(dir.path()).await, + PythonPackageManager::Poetry + ); + } + + #[tokio::test] + async fn test_detect_pip_fallback() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write(dir.path().join("requirements.txt"), "requests\n") + .await + .unwrap(); + assert_eq!(detect_python_pm(dir.path()).await, PythonPackageManager::Pip); + } + + #[test] + fn test_lock_command() { + assert_eq!(PythonPackageManager::Uv.lock_command(), Some(("uv", &["lock"][..]))); + assert_eq!(PythonPackageManager::Pip.lock_command(), None); + assert_eq!(PythonPackageManager::Hatch.lock_command(), None); + } +} diff --git a/crates/socket-patch-core/src/pth_hook/edit.rs b/crates/socket-patch-core/src/pth_hook/edit.rs new file mode 100644 index 00000000..0a6c9e0c --- /dev/null +++ b/crates/socket-patch-core/src/pth_hook/edit.rs @@ -0,0 +1,538 @@ +//! Add / remove the `socket-patch[hook]` dependency in a project's manifest. +//! +//! Two manifest kinds are supported: +//! * **pyproject.toml** — edited with `toml_edit` so the user's existing +//! formatting and comments are preserved. Targets the PEP 621 +//! `[project].dependencies` array, or a classic Poetry +//! `[tool.poetry.dependencies]` table when that is the only dependency +//! surface present. +//! * **requirements.txt** — a plain line append / removal. +//! +//! All operations are idempotent and honour `dry_run` (compute the result and +//! report status without writing). This mirrors the contracts of +//! [`crate::package_json::update`] for the npm side. + +use std::path::Path; +use tokio::fs; +use toml_edit::{Array, DocumentMut, Item, Table, Value}; + +use super::detect::{deps_contain_hook, spec_is_hook, HOOK_DEP}; + +/// Which manifest format a path is. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ManifestKind { + Pyproject, + Requirements, +} + +/// Outcome of editing one manifest. Mirrors `package_json::update::UpdateStatus`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PthStatus { + Updated, + AlreadyConfigured, + Error, +} + +#[derive(Debug, Clone)] +pub struct PthEditResult { + pub path: String, + pub kind: ManifestKind, + pub status: PthStatus, + pub error: Option, +} + +impl PthEditResult { + fn ok(path: &Path, kind: ManifestKind, status: PthStatus) -> Self { + Self { + path: path.display().to_string(), + kind, + status, + error: None, + } + } + fn err(path: &Path, kind: ManifestKind, msg: impl Into) -> Self { + Self { + path: path.display().to_string(), + kind, + status: PthStatus::Error, + error: Some(msg.into()), + } + } +} + +/// Add the hook dependency to a manifest. Idempotent. +pub async fn add_hook_dependency(path: &Path, kind: ManifestKind, dry_run: bool) -> PthEditResult { + let content = match fs::read_to_string(path).await { + Ok(c) => c, + // A missing requirements.txt is created (the pip-from-scratch path); + // a missing pyproject.toml is an error (we don't synthesize one). + Err(e) + if e.kind() == std::io::ErrorKind::NotFound + && kind == ManifestKind::Requirements => + { + String::new() + } + Err(e) => return PthEditResult::err(path, kind, e.to_string()), + }; + + let outcome = match kind { + ManifestKind::Pyproject => pyproject_add(&content), + ManifestKind::Requirements => requirements_add(&content), + }; + + match outcome { + Ok(None) => PthEditResult::ok(path, kind, PthStatus::AlreadyConfigured), + Ok(Some(new_content)) => { + if !dry_run { + if let Err(e) = fs::write(path, &new_content).await { + return PthEditResult::err(path, kind, e.to_string()); + } + } + PthEditResult::ok(path, kind, PthStatus::Updated) + } + Err(e) => PthEditResult::err(path, kind, e), + } +} + +/// Remove the hook dependency from a manifest. Idempotent (already-absent -> +/// `AlreadyConfigured`, i.e. nothing to do). +pub async fn remove_hook_dependency( + path: &Path, + kind: ManifestKind, + dry_run: bool, +) -> PthEditResult { + let content = match fs::read_to_string(path).await { + Ok(c) => c, + // Nothing on disk → nothing to remove (idempotent no-op). + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + return PthEditResult::ok(path, kind, PthStatus::AlreadyConfigured) + } + Err(e) => return PthEditResult::err(path, kind, e.to_string()), + }; + + let outcome = match kind { + ManifestKind::Pyproject => pyproject_remove(&content), + ManifestKind::Requirements => requirements_remove(&content), + }; + + match outcome { + Ok(None) => PthEditResult::ok(path, kind, PthStatus::AlreadyConfigured), + Ok(Some(new_content)) => { + if !dry_run { + if let Err(e) = fs::write(path, &new_content).await { + return PthEditResult::err(path, kind, e.to_string()); + } + } + PthEditResult::ok(path, kind, PthStatus::Updated) + } + Err(e) => PthEditResult::err(path, kind, e), + } +} + +// ── requirements.txt ──────────────────────────────────────────────────────── + +/// The file's dominant newline style, so edits don't rewrite CRLF as LF. +fn newline_of(content: &str) -> &'static str { + if content.contains("\r\n") { + "\r\n" + } else { + "\n" + } +} + +/// Returns `Some(new_content)` if a line was appended, `None` if already there. +fn requirements_add(content: &str) -> Result, String> { + if content + .lines() + .any(|l| deps_contain_hook(strip_requirement_comment(l))) + { + return Ok(None); + } + let nl = newline_of(content); + let mut new = content.to_string(); + if !new.is_empty() && !new.ends_with('\n') { + new.push_str(nl); + } + new.push_str(HOOK_DEP); + new.push_str(nl); + Ok(Some(new)) +} + +/// Returns `Some(new_content)` if any hook line was removed, `None` otherwise. +fn requirements_remove(content: &str) -> Result, String> { + let kept: Vec<&str> = content + .lines() + .filter(|l| !deps_contain_hook(strip_requirement_comment(l))) + .collect(); + if kept.len() == content.lines().count() { + return Ok(None); + } + let nl = newline_of(content); + let mut new = kept.join(nl); + if !new.is_empty() { + new.push_str(nl); + } + Ok(Some(new)) +} + +/// Strip a trailing `# comment` so we match against the requirement spec only. +fn strip_requirement_comment(line: &str) -> &str { + match line.find('#') { + Some(i) => &line[..i], + None => line, + } +} + +// ── pyproject.toml ─────────────────────────────────────────────────────────── + +/// Returns `Some(new_content)` if the doc was modified, `None` if the hook dep +/// was already present, or `Err` on malformed TOML / wrong-typed tables. +fn pyproject_add(content: &str) -> Result, String> { + let mut doc = content + .parse::() + .map_err(|e| format!("Invalid pyproject.toml: {e}"))?; + + // Prefer PEP 621 `[project].dependencies` when there is a *real* PEP 621 + // surface; otherwise fall back to a classic Poetry `[tool.poetry]` table. + // A `[project]` table that exists only implicitly (e.g. conjured by a + // `[project.urls]` sub-table in a Poetry-1.x project) is NOT a real PEP 621 + // surface — routing such a project to PEP 621 would add a + // `[project].dependencies` that Poetry ignores at install time. The inner + // helpers detect an already-present hook dependency structurally (which the + // textual marker check can't, e.g. a Poetry `extras = ["hook"]` table). + let real_pep621 = doc + .get("project") + .and_then(Item::as_table) + .map(|t| !t.is_implicit() || t.contains_key("dependencies")) + .unwrap_or(false); + let has_poetry = doc + .get("tool") + .and_then(Item::as_table) + .and_then(|t| t.get("poetry")) + .and_then(Item::as_table) + .is_some(); + + let changed = if has_poetry && !real_pep621 { + poetry_add(&mut doc)? + } else { + pep621_add(&mut doc)? + }; + Ok(if changed { Some(doc.to_string()) } else { None }) +} + +fn pyproject_remove(content: &str) -> Result, String> { + let mut doc = content + .parse::() + .map_err(|e| format!("Invalid pyproject.toml: {e}"))?; + + let mut changed = false; + changed |= pep621_remove(&mut doc); + changed |= poetry_remove(&mut doc); + + Ok(if changed { Some(doc.to_string()) } else { None }) +} + +/// Ensure `parent[key]` is a table, creating it if absent. Errors if present +/// but a non-table. +fn ensure_table<'a>(parent: &'a mut Table, key: &str, implicit: bool) -> Result<&'a mut Table, String> { + if !parent.contains_key(key) { + let mut t = Table::new(); + t.set_implicit(implicit); + parent.insert(key, Item::Table(t)); + } + parent + .get_mut(key) + .and_then(Item::as_table_mut) + .ok_or_else(|| format!("`{key}` is not a table")) +} + +fn pep621_add(doc: &mut DocumentMut) -> Result { + let root = doc.as_table_mut(); + let project = ensure_table(root, "project", false)?; + if !project.contains_key("dependencies") { + project.insert("dependencies", Item::Value(Value::Array(Array::new()))); + } + let deps = project + .get_mut("dependencies") + .and_then(Item::as_array_mut) + .ok_or("`project.dependencies` is not an array")?; + if deps + .iter() + .any(|v| v.as_str().map(spec_is_hook).unwrap_or(false)) + { + return Ok(false); + } + deps.push(HOOK_DEP); + Ok(true) +} + +fn pep621_remove(doc: &mut DocumentMut) -> bool { + let deps = match doc + .get_mut("project") + .and_then(Item::as_table_mut) + .and_then(|p| p.get_mut("dependencies")) + .and_then(Item::as_array_mut) + { + Some(d) => d, + None => return false, + }; + let before = deps.len(); + deps.retain(|v| !v.as_str().map(spec_is_hook).unwrap_or(false)); + deps.len() != before +} + +fn poetry_add(doc: &mut DocumentMut) -> Result { + let root = doc.as_table_mut(); + let tool = ensure_table(root, "tool", true)?; + let poetry = ensure_table(tool, "poetry", true)?; + let deps = ensure_table(poetry, "dependencies", false)?; + + // The hook is a standalone, version-agnostic dependency — add it as its own + // key rather than mutating the user's `socket-patch` entry. `"*"` because + // the hook needs no specific version (it runs whatever CLI is on PATH). + if deps.contains_key("socket-patch-hook") { + return Ok(false); + } + deps.insert("socket-patch-hook", Item::Value(Value::from("*"))); + Ok(true) +} + +fn poetry_remove(doc: &mut DocumentMut) -> bool { + let deps = match doc + .get_mut("tool") + .and_then(Item::as_table_mut) + .and_then(|t| t.get_mut("poetry")) + .and_then(Item::as_table_mut) + .and_then(|p| p.get_mut("dependencies")) + .and_then(Item::as_table_mut) + { + Some(d) => d, + None => return false, + }; + deps.remove("socket-patch-hook").is_some() +} + +#[cfg(test)] +mod tests { + use super::*; + + // ── requirements.txt ───────────────────────────────────────────── + + #[test] + fn test_requirements_add() { + let out = requirements_add("requests==2.31.0\n").unwrap().unwrap(); + assert!(out.contains("requests==2.31.0")); + assert!(out.contains("socket-patch-hook")); + assert!(out.ends_with('\n')); + } + + #[test] + fn test_requirements_add_no_trailing_newline() { + let out = requirements_add("requests").unwrap().unwrap(); + assert_eq!(out, "requests\nsocket-patch-hook\n"); + } + + #[test] + fn test_requirements_add_idempotent() { + // Both the standalone wheel and the legacy `[hook]` extra are recognized. + assert!(requirements_add("socket-patch-hook\n").unwrap().is_none()); + assert!(requirements_add("socket-patch-hook==3.3.0\n").unwrap().is_none()); + assert!(requirements_add("socket-patch[hook]\n").unwrap().is_none()); + } + + #[test] + fn test_requirements_remove() { + let out = requirements_remove("requests\nsocket-patch-hook\n") + .unwrap() + .unwrap(); + assert_eq!(out, "requests\n"); + } + + #[test] + fn test_requirements_remove_absent() { + assert!(requirements_remove("requests\n").unwrap().is_none()); + } + + // ── pyproject PEP 621 ──────────────────────────────────────────── + + #[test] + fn test_pep621_add_to_existing_array() { + let toml = "[project]\nname = \"x\"\ndependencies = [\"requests\"]\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + assert!(out.contains("socket-patch-hook")); + assert!(out.contains("requests")); + // Re-parse to confirm validity + idempotency. + assert!(pyproject_add(&out).unwrap().is_none()); + } + + #[test] + fn test_pep621_add_creates_dependencies() { + let toml = "[project]\nname = \"x\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + let deps = doc["project"]["dependencies"].as_array().unwrap(); + assert!(deps.iter().any(|v| v.as_str() == Some("socket-patch-hook"))); + } + + #[test] + fn test_pep621_preserves_other_content() { + let toml = "[build-system]\nrequires = [\"setuptools\"]\n\n[project]\nname = \"x\"\nversion = \"1.0\"\ndependencies = [\n \"requests\",\n]\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + assert!(out.contains("[build-system]")); + assert!(out.contains("version = \"1.0\"")); + assert!(out.contains("requests")); + assert!(out.contains("socket-patch-hook")); + } + + #[test] + fn test_pep621_remove() { + let toml = "[project]\ndependencies = [\"requests\", \"socket-patch-hook\"]\n"; + let out = pyproject_remove(toml).unwrap().unwrap(); + assert!(!out.contains("socket-patch-hook")); + assert!(out.contains("requests")); + } + + // ── pyproject Poetry (standalone hook key, no extras-merging) ───── + + #[test] + fn test_poetry_add_new_key() { + let toml = "[tool.poetry]\nname = \"x\"\n\n[tool.poetry.dependencies]\npython = \"^3.9\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + assert_eq!( + doc["tool"]["poetry"]["dependencies"]["socket-patch-hook"].as_str(), + Some("*") + ); + // Idempotent. + assert!(pyproject_add(&out).unwrap().is_none()); + } + + #[test] + fn test_poetry_leaves_existing_socket_patch_untouched() { + // An existing `socket-patch` dependency must NOT be mutated; we only add + // the standalone `socket-patch-hook` key. + let toml = "[tool.poetry]\nname = \"x\"\n[tool.poetry.dependencies]\nsocket-patch = \"^3.3.0\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + assert_eq!( + doc["tool"]["poetry"]["dependencies"]["socket-patch"].as_str(), + Some("^3.3.0"), + "existing socket-patch dep must be left intact" + ); + assert_eq!( + doc["tool"]["poetry"]["dependencies"]["socket-patch-hook"].as_str(), + Some("*") + ); + } + + #[test] + fn test_poetry_subtable_dependency_preserved() { + // A `[tool.poetry.dependencies.socket-patch]` sub-table (version/source) + // must survive untouched; only the standalone hook key is added. + let toml = "[tool.poetry.dependencies.socket-patch]\nversion = \"^3.3.0\"\ngit = \"https://example.com/x.git\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + let sp = &doc["tool"]["poetry"]["dependencies"]["socket-patch"]; + assert_eq!( + sp.as_table_like().and_then(|t| t.get("git")).and_then(Item::as_str), + Some("https://example.com/x.git"), + "sub-table keys must survive" + ); + assert_eq!( + doc["tool"]["poetry"]["dependencies"]["socket-patch-hook"].as_str(), + Some("*") + ); + // Idempotent. + assert!(pyproject_add(&out).unwrap().is_none()); + } + + #[test] + fn test_poetry_remove() { + let toml = "[tool.poetry.dependencies]\nsocket-patch-hook = \"*\"\npython = \"^3.9\"\n"; + let out = pyproject_remove(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + assert!(doc["tool"]["poetry"]["dependencies"] + .get("socket-patch-hook") + .is_none()); + assert!(doc["tool"]["poetry"]["dependencies"].get("python").is_some()); + } + + #[test] + fn test_pep621_preferred_when_both_present() { + // poetry 2.x: both [project] and [tool.poetry] — edit the PEP 621 array. + let toml = "[project]\nname = \"x\"\ndependencies = []\n\n[tool.poetry]\nname = \"x\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + assert!(doc["project"]["dependencies"] + .as_array() + .unwrap() + .iter() + .any(|v| v.as_str() == Some("socket-patch-hook"))); + } + + #[test] + fn test_invalid_toml_errors() { + assert!(pyproject_add("this is = = not toml [[[").is_err()); + } + + #[test] + fn test_classic_poetry_with_project_urls_routes_to_poetry() { + // `[project.urls]` conjures an implicit `[project]` table; a Poetry 1.x + // project must still be edited in the Poetry table, not given a + // `[project].dependencies` Poetry ignores. + let toml = "[tool.poetry]\nname = \"x\"\n\n[tool.poetry.dependencies]\npython = \"^3.9\"\n\n[project.urls]\nHome = \"https://example.com\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + let doc = out.parse::().unwrap(); + assert_eq!( + doc["tool"]["poetry"]["dependencies"]["socket-patch-hook"].as_str(), + Some("*"), + "must edit the poetry table, not create [project].dependencies; got:\n{out}" + ); + assert!(doc.get("project").and_then(|p| p.get("dependencies")).is_none()); + } + + #[test] + fn test_requirements_preserves_crlf() { + let out = requirements_add("requests\r\n").unwrap().unwrap(); + assert_eq!(out, "requests\r\nsocket-patch-hook\r\n"); + let removed = requirements_remove(&out).unwrap().unwrap(); + assert_eq!(removed, "requests\r\n"); + } + + // ── file-level NotFound handling (the create / no-op paths) ────── + + #[tokio::test] + async fn test_add_creates_missing_requirements() { + let dir = tempfile::tempdir().unwrap(); + let req = dir.path().join("requirements.txt"); // does not exist + let res = add_hook_dependency(&req, ManifestKind::Requirements, false).await; + assert_eq!(res.status, PthStatus::Updated); + let body = tokio::fs::read_to_string(&req).await.unwrap(); + assert_eq!(body, "socket-patch-hook\n"); + } + + #[tokio::test] + async fn test_add_missing_pyproject_is_error() { + let dir = tempfile::tempdir().unwrap(); + let py = dir.path().join("pyproject.toml"); // does not exist + let res = add_hook_dependency(&py, ManifestKind::Pyproject, false).await; + assert_eq!(res.status, PthStatus::Error); + } + + #[tokio::test] + async fn test_remove_missing_file_is_noop() { + let dir = tempfile::tempdir().unwrap(); + let req = dir.path().join("requirements.txt"); // does not exist + let res = remove_hook_dependency(&req, ManifestKind::Requirements, false).await; + assert_eq!(res.status, PthStatus::AlreadyConfigured); + } + + #[tokio::test] + async fn test_add_dry_run_does_not_create() { + let dir = tempfile::tempdir().unwrap(); + let req = dir.path().join("requirements.txt"); + let res = add_hook_dependency(&req, ManifestKind::Requirements, true).await; + assert_eq!(res.status, PthStatus::Updated); + assert!(!req.exists(), "dry-run must not create the file"); + } +} diff --git a/crates/socket-patch-core/src/pth_hook/mod.rs b/crates/socket-patch-core/src/pth_hook/mod.rs new file mode 100644 index 00000000..1fbf6278 --- /dev/null +++ b/crates/socket-patch-core/src/pth_hook/mod.rs @@ -0,0 +1,25 @@ +//! Python `.pth` post-install hook setup. +//! +//! Where npm-family ecosystems get an automatic post-install patch hook via a +//! `package.json` `postinstall` script ([`crate::package_json`]), Python has no +//! universal installer hook. Instead, `socket-patch setup` declares a committed +//! dependency on the `socket-patch-hook` wheel (via the `socket-patch[hook]` +//! extra); installing that wheel lays a startup `.pth` into site-packages that +//! re-applies patches after any install — package-manager-agnostic, because it +//! rides on the interpreter's startup hook rather than any one installer. +//! +//! This module is the Rust side: detecting the project's dependency manager +//! ([`detect`]) and editing its manifest(s) to add/remove the hook dependency +//! ([`edit`]). All actual patching stays in `socket-patch apply`. +//! +//! The committed dependency line is the single source of truth that the hook is +//! active — there is no separate marker/audit file (git history is the audit +//! trail), so nothing can drift out of sync with the manifest. + +pub mod detect; +pub mod edit; + +pub use detect::{deps_contain_hook, detect_python_pm, PythonPackageManager, HOOK_DEP}; +pub use edit::{ + add_hook_dependency, remove_hook_dependency, ManifestKind, PthEditResult, PthStatus, +}; diff --git a/pypi/socket-patch-hook/README.md b/pypi/socket-patch-hook/README.md new file mode 100644 index 00000000..b53b4e05 --- /dev/null +++ b/pypi/socket-patch-hook/README.md @@ -0,0 +1,51 @@ +# socket-patch-hook + +A tiny, package-manager-agnostic **post-install hook** for +[`socket-patch`](https://pypi.org/project/socket-patch/). + +Python package managers (pip, uv, poetry, pdm, hatch) have no universal +post-install step, so a `pip install` / `--force-reinstall` can silently revert +files that `socket-patch` previously patched. This package closes that gap. + +## How it works + +Installing this wheel lays down a startup `.pth` file in `site-packages` +(RECORD-tracked, so `pip uninstall` removes it cleanly). At interpreter startup +the hook does a microsecond-cheap check of whether the set of installed +distributions changed since the last run; only then does it re-apply your +project's **committed** patches by invoking `socket-patch apply --offline`. All +real patching (hash verification, atomic writes, locking) is done by the +`socket-patch` binary — this package only *triggers* it. + +Because it rides on Python's interpreter-startup `.pth` mechanism (not on any +one installer's hooks), it works the same under every Python package manager. + +## Version-agnostic by design + +This package has **no dependency** on `socket-patch`. At runtime it invokes +whatever `socket-patch` CLI is on `PATH` (or pip-installed in the environment), +so the committed `socket-patch-hook` dependency never needs a version bump and +the CLI can be upgraded independently. If no `socket-patch` is found, the hook +silently does nothing. + +Provision the CLI however you like — `pip install socket-patch`, `pipx install +socket-patch`, a system package, or a CI/GitHub Action step. (Or use the +all-in-one `pip install socket-patch[hook]`, which installs both.) + +## Activating it + +Don't add this by hand. Run, in your project: + +``` +socket-patch setup +``` + +That commits a `socket-patch-hook` dependency to your repo (the dependency +itself is the source of truth — there's no separate marker file), so the hook +activates automatically in CI after install. Remove it with `socket-patch setup +--remove` followed by `pip uninstall socket-patch-hook`. + +## Disabling at runtime + +Set `SOCKET_PATCH_HOOK=off` (or `SOCKET_NO_HOOK=1`) to fully bypass the hook for +a given interpreter — checked before any hook code runs. diff --git a/pypi/socket-patch-hook/pyproject.toml b/pypi/socket-patch-hook/pyproject.toml new file mode 100644 index 00000000..bbf2dd4a --- /dev/null +++ b/pypi/socket-patch-hook/pyproject.toml @@ -0,0 +1,37 @@ +[build-system] +requires = ["setuptools>=64"] +build-backend = "setuptools.build_meta" + +[project] +name = "socket-patch-hook" +version = "3.3.0" +description = "Auto-apply Socket security patches after install via a package-manager-agnostic .pth startup hook" +readme = "README.md" +license = "MIT" +requires-python = ">=3.8" +authors = [ + { name = "Socket Security" } +] +keywords = ["security", "patch", "hook", "dependencies", "pth"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Topic :: Security", + "Topic :: Software Development :: Build Tools", +] +# Intentionally NO dependency on socket-patch: the hook is version-agnostic and +# invokes whatever `socket-patch` CLI is on PATH (or pip-installed in the env), +# no-opping if none is present. This keeps the committed `socket-patch-hook` +# dependency a single stable token that never needs a version bump, and lets the +# CLI be provisioned independently (pip, pipx, a GitHub Action, system install). +# (The canonical build is scripts/build-pypi-wheels.py, which also lays down the +# startup .pth; this block keeps the directory a valid project for `pip install .`.) +dependencies = [] + +[project.urls] +Homepage = "https://github.com/SocketDev/socket-patch" +Repository = "https://github.com/SocketDev/socket-patch" + +[tool.setuptools] +packages = ["socket_patch_hook"] diff --git a/pypi/socket-patch-hook/socket_patch_hook.pth b/pypi/socket-patch-hook/socket_patch_hook.pth new file mode 100644 index 00000000..4c36d642 --- /dev/null +++ b/pypi/socket-patch-hook/socket_patch_hook.pth @@ -0,0 +1 @@ +import os; exec("try:\n import socket_patch_hook as _h; _h.run()\nexcept Exception: pass") if (os.environ.get('SOCKET_PATCH_HOOK','').strip().lower() not in ('off','0','false','no') and os.environ.get('SOCKET_NO_HOOK','').strip().lower() not in ('1','true','yes','on')) else None diff --git a/pypi/socket-patch-hook/socket_patch_hook/__init__.py b/pypi/socket-patch-hook/socket_patch_hook/__init__.py new file mode 100644 index 00000000..a09e7bcb --- /dev/null +++ b/pypi/socket-patch-hook/socket_patch_hook/__init__.py @@ -0,0 +1,285 @@ +"""socket-patch post-install hook (package-manager-agnostic). + +This module is imported at Python interpreter startup by a wheel-shipped +``socket_patch_hook.pth`` file (the same ``.pth`` ``import``-line mechanism +coverage.py uses). When the set of installed distributions has changed since the +last run -- e.g. ``pip install`` / ``--force-reinstall`` / ``uv sync`` reverted a +file that Socket had patched -- it re-applies the project's committed patches by +invoking the hardened ``socket-patch apply`` binary in offline mode. All actual +patching (hash verification, atomic writes, locking) stays in that binary; this +module only *triggers* it. + +Hard safety contract: + * ``run()`` must NEVER raise into ``site.py`` (a raise here would hit every + interpreter start in the environment). Every step is failure-swallowing. + * The common, no-change path must cost only a few syscalls (it does: a bounded + parent walk, one ``scandir`` of site-packages, and one small file read). + * The worst outcome of any bug here is that patches are simply not re-applied. + +Disable entirely with ``SOCKET_PATCH_HOOK=off`` (also checked in the ``.pth`` +line before this module is even imported) or ``SOCKET_NO_HOOK=1``. +""" + +import os +import sys + +__all__ = ["run"] + +# Set in the environment of the spawned ``apply`` process so a nested +# interpreter started underneath it does not re-trigger the hook. (The apply +# binary itself is native Rust, but it -- or a tool it shells out to -- may +# invoke ``python``, which would re-process the ``.pth``.) +_REENTRANCY_ENV = "_SOCKET_PATCH_HOOK_ACTIVE" + +# Upper bound on the parent-directory walk used to locate the project root. +_MAX_PARENTS = 40 + +# Generous safety net for a single hook-triggered apply. The apply is offline +# and local, so this only ever fires if something is badly wrong; it exists so a +# hung apply can never wedge interpreter startup forever. +_APPLY_TIMEOUT_SECONDS = 120 + + +def _truthy(value): + return str(value or "").strip().lower() in ("1", "true", "yes", "on") + + +def _disabled(): + """True if the user has switched the hook off via env var.""" + if _truthy(os.environ.get("SOCKET_NO_HOOK")): + return True + return os.environ.get("SOCKET_PATCH_HOOK", "").strip().lower() in ( + "off", + "0", + "false", + "no", + ) + + +def _site_packages_dir(): + # __file__ == /socket_patch_hook/__init__.py + return os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +def _find_project_root(): + """Locate the directory containing ``.socket/manifest.json``. + + Tries, in order: an upward walk from the current working directory, then the + parents of ``VIRTUAL_ENV`` and ``sys.prefix`` (covering the common deploy + shape where the venv lives at ``/.venv``). Returns ``None`` when no + committed manifest is found -- in which case the hook is a no-op, so a wheel + that happens to be installed in an unrelated environment does nothing. + """ + starts = [] + try: + starts.append(os.getcwd()) + except OSError: + pass + for env_dir in (os.environ.get("VIRTUAL_ENV"), getattr(sys, "prefix", None)): + if env_dir: + starts.append(os.path.dirname(os.path.abspath(env_dir))) + + seen = set() + for start in starts: + try: + d = os.path.abspath(start) + except OSError: + continue + for _ in range(_MAX_PARENTS): + if d in seen: + break + seen.add(d) + if os.path.isfile(os.path.join(d, ".socket", "manifest.json")): + return d + parent = os.path.dirname(d) + if parent == d: # reached the filesystem root + break + d = parent + return None + + +def _fingerprint(site_dir): + """Cheap signature of the installed distributions in ``site_dir``. + + A SHA-1 of the sorted ``(name, mtime)`` of every ``*.dist-info`` / + ``*.egg-info`` entry. This changes on any install / reinstall / uninstall, + but is deliberately immune to: + * our own patch writes (which touch package *files*, not the metadata + dirs), so the fingerprint is stable across an apply -- no re-apply loop; + * the stamp file (kept in a user cache, outside site-packages); + * ``__pycache__`` / ``.pyc`` churn. + Returns ``"?"`` on error so we fail toward a (harmless, idempotent) re-apply. + """ + import hashlib + + try: + items = [] + with os.scandir(site_dir) as it: + for entry in it: + name = entry.name + if name.endswith(".dist-info") or name.endswith(".egg-info"): + try: + mtime = entry.stat().st_mtime_ns + except OSError: + mtime = 0 + items.append("%s:%d" % (name, mtime)) + items.sort() + return hashlib.sha1( + "\n".join(items).encode("utf-8", "replace") + ).hexdigest() + except OSError: + return "?" + + +def _cache_dir(): + if os.name == "nt": + base = os.environ.get("LOCALAPPDATA") or os.path.expanduser("~") + else: + base = os.environ.get("XDG_CACHE_HOME") or os.path.join( + os.path.expanduser("~"), ".cache" + ) + return os.path.join(base, "socket-patch", "hook-stamps") + + +def _stamp_path(site_dir): + """Per-site-packages stamp file, in a user cache so writing it never + perturbs the site-packages fingerprint and never dirties the repo.""" + import hashlib + + key = hashlib.sha1( + os.path.abspath(site_dir).encode("utf-8", "replace") + ).hexdigest() + return os.path.join(_cache_dir(), key) + + +def _read_stamp(path): + try: + with open(path, "r") as f: + return f.read().strip() + except OSError: + return None + + +def _write_stamp(path, value): + tmp = None + try: + os.makedirs(os.path.dirname(path), exist_ok=True) + tmp = "%s.%d.tmp" % (path, os.getpid()) + with open(tmp, "w") as f: + f.write(value) + os.replace(tmp, path) + except OSError: + if tmp: + try: + os.unlink(tmp) + except OSError: + pass + + +def _resolve_binary(): + """Locate the ``socket-patch`` binary, version-agnostically. + + Prefers whatever ``socket-patch`` is on ``PATH`` (a pip/pipx/system/GitHub + Action-provisioned CLI), then falls back to the binary bundled in a + pip-installed ``socket_patch`` package if one happens to be in the env. The + hook does not depend on the CLI, so either may be absent — in which case we + return ``None`` and the hook no-ops. + """ + try: + import shutil + + found = shutil.which("socket-patch") + if found: + return found + except Exception: + pass + try: + import socket_patch + + resolver = getattr(socket_patch, "_resolve_binary", None) + if resolver is not None: + path = resolver() + if path: + return path + except Exception: + pass + return None + + +def _apply(binary, project_root): + """Run ``socket-patch apply`` synchronously, offline, best-effort. + + Synchronous so the patched bytes are in place before the interpreter + proceeds to user imports. Offline so it only ever re-heals from the + committed ``.socket/`` cache and never blocks startup on the network. + ``--lock-timeout 0`` so a parallel interpreter that loses the apply lock + (e.g. under ``pytest -n``) skips instantly instead of piling up. + + Returns ``True`` only if apply exited 0. A non-zero exit (e.g. losing the + apply lock to a sibling interpreter) returns ``False`` so the caller does + NOT stamp the state as handled and the heal is retried on the next start. + """ + import subprocess + + argv = [ + binary, + "apply", + "--offline", + "--silent", + "--ecosystems", + "pypi", + "--cwd", + project_root, + "--lock-timeout", + "0", + ] + env = dict(os.environ) + env[_REENTRANCY_ENV] = "1" + kwargs = { + "cwd": project_root, + "env": env, + "stdin": subprocess.DEVNULL, + "stdout": subprocess.DEVNULL, + "stderr": subprocess.DEVNULL, + "timeout": _APPLY_TIMEOUT_SECONDS, + } + # Don't flash a console window for a pythonw-hosted (no-console) app. + if os.name == "nt": + kwargs["creationflags"] = getattr(subprocess, "CREATE_NO_WINDOW", 0) + try: + return subprocess.run(argv, **kwargs).returncode == 0 + except Exception: + # Includes TimeoutExpired and OSError (binary vanished mid-run). + return False + + +def run(): + """Entry point invoked by the ``.pth`` line. Never raises.""" + try: + # Cheapest possible bail-outs first. + if os.environ.get(_REENTRANCY_ENV): + return + if _disabled(): + return + project_root = _find_project_root() + if project_root is None: + return + site_dir = _site_packages_dir() + fp = _fingerprint(site_dir) + stamp_path = _stamp_path(site_dir) + if _read_stamp(stamp_path) == fp: + return # nothing installed/reinstalled since the last apply + binary = _resolve_binary() + if not binary: + return + # Stamp only on a successful apply. The dist-info fingerprint is + # unchanged by an apply (which patches package files, not metadata + # dirs), so storing the pre-apply value is correct -- and gating on + # success means a lock-contended / failed apply is retried next start + # rather than being silently marked as handled. + if _apply(binary, project_root): + _write_stamp(stamp_path, fp) + except Exception: + # Final backstop. The .pth wrapper also guards, but a raise here would + # hit every interpreter start, so never rely on a single layer. + return diff --git a/pypi/socket-patch-hook/test_hook.py b/pypi/socket-patch-hook/test_hook.py new file mode 100644 index 00000000..f901f40e --- /dev/null +++ b/pypi/socket-patch-hook/test_hook.py @@ -0,0 +1,206 @@ +"""Tests for the socket-patch startup hook. + +Run with: ``python -m unittest test_hook`` (no third-party deps required). + +The overriding contract under test is *safety*: the hook must never raise, must +no-op cheaply when there is nothing to do, and must invoke ``socket-patch +apply`` with the right offline arguments only when the installed distributions +have changed. +""" + +import os +import sys +import unittest +from unittest import mock + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +import socket_patch_hook as hook # noqa: E402 + + +class HookTestBase(unittest.TestCase): + def setUp(self): + self._cwd = os.getcwd() + # Isolate env: clear switches + reentrancy + cache redirect. + self._saved_env = dict(os.environ) + for k in ("SOCKET_PATCH_HOOK", "SOCKET_NO_HOOK", hook._REENTRANCY_ENV): + os.environ.pop(k, None) + self._tmp = self._mkdtemp() + os.environ["XDG_CACHE_HOME"] = os.path.join(self._tmp, "cache") + os.environ["LOCALAPPDATA"] = os.path.join(self._tmp, "cache") + + def tearDown(self): + os.chdir(self._cwd) + os.environ.clear() + os.environ.update(self._saved_env) + + def _mkdtemp(self): + import tempfile + + d = tempfile.mkdtemp() + self.addCleanup(self._rmtree, d) + return d + + @staticmethod + def _rmtree(path): + import shutil + + shutil.rmtree(path, ignore_errors=True) + + def _make_project(self): + """A temp dir that looks like a socket-patch project (has a manifest).""" + root = self._mkdtemp() + os.makedirs(os.path.join(root, ".socket")) + with open(os.path.join(root, ".socket", "manifest.json"), "w") as f: + f.write('{"patches": {}}') + return root + + +class TestRunSpawning(HookTestBase): + def test_applies_when_manifest_present_and_state_changed(self): + root = self._make_project() + os.chdir(root) + with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run", return_value=mock.Mock(returncode=0)) as run: + hook.run() + self.assertEqual(run.call_count, 1) + argv = run.call_args[0][0] + self.assertEqual(argv[0], "/fake/socket-patch") + self.assertIn("apply", argv) + self.assertIn("--offline", argv) + self.assertIn("--silent", argv) + # --ecosystems pypi + self.assertEqual(argv[argv.index("--ecosystems") + 1], "pypi") + # --cwd + self.assertEqual( + os.path.realpath(argv[argv.index("--cwd") + 1]), + os.path.realpath(root), + ) + # --lock-timeout 0 (skip instantly if another apply holds the lock) + self.assertEqual(argv[argv.index("--lock-timeout") + 1], "0") + # Re-entrancy guard set in the child env. + env = run.call_args[1]["env"] + self.assertEqual(env[hook._REENTRANCY_ENV], "1") + + def test_second_run_is_a_noop_when_state_unchanged(self): + root = self._make_project() + os.chdir(root) + with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run", return_value=mock.Mock(returncode=0)) as run: + hook.run() # first run applies + writes the stamp (success) + hook.run() # second run: fingerprint matches stamp -> skip + self.assertEqual(run.call_count, 1) + + def test_failed_apply_does_not_stamp_so_it_retries(self): + # A non-zero apply (e.g. lost the lock) must NOT be recorded as handled. + root = self._make_project() + os.chdir(root) + with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run", return_value=mock.Mock(returncode=1)) as run: + hook.run() + hook.run() + self.assertEqual(run.call_count, 2, "a failed apply must be retried next start") + + def test_noop_without_manifest(self): + root = self._mkdtemp() # no .socket/manifest.json + os.chdir(root) + with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run") as run: + hook.run() + run.assert_not_called() + + def test_noop_when_binary_missing(self): + root = self._make_project() + os.chdir(root) + with mock.patch.object(hook, "_resolve_binary", return_value=None), \ + mock.patch("subprocess.run") as run: + hook.run() + run.assert_not_called() + + +class TestDisableSwitches(HookTestBase): + def test_socket_patch_hook_off(self): + root = self._make_project() + os.chdir(root) + os.environ["SOCKET_PATCH_HOOK"] = "off" + with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run") as run: + hook.run() + run.assert_not_called() + + def test_socket_no_hook(self): + root = self._make_project() + os.chdir(root) + os.environ["SOCKET_NO_HOOK"] = "1" + with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run") as run: + hook.run() + run.assert_not_called() + + def test_reentrancy_guard(self): + root = self._make_project() + os.chdir(root) + os.environ[hook._REENTRANCY_ENV] = "1" + with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run") as run: + hook.run() + run.assert_not_called() + + +class TestNeverRaises(HookTestBase): + def test_run_swallows_resolver_errors(self): + root = self._make_project() + os.chdir(root) + with mock.patch.object(hook, "_resolve_binary", side_effect=RuntimeError("boom")): + # Must not propagate. + hook.run() + + def test_run_swallows_subprocess_errors(self): + root = self._make_project() + os.chdir(root) + with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch("subprocess.run", side_effect=OSError("no such binary")): + hook.run() # must not raise + + def test_apply_timeout_is_swallowed(self): + import subprocess + + root = self._make_project() + os.chdir(root) + with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + mock.patch( + "subprocess.run", + side_effect=subprocess.TimeoutExpired(cmd="x", timeout=1), + ): + hook.run() # must not raise + + +class TestPthLine(unittest.TestCase): + """The .pth one-liner must be valid Python and obey the kill switch.""" + + def _pth_line(self): + here = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(here, "socket_patch_hook.pth")) as f: + return f.read().strip() + + def test_pth_line_executes_and_calls_run(self): + line = self._pth_line() + with mock.patch.object(hook, "run") as run: + os.environ.pop("SOCKET_PATCH_HOOK", None) + os.environ.pop("SOCKET_NO_HOOK", None) + exec(compile(line, "socket_patch_hook.pth", "exec"), {}) + run.assert_called_once() + + def test_pth_line_respects_off_switch(self): + line = self._pth_line() + with mock.patch.object(hook, "run") as run: + os.environ["SOCKET_PATCH_HOOK"] = "off" + try: + exec(compile(line, "socket_patch_hook.pth", "exec"), {}) + finally: + os.environ.pop("SOCKET_PATCH_HOOK", None) + run.assert_not_called() + + +if __name__ == "__main__": + unittest.main() diff --git a/pypi/socket-patch/pyproject.toml b/pypi/socket-patch/pyproject.toml index 2a6cec1c..e816b868 100644 --- a/pypi/socket-patch/pyproject.toml +++ b/pypi/socket-patch/pyproject.toml @@ -21,6 +21,14 @@ classifiers = [ "Topic :: Software Development :: Build Tools", ] +[project.optional-dependencies] +# `pip install socket-patch[hook]` additionally installs the +# package-manager-agnostic .pth startup hook that re-applies patches after +# install. Unpinned so the hook updates independently of the CLI. `setup` +# itself commits a bare `socket-patch-hook` dependency (the hook needs no +# specific CLI version — it runs whatever `socket-patch` is on PATH). +hook = ["socket-patch-hook"] + [project.urls] Homepage = "https://github.com/SocketDev/socket-patch" Repository = "https://github.com/SocketDev/socket-patch" diff --git a/pypi/socket-patch/socket_patch/__init__.py b/pypi/socket-patch/socket_patch/__init__.py index bfcb9d2f..b4cf04ec 100644 --- a/pypi/socket-patch/socket_patch/__init__.py +++ b/pypi/socket-patch/socket_patch/__init__.py @@ -3,20 +3,42 @@ import subprocess -def main(): - bin_dir = os.path.join(os.path.dirname(__file__), "bin") +def _resolve_binary(): + """Locate the bundled socket-patch binary, or return ``None``. + + Single source of truth for binary discovery, reused by both ``main()`` (the + console-script entry point) and the ``socket_patch_hook`` startup hook. Never + raises: returns ``None`` if the binary can't be found, so callers that run at + interpreter startup stay safe. + """ + bin_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "bin") try: entries = os.listdir(bin_dir) except OSError: - entries = [] + return None bins = [e for e in entries if e.startswith("socket-patch")] if len(bins) != 1: + return None + bin_path = os.path.join(bin_dir, bins[0]) + try: + if not os.access(bin_path, os.X_OK): + os.chmod(bin_path, os.stat(bin_path).st_mode | 0o111) + except OSError: + return None + return bin_path + + +def main(): + bin_path = _resolve_binary() + if bin_path is None: + bin_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "bin") + try: + count = len([e for e in os.listdir(bin_dir) if e.startswith("socket-patch")]) + except OSError: + count = 0 print( - f"Expected exactly one socket-patch binary in {bin_dir}, found {len(bins)}", + f"Expected exactly one socket-patch binary in {bin_dir}, found {count}", file=sys.stderr, ) sys.exit(1) - bin_path = os.path.join(bin_dir, bins[0]) - if not os.access(bin_path, os.X_OK): - os.chmod(bin_path, os.stat(bin_path).st_mode | 0o111) raise SystemExit(subprocess.call([bin_path] + sys.argv[1:])) diff --git a/scripts/build-pypi-wheels.py b/scripts/build-pypi-wheels.py index ab9f03b0..e1fcd392 100755 --- a/scripts/build-pypi-wheels.py +++ b/scripts/build-pypi-wheels.py @@ -190,6 +190,11 @@ def build_wheel( f"Summary: {metadata['description']}\n" f"License: {metadata['license']}\n" f"Requires-Python: {metadata['requires_python']}\n" + # `pip install socket-patch[hook]` additionally installs the + # package-manager-agnostic .pth post-install hook (a separate + # pure-python wheel). Unpinned so the hook can update independently. + f"Provides-Extra: hook\n" + f'Requires-Dist: socket-patch-hook; extra == "hook"\n' ) if metadata.get("readme"): metadata_header += "Description-Content-Type: text/markdown\n" @@ -237,6 +242,79 @@ def build_wheel( return wheel_path +DIST_NAME_HOOK = "socket_patch_hook" +PKG_NAME_HOOK = "socket-patch-hook" + + +def build_hook_wheel(version: str, hook_dir: Path, dist_dir: Path) -> Path: + """Build the pure-python ``socket-patch-hook`` wheel (``py3-none-any``). + + Unlike the platform wheels, this ships no binary. It contains the + ``socket_patch_hook`` package and — crucially — a top-level + ``socket_patch_hook.pth`` that pip installs into the site-packages root, so + Python executes it at interpreter startup. It depends on ``socket-patch`` + (the binary wheel) for the actual ``apply``. + """ + init_path = hook_dir / "socket_patch_hook" / "__init__.py" + pth_path = hook_dir / "socket_patch_hook.pth" + readme_path = hook_dir / "README.md" + init_py = init_path.read_bytes() + pth = pth_path.read_bytes() + readme = readme_path.read_text() if readme_path.exists() else "" + + wheel_name = f"{DIST_NAME_HOOK}-{version}-py3-none-any.whl" + wheel_path = dist_dir / wheel_name + dist_info = f"{DIST_NAME_HOOK}-{version}.dist-info" + + files = [] + # The package module. + files.append((f"{DIST_NAME_HOOK}/__init__.py", init_py, False)) + # The startup hook — at the wheel root so it installs to site-packages. + files.append(("socket_patch_hook.pth", pth, False)) + + # No Requires-Dist on socket-patch: the hook is version-agnostic and finds + # whatever `socket-patch` CLI is on PATH at runtime (provisioned separately). + metadata_content = ( + f"Metadata-Version: 2.1\n" + f"Name: {PKG_NAME_HOOK}\n" + f"Version: {version}\n" + f"Summary: Package-manager-agnostic post-install patch hook for socket-patch\n" + f"License: MIT\n" + f"Requires-Python: >=3.8\n" + ) + if readme: + metadata_content += "Description-Content-Type: text/markdown\n" + metadata_content += f"\n{readme}" + files.append((f"{dist_info}/METADATA", metadata_content.encode(), False)) + + # Pure-python: Root-Is-Purelib true so the .pth lands in site-packages. + wheel_content = ( + "Wheel-Version: 1.0\n" + "Generator: build-pypi-wheels.py\n" + "Root-Is-Purelib: true\n" + "Tag: py3-none-any\n" + ).encode() + files.append((f"{dist_info}/WHEEL", wheel_content, False)) + + record_lines = [] + for name, data, _ in files: + record_lines.append(f"{name},{sha256_digest(data)},{len(data)}") + record_name = f"{dist_info}/RECORD" + record_lines.append(f"{record_name},,") + files.append((record_name, "\n".join(record_lines).encode(), False)) + + with zipfile.ZipFile(wheel_path, "w", zipfile.ZIP_DEFLATED) as zf: + for name, data, _ in files: + info_obj = zipfile.ZipInfo(name) + info_obj.external_attr = ( + stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH + ) << 16 + info_obj.compress_type = zipfile.ZIP_DEFLATED + zf.writestr(info_obj, data) + + return wheel_path + + def main(): parser = argparse.ArgumentParser( description="Build platform-tagged PyPI wheels for socket-patch" @@ -248,8 +326,8 @@ def main(): ) parser.add_argument( "--artifacts", - required=True, - help="Directory containing build artifacts", + default=None, + help="Directory containing build artifacts (required unless --hook-only)", ) parser.add_argument( "--dist", @@ -261,23 +339,52 @@ def main(): default=None, help="Directory containing pyproject.toml (default: pypi/socket-patch relative to script)", ) + parser.add_argument( + "--hook-dir", + default=None, + help="Directory of the socket-patch-hook package (default: pypi/socket-patch-hook)", + ) + parser.add_argument( + "--hook-only", + action="store_true", + help="Build only the pure-python socket-patch-hook wheel (no binary artifacts needed)", + ) + parser.add_argument( + "--skip-hook", + action="store_true", + help="Skip building the socket-patch-hook wheel", + ) args = parser.parse_args() - artifacts_dir = Path(args.artifacts) dist_dir = Path(args.dist) dist_dir.mkdir(parents=True, exist_ok=True) + repo_root = Path(__file__).resolve().parent.parent + hook_dir = Path(args.hook_dir) if args.hook_dir else repo_root / "pypi" / "socket-patch-hook" + + built = [] + skipped = [] + + # The pure-python hook wheel needs no platform artifacts. + if args.hook_only: + wheel_path = build_hook_wheel(args.version, hook_dir, dist_dir) + size_kb = wheel_path.stat().st_size / 1024 + print(f"Built hook wheel: {wheel_path.name} ({size_kb:.1f} KB)") + return + + if not args.artifacts: + parser.error("--artifacts is required unless --hook-only is given") + + artifacts_dir = Path(args.artifacts) + if args.pyproject_dir: pyproject_dir = Path(args.pyproject_dir) else: - pyproject_dir = Path(__file__).resolve().parent.parent / "pypi" / "socket-patch" + pyproject_dir = repo_root / "pypi" / "socket-patch" metadata = read_pyproject_metadata(pyproject_dir) init_py = read_init_py(pyproject_dir) - built = [] - skipped = [] - for target, info in TARGETS.items(): archive_ext = info["archive_ext"] archive_path = artifacts_dir / f"socket-patch-{target}.{archive_ext}" @@ -300,6 +407,12 @@ def main(): print(f" -> {wheel_path.name} ({size_mb:.1f} MB)") built.append(wheel_path) + if not args.skip_hook: + hook_wheel = build_hook_wheel(args.version, hook_dir, dist_dir) + size_kb = hook_wheel.stat().st_size / 1024 + print(f" -> {hook_wheel.name} ({size_kb:.1f} KB) [pure-python hook]") + built.append(hook_wheel) + print(f"\nBuilt {len(built)} wheel(s) in {dist_dir}/") if skipped: print(f"Skipped {len(skipped)} target(s) (artifact not found): {', '.join(skipped)}") diff --git a/tests/setup_matrix/matrix.json b/tests/setup_matrix/matrix.json index ed6ccbdc..a457cc72 100644 --- a/tests/setup_matrix/matrix.json +++ b/tests/setup_matrix/matrix.json @@ -92,14 +92,14 @@ }, { - "ecosystem": "pypi", "pm": "pip", "image": "pypi", "hook_family": "none", - "baseline_supported": false, + "ecosystem": "pypi", "pm": "pip", "image": "pypi", "hook_family": "pth", + "baseline_supported": true, "package": "six", "version": "1.16.0", "purl": "pkg:pypi/six@1.16.0", "manifest_key": "six.py", "apply_ecosystems": "pypi" }, { - "ecosystem": "pypi", "pm": "uv", "image": "pypi", "hook_family": "none", - "baseline_supported": false, + "ecosystem": "pypi", "pm": "uv", "image": "pypi", "hook_family": "pth", + "baseline_supported": true, "package": "six", "version": "1.16.0", "purl": "pkg:pypi/six@1.16.0", "manifest_key": "six.py", "apply_ecosystems": "pypi" }, diff --git a/tests/setup_matrix/run-case.sh b/tests/setup_matrix/run-case.sh index 328587be..9382ce13 100755 --- a/tests/setup_matrix/run-case.sh +++ b/tests/setup_matrix/run-case.sh @@ -205,7 +205,22 @@ EOF { "name": "sm-proj", "version": "0.0.0", "nodeModulesDir": "auto" } EOF ;; - pip|uv) : ;; + pip) + # A requirements.txt makes `setup` detect a pip project and add the + # `socket-patch[hook]` dependency to it. + printf '%s==%s\n' "$SM_PACKAGE" "$SM_VERSION" > requirements.txt ;; + uv) + # A PEP 621 pyproject + uv.lock makes `setup` detect a uv project. + cat > pyproject.toml < uv.lock ;; poetry) cat > pyproject.toml </dev/null || true +} + +# Start an interpreter so the `.pth` hook fires (models a CI app start / +# the next python invocation after install). No-op if there is no venv. +pth_trigger() { # $1=venv dir + local venv="$1" + [ -x "$venv/bin/python" ] || return 0 + PATH="$PWD/$venv/bin:$PATH" "$venv/bin/python" -c "pass" >/dev/null 2>&1 || true +} + # --- per-PM native install (the hook, if configured, fires here) ------ run_install() { case "$SM_PM" in @@ -281,8 +323,16 @@ run_install() { pnpm) pnpm install --no-frozen-lockfile ;; bun) bun add "$SM_PACKAGE@$SM_VERSION" ;; deno) deno install --allow-scripts ;; - pip) python3 -m venv venv && ./venv/bin/pip install --disable-pip-version-check --quiet --no-cache-dir "$SM_PACKAGE==$SM_VERSION" ;; - uv) uv venv venv && uv pip install --python venv/bin/python --quiet "$SM_PACKAGE==$SM_VERSION" ;; + pip) + python3 -m venv venv + pth_install_into_venv venv pip + ./venv/bin/pip install --disable-pip-version-check --quiet --no-cache-dir "$SM_PACKAGE==$SM_VERSION" + pth_trigger venv ;; + uv) + uv venv venv + pth_install_into_venv venv uv + uv pip install --python venv/bin/python --quiet "$SM_PACKAGE==$SM_VERSION" + pth_trigger venv ;; poetry) poetry config virtualenvs.in-project true --local && poetry add --no-interaction "$SM_PACKAGE@$SM_VERSION" ;; pdm) pdm config python.use_venv true >/dev/null 2>&1; pdm add "$SM_PACKAGE==$SM_VERSION" ;; hatch) HATCH_DATA_DIR="$PWD/.hatch" hatch env create && HATCH_DATA_DIR="$PWD/.hatch" hatch run python -c "import ${SM_PACKAGE//-/_}" ;; @@ -302,7 +352,10 @@ resolve_target() { local base; base="$(basename "$rel")" case "$SM_ECOSYSTEM" in npm|deno) printf '%s\n' "$PWD/node_modules/$SM_PACKAGE/$rel" ;; - pypi) find "$PWD" -name "$base" 2>/dev/null | head -1 ;; + # Exclude vendored copies (pip/setuptools bundle their own six.py under + # */_vendor/*); the patch lands in the installed package at the + # site-packages root. + pypi) find "$PWD" -name "$base" -not -path '*/_vendor/*' 2>/dev/null | head -1 ;; cargo) find "${CARGO_HOME:-$HOME/.cargo}/registry/src" -path "*/${SM_PACKAGE}-${SM_VERSION}/${rel}" 2>/dev/null | head -1 ;; gem) find "$PWD/vendor" -path "*/${SM_PACKAGE}-${SM_VERSION}/${rel}" 2>/dev/null | head -1 ;; golang) local gmc; gmc="$(go env GOMODCACHE 2>/dev/null || echo "${GOPATH:-$HOME/go}/pkg/mod")"; find "$gmc" -path "*/$(basename "$SM_PACKAGE")@${SM_VERSION}/${rel}" 2>/dev/null | head -1 ;; @@ -443,7 +496,7 @@ resolve_targets() { fi case "$SM_ECOSYSTEM" in npm|deno|monorepo) find "$PWD" -path "*/node_modules/$SM_PACKAGE/$rel" 2>/dev/null ;; - pypi) find "$PWD" -name "$base" 2>/dev/null ;; + pypi) find "$PWD" -name "$base" -not -path '*/_vendor/*' 2>/dev/null ;; *) resolve_target ;; esac } @@ -484,6 +537,9 @@ fi # "1". export SOCKET_OFFLINE=true SOCKET_FORCE=true SOCKET_API_TOKEN=fake SOCKET_ORG_SLUG=test-org export SOCKET_TELEMETRY_DISABLED=1 SOCKET_EXPERIMENTAL_MAVEN=1 SOCKET_EXPERIMENTAL_NUGET=1 +# Isolate the pypi `.pth` hook's change-detection stamp per case so runs +# don't bleed into each other (the stamp lives under XDG_CACHE_HOME). +export XDG_CACHE_HOME="$WORKDIR/.cache" # NOTE: deliberately do NOT export SOCKET_CWD. The install hook's apply # must run with whatever cwd the package manager sets for the lifecycle # script — the project root for a single project, and the *member* dir From b52742790030af0c97b756e711ad585de159d327 Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Tue, 2 Jun 2026 18:29:47 -0400 Subject: [PATCH 2/5] test(setup-matrix): make the pypi .pth matrix Docker-runnable; add hatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Validated the pypi setup matrix in Docker (rebuilt image from this branch) and fixed two real bugs that broke the Docker path: - Hook wheel was mounted into the container under a non-PEP-427 filename (/tmp/socket_patch_hook.whl), which pip/uv/pdm reject ("not a valid wheel filename"). Mount it preserving its real {name}-{ver}-{tags}.whl filename. - verify ran the patched module via ` run python`, which re-resolves the project — now including the committed (and, in the hermetic test, unpublished) socket-patch-hook dependency → resolve failure unrelated to whether the file is patched. Run the file with the in-project venv interpreter directly instead (faithful: still executes the patched code + checks the marker). Result: pip, uv and hatch pass in Docker (flip hatch to baseline_supported). poetry/pdm stay documented gaps: their add/install/run re-resolve the manifest against an index the hermetic test can't provide; the .pth mechanism itself is package-manager-agnostic (proven by pip/uv/hatch). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../tests/setup_matrix_common/mod.rs | 13 ++++-- .../tests/setup_matrix_pypi.rs | 19 +++++---- tests/setup_matrix/matrix.json | 4 +- tests/setup_matrix/run-case.sh | 41 ++++++++++++++++--- 4 files changed, 58 insertions(+), 19 deletions(-) diff --git a/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs b/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs index 63e8e6da..050e2399 100644 --- a/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs +++ b/crates/socket-patch-cli/tests/setup_matrix_common/mod.rs @@ -255,13 +255,20 @@ fn run_case(case: &Case) -> RunResult { for (k, v) in &env { cmd.args(["-e", &format!("{k}={v}")]); } - // Mount the hook wheel into the container at a fixed path. + // Mount the hook wheel into the container, PRESERVING its PEP 427 + // filename (pip/uv/pdm reject a wheel whose filename isn't a valid + // `{name}-{ver}-{tags}.whl`, so we must not rename it on mount). if let Some(w) = &wheel { + let name = w + .file_name() + .and_then(|n| n.to_str()) + .expect("hook wheel filename"); + let dest = format!("/tmp/{name}"); cmd.args([ "-v", - &format!("{}:/tmp/socket_patch_hook.whl:ro", w.display()), + &format!("{}:{}:ro", w.display(), dest), "-e", - "SOCKET_PATCH_HOOK_WHEEL=/tmp/socket_patch_hook.whl", + &format!("SOCKET_PATCH_HOOK_WHEEL={dest}"), ]); } cmd.arg(format!("socket-patch-test-{}:latest", case.image)); diff --git a/crates/socket-patch-cli/tests/setup_matrix_pypi.rs b/crates/socket-patch-cli/tests/setup_matrix_pypi.rs index 1d58ddc2..9763af79 100644 --- a/crates/socket-patch-cli/tests/setup_matrix_pypi.rs +++ b/crates/socket-patch-cli/tests/setup_matrix_pypi.rs @@ -1,14 +1,17 @@ //! setup-matrix: pypi ecosystem (pip / uv / poetry / pdm / hatch). //! //! Python installers have no native post-install hook, so `socket-patch -//! setup` instead commits a `socket-patch[hook]` dependency whose wheel -//! ships a startup `.pth` that re-applies patches after install -//! (package-manager-agnostic). pip and uv are now wired for this, so their -//! `baseline_with_setup` / `alt_content_patchset` cases should APPLY (the -//! harness builds the hook wheel and the driver installs it + fires an -//! interpreter). poetry / pdm / hatch and the nested-workspace layouts are -//! not yet wired (BASELINE GAP) — a follow-up. The negative-control / -//! empty / wrong-target cases must still NOT apply. +//! setup` instead commits a `socket-patch-hook` dependency whose wheel ships +//! a startup `.pth` that re-applies patches after install +//! (package-manager-agnostic). pip, uv and hatch are wired + verified in +//! Docker: their `baseline_with_setup` / `alt_content_patchset` cases APPLY +//! (the harness builds the hook wheel and the driver installs it + fires an +//! interpreter). poetry / pdm are resolver-based — their `add`/`install`/`run` +//! re-resolve the whole manifest (now incl. the committed `socket-patch-hook`) +//! against a package index, which the hermetic test can't provide, so they +//! remain BASELINE GAPs (the mechanism is PM-agnostic and proven by the +//! others). Nested-workspace layouts are also still gaps. The negative-control +//! / empty / wrong-target cases must NOT apply for any of them. //! //! Run: `cargo test -p socket-patch-cli --features setup-e2e --test setup_matrix_pypi` #![cfg(feature = "setup-e2e")] diff --git a/tests/setup_matrix/matrix.json b/tests/setup_matrix/matrix.json index a457cc72..1776f878 100644 --- a/tests/setup_matrix/matrix.json +++ b/tests/setup_matrix/matrix.json @@ -116,8 +116,8 @@ "manifest_key": "six.py", "apply_ecosystems": "pypi" }, { - "ecosystem": "pypi", "pm": "hatch", "image": "pypi", "hook_family": "none", - "baseline_supported": false, + "ecosystem": "pypi", "pm": "hatch", "image": "pypi", "hook_family": "pth", + "baseline_supported": true, "package": "six", "version": "1.16.0", "purl": "pkg:pypi/six@1.16.0", "manifest_key": "six.py", "apply_ecosystems": "pypi" }, diff --git a/tests/setup_matrix/run-case.sh b/tests/setup_matrix/run-case.sh index 88f80a59..c6448818 100755 --- a/tests/setup_matrix/run-case.sh +++ b/tests/setup_matrix/run-case.sh @@ -322,8 +322,14 @@ pth_install_into_venv() { # $1=venv dir $2=pip|uv [ -n "${SOCKET_PATCH_HOOK_WHEEL:-}" ] && [ -f "${SOCKET_PATCH_HOOK_WHEEL:-}" ] || { note "no SOCKET_PATCH_HOOK_WHEEL; pth hook not installed (gap)"; return 0; } case "$flavor" in - pip) "$venv/bin/pip" install --quiet --no-deps "$SOCKET_PATCH_HOOK_WHEEL" || note "hook wheel install failed" ;; uv) uv pip install --python "$venv/bin/python" --quiet --no-deps "$SOCKET_PATCH_HOOK_WHEEL" || note "hook wheel install failed" ;; + *) # pip-compatible venv (pip / poetry / pdm). Use the venv's own + # python so it targets that interpreter's site-packages; ensurepip + # if the venv was created without pip. + "$venv/bin/python" -m pip --version >/dev/null 2>&1 \ + || "$venv/bin/python" -m ensurepip --upgrade >/dev/null 2>&1 || true + "$venv/bin/python" -m pip install --quiet --no-deps "$SOCKET_PATCH_HOOK_WHEEL" \ + || note "hook wheel install failed" ;; esac # The hook resolves `socket-patch` off PATH (it isn't pip-installed here). ln -sf "$SP_BIN" "$venv/bin/socket-patch" 2>/dev/null || true @@ -355,9 +361,24 @@ run_install() { pth_install_into_venv venv uv uv pip install --python venv/bin/python --quiet "$SM_PACKAGE==$SM_VERSION" pth_trigger venv ;; + # poetry / pdm are resolver-based: `add` re-resolves the whole manifest + # (which setup edited to add `socket-patch-hook`) against a package index. + # In this hermetic test the hook wheel isn't published, so resolution + # fails — these PMs can't be exercised without a local index, so they stay + # documented gaps (baseline_supported:false). The .pth mechanism itself is + # package-manager-agnostic (proven by pip/uv/hatch). poetry) poetry config virtualenvs.in-project true --local && poetry add --no-interaction "$SM_PACKAGE@$SM_VERSION" ;; pdm) pdm config python.use_venv true >/dev/null 2>&1; pdm add "$SM_PACKAGE==$SM_VERSION" ;; - hatch) HATCH_DATA_DIR="$PWD/.hatch" hatch env create && HATCH_DATA_DIR="$PWD/.hatch" hatch run python -c "import ${SM_PACKAGE//-/_}" ;; + hatch) + HATCH_DATA_DIR="$PWD/.hatch" hatch env create + HATCH_DATA_DIR="$PWD/.hatch" hatch run python -c "import ${SM_PACKAGE//-/_}" + # hatch manages its env outside .venv; install the hook + fire an + # interpreter through `hatch run`. + if [ "$SM_RUN_SETUP" = 1 ] && [ -n "${SOCKET_PATCH_HOOK_WHEEL:-}" ] && [ -f "${SOCKET_PATCH_HOOK_WHEEL:-}" ]; then + HATCH_DATA_DIR="$PWD/.hatch" hatch run pip install --no-deps "$SOCKET_PATCH_HOOK_WHEEL" \ + || note "hatch hook wheel install failed" + fi + HATCH_DATA_DIR="$PWD/.hatch" hatch run python -c "pass" || true ;; cargo) cargo fetch ;; bundler) bundle config set --local path vendor/bundle && bundle install ;; go) GOFLAGS=-mod=mod go mod download "$SM_PACKAGE@$SM_VERSION" ;; @@ -556,11 +577,19 @@ run_file() { # $1 = absolute path to the resolved package file esac ;; deno) deno run -A "$1" ;; pypi) + # Run the patched module with the in-project venv interpreter directly. + # Going through ` run` re-resolves the project, which (after setup) + # includes the committed `socket-patch-hook` dependency — unpublished in + # this hermetic test, so the resolve would fail for a reason unrelated to + # whether six.py is patched. Direct execution faithfully runs the on-disk + # patched file and observes its marker. (hatch manages its env outside + # an in-project .venv, and its skip-install env doesn't re-resolve, so it + # keeps using `hatch run`.) case "$SM_PM" in - uv) uv run python "$1" ;; - poetry) poetry run python "$1" ;; - pdm) pdm run python "$1" ;; - hatch) hatch run python "$1" ;; + uv) ./venv/bin/python "$1" ;; + poetry) ./.venv/bin/python "$1" ;; + pdm) ./.venv/bin/python "$1" ;; + hatch) HATCH_DATA_DIR="$PWD/.hatch" hatch run python "$1" ;; pip) ./venv/bin/python "$1" ;; *) python3 "$1" ;; esac ;; From 43f2d62d7c3ceb11aaba291cec3aeb4258e36d62 Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Tue, 2 Jun 2026 21:26:55 -0400 Subject: [PATCH 3/5] feat(setup,release): commit the socket-patch[hook] extra; publish socket-patch-hook to PyPI setup now commits the `socket-patch[hook]` extra (one line that pulls both the CLI and the socket-patch-hook .pth wheel) instead of a bare socket-patch-hook dep. PEP 621 / requirements.txt get the literal `socket-patch[hook]`; classic Poetry can't express an extra as a bare key, so edit.rs writes the equivalent `socket-patch = { extras = ["hook"] }`, merged into any existing socket-patch dep with its version/source preserved. The separate socket-patch-hook wheel remains the irreducible .pth carrier behind the extra (an extra can only pull a dependency, not ship a file); users never reference it directly. Release: publish socket-patch and socket-patch-hook as separate PyPI projects, each from its own dist dir so trusted publishing mints a correctly-scoped OIDC token per project. (socket-patch-hook needs its own pending trusted publisher registered on PyPI before the first release.) Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/release.yml | 21 ++- .../tests/setup_pth_invariants.rs | 10 +- .../socket-patch-core/src/pth_hook/detect.rs | 19 ++- crates/socket-patch-core/src/pth_hook/edit.rs | 148 ++++++++++++------ pypi/socket-patch-hook/README.md | 12 +- 5 files changed, 143 insertions(+), 67 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2a9f1844..d8449c29 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -421,12 +421,27 @@ jobs: - name: Copy README for PyPI package run: cp README.md pypi/socket-patch/README.md - - name: Build platform wheels + - name: Build wheels (platform socket-patch + pure-python socket-patch-hook) run: | VERSION="${{ needs.version.outputs.version }}" + # Builds the platform-tagged socket-patch wheels AND the pure-python + # socket-patch-hook wheel (the .pth carrier behind `socket-patch[hook]`). python scripts/build-pypi-wheels.py --version "$VERSION" --artifacts artifacts --dist dist - - - name: Publish to PyPI + # socket-patch and socket-patch-hook are two distinct PyPI projects. + # Publish each from its own dir so trusted publishing mints an OIDC + # token scoped to the right project (one upload spanning both projects + # can be rejected). Each project needs its own trusted publisher on + # PyPI; register a "pending" publisher for socket-patch-hook before the + # first release (repo + workflow `release.yml` + this environment). + mkdir -p dist-hook + mv dist/socket_patch_hook-*.whl dist-hook/ + + - name: Publish socket-patch to PyPI uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: packages-dir: dist/ + + - name: Publish socket-patch-hook to PyPI + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 + with: + packages-dir: dist-hook/ diff --git a/crates/socket-patch-cli/tests/setup_pth_invariants.rs b/crates/socket-patch-cli/tests/setup_pth_invariants.rs index 72964dae..bc2e5acd 100644 --- a/crates/socket-patch-cli/tests/setup_pth_invariants.rs +++ b/crates/socket-patch-cli/tests/setup_pth_invariants.rs @@ -50,7 +50,7 @@ fn pip_requirements_gets_hook_dep() { assert_eq!(entry["kind"], "pth"); let req = read(&tmp.path().join("requirements.txt")); - assert!(req.contains("socket-patch-hook"), "got:\n{req}"); + assert!(req.contains("socket-patch[hook]"), "got:\n{req}"); assert!(req.contains("requests==2.31.0"), "must preserve existing deps"); // The committed dependency is the source of truth — no separate marker file. @@ -72,7 +72,7 @@ fn uv_pyproject_array_edited_and_format_preserved() { assert_eq!(v["pythonPackageManager"], "uv"); let py = read(&tmp.path().join("pyproject.toml")); - assert!(py.contains("socket-patch-hook")); + assert!(py.contains("socket-patch[hook]")); assert!(py.contains("[tool.uv]"), "unrelated tables preserved"); assert!(py.contains("name = \"x\"")); } @@ -88,7 +88,7 @@ fn idempotent_second_run_reports_already_configured() { assert_eq!(v["status"], "already_configured"); let req = read(&tmp.path().join("requirements.txt")); assert_eq!( - req.matches("socket-patch-hook").count(), + req.matches("socket-patch[hook]").count(), 1, "must not duplicate the hook dependency" ); @@ -120,7 +120,7 @@ fn remove_reverses_dep() { let (code, v) = run_setup(tmp.path(), &["--remove"]); assert_eq!(code, 0, "payload={v}"); let req = read(&tmp.path().join("requirements.txt")); - assert!(!req.contains("socket-patch-hook"), "got:\n{req}"); + assert!(!req.contains("socket-patch[hook]"), "got:\n{req}"); assert!(req.contains("requests")); } @@ -149,7 +149,7 @@ fn polyglot_configures_both_npm_and_python() { assert!(kinds.contains(&"pth")); assert!(read(&tmp.path().join("package.json")).contains("socket-patch")); - assert!(read(&tmp.path().join("pyproject.toml")).contains("socket-patch-hook")); + assert!(read(&tmp.path().join("pyproject.toml")).contains("socket-patch[hook]")); } #[test] diff --git a/crates/socket-patch-core/src/pth_hook/detect.rs b/crates/socket-patch-core/src/pth_hook/detect.rs index 525f4b39..9756f2f3 100644 --- a/crates/socket-patch-core/src/pth_hook/detect.rs +++ b/crates/socket-patch-core/src/pth_hook/detect.rs @@ -2,16 +2,19 @@ use std::path::Path; -/// The dependency `setup` adds to activate the hook: the standalone, version- -/// agnostic hook wheel (it has no dependency on the CLI — the hook runs whatever -/// `socket-patch` is on PATH). A bare token so the committed line never needs a -/// version bump. -pub const HOOK_DEP: &str = "socket-patch-hook"; +/// The dependency `setup` adds (PEP 508 form, used for `requirements.txt` and +/// PEP 621 `[project].dependencies`): the `socket-patch[hook]` extra, which +/// pulls both the socket-patch CLI and the socket-patch-hook wheel (the `.pth` +/// carrier). A single, familiar line. Classic Poetry can't express an extra as +/// a bare key, so [`super::edit`] emits the equivalent +/// `socket-patch = { extras = ["hook"] }` there instead. +pub const HOOK_DEP: &str = "socket-patch[hook]"; /// Substrings (space-insensitive, lower-cased) that mean the hook is already -/// declared — the standalone wheel, the `socket-patch[hook]` convenience extra, -/// or the underscore spelling. -const HOOK_MARKERS: &[&str] = &["socket-patch-hook", "socket_patch_hook", "socket-patch[hook]"]; +/// declared — the `socket-patch[hook]` extra, the standalone wheel, or the +/// underscore spelling. (The Poetry `extras = ["hook"]` form is detected +/// structurally by [`super::edit`], not by this textual check.) +const HOOK_MARKERS: &[&str] = &["socket-patch[hook]", "socket-patch-hook", "socket_patch_hook"]; /// Which Python dependency-management style a project uses. Drives both which /// manifest/table `setup` edits and which lockfile (if any) to refresh. diff --git a/crates/socket-patch-core/src/pth_hook/edit.rs b/crates/socket-patch-core/src/pth_hook/edit.rs index 0a6c9e0c..11c89cc3 100644 --- a/crates/socket-patch-core/src/pth_hook/edit.rs +++ b/crates/socket-patch-core/src/pth_hook/edit.rs @@ -14,7 +14,7 @@ use std::path::Path; use tokio::fs; -use toml_edit::{Array, DocumentMut, Item, Table, Value}; +use toml_edit::{Array, DocumentMut, InlineTable, Item, Table, Value}; use super::detect::{deps_contain_hook, spec_is_hook, HOOK_DEP}; @@ -287,13 +287,33 @@ fn poetry_add(doc: &mut DocumentMut) -> Result { let poetry = ensure_table(tool, "poetry", true)?; let deps = ensure_table(poetry, "dependencies", false)?; - // The hook is a standalone, version-agnostic dependency — add it as its own - // key rather than mutating the user's `socket-patch` entry. `"*"` because - // the hook needs no specific version (it runs whatever CLI is on PATH). + // Classic Poetry can't express `socket-patch[hook]` as a key, so declare + // the equivalent: `socket-patch` carrying the `hook` extra. Already wired + // if a bare `socket-patch-hook` key exists or the extra is already present. if deps.contains_key("socket-patch-hook") { return Ok(false); } - deps.insert("socket-patch-hook", Item::Value(Value::from("*"))); + if let Some(item) = deps.get_mut("socket-patch") { + if item_has_hook_extra(item) { + return Ok(false); + } + // An existing `socket-patch` dep (bare string or a table): merge the + // `hook` extra in place, preserving its version / source / markers. + if let Some(tbl) = item.as_table_like_mut() { + let mut extras = tbl + .get("extras") + .and_then(Item::as_array) + .cloned() + .unwrap_or_default(); + extras.push("hook"); + tbl.insert("extras", Item::Value(Value::Array(extras))); + } else { + let version = item.as_str().map(str::to_string).unwrap_or_else(|| "*".to_string()); + deps.insert("socket-patch", Item::Value(hook_inline_table(&version))); + } + return Ok(true); + } + deps.insert("socket-patch", Item::Value(hook_inline_table("*"))); Ok(true) } @@ -309,7 +329,47 @@ fn poetry_remove(doc: &mut DocumentMut) -> bool { Some(d) => d, None => return false, }; - deps.remove("socket-patch-hook").is_some() + + let mut changed = false; + // Drop a legacy bare `socket-patch-hook` key if present. + if deps.remove("socket-patch-hook").is_some() { + changed = true; + } + // Strip the `hook` extra from a `socket-patch` dep table, leaving the rest + // of the spec intact. + if let Some(tbl) = deps.get_mut("socket-patch").and_then(Item::as_table_like_mut) { + if let Some(extras) = tbl.get_mut("extras").and_then(Item::as_array_mut) { + let before = extras.len(); + extras.retain(|v| v.as_str() != Some("hook")); + if extras.len() != before { + changed = true; + } + if extras.is_empty() { + tbl.remove("extras"); + } + } + } + changed +} + +/// Build `{ version = "", extras = ["hook"] }`. +fn hook_inline_table(version: &str) -> Value { + let mut it = InlineTable::new(); + it.insert("version", Value::from(version)); + let mut extras = Array::new(); + extras.push("hook"); + it.insert("extras", Value::Array(extras)); + Value::InlineTable(it) +} + +/// True if a dependency item (inline table or sub-table) already carries the +/// `hook` extra. +fn item_has_hook_extra(item: &Item) -> bool { + item.as_table_like() + .and_then(|t| t.get("extras")) + .and_then(Item::as_array) + .map(|a| a.iter().any(|v| v.as_str() == Some("hook"))) + .unwrap_or(false) } #[cfg(test)] @@ -322,27 +382,27 @@ mod tests { fn test_requirements_add() { let out = requirements_add("requests==2.31.0\n").unwrap().unwrap(); assert!(out.contains("requests==2.31.0")); - assert!(out.contains("socket-patch-hook")); + assert!(out.contains("socket-patch[hook]")); assert!(out.ends_with('\n')); } #[test] fn test_requirements_add_no_trailing_newline() { let out = requirements_add("requests").unwrap().unwrap(); - assert_eq!(out, "requests\nsocket-patch-hook\n"); + assert_eq!(out, "requests\nsocket-patch[hook]\n"); } #[test] fn test_requirements_add_idempotent() { - // Both the standalone wheel and the legacy `[hook]` extra are recognized. + // The extra, the standalone wheel, and a pinned variant are all recognized. + assert!(requirements_add("socket-patch[hook]\n").unwrap().is_none()); assert!(requirements_add("socket-patch-hook\n").unwrap().is_none()); assert!(requirements_add("socket-patch-hook==3.3.0\n").unwrap().is_none()); - assert!(requirements_add("socket-patch[hook]\n").unwrap().is_none()); } #[test] fn test_requirements_remove() { - let out = requirements_remove("requests\nsocket-patch-hook\n") + let out = requirements_remove("requests\nsocket-patch[hook]\n") .unwrap() .unwrap(); assert_eq!(out, "requests\n"); @@ -359,7 +419,7 @@ mod tests { fn test_pep621_add_to_existing_array() { let toml = "[project]\nname = \"x\"\ndependencies = [\"requests\"]\n"; let out = pyproject_add(toml).unwrap().unwrap(); - assert!(out.contains("socket-patch-hook")); + assert!(out.contains("socket-patch[hook]")); assert!(out.contains("requests")); // Re-parse to confirm validity + idempotency. assert!(pyproject_add(&out).unwrap().is_none()); @@ -371,7 +431,7 @@ mod tests { let out = pyproject_add(toml).unwrap().unwrap(); let doc = out.parse::().unwrap(); let deps = doc["project"]["dependencies"].as_array().unwrap(); - assert!(deps.iter().any(|v| v.as_str() == Some("socket-patch-hook"))); + assert!(deps.iter().any(|v| v.as_str() == Some("socket-patch[hook]"))); } #[test] @@ -381,79 +441,74 @@ mod tests { assert!(out.contains("[build-system]")); assert!(out.contains("version = \"1.0\"")); assert!(out.contains("requests")); - assert!(out.contains("socket-patch-hook")); + assert!(out.contains("socket-patch[hook]")); } #[test] fn test_pep621_remove() { - let toml = "[project]\ndependencies = [\"requests\", \"socket-patch-hook\"]\n"; + let toml = "[project]\ndependencies = [\"requests\", \"socket-patch[hook]\"]\n"; let out = pyproject_remove(toml).unwrap().unwrap(); - assert!(!out.contains("socket-patch-hook")); + assert!(!out.contains("socket-patch[hook]")); assert!(out.contains("requests")); } - // ── pyproject Poetry (standalone hook key, no extras-merging) ───── + // ── pyproject Poetry (the `socket-patch[hook]` equivalent: the + // `socket-patch` dep carrying the `hook` extra) ───────────────── #[test] - fn test_poetry_add_new_key() { + fn test_poetry_add_new_dep() { let toml = "[tool.poetry]\nname = \"x\"\n\n[tool.poetry.dependencies]\npython = \"^3.9\"\n"; let out = pyproject_add(toml).unwrap().unwrap(); let doc = out.parse::().unwrap(); - assert_eq!( - doc["tool"]["poetry"]["dependencies"]["socket-patch-hook"].as_str(), - Some("*") + assert!( + item_has_hook_extra(&doc["tool"]["poetry"]["dependencies"]["socket-patch"]), + "poetry dep must carry the hook extra; got:\n{out}" ); // Idempotent. assert!(pyproject_add(&out).unwrap().is_none()); } #[test] - fn test_poetry_leaves_existing_socket_patch_untouched() { - // An existing `socket-patch` dependency must NOT be mutated; we only add - // the standalone `socket-patch-hook` key. + fn test_poetry_merges_extra_into_existing_dep() { + // An existing `socket-patch = "^3.3.0"` gains the hook extra, version kept. let toml = "[tool.poetry]\nname = \"x\"\n[tool.poetry.dependencies]\nsocket-patch = \"^3.3.0\"\n"; let out = pyproject_add(toml).unwrap().unwrap(); let doc = out.parse::().unwrap(); + let item = &doc["tool"]["poetry"]["dependencies"]["socket-patch"]; + assert!(item_has_hook_extra(item), "hook extra must be added"); assert_eq!( - doc["tool"]["poetry"]["dependencies"]["socket-patch"].as_str(), + item.as_table_like().and_then(|t| t.get("version")).and_then(Item::as_str), Some("^3.3.0"), - "existing socket-patch dep must be left intact" - ); - assert_eq!( - doc["tool"]["poetry"]["dependencies"]["socket-patch-hook"].as_str(), - Some("*") + "existing version must be preserved" ); } #[test] fn test_poetry_subtable_dependency_preserved() { - // A `[tool.poetry.dependencies.socket-patch]` sub-table (version/source) - // must survive untouched; only the standalone hook key is added. + // A `[tool.poetry.dependencies.socket-patch]` sub-table gains the hook + // extra while keeping its version / source. let toml = "[tool.poetry.dependencies.socket-patch]\nversion = \"^3.3.0\"\ngit = \"https://example.com/x.git\"\n"; let out = pyproject_add(toml).unwrap().unwrap(); let doc = out.parse::().unwrap(); let sp = &doc["tool"]["poetry"]["dependencies"]["socket-patch"]; + assert!(item_has_hook_extra(sp), "hook extra must be added"); assert_eq!( sp.as_table_like().and_then(|t| t.get("git")).and_then(Item::as_str), Some("https://example.com/x.git"), "sub-table keys must survive" ); - assert_eq!( - doc["tool"]["poetry"]["dependencies"]["socket-patch-hook"].as_str(), - Some("*") - ); // Idempotent. assert!(pyproject_add(&out).unwrap().is_none()); } #[test] - fn test_poetry_remove() { - let toml = "[tool.poetry.dependencies]\nsocket-patch-hook = \"*\"\npython = \"^3.9\"\n"; + fn test_poetry_remove_strips_extra() { + let toml = "[tool.poetry.dependencies]\nsocket-patch = {version = \"*\", extras = [\"hook\"]}\npython = \"^3.9\"\n"; let out = pyproject_remove(toml).unwrap().unwrap(); let doc = out.parse::().unwrap(); - assert!(doc["tool"]["poetry"]["dependencies"] - .get("socket-patch-hook") - .is_none()); + assert!(!item_has_hook_extra( + &doc["tool"]["poetry"]["dependencies"]["socket-patch"] + )); assert!(doc["tool"]["poetry"]["dependencies"].get("python").is_some()); } @@ -467,7 +522,7 @@ mod tests { .as_array() .unwrap() .iter() - .any(|v| v.as_str() == Some("socket-patch-hook"))); + .any(|v| v.as_str() == Some("socket-patch[hook]"))); } #[test] @@ -483,9 +538,8 @@ mod tests { let toml = "[tool.poetry]\nname = \"x\"\n\n[tool.poetry.dependencies]\npython = \"^3.9\"\n\n[project.urls]\nHome = \"https://example.com\"\n"; let out = pyproject_add(toml).unwrap().unwrap(); let doc = out.parse::().unwrap(); - assert_eq!( - doc["tool"]["poetry"]["dependencies"]["socket-patch-hook"].as_str(), - Some("*"), + assert!( + item_has_hook_extra(&doc["tool"]["poetry"]["dependencies"]["socket-patch"]), "must edit the poetry table, not create [project].dependencies; got:\n{out}" ); assert!(doc.get("project").and_then(|p| p.get("dependencies")).is_none()); @@ -494,7 +548,7 @@ mod tests { #[test] fn test_requirements_preserves_crlf() { let out = requirements_add("requests\r\n").unwrap().unwrap(); - assert_eq!(out, "requests\r\nsocket-patch-hook\r\n"); + assert_eq!(out, "requests\r\nsocket-patch[hook]\r\n"); let removed = requirements_remove(&out).unwrap().unwrap(); assert_eq!(removed, "requests\r\n"); } @@ -508,7 +562,7 @@ mod tests { let res = add_hook_dependency(&req, ManifestKind::Requirements, false).await; assert_eq!(res.status, PthStatus::Updated); let body = tokio::fs::read_to_string(&req).await.unwrap(); - assert_eq!(body, "socket-patch-hook\n"); + assert_eq!(body, "socket-patch[hook]\n"); } #[tokio::test] diff --git a/pypi/socket-patch-hook/README.md b/pypi/socket-patch-hook/README.md index b53b4e05..c4d858e1 100644 --- a/pypi/socket-patch-hook/README.md +++ b/pypi/socket-patch-hook/README.md @@ -40,10 +40,14 @@ Don't add this by hand. Run, in your project: socket-patch setup ``` -That commits a `socket-patch-hook` dependency to your repo (the dependency -itself is the source of truth — there's no separate marker file), so the hook -activates automatically in CI after install. Remove it with `socket-patch setup ---remove` followed by `pip uninstall socket-patch-hook`. +That commits a `socket-patch[hook]` dependency to your repo — the `[hook]` +extra on the main `socket-patch` package, which pulls in both the CLI and this +wheel (you never reference `socket-patch-hook` directly). The committed +dependency is the source of truth — there's no separate marker file. The hook +then activates automatically in CI after install. Remove it with `socket-patch +setup --remove` followed by `pip uninstall socket-patch-hook`. (Classic Poetry +can't express an extra as a bare key, so there `setup` writes the equivalent +`socket-patch = { extras = ["hook"] }`.) ## Disabling at runtime From b935c185726a70ecd75170f319050538dac7c699 Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Tue, 2 Jun 2026 21:29:59 -0400 Subject: [PATCH 4/5] docs(socket-patch-hook): drop the "version-agnostic / provision the CLI" section It contradicted the recommended flow: setup commits `socket-patch[hook]`, which pulls the socket-patch package, so "no dependency / provision the CLI yourself" was misleading. The README now just covers how it works, activating it via `socket-patch setup`, and the disable switch. Co-Authored-By: Claude Opus 4.8 (1M context) --- pypi/socket-patch-hook/README.md | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pypi/socket-patch-hook/README.md b/pypi/socket-patch-hook/README.md index c4d858e1..64fd15a4 100644 --- a/pypi/socket-patch-hook/README.md +++ b/pypi/socket-patch-hook/README.md @@ -20,18 +20,6 @@ real patching (hash verification, atomic writes, locking) is done by the Because it rides on Python's interpreter-startup `.pth` mechanism (not on any one installer's hooks), it works the same under every Python package manager. -## Version-agnostic by design - -This package has **no dependency** on `socket-patch`. At runtime it invokes -whatever `socket-patch` CLI is on `PATH` (or pip-installed in the environment), -so the committed `socket-patch-hook` dependency never needs a version bump and -the CLI can be upgraded independently. If no `socket-patch` is found, the hook -silently does nothing. - -Provision the CLI however you like — `pip install socket-patch`, `pipx install -socket-patch`, a system package, or a CI/GitHub Action step. (Or use the -all-in-one `pip install socket-patch[hook]`, which installs both.) - ## Activating it Don't add this by hand. Run, in your project: From 98c4ee635a846f1d0b93f0212eb97f9388762aa6 Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Tue, 2 Jun 2026 21:59:47 -0400 Subject: [PATCH 5/5] fix(security): harden the .pth hook + apply path validation; document the model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acted on an adversarial security review of the feature. Fixes: - CRITICAL (apply engine): reject manifest file keys that escape the package directory (absolute paths or `..`). A committed/poisoned .socket/manifest.json could otherwise make `apply` write outside site-packages (arbitrary-file write -> code execution) via pkg_path.join(key). New is_safe_relative_subpath() guards apply_package_patch (hard abort, not --force-skippable), apply_file_patch, and verify_file_patch. This hardens all callers (apply/scan/rollback), and the auto-running hook made it reachable from a committed manifest. - HIGH (hook): anchor project discovery to the virtualenv (sys.prefix) instead of cwd, so a `python` started from an unrelated dir can't pull in a foreign .socket/manifest.json (cross-project contamination). Falls back to cwd only for non-venv (system/container) interpreters. - HIGH (hook): resolve the socket-patch binary from the installed socket_patch package first, then PATH — avoids running a malicious `socket-patch` placed earlier on PATH at every interpreter startup. Also: add an explanatory comment block to the .pth (purpose, disable, remove, link; site.py ignores `#` lines), and document the hook's safety model + opt-out / disable in the root README `setup` section and the socket-patch-hook README. Low-severity findings (stamp-poisoning needs cache write access; the release pending-publisher is a one-time manual step; the 120s apply timeout is an intentional fail-open backstop) are accepted/documented, not code-changed. Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 37 ++++- crates/socket-patch-core/src/patch/apply.rs | 110 ++++++++++++ pypi/socket-patch-hook/README.md | 19 +++ pypi/socket-patch-hook/socket_patch_hook.pth | 12 ++ .../socket_patch_hook/__init__.py | 73 ++++---- pypi/socket-patch-hook/test_hook.py | 156 ++++++++++++------ 6 files changed, 320 insertions(+), 87 deletions(-) diff --git a/README.md b/README.md index 6f4b51aa..a359395d 100644 --- a/README.md +++ b/README.md @@ -383,18 +383,44 @@ socket-patch repair --json ### `setup` -Configure `package.json` postinstall scripts to automatically apply patches after `npm install`. +Configure your project so patches are **re-applied automatically after install** — no manual `socket-patch apply` step in CI. `setup` is a one-time operation: run it, commit the change together with your `.socket/` patches, and every later install handles the rest. It is strictly **opt-in** — nothing is hooked unless you run `setup` and commit the result. + +- **npm / yarn / pnpm / bun** — writes a `postinstall` script into `package.json` so any install re-applies patches. +- **Python (pip / uv / poetry / pdm / hatch)** — Python has no universal post-install hook, so `setup` instead commits a **`socket-patch[hook]`** dependency (for classic Poetry, the equivalent `socket-patch = { extras = ["hook"] }`). Installing it lays down a startup `.pth` (shipped by the small `socket-patch-hook` wheel) that re-applies your committed `.socket/` patches the next time the interpreter runs. It is package-manager-agnostic (it rides the interpreter, not any one installer) and **fail-open** — a hook error can never break interpreter startup. **Usage:** ```bash -socket-patch setup [options] +socket-patch setup # configure (interactive) +socket-patch setup --check # verify configured; non-zero exit if not (CI gate) +socket-patch setup --remove # revert what setup added ``` -No command-specific options — see [Global Options](#global-options) (`--dry-run`, `--yes`, `--json`, `--cwd` are the relevant ones). +**Command-specific options** (plus all [Global Options](#global-options) — `--dry-run`, `--yes`, `--json`, `--cwd`): +| Flag | Description | +|------|-------------| +| `--check` | Read-only verification that every manifest is configured; exits non-zero if any still needs setup. Never writes (safe in CI). Conflicts with `--remove`. | +| `--remove` | Revert the install hooks `setup` added (npm `package.json` scripts and the Python `socket-patch[hook]` dependency). | + +#### Disabling / opting out (Python hook) + +The Python hook is designed to be easy to skip or remove: + +- **Per interpreter / CI step:** set `SOCKET_PATCH_HOOK=off` (or `SOCKET_NO_HOOK=1`). This is checked *before any hook code runs*, so it fully bypasses the hook for that process. +- **Remove from a project:** `socket-patch setup --remove`, then `pip uninstall socket-patch-hook`. +- **Never opted in:** if you don't run `setup`, there is no hook — it is opt-in by design. + +#### What the Python hook does, and its safety model + +On interpreter startup, *only when the set of installed packages changed*, the hook runs `socket-patch apply --offline --ecosystems pypi` for the project that owns the current virtualenv, re-applying only the patches committed in that project's `.socket/`. Specifically: + +- It is **anchored to the virtualenv** it is installed in (not the working directory), so a `python` started from an unrelated directory cannot pull in a foreign `.socket/manifest.json`. +- It **verifies each file's hash before patching** and **never writes outside the installed package directory** (path-escaping manifest keys are refused). +- It resolves the `socket-patch` binary from the **installed `socket-patch` package** (not from `PATH`), so an unexpected binary on `PATH` is not executed. +- It runs **offline** (no network at startup) and is **fail-open** (any error is swallowed; it can never abort the interpreter). **Examples:** ```bash -# Interactive setup +# Interactive setup (npm and/or Python, auto-detected) socket-patch setup # Non-interactive @@ -403,6 +429,9 @@ socket-patch setup -y # Preview changes socket-patch setup --dry-run +# Verify configuration in CI (exits non-zero if not set up) +socket-patch setup --check + # JSON output for scripting socket-patch setup --json -y ``` diff --git a/crates/socket-patch-core/src/patch/apply.rs b/crates/socket-patch-core/src/patch/apply.rs index 761b6694..3fd4249e 100644 --- a/crates/socket-patch-core/src/patch/apply.rs +++ b/crates/socket-patch-core/src/patch/apply.rs @@ -117,6 +117,28 @@ pub fn normalize_file_path(file_name: &str) -> &str { } } +/// True if a (post-`normalize_file_path`) manifest key is a safe relative path +/// that stays inside the package directory when joined to it. +/// +/// SECURITY: manifest file keys come from a committed `.socket/manifest.json`, +/// which the auto-running install hook applies without explicit user action. An +/// unvalidated key like `../../home/u/.bashrc` or `/etc/cron.d/x` would let a +/// poisoned manifest write OUTSIDE site-packages (arbitrary-file write → code +/// execution) via `pkg_path.join(key)` — `Path::join` discards the base on an +/// absolute key, and `..` components walk out. We reject anything that isn't a +/// plain relative path (no absolute/root/prefix components, no `..`, no NUL). +pub fn is_safe_relative_subpath(normalized: &str) -> bool { + use std::path::Component; + if normalized.is_empty() || normalized.contains('\0') { + return false; + } + let path = Path::new(normalized); + if path.is_absolute() { + return false; + } + path.components().all(|c| matches!(c, Component::Normal(_) | Component::CurDir)) +} + /// Verify a single file can be patched. pub async fn verify_file_patch( pkg_path: &Path, @@ -124,6 +146,17 @@ pub async fn verify_file_patch( file_info: &PatchFileInfo, ) -> VerifyResult { let normalized = normalize_file_path(file_name); + // SECURITY: never resolve a key that escapes the package directory. + if !is_safe_relative_subpath(normalized) { + return VerifyResult { + file: file_name.to_string(), + status: VerifyStatus::NotFound, + message: Some("Unsafe patch path (escapes package directory)".to_string()), + current_hash: None, + expected_hash: None, + target_hash: None, + }; + } let filepath = pkg_path.join(normalized); let is_new_file = file_info.before_hash.is_empty(); @@ -297,6 +330,13 @@ pub async fn apply_file_patch( expected_hash: &str, ) -> Result<(), std::io::Error> { let normalized = normalize_file_path(file_name); + // SECURITY: refuse to write through a key that escapes the package dir. + if !is_safe_relative_subpath(normalized) { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unsafe patch path (escapes package directory): {file_name}"), + )); + } let filepath = pkg_path.join(normalized); // Hash-check the in-memory content BEFORE touching disk. Removes @@ -608,6 +648,18 @@ pub async fn apply_package_patch( // First, verify all files for (file_name, file_info) in files { + // SECURITY: reject any manifest key that would escape the package dir + // (absolute path or `..`). Abort the whole package apply before any + // disk write — NOT skippable by `--force`, since a path escape is never + // a legitimate patch target. + if !is_safe_relative_subpath(normalize_file_path(file_name)) { + result.success = false; + result.error = Some(format!( + "Refusing patch with unsafe file path (escapes package directory): {file_name}" + )); + return result; + } + let mut verify_result = verify_file_patch(pkg_path, file_name, file_info).await; if verify_result.status != VerifyStatus::Ready @@ -948,6 +1000,64 @@ mod tests { ); } + #[test] + fn test_is_safe_relative_subpath() { + // Legitimate manifest keys (post-normalize) are accepted. + for ok in [ + "six.py", + "index.js", + "lib/server.js", + "pydantic_ai/models/openai.py", + "./a.py", + ] { + assert!(is_safe_relative_subpath(ok), "should accept {ok:?}"); + } + // Path escapes are rejected on every platform. + for bad in [ + "../etc/passwd", + "../../home/u/.bashrc", + "/etc/passwd", + "a/../../b", + "foo/..", + "", + "with\0null", + "/", + ] { + assert!(!is_safe_relative_subpath(bad), "should reject {bad:?}"); + } + // Windows drive/UNC prefixes are absolute only on Windows (on Unix a + // backslash is an ordinary filename char, so the path stays under the + // package dir and is harmless). + #[cfg(windows)] + for bad in ["\\\\server\\share\\x", "C:\\Windows\\x"] { + assert!(!is_safe_relative_subpath(bad), "should reject {bad:?}"); + } + // The `package/`-prefixed escape that previously slipped through: + // `package//etc/passwd` normalizes to `/etc/passwd`. + assert!(!is_safe_relative_subpath(normalize_file_path("package//etc/passwd"))); + } + + #[tokio::test] + async fn test_apply_file_patch_rejects_escaping_path() { + // apply_file_patch must refuse to write outside the package dir even if + // the (attacker-chosen) content hashes to the declared afterHash. + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("site-packages"); + tokio::fs::create_dir_all(&pkg).await.unwrap(); + let content = b"pwned\n"; + let after = compute_git_sha256_from_bytes(content); + for key in ["../escape.txt", "../../etc/whatever", "/abs/whatever"] { + let res = apply_file_patch(&pkg, key, content, &after).await; + assert!(res.is_err(), "must reject {key:?}"); + assert!( + res.unwrap_err().to_string().contains("Unsafe patch path"), + "wrong error for {key:?}" + ); + } + // Nothing was written outside the package dir. + assert!(!dir.path().join("escape.txt").exists()); + } + #[tokio::test] async fn test_verify_file_patch_not_found() { let dir = tempfile::tempdir().unwrap(); diff --git a/pypi/socket-patch-hook/README.md b/pypi/socket-patch-hook/README.md index 64fd15a4..3058d030 100644 --- a/pypi/socket-patch-hook/README.md +++ b/pypi/socket-patch-hook/README.md @@ -20,6 +20,25 @@ real patching (hash verification, atomic writes, locking) is done by the Because it rides on Python's interpreter-startup `.pth` mechanism (not on any one installer's hooks), it works the same under every Python package manager. +## Safety + +A `.pth` that runs code at startup deserves a careful safety model. This one: + +- **Fail-open** — every code path is wrapped so it can never raise into the + interpreter; the worst outcome of any bug is that patches aren't re-applied. +- **Venv-anchored** — it applies only the `.socket/manifest.json` of the project + that owns the virtualenv it's installed in, never whatever `.socket/` happens + to sit above the current working directory. +- **Hash-verified, in-tree only** — the underlying `socket-patch apply` verifies + each file's hash before patching and refuses manifest keys that would write + outside the installed package directory. +- **Trusted binary** — it runs the `socket-patch` binary from the installed + `socket-patch` package, not the first one found on `PATH`. +- **Offline + cheap** — no network at startup; the no-change path is a couple of + syscalls. It only spawns `socket-patch` when installed packages changed. +- **Opt-in + easy off** — present only when a project committed it; disable any + interpreter with `SOCKET_PATCH_HOOK=off`. + ## Activating it Don't add this by hand. Run, in your project: diff --git a/pypi/socket-patch-hook/socket_patch_hook.pth b/pypi/socket-patch-hook/socket_patch_hook.pth index 4c36d642..38c4307f 100644 --- a/pypi/socket-patch-hook/socket_patch_hook.pth +++ b/pypi/socket-patch-hook/socket_patch_hook.pth @@ -1 +1,13 @@ +# socket-patch post-install hook — installed by the `socket-patch-hook` wheel. +# Re-applies this project's committed Socket security patches (.socket/) after a +# pip/uv/poetry/etc. install reverts a patched file. At interpreter startup it +# does a cheap "did the installed packages change?" check and, only then, runs +# `socket-patch apply --offline`. Fail-open: every error is swallowed so it can +# never break interpreter startup, and it does nothing unless this environment's +# project has a committed .socket/manifest.json. +# Disable (this interpreter): SOCKET_PATCH_HOOK=off (or SOCKET_NO_HOOK=1) +# Remove (this project): socket-patch setup --remove then pip uninstall socket-patch-hook +# Details: https://github.com/SocketDev/socket-patch +# (Lines starting with `#` are ignored by Python's site module; the single +# `import` line below is the only code it executes.) import os; exec("try:\n import socket_patch_hook as _h; _h.run()\nexcept Exception: pass") if (os.environ.get('SOCKET_PATCH_HOOK','').strip().lower() not in ('off','0','false','no') and os.environ.get('SOCKET_NO_HOOK','').strip().lower() not in ('1','true','yes','on')) else None diff --git a/pypi/socket-patch-hook/socket_patch_hook/__init__.py b/pypi/socket-patch-hook/socket_patch_hook/__init__.py index a09e7bcb..9e3dee76 100644 --- a/pypi/socket-patch-hook/socket_patch_hook/__init__.py +++ b/pypi/socket-patch-hook/socket_patch_hook/__init__.py @@ -62,25 +62,35 @@ def _site_packages_dir(): def _find_project_root(): - """Locate the directory containing ``.socket/manifest.json``. - - Tries, in order: an upward walk from the current working directory, then the - parents of ``VIRTUAL_ENV`` and ``sys.prefix`` (covering the common deploy - shape where the venv lives at ``/.venv``). Returns ``None`` when no - committed manifest is found -- in which case the hook is a no-op, so a wheel - that happens to be installed in an unrelated environment does nothing. + """Locate the project whose committed ``.socket/manifest.json`` this + environment opted into. Returns ``None`` (hook no-ops) if none is found. + + SECURITY — which manifest do we trust? When running inside a virtualenv we + anchor the search to the **venv** (``sys.prefix``), NOT the current working + directory: the committed ``socket-patch[hook]`` dependency installed this + hook into THIS venv, so the owning project is an ancestor of the venv (e.g. + ``/.venv``). Anchoring to the venv ties the patches we apply to the + project that opted in, instead of whatever ``.socket/`` happens to sit above + the cwd — which could belong to an unrelated or hostile parent/sibling + project (a `python` started from elsewhere must not pull in a foreign + manifest). Only when there is no venv (a system / container interpreter, + where there is nothing to anchor to) do we fall back to the cwd. """ - starts = [] - try: - starts.append(os.getcwd()) - except OSError: - pass - for env_dir in (os.environ.get("VIRTUAL_ENV"), getattr(sys, "prefix", None)): - if env_dir: - starts.append(os.path.dirname(os.path.abspath(env_dir))) + in_venv = getattr(sys, "prefix", "") != getattr(sys, "base_prefix", getattr(sys, "prefix", "")) + anchors = [] + if in_venv: + anchors.append(sys.prefix) + env_venv = os.environ.get("VIRTUAL_ENV") + if env_venv: + anchors.append(env_venv) + else: + try: + anchors.append(os.getcwd()) + except OSError: + pass seen = set() - for start in starts: + for start in anchors: try: d = os.path.abspath(start) except OSError: @@ -177,22 +187,16 @@ def _write_stamp(path, value): def _resolve_binary(): - """Locate the ``socket-patch`` binary, version-agnostically. - - Prefers whatever ``socket-patch`` is on ``PATH`` (a pip/pipx/system/GitHub - Action-provisioned CLI), then falls back to the binary bundled in a - pip-installed ``socket_patch`` package if one happens to be in the env. The - hook does not depend on the CLI, so either may be absent — in which case we - return ``None`` and the hook no-ops. + """Locate the ``socket-patch`` binary to run. + + SECURITY — order matters. We prefer the binary **bundled in the installed + ``socket_patch`` package** (the one `socket-patch[hook]` pulls in: a + RECORD-tracked file resolved by the dependency solver) and only fall back to + ``PATH`` if that package isn't present. Resolving via ``PATH`` first would + let a malicious ``socket-patch`` placed earlier on ``PATH`` (or `.` on PATH) + be executed at every interpreter startup. Returns ``None`` if neither is + found, in which case the hook no-ops. """ - try: - import shutil - - found = shutil.which("socket-patch") - if found: - return found - except Exception: - pass try: import socket_patch @@ -203,7 +207,12 @@ def _resolve_binary(): return path except Exception: pass - return None + try: + import shutil + + return shutil.which("socket-patch") + except Exception: + return None def _apply(binary, project_root): diff --git a/pypi/socket-patch-hook/test_hook.py b/pypi/socket-patch-hook/test_hook.py index f901f40e..e843f981 100644 --- a/pypi/socket-patch-hook/test_hook.py +++ b/pypi/socket-patch-hook/test_hook.py @@ -3,9 +3,10 @@ Run with: ``python -m unittest test_hook`` (no third-party deps required). The overriding contract under test is *safety*: the hook must never raise, must -no-op cheaply when there is nothing to do, and must invoke ``socket-patch -apply`` with the right offline arguments only when the installed distributions -have changed. +no-op cheaply when there is nothing to do, must invoke ``socket-patch apply`` +with the right offline arguments only when the installed distributions have +changed, and must only ever apply the manifest of the project that owns this +environment (never a foreign one above the cwd). """ import os @@ -21,9 +22,9 @@ class HookTestBase(unittest.TestCase): def setUp(self): self._cwd = os.getcwd() - # Isolate env: clear switches + reentrancy + cache redirect. + # Isolate env: clear switches + reentrancy + venv + cache redirect. self._saved_env = dict(os.environ) - for k in ("SOCKET_PATCH_HOOK", "SOCKET_NO_HOOK", hook._REENTRANCY_ENV): + for k in ("SOCKET_PATCH_HOOK", "SOCKET_NO_HOOK", "VIRTUAL_ENV", hook._REENTRANCY_ENV): os.environ.pop(k, None) self._tmp = self._mkdtemp() os.environ["XDG_CACHE_HOME"] = os.path.join(self._tmp, "cache") @@ -57,10 +58,12 @@ def _make_project(self): class TestRunSpawning(HookTestBase): + # These exercise the spawn/guard/stamp logic; project discovery is mocked + # (it has its own tests in TestProjectRootDiscovery). def test_applies_when_manifest_present_and_state_changed(self): root = self._make_project() - os.chdir(root) - with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ mock.patch("subprocess.run", return_value=mock.Mock(returncode=0)) as run: hook.run() self.assertEqual(run.call_count, 1) @@ -69,96 +72,82 @@ def test_applies_when_manifest_present_and_state_changed(self): self.assertIn("apply", argv) self.assertIn("--offline", argv) self.assertIn("--silent", argv) - # --ecosystems pypi self.assertEqual(argv[argv.index("--ecosystems") + 1], "pypi") - # --cwd self.assertEqual( os.path.realpath(argv[argv.index("--cwd") + 1]), os.path.realpath(root), ) - # --lock-timeout 0 (skip instantly if another apply holds the lock) self.assertEqual(argv[argv.index("--lock-timeout") + 1], "0") - # Re-entrancy guard set in the child env. env = run.call_args[1]["env"] self.assertEqual(env[hook._REENTRANCY_ENV], "1") def test_second_run_is_a_noop_when_state_unchanged(self): root = self._make_project() - os.chdir(root) - with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ mock.patch("subprocess.run", return_value=mock.Mock(returncode=0)) as run: hook.run() # first run applies + writes the stamp (success) hook.run() # second run: fingerprint matches stamp -> skip self.assertEqual(run.call_count, 1) def test_failed_apply_does_not_stamp_so_it_retries(self): - # A non-zero apply (e.g. lost the lock) must NOT be recorded as handled. root = self._make_project() - os.chdir(root) - with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ mock.patch("subprocess.run", return_value=mock.Mock(returncode=1)) as run: hook.run() hook.run() self.assertEqual(run.call_count, 2, "a failed apply must be retried next start") def test_noop_without_manifest(self): - root = self._mkdtemp() # no .socket/manifest.json - os.chdir(root) - with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + with mock.patch.object(hook, "_find_project_root", return_value=None), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ mock.patch("subprocess.run") as run: hook.run() run.assert_not_called() def test_noop_when_binary_missing(self): root = self._make_project() - os.chdir(root) - with mock.patch.object(hook, "_resolve_binary", return_value=None), \ + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value=None), \ mock.patch("subprocess.run") as run: hook.run() run.assert_not_called() class TestDisableSwitches(HookTestBase): - def test_socket_patch_hook_off(self): + def _run_disabled(self): root = self._make_project() - os.chdir(root) - os.environ["SOCKET_PATCH_HOOK"] = "off" - with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ mock.patch("subprocess.run") as run: hook.run() - run.assert_not_called() + return run + + def test_socket_patch_hook_off(self): + os.environ["SOCKET_PATCH_HOOK"] = "off" + self._run_disabled().assert_not_called() def test_socket_no_hook(self): - root = self._make_project() - os.chdir(root) os.environ["SOCKET_NO_HOOK"] = "1" - with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ - mock.patch("subprocess.run") as run: - hook.run() - run.assert_not_called() + self._run_disabled().assert_not_called() def test_reentrancy_guard(self): - root = self._make_project() - os.chdir(root) os.environ[hook._REENTRANCY_ENV] = "1" - with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ - mock.patch("subprocess.run") as run: - hook.run() - run.assert_not_called() + self._run_disabled().assert_not_called() class TestNeverRaises(HookTestBase): def test_run_swallows_resolver_errors(self): root = self._make_project() - os.chdir(root) - with mock.patch.object(hook, "_resolve_binary", side_effect=RuntimeError("boom")): - # Must not propagate. - hook.run() + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", side_effect=RuntimeError("boom")): + hook.run() # must not propagate def test_run_swallows_subprocess_errors(self): root = self._make_project() - os.chdir(root) - with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ mock.patch("subprocess.run", side_effect=OSError("no such binary")): hook.run() # must not raise @@ -166,25 +155,90 @@ def test_apply_timeout_is_swallowed(self): import subprocess root = self._make_project() - os.chdir(root) - with mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ + with mock.patch.object(hook, "_find_project_root", return_value=root), \ + mock.patch.object(hook, "_resolve_binary", return_value="/fake/socket-patch"), \ mock.patch( "subprocess.run", side_effect=subprocess.TimeoutExpired(cmd="x", timeout=1), ): hook.run() # must not raise + def test_run_swallows_discovery_errors(self): + with mock.patch.object(hook, "_find_project_root", side_effect=RuntimeError("boom")), \ + mock.patch("subprocess.run") as run: + hook.run() # must not raise + run.assert_not_called() + + +class TestProjectRootDiscovery(HookTestBase): + """The hook must apply only the manifest of the project that OWNS this + environment — anchored to the venv, not whatever .socket/ sits above cwd.""" + + def _socket(self, d): + os.makedirs(os.path.join(d, ".socket")) + with open(os.path.join(d, ".socket", "manifest.json"), "w") as f: + f.write('{"patches": {}}') + + def test_anchors_to_venv_not_cwd(self): + # venv at /.venv; manifest at ; cwd is elsewhere. + proj = os.path.join(self._tmp, "proj") + self._socket(proj) + venv = os.path.join(proj, ".venv") + elsewhere = os.path.join(self._tmp, "elsewhere") + os.makedirs(elsewhere) + os.chdir(elsewhere) + with mock.patch.object(sys, "prefix", venv), \ + mock.patch.object(sys, "base_prefix", self._tmp): # in_venv = True + got = hook._find_project_root() + self.assertEqual(os.path.realpath(got), os.path.realpath(proj)) + + def test_in_venv_ignores_unrelated_cwd_manifest(self): + # SECURITY: a hostile .socket/ above the cwd must NOT be picked up when + # running inside a venv whose project committed no manifest. + proj = os.path.join(self._tmp, "proj") # venv's project: NO .socket + os.makedirs(proj) + venv = os.path.join(proj, ".venv") + attacker = os.path.join(self._tmp, "attacker") + self._socket(attacker) + os.chdir(attacker) + with mock.patch.object(sys, "prefix", venv), \ + mock.patch.object(sys, "base_prefix", self._tmp): # in_venv = True + got = hook._find_project_root() + self.assertIsNone(got, "must not apply a foreign manifest found above cwd") + + def test_system_python_falls_back_to_cwd(self): + # No venv (sys.prefix == base_prefix): the container/system case, where + # the project is wherever the process runs from. + proj = os.path.join(self._tmp, "proj") + self._socket(proj) + os.chdir(proj) + with mock.patch.object(sys, "prefix", "/usr"), \ + mock.patch.object(sys, "base_prefix", "/usr"): # in_venv = False + got = hook._find_project_root() + self.assertEqual(os.path.realpath(got), os.path.realpath(proj)) + class TestPthLine(unittest.TestCase): - """The .pth one-liner must be valid Python and obey the kill switch.""" + """The .pth must be valid: comment lines are ignored by site.py, the import + line execs, and the kill switch short-circuits before importing.""" - def _pth_line(self): + def _pth_import_line(self): + # site.py execs only lines starting with `import`; `#` lines are + # comments. Mirror that: run the import line(s) the way site would. here = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(here, "socket_patch_hook.pth")) as f: - return f.read().strip() + lines = [ + ln.rstrip("\n") + for ln in f + if ln.strip() and not ln.lstrip().startswith("#") + ] + # Exactly one executable (import) line. + assert len(lines) == 1, f"expected one import line, got {lines!r}" + assert lines[0].startswith("import "), lines[0] + return lines[0] def test_pth_line_executes_and_calls_run(self): - line = self._pth_line() + line = self._pth_import_line() with mock.patch.object(hook, "run") as run: os.environ.pop("SOCKET_PATCH_HOOK", None) os.environ.pop("SOCKET_NO_HOOK", None) @@ -192,7 +246,7 @@ def test_pth_line_executes_and_calls_run(self): run.assert_called_once() def test_pth_line_respects_off_switch(self): - line = self._pth_line() + line = self._pth_import_line() with mock.patch.object(hook, "run") as run: os.environ["SOCKET_PATCH_HOOK"] = "off" try: