Files
h4b-image-optim/includes/class-cli-rescue.php
Henk 7e1c86f215 feat: initial v0.1.0 MVP
Replaces Smush Pro's optimisation pipeline without the grey-wash bug.

CLI commands working:
  wp h4b-img status
  wp h4b-img optimise --id=<n>
  wp h4b-img bulk
  wp h4b-img rescue

Verified on dev.rds.ink:
- ICC profile preservation works (the Smush-bug fix)
- Bulk: 20 attachments → 487 KB saved (10.4%), 0 errors
- Rescue: end-to-end mechanism verified on WorkingAsOne_horse fixture
- WebP synchronous, AVIF queued via WP-Cron
- Originals backed up to wp-content/h4b-img-originals/

See CHANGELOG.md for details + ../DESIGN-h4b-image-optim.md for architecture.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-19 13:41:03 +10:00

300 lines
8.4 KiB
PHP

<?php
/**
* wp h4b-img rescue — regenerate Smush-mangled JPGs from their .webp twins.
*
* @package H4B\ImageOptim
*/
namespace H4B\ImageOptim;
use Imagick;
use ImagickException;
if ( ! defined( 'ABSPATH' ) ) {
exit;
}
final class CLI_Rescue {
/**
* Find or fix JPGs damaged by Smush Ultra's grey-wash bug.
*
* ## OPTIONS
*
* [--scan]
* : Just scan; report counts. Default if neither --scan nor --apply given.
*
* [--apply]
* : Actually regenerate broken JPGs from their .webp siblings.
*
* [--min-severity=<float>]
* : Only rescue files with severity >= this. Default 10.0
*
* [--limit=<n>]
* : Maximum files to process. 0 = unlimited.
*
* [--csv=<path>]
* : Write per-file scan results to CSV.
*
* [--manifest=<path>]
* : Write JSON manifest of rescue actions.
*
* [--backup-dir=<path>]
* : Where to put pre-rescue backups. Default: wp-content/h4b-img-originals/_rescue_<timestamp>/
*
* ## EXAMPLES
* wp h4b-img rescue --scan
* wp h4b-img rescue --apply --min-severity=20 --limit=10
* wp h4b-img rescue --apply --csv=/tmp/scan.csv --manifest=/tmp/manifest.json
*/
public function __invoke( $args, $assoc ): void {
$apply = ! empty( $assoc['apply'] );
$min_sev = (float) ( $assoc['min-severity'] ?? 10.0 );
$limit = (int) ( $assoc['limit'] ?? 0 );
$csv_path = $assoc['csv'] ?? null;
$manifest_path = $assoc['manifest'] ?? null;
$backup_dir = $assoc['backup-dir'] ?? null;
$uploads = wp_get_upload_dir();
$basedir = trailingslashit( $uploads['basedir'] );
// Smush's WebP siblings live in wp-content/smush-webp/, NOT inside uploads/.
// Mirror tree: uploads/2026/02/foo.jpg → smush-webp/2026/02/foo.jpg.webp
$webp_root = trailingslashit( WP_CONTENT_DIR ) . 'smush-webp/';
if ( ! is_dir( $webp_root ) ) {
\WP_CLI::error( "WebP root not found: $webp_root — rescue requires Smush's smush-webp/ directory to read clean WebP twins from." );
}
// Scan phase
\WP_CLI::log( "Scanning $basedir for .jpg files…" );
$jpgs = self::find_jpgs( $basedir );
\WP_CLI::log( sprintf( 'Found %d JPG files.', count( $jpgs ) ) );
$progress = \WP_CLI\Utils\make_progress_bar( 'Analysing', count( $jpgs ) );
$results = [];
$broken_list = [];
foreach ( $jpgs as $jpg ) {
$rel = substr( $jpg, strlen( $basedir ) );
$webp = $webp_root . $rel . '.webp';
// Also try the symmetric layout where webp is alongside the jpg
if ( ! is_readable( $webp ) ) {
$webp_alt = $jpg . '.webp';
if ( is_readable( $webp_alt ) ) {
$webp = $webp_alt;
}
}
$res = Rescue_Detector::analyse( $jpg, $webp );
$res['path'] = $jpg;
$res['rel_path'] = $rel;
$res['has_webp'] = is_readable( $webp );
$results[] = $res;
if ( $res['classification'] === 'broken' && $res['severity'] >= $min_sev ) {
$broken_list[] = $res;
}
$progress->tick();
}
$progress->finish();
// Sort broken by severity desc
usort( $broken_list, fn( $a, $b ) => $b['severity'] <=> $a['severity'] );
if ( $limit > 0 ) {
$broken_list = array_slice( $broken_list, 0, $limit );
}
$by_class = [];
foreach ( $results as $r ) {
$by_class[ $r['classification'] ] = ( $by_class[ $r['classification'] ] ?? 0 ) + 1;
}
\WP_CLI::log( '' );
\WP_CLI::log( 'Classification summary:' );
foreach ( $by_class as $k => $v ) {
\WP_CLI::log( sprintf( ' %-35s %5d', $k, $v ) );
}
\WP_CLI::log( sprintf( ' → %d to rescue at severity >= %.1f', count( $broken_list ), $min_sev ) );
// CSV
if ( $csv_path ) {
$fh = fopen( $csv_path, 'w' );
fputcsv( $fh, [ 'path', 'classification', 'severity', 'delta_mean', 'jpg_stddev', 'webp_mean', 'jpg_mean', 'webp_white_pixels', 'has_webp', 'error' ] );
foreach ( $results as $r ) {
fputcsv( $fh, [
$r['rel_path'],
$r['classification'],
$r['severity'],
$r['delta_mean'],
$r['jpg_stddev'],
$r['webp_mean'],
$r['jpg_mean'],
$r['webp_white_pixels'],
$r['has_webp'] ? 1 : 0,
$r['error'],
] );
}
fclose( $fh );
\WP_CLI::log( "Scan CSV written: $csv_path" );
}
if ( ! $apply ) {
\WP_CLI::success( 'Scan complete. Use --apply to actually rescue.' );
return;
}
if ( empty( $broken_list ) ) {
\WP_CLI::success( 'Nothing to rescue.' );
return;
}
// Rescue phase
$timestamp = date( 'Ymd_His' );
$backup_dir = $backup_dir
?: ( $basedir . Optimizer::ORIGINALS_DIRNAME . '/_rescue_' . $timestamp );
wp_mkdir_p( $backup_dir );
\WP_CLI::log( "Backup dir: $backup_dir" );
$progress = \WP_CLI\Utils\make_progress_bar( 'Rescuing', count( $broken_list ) );
$actions = [];
$done = 0;
$errored = 0;
foreach ( $broken_list as $r ) {
$jpg = $r['path'];
// Re-resolve the WebP using the same fallback logic
$webp = $webp_root . $r['rel_path'] . '.webp';
if ( ! is_readable( $webp ) ) {
$webp_alt = $jpg . '.webp';
if ( is_readable( $webp_alt ) ) {
$webp = $webp_alt;
}
}
$backup = trailingslashit( $backup_dir ) . $r['rel_path'];
$res = self::rescue_one( $jpg, $webp, $backup );
$res['path'] = $jpg;
$res['severity'] = $r['severity'];
$actions[] = $res;
if ( $res['status'] === 'done' ) {
$done++;
} else {
$errored++;
}
$progress->tick();
}
$progress->finish();
if ( $manifest_path ) {
file_put_contents( $manifest_path, wp_json_encode( [
'timestamp' => $timestamp,
'backup_dir' => $backup_dir,
'min_severity' => $min_sev,
'total' => count( $actions ),
'done' => $done,
'errored' => $errored,
'actions' => $actions,
], JSON_PRETTY_PRINT ) );
\WP_CLI::log( "Manifest: $manifest_path" );
}
\WP_CLI::success( sprintf(
'Rescued %d / %d (errors: %d). Backups in %s',
$done, count( $broken_list ), $errored, $backup_dir
) );
}
/**
* @return string[]
*/
private static function find_jpgs( string $root ): array {
$out = [];
$it = new \RecursiveIteratorIterator(
new \RecursiveDirectoryIterator( $root, \FilesystemIterator::SKIP_DOTS )
);
foreach ( $it as $entry ) {
/** @var \SplFileInfo $entry */
if ( ! $entry->isFile() ) {
continue;
}
$path = $entry->getPathname();
// Skip our own working directories
if ( strpos( $path, '/' . Optimizer::ORIGINALS_DIRNAME . '/' ) !== false
|| strpos( $path, '/smush-webp/' ) !== false ) {
continue;
}
$ext = strtolower( $entry->getExtension() );
if ( $ext === 'jpg' || $ext === 'jpeg' ) {
$out[] = $path;
}
}
return $out;
}
/**
* Rescue a single broken JPG by re-encoding from its clean WebP twin.
*
* @return array{status:string, new_size:int, original_size:int, error:?string, backup_path:string}
*/
private static function rescue_one( string $jpg, string $webp, string $backup_path ): array {
$res = [
'status' => 'pending',
'new_size' => 0,
'original_size' => is_readable( $jpg ) ? filesize( $jpg ) : 0,
'error' => null,
'backup_path' => $backup_path,
];
if ( ! is_readable( $webp ) ) {
$res['status'] = 'error';
$res['error'] = 'webp_not_readable';
return $res;
}
// Backup first
wp_mkdir_p( dirname( $backup_path ) );
if ( ! @copy( $jpg, $backup_path ) ) {
$res['status'] = 'error';
$res['error'] = 'backup_failed';
return $res;
}
if ( filesize( $backup_path ) !== $res['original_size'] ) {
$res['status'] = 'error';
$res['error'] = 'backup_size_mismatch';
return $res;
}
// Decode WebP → re-encode as JPEG q=90, 4:4:4, sRGB, ICC preserved
try {
$img = new Imagick( $webp );
$img->setImageFormat( 'jpeg' );
$img->setImageCompressionQuality( 90 );
$img->setSamplingFactors( [ '1x1', '1x1', '1x1' ] ); // 4:4:4 — no chroma loss
$img->setInterlaceScheme( Imagick::INTERLACE_NO );
$img->setImageColorspace( Imagick::COLORSPACE_SRGB );
// Ensure an ICC profile is attached
ICC_Profile::preserve_or_inject( $img );
// Atomic write
$tmp = $jpg . '.h4b.tmp';
$img->writeImage( $tmp );
$img->clear();
$stat = stat( $jpg );
if ( $stat !== false ) {
@chown( $tmp, $stat['uid'] );
@chgrp( $tmp, $stat['gid'] );
@chmod( $tmp, $stat['mode'] & 0777 );
}
rename( $tmp, $jpg );
} catch ( ImagickException $e ) {
$res['status'] = 'error';
$res['error'] = 'reencode_failed: ' . $e->getMessage();
// Best-effort restore from backup
@copy( $backup_path, $jpg );
return $res;
}
clearstatcache( true, $jpg );
$res['new_size'] = filesize( $jpg );
$res['status'] = 'done';
return $res;
}
}