Add files without passwords and other unwanted fluff.

Mon, 23 Jan 2023 21:37:02 +0100

author
Peter Gervai <grin@grin.hu>
date
Mon, 23 Jan 2023 21:37:02 +0100
changeset 0
3b714bbb1347
child 1
ab6634389318

Add files without passwords and other unwanted fluff.

delinker_fixer/BotSecrets.pm-sample file | annotate | diff | comparison | revisions
delinker_fixer/defixer.pl file | annotate | diff | comparison | revisions
delinquent_files/debug.inc file | annotate | diff | comparison | revisions
delinquent_files/delinker_job.yaml file | annotate | diff | comparison | revisions
delinquent_files/demon.php file | annotate | diff | comparison | revisions
delinquent_files/matcher.inc file | annotate | diff | comparison | revisions
delinquent_files/shared.inc file | annotate | diff | comparison | revisions
delinquent_files/test/_t_desc1.data file | annotate | diff | comparison | revisions
delinquent_files/test/_t_desc1.in file | annotate | diff | comparison | revisions
delinquent_files/test/_t_desc1.replace file | annotate | diff | comparison | revisions
delinquent_files/test/_t_desc1.unlink file | annotate | diff | comparison | revisions
delinquent_files/test/_t_gallery-closing-tag.data file | annotate | diff | comparison | revisions
delinquent_files/test/_t_gallery-closing-tag.in file | annotate | diff | comparison | revisions
delinquent_files/test/_t_gallery-closing-tag.replace file | annotate | diff | comparison | revisions
delinquent_files/test/_t_gallery-closing-tag.unlink file | annotate | diff | comparison | revisions
delinquent_files/test/_t_jap.data file | annotate | diff | comparison | revisions
delinquent_files/test/_t_jap.in file | annotate | diff | comparison | revisions
delinquent_files/test/_t_jap.replace file | annotate | diff | comparison | revisions
delinquent_files/test/_t_jap.unlink file | annotate | diff | comparison | revisions
delinquent_files/test/_t_latest.data file | annotate | diff | comparison | revisions
delinquent_files/test/_t_latest.in file | annotate | diff | comparison | revisions
delinquent_files/test/_t_latest.replace file | annotate | diff | comparison | revisions
delinquent_files/test/_t_latest.unlink file | annotate | diff | comparison | revisions
delinquent_files/test/_t_movingspace.data file | annotate | diff | comparison | revisions
delinquent_files/test/_t_movingspace.in file | annotate | diff | comparison | revisions
delinquent_files/test/_t_movingspace.replace file | annotate | diff | comparison | revisions
delinquent_files/test/_t_movingspace.unlink file | annotate | diff | comparison | revisions
delinquent_files/test/_t_no-text-removal.data file | annotate | diff | comparison | revisions
delinquent_files/test/_t_no-text-removal.in file | annotate | diff | comparison | revisions
delinquent_files/test/_t_no-text-removal.replace file | annotate | diff | comparison | revisions
delinquent_files/test/_t_no-text-removal.unlink file | annotate | diff | comparison | revisions
delinquent_files/test/_t_normal.data file | annotate | diff | comparison | revisions
delinquent_files/test/_t_normal.in file | annotate | diff | comparison | revisions
delinquent_files/test/_t_normal.replace file | annotate | diff | comparison | revisions
delinquent_files/test/_t_normal.unlink file | annotate | diff | comparison | revisions
delinquent_files/test/_t_punctuation.data file | annotate | diff | comparison | revisions
delinquent_files/test/_t_punctuation.in file | annotate | diff | comparison | revisions
delinquent_files/test/_t_punctuation.replace file | annotate | diff | comparison | revisions
delinquent_files/test/_t_punctuation.unlink file | annotate | diff | comparison | revisions
delinquent_files/test/_t_uni1.data file | annotate | diff | comparison | revisions
delinquent_files/test/_t_uni1.in file | annotate | diff | comparison | revisions
delinquent_files/test/_t_uni1.replace file | annotate | diff | comparison | revisions
delinquent_files/test/_t_uni1.unlink file | annotate | diff | comparison | revisions
delinquent_files/test/_t_uni2.data file | annotate | diff | comparison | revisions
delinquent_files/test/_t_uni2.in file | annotate | diff | comparison | revisions
delinquent_files/test/_t_uni2.replace file | annotate | diff | comparison | revisions
delinquent_files/test/_t_uni2.unlink file | annotate | diff | comparison | revisions
delinquent_files/test_regex.php file | annotate | diff | comparison | revisions
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinker_fixer/BotSecrets.pm-sample	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,13 @@
+package BotSecrets;
+
+use strict;
+use warnings;
+
+use Exporter;
+our @ISA    = qw/ Exporter /;
+our @EXPORT = qw/ $db_user $db_pw $bu $bp /;
+
+our ($db_user, $db_pw) = ("s5****", "***");
+#my ($bu,$bp) = ('****', '****' );
+our ($bu,$bp) = ('****', '****' );
+1;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinker_fixer/defixer.pl	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,262 @@
+#!/usr/bin/perl
+#$Id$
+#
+# cleanup after delinker
+#
+# (c)Peter [[grin]] Gervai, 2022
+# cc-by-sa-4.0-int / gplv3+
+#
+
+use strict;
+use warnings;
+use utf8;
+
+use File::Basename;
+use lib dirname(__FILE__);
+use BotSecrets;
+
+use MediaWiki::Bot qw(:constants);
+use DBI;
+
+binmode( STDOUT, ':utf8' );
+
+$|=1;
+
+my ($db_name, $db_host, $db_port) = ("s52421__commonsdelinquent_p", "tools.db.svc.wikimedia.cloud", 3306);
+## my ($db_user, $db_pw) = ("xxxxxxxxx", "xxxxxxxxxx");
+
+&d("Start");
+
+## connect db
+my $dsn = "DBI:mysql:database=$db_name;host=$db_host;port=$db_port";
+my $dbh = DBI->connect( $dsn, $db_user, $db_pw, { mysql_enable_utf8=>1, RaiseError=>0, AutoCommit=>0 } );
+$dbh->{mysql_enable_utf8} = 1;
+
+## prepare sql
+#my $sth = $dbh->prepare( "SELECT id,action,file,wiki,page,revision,done,note,replace_with_file FROM event WHERE action=? AND file=? AND done=? AND timestamp BETWEEN ? AND ?" );
+my $sth = $dbh->prepare( "SELECT id,action,file,wiki,page,revision,done,note,replace_with_file FROM event WHERE action=? AND done=? AND timestamp BETWEEN ? AND ?" );
+
+my $sth_update = $dbh->prepare( "UPDATE event SET done=? WHERE id=?" );
+
+## connect bot (enwp)
+#my ($bu,$bp) = ('xxxxxxxxxxxxx', 'xxxxxxxxxxxxxx' );
+our $bot = MediaWiki::Bot->new({
+    host => 'en.wikipedia.org',
+    login_data => { username => $bu, password=> $bp },
+    do_sul => 1,
+    operator => 'grin',
+    protocol => 'https',
+    debug => 2,
+    assert => 'user',
+});
+my $last_wiki='huwiki';
+
+die "Cannot login" unless $bot;
+
+## rev
+my $revert_message = "Undoing CommonsDelinker bad replace, will be retried later.";
+
+## prepare search
+my $action = 'replace';
+#my $file = 'S-3A_MAD_DN-SC-87-05743.JPEG';
+my $done = 127;
+my ($ts_from, $ts_to) = ('20220502000000', '20220506130000');
+#my $res = $sth->execute( $action, $file, $done, $ts_from, $ts_to );
+my $res = $sth->execute( $action, $done, $ts_from, $ts_to );
+if( $dbh->err ) {
+    die "Error doing SQL: " . $dbh->errstr;
+}
+print $sth->rows . " rows found.\n";
+
+## results
+while( my $a = $sth->fetchrow_hashref ) {
+    for my $key (sort keys %$a) {
+        print "$key=" . $$a{$key} . " ";
+    }
+    print "\n";
+
+    # check data
+    if( $last_wiki ne $$a{wiki} ) {
+        my $wikidata = &get_wikidata( $$a{wiki} );
+        next unless $wikidata;
+        $bot->set_wiki( $wikidata );
+    }
+
+    my $revid = $bot->get_last( $$a{page}, 'CommonsDelinker' );
+    if( !defined( $revid ) ) {
+        &d("Revid is missing!! skipping $$a{wiki}:$$a{page}!!");
+        #&error("missing revid");
+        next;
+    }
+
+    if( $revid == $$a{revision} ) {
+        &d(" Page unchanged, undo possible! REVERTING $$a{wiki}:$$a{page}");
+
+        # revert
+        if( $bot->revert( $$a{page}, $$a{revision}, $revert_message ) ) {
+            &d( "Success. Updating.");
+            $sth_update->execute( 0, $$a{id} );
+            if( $dbh->err ) {
+                &d( "*** Error updating db for $$a{wiki}:$$a{page} id=$$a{id}!!");
+                $dbh->commit;
+                exit;
+            }
+
+        } else {
+            &d( " Revert #1 failed, try to login.");
+            if( $bot->login( { username=>$bu, password=>$bp } ) ) {
+                if( $bot->revert( $$a{page}, $$a{revision}, $revert_message ) ) {
+                    &d( "Success^2. Updating.");
+                    $sth_update->execute( 0, $$a{id} );
+                    if( $dbh->err ) {
+                        &d( "*** Error updating db for $$a{wiki}:$$a{page} id=$$a{id}!!");
+                        $dbh->commit;
+                        exit;
+                    }
+                }
+
+            } else {
+                &d( "Login failed into $$a{wiki}. Skipping");
+                &error("login failed");
+                $sth_update->execute( 43, $$a{id} );
+            }
+        }
+
+    } else {
+        &d( " Page changed, oldid $$a{revision} - newid $revid; skipping (update db).");
+        $sth_update->execute( 42, $$a{id} );
+        if( $dbh->err ) {
+            &d( "*** Error updating db for non-changed $$a{wiki}:$$a{page} id=$$a{id}!!");
+        }
+    }
+
+    $dbh->commit;
+}
+&d( "Commit.");
+$dbh->commit;
+
+exit;
+
+sub d {
+    my ($s) = @_;
+    print scalar(localtime) .  " [$$] $s\n";
+}
+
+
+sub error {
+    return; # doesn't work
+    my ($s) = @_;
+    print "error: $s; " . $bot->{error}->{code} . "; " . $bot->{error}->{details} . "\n";
+    
+    #use Data::Dumper;
+    #die Dumper($bot);
+    #exit;
+}
+
+## decypher short wikinames
+sub get_wikidata {
+    my ($name) = @_;
+
+    &d("Decode $name");
+
+    my $host = $bot->db_to_domain( $name );
+    return { host => $host };
+
+    if( $name =~ /^(.{2,3}|simple)wiki$/ ) {
+        return { host => "$1.wikipedia.org" };
+    }
+
+    if( $name =~ /^(.{2,3})wikivoyage$/ ) {
+        return { host => "$1.wikivoyage.org" };
+    }
+
+    if( $name =~ /^(.{2,3})wikisource$/ ) {
+        return { host => "$1.wikisource.org" };
+    }
+
+    if( $name =~ /^(.{2,3})wikiquote$/ ) {
+        return { host => "$1.wikiquote.org" };
+    }
+
+    if( $name =~ /^(.{2,3})wikibooks$/ ) {
+        return { host => "$1.wikibooks.org" };
+    }
+
+    if( $name =~ /^(.{2,3})wiktionary$/ ) {
+        return { host => "$1.wiktionary.org" };
+    }
+
+    if( $name eq 'wikidatawiki' ) {
+        # wikidata probably not fucked up
+        return undef;
+    }
+
+    &d("*** $name not implemented yet!!! ***");
+    return undef;
+    #die "decode '$name' isn't implemented yet.";
+}
+
+exit;
+
+my $article_name = 'a';
+
+my $options = { revid => 13849803 };
+my $txt = $bot->get_text($article_name, $options);
+die "error something" unless defined $options->{pageid};
+warn "page doesn't exist" if $options->{pageid} == MediaWiki::Bot::PAGE_NONEXISTENT;
+print "Page length is ". length($txt) . "!\n";
+
+my $pageid = $bot->get_id($article_name);
+die "error something else" unless defined $pageid;
+printf "Page id is %s\n", $pageid;
+
+# last _not_ by user
+my $revid = $bot->get_last($article_name,'no such user');
+printf "Last revid is %s\n", $revid;
+
+$revid = $bot->get_last($article_name,'FoBe');
+printf "Last revid-2 is %s\n", $revid;
+
+$options = { oldid=> 20300641, revid=>20300648 };
+my $diff = $bot->diff($options);
+print "Diff: $diff\n";
+
+## commons
+$bot->set_wiki({
+    host => 'commons.wikimedia.org'
+});
+die "Cannot login to commons" unless $bot;
+print "Logged over commons.\n";
+
+$options = { revid=> 568734018, oldid=>628016329 };
+$diff = $bot->diff($options);
+print "Diff: $diff\n";
+
+## a hiba: az elozo sor utolso szava + \n bekerult a replacementbe
+## javitas: 
+##  - ha ez az utolso edit
+##  - revert
+
+__END__
+<tr>
+    <td class="diff-marker" data-marker="−"></td>
+    <td class="diff-deletedline diff-side-deleted"><div>File:Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian.jpg|Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian, <del class="diffchange diffchange-inline">1935</del></div></td>
+    <td class="diff-marker" data-marker="+"></td>
+    <td class="diff-addedline diff-side-added"><div>File:Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian.jpg|Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian,<ins class="diffchange diffchange-inline">|{{c|Georgy</ins> <ins class="diffchange diffchange-inline">Malenkov}} and Beria, 1940</ins></div></td>
+</tr>
+<tr>
+    <td class="diff-marker" data-marker="−"></td>
+    <td class="diff-deletedline diff-side-deleted"><div>File:Берия в суде.jpg|{{c|Georgy Malenkov}} and Beria, 1940</div></td>
+    <td colspan="2" class="diff-empty diff-side-added"></td>
+</tr>
+
+
+## generic revert
+## - adatbazisbol ami done=42
+## - revision
+## - ha az az utolso akkor:
+##  - revert
+##  - done=0 (pending)
+## - ha nem akkor
+##  - done=666 (fixx it felix)
+##
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/debug.inc	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,54 @@
+<?php
+
+## simple debug framework
+## (grin)
+##
+## $Id$
+
+class Debug {
+    private $debuglevel = 2;
+
+    function __construct() {
+        // wtf
+    }
+
+    function set_level($l) {
+        $this->debuglevel = $l;
+        $this->warn( "Set debug level to $this->debuglevel" );
+    }
+
+    function msg ($s) {
+        $this->log( 0, $s );
+    }
+
+    function error ($s) {
+        $this->log( 1, $s );
+    }
+
+    function warn ($s) {
+        $this->log( 2, $s );
+    }
+
+    function info ($s) {
+        $this->log( 5, $s );
+    }
+
+    function debug ($s) {
+        $this->log( 8, $s );
+    }
+
+    function trace ($s) {
+        $this->log( 9, $s );
+    }
+    
+    function log ($level,$msg) {
+        #$now = strftime("%Y-%m-%d %T");
+        if( $level > $this->debuglevel ) {
+            return;
+        }
+
+        $now = date('c');
+        print( "$now [$level] $msg\n" );
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/delinker_job.yaml	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,17 @@
+##
+## start: 
+##	become commons-delinquent
+##	toolforge-jobs load delinker_job.yaml
+## stop:
+##      toolforge-jobs flush
+## 
+---
+# continuous job
+- image: tf-php74
+  name: delinker
+  command: ./demon.php
+  continuous: true
+  emails: none
+  # mem 512M (max 305)
+  #mem: 512Mi
+  mem: 768Mi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/demon.php	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,706 @@
+#!/usr/bin/php
+<?PHP
+
+chdir ( '/data/project/commons-delinquent' ) ;
+
+error_reporting(E_ERROR|E_CORE_ERROR|E_COMPILE_ERROR); # E_ALL|
+ini_set('display_errors', 'On');
+
+require_once ( './shared.inc' ) ;
+require_once( './matcher.inc' );
+require_once( './debug.inc' );
+
+class CommonsDelinquentDemon extends CommonsDelinquent {
+
+	var $avoidNamespaceOnWiki = [
+		'dewiki' => [4]
+	] ;
+
+	private $d;
+	private $matcher;
+
+	var $maximum_file_usage_limit = 65535; # prevent memory explosion by extreme used files (65535 ~ 650 MB)
+	var $delay_minutes = 10 ;  # Wait after deletion
+	var $fallback_minutes = 120 ; # Only used if DB is empty
+	var $max_text_diff = 1500 ; # Max char diff
+	var $min_faux_template_icon = 500 ;
+	var $comments = array() ;
+	var $comments_default = array (
+		'summary' => 'Removing [[:c:File:$1|$1]], it has been deleted from Commons by [[:c:User:$2|$2]] because: $3.' ,
+		'replace' => 'Replacing $1 with [[File:$2]] (by [[:c:User:$3|$3]] because: $4).' ,
+		'by' => ' Requested by [[User:$1|]].'
+	) ;
+
+	function __construct() {
+		parent::__construct();
+		$this->d = new Debug;
+		$this->matcher = new Matcher($this->d);
+	}
+
+	function set_debug_mode($l) {
+		$this->d->set_level( $l );
+	}
+
+	function debug($msg) {
+		$this->d->debug($msg);
+	}
+
+	// Returns the last timestamp in the tool database, or a dummy (current time - X min)
+	function getLastTimestamp () {
+		# Open tool database
+		$db = $this->getToolDB() ;
+		
+		# Get highest timestamp in tool DB as a starting point
+		$max_ts = '' ;
+		$sql = "SELECT max(log_timestamp) AS max_ts FROM event WHERE done=1" ; # Timestamp of Commons logging table, NOT tool edit timestamp!
+		$result = $this->runQuery ( $db , $sql ) ;
+		while($o = $result->fetch_object()){
+			$max_ts = $o->max_ts ;
+		}
+		$db->close() ;
+		if ( $max_ts == '' ) $max_ts = date ( 'YmdHis' , time() - $this->fallback_minutes*60 ) ; # Fallback to current date minus X min
+		return $max_ts ;
+	}
+	
+	function isBadPage ( $o , $filename ) {
+		if ( $o->gil_page_namespace_id == 6 and $o->gil_wiki == 'commonswiki' and $o->gil_to == $filename ) return true ; // Self-reference
+		if ( $o->gil_page_namespace_id == 2 and $o->gil_wiki == 'commonswiki' and preg_match ( '/^\w+Bot\b/' , $o->gil_page_title ) ) return true ; // Bot subpage on Commons
+		if ( $o->gil_page_namespace_id == 4 and $o->gil_wiki == 'commonswiki' and preg_match ( '/(Deletion(_| )requests\/.*|Undeletion(_| )requests\/.*)\b/' , $o->gil_page_title ) ) return true ; // DR and UDR on Commons
+		foreach ( $this->avoidNamespaceOnWiki AS $wiki => $namespaces ) {
+			if ( $o->gil_wiki != $wiki ) continue ;
+			foreach ( $namespaces AS $namespace ) {
+				if ( $namespace == $o->gil_page_namespace_id ) return true ;
+			}
+		}
+		return false ;
+	}
+	
+	function getRecentDeletedFiles ( $max_ts ) {
+		# Open Commons database replica
+		$db_co = $this->getCommonsDB() ;
+		$cur_ts = date ( 'YmdHis' , time() - $this->delay_minutes*60 ) ;
+
+		# Get all file deletions
+		$delink_files = array() ; # Files to delink
+		$sql = "SELECT * FROM logging_compat LEFT JOIN comment ON comment_id = log_comment_id WHERE log_type='delete' AND log_action='delete' AND log_timestamp>='$max_ts' AND log_timestamp<'$cur_ts' AND log_namespace=6" ;
+		$sql .= " AND NOT EXISTS (SELECT * FROM image WHERE img_name=log_title)" ;
+		$sql .= " AND NOT EXISTS (SELECT * FROM page WHERE page_title=log_title AND page_namespace=6 AND page_is_redirect=1)" ; # Do not remove redirects. Is that OK???
+		$sql .= " ORDER BY log_timestamp ASC" ;
+		$result = $this->runQuery ( $db_co , $sql ) ;
+		while($o = $result->fetch_object()){
+			$delink_files[] = $o ;
+		}
+		foreach ( $delink_files AS $deletion ) {
+			$filename = $deletion->log_title ;
+			$sql = "SELECT * FROM globalimagelinks WHERE gil_to='" . $this->getDBsafe($filename) . "'" ;
+			$deletion->usage = array() ; # Usage instances for this file
+			$result = $this->runQuery ( $db_co , $sql ) ;
+			while($o = $result->fetch_object()){
+				if ( $this->isBadWiki($o->gil_wiki) ) continue ;
+				if ( $this->isBadPage($o,$filename) ) continue ;
+				$deletion->usage[] = $o ;
+			}
+		}
+		$db_co->close() ;
+//		print_r ( $delink_files ) ;
+		return $delink_files ;
+	}
+
+	function getFileUsage ( $filename ) {
+		$this->d->trace("Get file usage for $filename");
+		$ret = array() ;
+		$db_co = $this->getCommonsDB() ;
+		$cur_ts = date ( 'YmdHis' , time() - $this->delay_minutes*60 ) ;
+		$sql = "SELECT * FROM globalimagelinks WHERE gil_to='" . $this->getDBsafe($filename) . "'" ;
+		$this->d->trace("Try running: $sql");
+		$result = $this->runQuery ( $db_co , $sql ) ;
+		$this->d->trace("Got result, looping through");
+		while($o = $result->fetch_object()){
+			if ( $this->isBadWiki($o->gil_wiki) ) continue ;
+			$ret[] = $o ;
+			// $this->d->trace("Added to ret, len=" . count($ret));
+			# limit the maximum matches because we use more memory than toolforge allows
+			if( count($ret) > $this->maximum_file_usage_limit ) {
+				$this->d->error("Overflow!! We cannot get usage for $filename, too much hits (>$this->maximum_file_usage_limit)!");
+				$ret = array();
+				$ret[] = "*OVERFLOW*";
+				$db_co->close();
+				return $ret;
+			}
+		}
+		$this->d->trace("Processed " . count($ret) . " usage points");
+		$db_co->close() ;
+		return $ret ;
+	}
+	
+	function canUnlinkFromNamespace ( $usage ) {
+		if ( $usage->gil_page_namespace_id % 2 > 0 ) return false ; // Skip talk pages
+		if ( $usage->gil_page_namespace_id < 0 ) return false ; // Paranoia
+		return true ;
+	}
+
+	function fileExistenceSanityCheck ( $e , $check_commons ) {
+		if ( $this->hasLocalFile ( $e->wiki , $e->file ) ) {
+			$this->setDone ( $e->id , 2 , 'Skipped: Local file exists' ) ;
+			return false ;
+		}
+		if ( $check_commons and $this->hasLocalFile ( 'commonswiki' , $e->file ) ) {
+			$this->setDone ( $e->id , 2 , 'Skipped: Commons file exists' ) ;
+			return false ;
+		}
+		return true ;
+	}
+
+
+	function getTextFromWiki ( $wiki , $pagename ) {
+		$ret = false ;
+		$api = $this->getAPI ( $wiki ) ;
+		if ( $api ) {
+			$services = new \Mediawiki\Api\MediawikiFactory( $api );
+			$page = $services->newPageGetter()->getFromTitle( $pagename );
+			$revision = $page->getRevisions()->getLatest();
+		
+			if ( $revision ) {
+				$ret = $revision->getContent()->getData() ;
+			}
+		}
+		return $ret ;
+	}
+	
+	/**
+		mode	"summary" or "replace"
+	*/
+	function getLocalizedCommentPattern ( $wiki , $mode = 'summary') {
+		if ( !isset($mode) ) $mode = 'summary' ;
+		if ( isset ( $this->comments[$mode][$wiki] ) ) return $this->comments[$mode][$wiki] ;
+		$pattern = $this->comments_default[$mode] ; # Default
+		
+		# Try local translation
+		$local = $this->getTextFromWiki ( $wiki , 'User:CommonsDelinker/' . $mode . '-I18n' ) ;
+		if ( $local !== false ) $pattern = $local ;
+		
+		$this->comments[$mode][$wiki] = $pattern ;
+		return $pattern ;
+	}
+	
+	function constructUnlinkComment ( $file , $usage ) {
+		$pattern = $this->getLocalizedCommentPattern ( $usage->gil_wiki ) ;
+		
+		$c = $file->comment_text ;
+		if ( $usage->wiki != 'commonswiki' ) { # Point original comment links to Commons
+			$c = preg_replace ( '/\[\[([^|]+?)\]\]/' , '[[:c:\1|]]' , $c ) ; # Pointing to Commons (no pipe)
+			$c = preg_replace ( '/\[\[([^:].+?)\]\]/' , '[[:c:\1]]' , $c ) ; # Pointing to Commons (with pipe)
+		}
+
+		$pattern = preg_replace ( '/\$1/' , $file->log_title , $pattern ) ;
+		$pattern = preg_replace ( '/\$2/' , $file->log_user_text , $pattern ) ;
+		$pattern = preg_replace ( '/\$3/' , $c , $pattern ) ;
+#		print "\n$pattern\n" ; exit ( 0 ) ; // TESTING
+		return $pattern ;
+	}
+
+	function constructReplaceComment ( $params ) {
+		$pattern = $this->getLocalizedCommentPattern ( $params['wiki'] , 'replace' ) ;
+		
+		$c = $params['comment'] ;
+		if ( $params['wiki'] != 'commonswiki' ) { # Point original comment links to Commons
+			$c = preg_replace ( '/\[\[([^|]+?)\]\]/' , '[[:c:\1|]]' , $c ) ; # Pointing to Commons (no pipe)
+			$c = preg_replace ( '/\[\[([^:].+?)\]\]/' , '[[:c:\1]]' , $c ) ; # Pointing to Commons (with pipe)
+		}
+
+		$pattern = preg_replace ( '/\$1/' , $params['file'] , $pattern ) ;
+		$pattern = preg_replace ( '/\$2/' , $params['replace_with_file'] , $pattern ) ;
+		$pattern = preg_replace ( '/\$3/' , 'CommonsDelinker' , $pattern ) ;
+		$pattern = preg_replace ( '/\$4/' , $c , $pattern ) ;
+		
+		if ( isset($params['user']) and $params['user'] != '' ) {
+			$by = $this->getLocalizedCommentPattern ( $params['wiki'] , 'by' ) ;
+			$by = preg_replace ( '/\$1/' , $params['user'] , $by ) ;
+			$pattern .= ' ' . $by ;
+		}
+		
+		return $pattern ;
+	}
+	
+	function addUnlinkEvent ( $file , $usage , &$sqls ) {
+		if ( !$this->canUnlinkFromNamespace ( $usage ) ) return ;
+		if ( $this->hasLocalFile ( $usage->gil_wiki , $usage->gil_to ) ) return ;
+		
+		$page = $usage->gil_page_title ;
+		if ( $usage->gil_page_namespace != '' ) $page = $usage->gil_page_namespace . ":$page" ;
+		$params = array (
+			'action' => 'unlink' ,
+			'file' => $usage->gil_to ,
+			'wiki' => $usage->gil_wiki ,
+			'page' => $page ,
+			'namespace' => $usage->gil_page_namespace_id ,
+			'comment' => $this->constructUnlinkComment ( $file , $usage ) ,
+			'timestamp' => date ( 'YmdHis' ) ,
+			'log_id' => $file->log_id ,
+			'log_timestamp' => $file->log_timestamp ,
+			'done' => 0
+		) ;
+#		print_r ( $params ) ;
+		
+		$s1 = array() ;
+		$s2 = array() ;
+		foreach ( $params AS $k => $v ) {
+			$s1[] = $k ;
+			$s2[] = "'" . $this->getDBsafe($v) . "'" ;
+		}
+		
+		$sql = "INSERT IGNORE INTO event (" . implode ( ',' , $s1 ) . ") VALUES (" . implode ( "," , $s2 ) . ")" ;
+		$sqls[] = $sql ;
+	}
+	
+	function addUnlinkEvents ( $delink_files ) {
+		$sqls = array() ;
+		foreach ( $delink_files AS $file ) {
+			foreach ( $file->usage AS $usage ) {
+				$this->addUnlinkEvent ( $file , $usage , $sqls ) ;
+			}
+		}
+		
+		$db = $this->getToolDB() ;
+		foreach ( $sqls AS $sql ) $this->runQuery ( $db , $sql ) ;
+		$db->close() ;
+	}
+	
+	function getJSON4Q ( $e ) {
+		$q = $e->page ;
+		$url = "http://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids=" . $q ;
+		$j = json_decode ( file_get_contents ( $url ) ) ;
+		if ( isset ( $j->entities->$q->missing ) ) { # No such item
+			$this->setDone ( $e->id , 2 , "No such item $q" ) ;
+			return false ;
+		}
+		if ( !isset ( $j->entities->$q->claims ) ) {
+			$this->setDone ( $e->id , 2 , "Did not find " . $e->file . " on " . $q ) ;
+			return false ;
+		}
+		return $j ;
+	}
+
+	function performEditUnlinkWikidata ( $e ) {
+		$j = $this->getJSON4Q ( $e ) ;
+		if ( $j === false ) return ;
+
+		$q = $e->page ;
+		$j = $j->entities->$q->claims ;
+		$remove = array() ;
+		foreach ( $j AS $prop => $claims ) {
+			foreach ( $claims AS $c ) {
+				if ( $c->type != 'statement' ) continue ;
+				if ( $c->mainsnak->datatype != 'commonsMedia' ) continue ;
+				if ( str_replace ( ' ' , '_' , ucfirst ( trim ( $c->mainsnak->datavalue->value ) ) ) != $e->file ) continue ;
+				$remove[] = $c->id ;
+			}
+		}
+		
+		if ( count($remove) > 0 ) {
+			$ok = $this->editWikidata ( 'wbremoveclaims' , array ( 'claim'=>implode('|',$remove) , 'summary' => $e->comment ) ) ;
+			if ( !$ok ) return ;
+		}
+		
+		$this->setDone ( $e->id , 1 ) ; # OK!
+	}
+
+
+	function performEditReplaceWikidata ( $e ) {
+		$j = $this->getJSON4Q ( $e ) ;
+		if ( $j === false ) return ;
+
+		$q = $e->page ;
+
+		$j = $j->entities->$q->claims ;
+		$remove = array() ;
+		foreach ( $j AS $prop => $claims ) {
+			foreach ( $claims AS $c ) {
+				if ( $c->type != 'statement' ) continue ;
+				if ( $c->mainsnak->datatype != 'commonsMedia' ) continue ;
+				if ( str_replace ( ' ' , '_' , ucfirst ( trim ( $c->mainsnak->datavalue->value ) ) ) != $e->file ) continue ;
+				$remove[] = array ( $c->id , $prop ) ;
+			}
+		}
+		
+		if ( count($remove) > 0 ) {
+
+			# Remove old image entries
+			$ids = array() ;
+			foreach ( $remove AS $r ) $ids[] = $r[0] ;
+			$ok = $this->editWikidata ( 'wbremoveclaims' , array ( 'claim'=>implode('|',$ids) ) ) ;
+			if ( !$ok ) {
+				$this->d->error("performEditReplaceWikidata:1 failed");
+				return ;
+			}
+
+			# Add new image entries
+			foreach ( $remove AS $r ) {
+				$params = array(
+					'snaktype' => 'value' ,
+					'property' => $r[1] ,
+					'value' => json_encode(str_replace('_',' ',$e->replace_with_file)) ,
+					'entity' => $e->page ,
+					'summary' => $e->comment
+				) ;
+
+				$ok = $this->editWikidata ( 'wbcreateclaim' , $params ) ;
+				if ( !$ok ) {
+					$this->d->error( "performEditReplaceWikidata:2 failed" );
+					return ;
+				}
+
+			}
+		} else {
+			$this->setDone ( $e->id , 2 , 'File link not found in page' ) ;
+			return ;
+		}
+		
+		$this->setDone ( $e->id , 1 ) ; # OK!
+	}
+
+	
+	##################################################################
+	##
+	## Here we make the changes.
+	## Get the page, replace content and upload again.
+	##
+	##################################################################
+	function performEditText ( $e ) {
+		$this->d->debug( "performEditText $e->action on id=$e->id wiki=$e->wiki page=$e->page." );
+		$api = $this->getAPI ( $e->wiki ) ;
+		if ( $api === false ) {
+			$this->setDone ( $e->id , 2 , "Could not connect to API" ) ;
+			return ;
+		}
+		$services = new \Mediawiki\Api\MediawikiFactory( $api );
+		try {
+			$page = $services->newPageGetter()->getFromTitle( $e->page );
+		} catch (Exception $ex) {
+			$this->setDone ( $e->id , 2 , "Page not found" ) ;
+			$this->d->debug("Page '$e->page' not found ($ex), id='$e->id' wiki=$e->wiki file=$e->file action=$e->action");
+			return ;
+		}
+		$revision = $page->getRevisions()->getLatest();
+		
+		if ( !$revision ) {
+			$this->setDone ( $e->id , 2 , "Latest revision not found" ) ;
+			return ;
+		}
+		
+		$rev_id = $revision->getId() ;
+		$text = $revision->getContent()->getData() ;
+		
+		$file = $e->file ;
+		$pattern = $this->matcher->matcher_prepare_pattern( $file );
+	
+		$new_text = $text ;
+
+		if ( $e->action == 'unlink' ) {
+			## remove image references in text; see ./matcher.inc
+			$this->d->info("Try to unlink '$pattern' in $e->wiki: $e->page");
+			$new_text = $this->matcher->matcher_do_unlink( $new_text, $pattern );
+
+		} else if ( $e->action == 'replace' ) {
+			## replace image with new_file in text; see ./matcher.inc
+			$new_file = $e->replace_with_file;
+			$this->d->info("Try to replace '$pattern' with '$new_file' in $e->wiki: $e->page");
+			$new_text = $this->matcher->matcher_do_replacement( $new_text, $pattern, $new_file );
+		}
+		
+		if ( $text == $new_text ) { # No change
+			$this->setDone ( $e->id , 2 , 'File link not found in page' ) ;
+			$this->d->info( "Article unchanged. id=$e->id; $e->wiki: $e->page" );
+			return ;
+		}
+		
+		if ( strlen(trim($new_text)) == 0 or abs(strlen($text)-strlen($new_text)) > $this->max_text_diff ) {
+			$this->setDone ( $e->id , 2 , 'Text change too big' ) ;
+			$this->d->warn( "Article text change is too big. id=$e->id; $e->wiki: $e->page" );
+			return ;
+		}
+		
+		if ( !isset($e->comment) ) $e->comment = '' ;
+		$e->comment = (string)$e->comment ;
+
+		$this->d->info( "Editing $e->wiki: $e->page to $e->action $e->file (w/ $new_file) AS $e->comment") ;
+		
+		$params = array (
+			'title' => $e->page ,
+			'text' => trim($new_text) ,
+			'summary' => $e->comment ,
+			'bot' => 1
+		) ;
+		
+		$x = $this->editWiki ( $e->wiki , 'edit' , $params ) ;
+		if ( $x and $x['edit']['result'] == 'Success' ) {
+			$this->setDone ( $e->id , 1 , array('revision'=>$rev_id) ) ;
+		} else {
+			$this->d->error( "Cannot edit wiki ($e->wiki: $e->page): " . $this->last_exception );
+			$this->setDone ( $e->id , 2 , $this->last_exception ) ;
+		}
+
+	}
+	
+	function performEditReplace ( $e ) {
+		if ( !$this->fileExistenceSanityCheck($e,false) ) return ; # Nothing to do
+		if ( !isset($e->namespace) ) return ; # Paranoia
+		if ( $e->wiki == 'wikidatawiki' && $e->namespace == 0 ) { # Wikidata item
+			$this->performEditReplaceWikidata ( $e ) ;
+		} else { # "Normal" edit
+			$this->performEditText ( $e ) ;
+		}
+	}
+	
+	function performEditUnlink ( $e ) {
+		if ( !$this->fileExistenceSanityCheck($e,true) ) return ; # Nothing to do
+		if ( $e->wiki == 'wikidatawiki' && $e->namespace == 0 ) { # Wikidata item
+			$this->performEditUnlinkWikidata ( $e ) ;
+		} else { # "Normal" edit
+			$this->d->debug( "performEditUnlink $e->action on id=$e->id wiki=$e->wiki page=$e->page." );
+			$this->performEditText ( $e ) ;
+		}
+	}
+	
+	function performEdit ( $e ) {
+		$this->d->debug( "performEdit $e->action on id=$e->id wiki=$e->wiki page=$e->page." );
+		if ( $e->action == 'unlink' ) $this->performEditUnlink ( $e ) ;
+		else if ( $e->action == 'replace' ) $this->performEditReplace ( $e ) ;
+		else {
+			$this->d->error( "PerformEdit got unknown action $e->action" );
+			print_r ( $e ) ;
+			die ( "Unknown action " . $e->action ) ;
+		}
+	}
+	
+	function clearBogusIssues ( $db ) {
+		# Clear some previous issues
+		// 0=pending
+		// 1=done
+		// 2=skipped
+		$sql = "update `event` set done=0,note='' where note like '%rate limit%' and done=2" ;
+		$this->d->debug("Set done=0 (pending) on 'rate limit' events where done=2(skipped)");
+		$this->runQuery ( $db , $sql ) ;
+		$sql = "update `event` set done=0,note='' where note like '%edit conflict%' and done=2" ;
+		$this->d->debug("Set done=0 (pending) on 'edit conflict' events where done=2(skipped)");
+		$this->runQuery ( $db , $sql ) ;
+	}
+	
+	function performEdits () {
+		$edits = array() ;
+		$this->d->debug("Connecting to DB");
+		$db = $this->getToolDB() ;
+		## this is slow, let's do it at the end
+		// $this->d->debug("Clear bogus issues (reactivate rate limit/edit conflict skipped issues)");
+		// $this->clearBogusIssues ( $db ) ;
+		$this->d->debug("Get work events (pending events)");
+		$sql = "SELECT * FROM `event` WHERE done=0 ORDER BY timestamp ASC,log_timestamp ASC" ;
+		$result = $this->runQuery ( $db , $sql ) ;
+		while($o = $result->fetch_object()){
+			$edits[] = $o ;
+		}
+		$db->close() ;
+
+		$last_wiki = '' ;
+		foreach ( $edits AS $o ) {
+			if ( $last_wiki == $o->wiki ) sleep ( 5 ) ; // Edit rate limiter
+			$this->d->debug("Perform an edit in $o->wiki");
+			try {
+				$this->performEdit ( $o ) ;
+			} catch (Exception $e) {
+    			echo 'Caught exception: ',  $e->getMessage(), "\n";
+			}
+			$last_wiki = $o->wiki ;
+		}
+
+		$this->d->debug("Connecting to DB");
+		$db = $this->getToolDB() ;
+		$this->d->debug("Clear bogus issues (reactivate rate limit/edit conflict skipped issues)");
+		$this->clearBogusIssues ( $db ) ;
+		$db->close() ;
+	}
+	
+	function addReplaceEvents () {
+		$cmd_page = 'User:CommonsDelinker/commands' ;
+		$this->d->trace("getText from User:CommonsDelinker/commands");
+		$t = $this->getTextFromWiki ( 'commonswiki' , $cmd_page ) ;
+		if ( $t === false ) {
+			$this->d->error( "Could not open commands page") ;
+			return ;
+		}
+		
+		if ( preg_match ( '/\{\{[Ss]top\}\}/' , $t ) ) return ; // STOP
+		
+		$sqls = array() ;
+		
+#		$t = "{{/front}}\n{{universal replace|Overzicht - Hulst - 20118655 - RCE.jpg|Red Weaver Ant, Oecophylla smaragdina.jpg|reason=Testing}}" ; # TESTING
+		
+		$this->d->trace("Processing page content...");
+		$ts = date ( 'YmdHis' ) ;
+		$t = explode ( "\n" , $t ) ;
+		$nt = array() ;
+		foreach ( $t AS $l ) {
+			if ( !preg_match ( '/^\s*\{\{\s*[Uu]niversal[ _]replace\s*\|\s*(.+?)\s*\|\s*(.+?)\s*\|\s*reason\s*=\s*(\S.*?)\s*\}\}/' , $l , $m ) ) {
+				if ( !preg_match ( '/^\s*\{\{\s*[Uu]niversal[ _]replace\s*\|\s*(.+?)\s*\|\s*(.+?)\s*\|\s*reason\s*=\s*(.+?)\s*\|\s*user\s*=\s*(\S.*?)\s*\}\}/' , $l , $m ) ) {
+					$nt[] = $l ;
+					continue ;
+				}
+			}
+			$old_file = ucfirst(str_replace(' ','_',trim($m[1]))) ;
+			$new_file = ucfirst(str_replace(' ','_',trim($m[2]))) ;
+			$this->d->trace("Process line; old:$old_file new:$new_file.");
+			
+			$comment = trim($m[3]) ;
+			$user = '' ;
+			if ( isset($m[4]) ) {
+				$user = str_replace(' ','_',trim($m[4])) ;
+				$user = preg_replace ( '/^\s*\[\[[^:]+(.+?)\s*(\||\]\]).*$/' , '$1' , $user ) ;
+			}
+			
+			if ( !$this->hasLocalFile ( 'commonswiki' , $new_file ) ) {
+				$nt[] = "No such replacement file: " . $l ;
+				continue ;
+			}
+
+			if ( !preg_match('/\.svg$/i',$old_file) and preg_match('/\.svg$/i',$new_file) ) {
+				$nt[] = "Non-SVG to SVG replacement: " . $l ;
+				continue ;
+			}
+
+			$this->d->trace("Get file usage for $old_file");
+			$usages = $this->getFileUsage ( $old_file ) ;
+			if( $usages[0] == "*OVERFLOW*" ) {
+				$nt[] = "File is used on too many pages: " . $l;
+				$this->d->info("Skipping file $old_file; high usage (>$this->maximum_file_usage_limit)!");
+				continue;
+			}
+			
+			$this->d->trace("Get db handle for TooDB");
+			$db = $this->getToolDB() ;
+			
+			$this->d->trace("Generate replaces...");
+			foreach ( $usages AS $usage ) {
+				$page = $usage->gil_page_title ;
+				if ( $usage->gil_page_namespace != '' ) $page = $usage->gil_page_namespace . ':' . $page ;
+				$params = array (
+					'action' => 'replace' ,
+					'file' => $old_file ,
+					'wiki' => $usage->gil_wiki ,
+					'page' => $page ,
+					'namespace' => $usage->gil_page_namespace_id ,
+					'timestamp' => $ts ,
+					'comment' => $comment ,
+					'log_id' => -1 ,
+					'log_timestamp' => $ts ,
+					'user' => $user ,
+					'done' => 0 ,
+					'replace_with_file' => $new_file
+				) ;
+				$params['comment'] = $this->constructReplaceComment ( $params ) ;
+//				print_r ( $params ) ;
+
+				$s1 = array() ;
+				$s2 = array() ;
+				foreach ( $params AS $k => $v ) {
+					$s1[] = $k ;
+					$s2[] = "'" . $this->getDBsafe($v) . "'" ;
+				}
+				
+				$this->d->trace("Add SQL to sqls[] array, len=" . count($sqls));
+				$sql = "INSERT IGNORE INTO event (" . implode ( ',' , $s1 ) . ") VALUES (" . implode ( "," , $s2 ) . ")" ;
+				$sqls[] = $sql ;
+				$this->d->trace("SQL:$sql");
+
+			}
+			
+			$db->close() ;
+			
+		}
+		
+		$t = implode ( "\n" , $t ) ;
+		$nt = implode ( "\n" , $nt ) ;
+		if ( $t == $nt ) return ; // No change
+		
+		# Run SQL
+		$db = $this->getToolDB() ;
+		foreach ( $sqls AS $sql ) $this->runQuery ( $db , $sql ) ;
+		$db->close() ;
+		
+		# Save new text to Wiki
+		$params = array (
+			'title' => $cmd_page ,
+			'text' => trim($nt) ,
+			'summary' => 'Removing replace commands, will be executed soon' ,
+			'bot' => 1
+		) ;
+		
+		$this->d->info( "Editing $cmd_page...") ;
+		$x = $this->editWiki ( 'commonswiki' , 'edit' , $params ) ;
+		$this->d->debug( "Editing $cmd_page done.") ;
+	}
+	
+	function fixFauxTemplateReplacements () {
+		$todo = array() ;
+		$db = $this->getToolDB() ;
+		$sql = "DELETE FROM event WHERE action='' and file=''" ;
+		$result = $this->runQuery ( $db , $sql ) ;
+		$sql = 'select file,wiki, count(*) as cnt,namespace from event where done=0 group by file,wiki,namespace having cnt>' . $this->min_faux_template_icon ;
+		$result = $this->runQuery ( $db , $sql ) ;
+		while($o = $result->fetch_object()){
+			$file = $this->getDBsafe ( $o->file ) ;
+			$wiki = $this->getDBsafe ( $o->wiki ) ;
+			$todo[] = "UPDATE event SET done=2,note='Likely template icon, skipping' WHERE file='$file' AND wiki='$wiki' AND namespace=" . $o->namespace ;
+		}
+		foreach ( $todo AS $sql ) {
+			$this->runQuery ( $db , $sql ) ;
+		}
+		$db->close() ;
+	}
+
+	// Unlinks deleted files
+	function run () {
+		$this->d->debug("Get last timestamp");
+		$max_ts = $this->getLastTimestamp() ;
+		$this->d->debug("Get recent deleted files");
+		$delink_files = $this->getRecentDeletedFiles ( $max_ts ) ;
+		$this->d->debug("Add unlink events for recently deleted files");
+		$this->addUnlinkEvents ( $delink_files ) ;
+		$this->d->debug("Add replace events");
+		$this->addReplaceEvents () ;
+		$this->d->debug("Fix bogus template replacements");
+		$this->fixFauxTemplateReplacements() ;
+		$this->d->debug("Perform the queued edits");
+		$this->performEdits() ;
+	}
+	
+	function debug_run0() {
+		$this->d->debug("Add replace events");
+		$this->addReplaceEvents () ;
+	
+	}
+
+}
+
+print "Bot is starting.\n";
+$demon = new CommonsDelinquentDemon ;
+
+//$demon->addReplaceEvents () ;
+//$demon->performEdits() ;
+//$demon->fixFauxTemplateReplacements() ;
+
+$demon->set_debug_mode(8);
+
+   // test
+#   $demon->debug_run0();
+   //\\\\\\
+
+$demon->debug("Performing edits...");
+$demon->performEdits() ;
+while ( 1 ) {
+	$demon->debug("Calling run loop...");
+	$demon->run() ;
+	$demon->debug("Sleeping 30...");
+	sleep ( 30 ) ;
+}
+
+?>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/matcher.inc	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,117 @@
+<?php
+
+## This file contains the regex replacement functions.
+## It is separated to be testable, by test_regex.php. 
+## This has been written by Peter 'grin' Gervai about 2022.
+## Licensed under GPLv3+ and CC_By_Sa-4.0. 
+##
+## $Id$
+##
+
+class Matcher {
+    private $d;
+
+    function __construct($debug) {
+        # ask for the debug framework object
+        $this->d = $debug;
+        $this->d->msg("Matcher debug initialized");
+    }
+
+    ## prepare a filename regex pattern from filename
+    function matcher_prepare_pattern( $file ) {
+        $first_letter = substr ( $file , 0 , 1 ) ;
+        $pattern = substr ( $file , 1 ) ;
+        # If first letter have upper/lowercase include both as [Aa]
+        if ( mb_strtoupper($first_letter) != mb_strtolower($first_letter) ) {
+            $first_letter = "[" . mb_strtoupper($first_letter) . mb_strtolower($first_letter) . "]" ;
+        } else {
+            # escape special characters and also '/'
+            $first_letter = preg_quote ( $first_letter , '/' ) ; # can be metacharacter
+        }
+        ## normalise mediawiki filenames: _ to space, first letter anycase, then space to [_ ]
+        $pattern = str_replace ( '_' , ' ' , $pattern ) ;
+        $pattern = $first_letter . preg_quote ( $pattern, '/' ) ;
+        $pattern = str_replace ( ' ' , '[_ ]' , $pattern ) ;
+        return $pattern;
+    }
+
+    ##
+    ## remove the filename from various places in the text body
+    ##
+    function matcher_do_unlink( $text, $pattern ) {
+        $new_text = $text;
+
+        # unicode \w
+        $w='[\pL\pM]';
+        # unicode \s
+        $s='\pZ';
+        # word end separator (instead of \b, but a zero-width assertion would be nicer)
+        $we='(?=[^\pL\pM\n]|$)';
+        
+        # filename: " : Image : name.ext"
+        ### 20220523- request not to remove [[:File:....]] -g
+        ###$pattern_file = "$s*(: *)?$w+ *: *$pattern" ; # e.g. File:x.jog, Tập_tin:x.jpg
+        $pattern_file = "$s*$w+ *: *$pattern" ; # e.g. File:x.jog, Tập_tin:x.jpg
+        # filename in galleries (leading : cannot stand w/o namespace)
+        $pattern_gfile= "((: *)?$w+ *: *)?$pattern$we" ; 
+        # links
+        # [[ : image : foo.jpg | pip=pop | flip [[flop]] [http://example.com x] [[zig]] zag ]]
+        $pattern_link = '\[\[ *' . $pattern_file . "(\[\[.*?\]\]$w*|\[.*?\]|[^\pL\pM\\n\]].*?)*\]\]";
+        # if we had to remove the whole line, eat LF, too.
+        $pattern_link_wholeline = '^\[\[ *' . $pattern_file . "(\[\[.*?\]\]$w*|\[.*?\]|[^\pL\pM\\n\]].*?)*\]\]$s*\\n";
+        # gallery entries
+        $pattern_gallery = '\n?^' . $s .'*'. $pattern_gfile .'[^\n]*?((?<gal><\/gallery *>)|$)' ;
+        # plain gallery entry (not used now)
+        $pattern_gallery2 = '\n?^'. $s .'*'. $pattern .'[ \t]*\|[^\n]*$' ;
+        # files within templates
+        $pattern_template = '= *' . $pattern_gfile . ' *';
+
+        $this->d->trace(" PatternLink WL : $pattern_link_wholeline");
+        $this->d->trace(" PatternLink    : $pattern_link");
+        $this->d->trace(" PatternGallery : $pattern_gallery");
+        $this->d->trace(" PatternTemplate: $pattern_template");
+
+        # in normal link (non-multiline pattern)
+        #  if we have to remove the whole line, do it first
+        $new_text = preg_replace ( "/$pattern_link_wholeline/um" , '' , $new_text ) ;
+        $this->d->trace("Text after link replacement (wholeline): \n>>>$new_text<<<");
+        #  otherwise leave one space to keep word separation
+        $new_text = preg_replace ( "/ *$pattern_link */u" , ' ' , $new_text ) ;
+        $this->d->trace("Text after link replacement: \n>>>$new_text<<<");
+        # in gallery
+        #$new_text = preg_replace ( "/$pattern_gallery/um" , '' , $new_text ) ;
+        $new_text = preg_replace_callback ( "/$pattern_gallery/um",
+                function ($matches) {
+                    # original if no match (doesn't get called), ${gal} if group match, empty if doesn't
+                    if( array_key_exists( 'gal', $matches ) ) {
+                        return $matches['gal'];
+                    } else {
+                        return '';
+                    }
+                }, 
+                $new_text ) ;
+        
+        $this->d->trace("Text after gallery replacement: \n>>>$new_text<<<");
+    #    $new_text = preg_replace ( "/$pattern_gallery2/um" , '' , $new_text ) ;
+        # ?
+    #    $new_text = preg_replace ( "/ *$pattern_file */u" , ' ' , $new_text ) ;
+        # in template
+        $new_text = preg_replace ( "/$pattern_template/um" , '=' , $new_text ) ;
+        $this->d->trace("Text after template replacement: \n>>>$new_text<<<");
+        
+        return $new_text;
+    }
+
+    ##
+    ## replace file, don't care much about the context
+    ##
+    function matcher_do_replacement( $text, $pattern, $replacement_file ) {
+        $new_text = $text;
+        # there is no mb_ucfirst
+        $new_file = ucfirst ( trim ( str_replace ( '_' , ' ' , $replacement_file ) ) ) ;
+        $pattern = '(?<=^|[^\pL\pM\n])'.$pattern.'(?=$|[^\pL\pM])';
+        $this->d->trace(" ReplMatch: $pattern"); 
+        $new_text = preg_replace ( "/$pattern/um" , $new_file , $new_text ) ;
+        return $new_text;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/shared.inc	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,193 @@
+<?PHP
+
+# Wikipedia bot components
+require_once ( __DIR__.'/vendor/mediawiki-api/vendor/autoload.php' ) ;
+
+# "the usual" tools routines, mostly database opening ones; these should probably be replaced by local ones!
+require_once ( __DIR__.'/public_html/php/common.php' ) ;
+
+
+class CommonsDelinquent {
+
+	var $debugging = false ;
+	
+	function __construct () {
+		$this->config = parse_ini_file ( __DIR__.'/bot.cnf' ) ;
+	}
+
+	// Runs a MySQL query. Optional debugging output, and output of query on error
+	function runQuery ( $db , $sql ) {
+		if ( $this->debugging ) print "$sql\n" ;
+		if(!$result = $db->query($sql)) throw new Exception('There was an error running the query [' . $db->error . ']:'."\n$sql\n");
+		return $result ;
+	}
+	
+	function getCommonsDB () {
+		$this->last_db = openDB ( 'commons' , 'wikimedia' ) ;
+		return $this->last_db ;
+	}
+	
+	function getToolDB () {
+		$this->last_db = openToolDB ( 'commonsdelinquent_p' ) ;
+		return $this->last_db ;
+	}
+	
+	function getDBsafe ( $s ) {
+		if ( !isset ( $this->last_db ) ) die ( "getDBsafe called before database was opened!" ) ;
+		return $this->last_db->real_escape_string ( $s ) ;
+	}
+	
+	function isBadWiki ( $wiki ) {
+		if ( $wiki == 'ukwikimedia' ) return true ;
+		if ( preg_match ( '/^wikimania/' , $wiki  ) ) return true ;
+		if ( preg_match ( '/strategy/' , $wiki  ) ) return true ;
+		if ( preg_match ( '/foundation/' , $wiki  ) ) return true ;
+#		if ( preg_match ( '/outreach/' , $wiki  ) ) return true ;
+
+		if ( preg_match ( '/mxwikimedia/' , $wiki  ) ) return true ;
+		if ( preg_match ( '/rswikimedia/' , $wiki  ) ) return true ;
+		
+		if ( preg_match ( '/tenwiki/' , $wiki  ) ) return true ;
+		if ( preg_match ( '/stqwiki/' , $wiki  ) ) return true ;
+		
+		if ( preg_match ( '/enwikinews/' , $wiki  ) ) return true ;
+		
+		if ( preg_match ( '/testwikidatawiki/' , $wiki  ) ) return true ;
+#		if ( preg_match ( '/^suwiki$/' , $wiki  ) ) return true ;
+		if ( preg_match ( '/usability/' , $wiki  ) ) return true ;
+		# SUL LOGIN not working
+		if ( $wiki == 'donatewiki' ) return true ;
+	        if ( $wiki == 'idwikimedia' ) return true ;
+        	if ( $wiki == 'bdwikimedia' ) return true ;
+		if ( $wiki == 'maiwikimedia' ) return true ;
+		if ( $wiki == 'amwikimedia' ) return true ;
+		if ( $wiki == 'gewikimedia' ) return true ;
+		
+		if ( $wiki == 'mniwiki' ) return true ;		# -grin 2022-02-05
+#		if ( $wiki == 'vecwiki' ) return true;		# -grin 2022-10-03
+
+		if ( $wiki == 'mnwwiktionary' ) return true ;	# -grin	2022-03-10
+		if ( $wiki == 'mniwiktionary' ) return true ;	# -grin	2022-03-10
+		if ( $wiki == 'shnwiktionary' ) return true ;   # -grin 2022-07-18
+		if ( $wiki == 'niawiktionary' ) return true ;   # -grin	2022-09-06
+
+		if ( $wiki == 'wawikisource' ) return true;	# -grin 2022-05-02
+		if ( $wiki == 'banwikisource' ) return true;     # -grin	2022-08-08
+
+#                if ( $wiki == 'fiwikivoyage' ) return true ;
+#                if ( $wiki == 'brwikisource' ) return true ;
+#                if ( $wiki == 'liwikibooks' ) return true ;
+#                if ( $wiki == 'liwikisource' ) return true ;
+		return false ; // Wiki is OK
+	}
+	
+	function hasLocalFile ( $wiki , $file ) {
+		$ret = false ;
+#		print "OPENING 1: $wiki\n" ;
+		$db = openDBwiki ( $wiki ) ;
+		if ( $db === false ) {
+			print "FAILED TO OPEN $wiki - returning false\n" ;
+			return false ;
+		}
+		$this->last_db = $db ;
+		/// hack by grin, 2021-03-01; getDBsafe may return empty!
+		$sql_name = $this->getDBsafe(str_replace(' ','_',$file));
+		if( $sql_name == "" )  return false;
+		/// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+		$sql = "SELECT * FROM image WHERE img_name='" . $sql_name . "' LIMIT 1" ;
+		try {
+			$result = $this->runQuery ( $this->last_db , $sql ) ;
+			while($o = $result->fetch_object()) $ret = true ;
+		} catch (Exception $e) {
+	    		echo 'Caught exception: ',  $e->getMessage(), "\n";
+		}
+		return $ret ;
+	}
+	
+	function setDone ( $id , $value , $meta = null) {
+		$db = $this->getToolDB() ;
+		$sql = "UPDATE event SET done=" . ($value*1) ;
+		if ( isset ( $meta ) ) {
+			if ( !is_array ( $meta ) ) $meta = array ( 'note' => $meta ) ;
+			foreach ( $meta AS $k => $v ) {
+				$sql .= ",$k='" . $this->getDBsafe($v) . "'" ;
+			}
+		}
+		$sql .= " WHERE id=" . ($id*1) ;
+		if ( $value != 1 ) print "$sql\n" ;
+		$this->runQuery ( $db , $sql ) ;
+		$db->close() ;
+	}
+	
+	function wiki2server ( $wiki ) {
+		if ( $wiki == 'wikidatawiki' ) return 'www.wikidata.org' ;
+		if ( $wiki == 'commonswiki' ) return 'commons.wikimedia.org' ;
+		if ( $wiki == 'mediawikiwiki' ) return 'www.mediawiki.org' ;
+		if ( $wiki == 'metawiki' ) return 'meta.wikimedia.org' ;
+                if ( $wiki == 'outreachwiki' ) return 'outreach.wikimedia.org' ;
+		if ( $wiki == 'incubatorwiki' ) return 'incubator.wikimedia.org' ;
+		if ( $wiki == 'sourceswiki' ) return 'wikisource.org' ;
+		if ( $wiki == 'specieswiki' ) return 'species.wikimedia.org' ;
+		/// fix by grin 2021-03-01: missing close re separator
+		if ( preg_match ( '/(.+)wikimedia/' , $wiki , $m ) ) return $m[1] . ".wikimedia.org" ;
+		if ( preg_match ( '/^(wikimania\d+)wiki$/' , $wiki , $m ) ) return $m[1] . ".wikimedia.org" ;
+
+		if ( preg_match ( '/^(.+?)(wik.+)$/' , $wiki , $m ) ) {
+			$server = str_replace('_','-',$m[1]) . "." ;
+			if ( $server == 'be-x-old.' ) $server = 'be-tarask.' ;
+
+			if ( $m[2] == 'wiki' ) $server .= 'wikipedia' ;
+			else $server .= $m[2] ;
+			$server .= '.org' ;
+			return $server ;
+		}
+		return false ;
+	}
+	
+	function getAPI ( $wiki ) {
+		# TODO check if re-opening same API, cache in object
+		$server = $this->wiki2server ( $wiki ) ;
+		if ( $server === false ) return false ;
+		$api = new \Mediawiki\Api\MediawikiApi( 'https://'.$server.'/w/api.php' );
+		if ( !$api->isLoggedin() ) {
+			print "Logging into https://$server/w/api.php\n";  #  --grin 2022-02-05
+			$x = $api->login( new \Mediawiki\Api\ApiUser( $this->config['name'], $this->config['password'] ) );
+			if ( !$x ) return false ;
+		}
+		return $api ;
+	}
+	
+	function editWiki ( $wiki , $action , $params ) {
+		$api = $this->getAPI ( $wiki ) ;
+		if ( $api === false ) return false ;
+		$params['token'] = $api->getToken() ;
+		$params['bot'] = 1 ;
+		$x = false ;
+		if ( $this->debugging ) print_r ( $params ) ;
+		try {
+			$x = $api->postRequest( new \Mediawiki\Api\SimpleRequest( $action, $params ) );
+		} catch (Exception $e) {
+			echo 'Caught exception: ',  $e->getMessage(), "\n";
+			$this->last_exception = $e->getMessage() ;
+			$x = false ;
+		}
+		if ( $this->debugging ) print_r ( $x ) ;
+		//$api->logout() ;
+		$params['token'] = $api->getToken() ;
+		$api->postRequest( new \Mediawiki\Api\SimpleRequest( "logout", $params ) );
+		return $x ;
+	}
+
+	function editWikidata ( $action , $params ) {
+		$ret = $this->editWiki ( 'wikidatawiki' , $action , $params ) ;
+		if ( $this->debugging ) {
+			if ( is_array($ret) ) print "RET:" .  $ret['success'] . "\n" ;
+			else print "RET: FALSE\n" ;
+		}
+		if ( is_array($ret) ) return $ret['success'] ;
+		return false ;
+	}
+		
+}
+
+?>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_desc1.data	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,4 @@
+img: 10 TND obverse.jpg
+pattern: 10[_ ]TND[_ ]obverse\.jpg
+repl_img: Халдун.jxr
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_desc1.in	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,3 @@
+Ибн Халдун твърди, че е с арабски произход с цел да придобие висок социален статус.<ref>A., Ibn Khaldun: His life and Works for Mohammad Enan</ref>
+[[Файл:10 TND obverse.jpg|мини|250px|Изображение на Ибн Халдун върху банкнота от 10 [[тунизийски денар]]а]]
+Като цяло е известно че Ибн Халдун е роден в [[Тунис]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_desc1.replace	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,3 @@
+Ибн Халдун твърди, че е с арабски произход с цел да придобие висок социален статус.<ref>A., Ibn Khaldun: His life and Works for Mohammad Enan</ref>
+[[Файл:Халдун.jxr|мини|250px|Изображение на Ибн Халдун върху банкнота от 10 [[тунизийски денар]]а]]
+Като цяло е известно че Ибн Халдун е роден в [[Тунис]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_desc1.unlink	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,2 @@
+Ибн Халдун твърди, че е с арабски произход с цел да придобие висок социален статус.<ref>A., Ibn Khaldun: His life and Works for Mohammad Enan</ref>
+Като цяло е известно че Ибн Халдун е роден в [[Тунис]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_gallery-closing-tag.data	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,3 @@
+img: Aung San Suu Kyi 1951.jpg
+pattern: [Aa]ung[_ ]San[_ ]Suu[_ ]Kyi[_ ]1951\.jpg
+repl_img: NothingAtAll.jxr
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_gallery-closing-tag.in	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,7 @@
+Text page
+<gallery>
+Archivo:David_Thewlis_2008.jpg|[[David Thewlis]] en 2001
+Archivo:Aung San Suu Kyi 17 November 2011.jpg| [[Aung San Suu Kyi]] en 2010
+Archivo:Aung San Suu Kyi 1951.jpg|Aung San Suu Kyi en 1951</gallery>
+
+== more comes ==
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_gallery-closing-tag.replace	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,7 @@
+Text page
+<gallery>
+Archivo:David_Thewlis_2008.jpg|[[David Thewlis]] en 2001
+Archivo:Aung San Suu Kyi 17 November 2011.jpg| [[Aung San Suu Kyi]] en 2010
+Archivo:NothingAtAll.jxr|Aung San Suu Kyi en 1951</gallery>
+
+== more comes ==
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_gallery-closing-tag.unlink	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,6 @@
+Text page
+<gallery>
+Archivo:David_Thewlis_2008.jpg|[[David Thewlis]] en 2001
+Archivo:Aung San Suu Kyi 17 November 2011.jpg| [[Aung San Suu Kyi]] en 2010</gallery>
+
+== more comes ==
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_jap.data	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,4 @@
+# test data
+img: Yasukuni Radha Binod Pal Commending Stele.jpg
+pattern: [Yy]asukuni[_ ]Radha[_ ]Binod[_ ]Pal[_ ]Commending[_ ]Stele\.jpg
+repl_img: Remix1.jxr
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_jap.in	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,3 @@
+[[画像:Radha Binod Pal01.jpg|thumb|パール判事の顕彰碑(京都霊山護国神社にて撮影)]]
+[[画像:Yasukuni Radha Binod Pal Commending Stele.jpg|thumb|パール判事の顕彰碑(東京九段・[[靖国神社]]内・[[遊就館]]前にて撮影)]]
+[[1886年]]に[[イギリス領インド帝国|英領インド]]・ベンガル州ノディア県クシュティヤ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_jap.replace	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,3 @@
+[[画像:Radha Binod Pal01.jpg|thumb|パール判事の顕彰碑(京都霊山護国神社にて撮影)]]
+[[画像:Remix1.jxr|thumb|パール判事の顕彰碑(東京九段・[[靖国神社]]内・[[遊就館]]前にて撮影)]]
+[[1886年]]に[[イギリス領インド帝国|英領インド]]・ベンガル州ノディア県クシュティヤ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_jap.unlink	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,2 @@
+[[画像:Radha Binod Pal01.jpg|thumb|パール判事の顕彰碑(京都霊山護国神社にて撮影)]]
+[[1886年]]に[[イギリス領インド帝国|英領インド]]・ベンガル州ノディア県クシュティヤ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_latest.data	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,4 @@
+# test data
+img: Charles_Turner04.jpg
+pattern: [Cc]harles[_ ]Turner04\.jpg
+repl_img: Charles_Turner_-_Portrait_of_Charlotte_Cholmondeley_and_son_Henry.jpg
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_latest.in	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,17 @@
+https://en.wikipedia.org/w/index.php?title=Peregrine_Bertie,_3rd_Duke_of_Ancaster_and_Kesteven&action=edit&oldid=1079060290
+
+[[Image:Mary, Duchess of Ancaster and Kesteven.jpg|thumb|200px|Mary, Duchess of Ancaster and Kesteven, wife of the 3rd Duke of Ancaster and Kesteven.]]
+He married, firstly, Elizabeth Blundell (died 1743), widow of [[Charles Gounter Nicoll]], on 22 May 1735. He married, secondly, [[Mary Bertie, Duchess of Ancaster and Kesteven|Mary Panton]], on 27 November 1750. They had six children:<ref>{{cite journal | title=Monumental Memoirs of the Bertie Family | journal=The Gentleman's Magazine | volume=78 | year=1808 | pages=21–22 | url=https://books.google.com/books?id=9hJEAQAAMAAJ&pg=PA21}}</ref>
+*Lady Mary Catherine Bertie (14 April 1754 – 12 April 1767)
+*Peregrine Thomas Bertie, Marquess of Lindsey (21 May 1755 – 12 December 1758)
+*a son (born and died 14 September 1759)
+*[[Robert Bertie, 4th Duke of Ancaster and Kesteven]] (1756–1779)
+*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828)
+
+[[File:Charles Turner04.jpg|thumb|left|Lady Cholmondeley and her son [[William Cholmondeley, 3rd Marquess of Cholmondeley|William Henry Hugh Cholmondeley, 3rd Marquess of Cholmondeley]] (1805), by [[Charles Turner (engraver)|Charles Turner]]]]
+
+*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue.
+
+On the death of his father in 1742, he succeeded him in the dukedom and as [[Lord Great Chamberlain]] and [[Lord Lieutenant of Lincolnshire]], and was appointed to the [[Privy Council of Great Britain|Privy Council]].
+
+He gained the rank of [[Major-General]] on 19 January 1755, [[Lieutenant-General]] on 3 February 1759 and [[General]] on 25 May 1772.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_latest.replace	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,17 @@
+https://en.wikipedia.org/w/index.php?title=Peregrine_Bertie,_3rd_Duke_of_Ancaster_and_Kesteven&action=edit&oldid=1079060290
+
+[[Image:Mary, Duchess of Ancaster and Kesteven.jpg|thumb|200px|Mary, Duchess of Ancaster and Kesteven, wife of the 3rd Duke of Ancaster and Kesteven.]]
+He married, firstly, Elizabeth Blundell (died 1743), widow of [[Charles Gounter Nicoll]], on 22 May 1735. He married, secondly, [[Mary Bertie, Duchess of Ancaster and Kesteven|Mary Panton]], on 27 November 1750. They had six children:<ref>{{cite journal | title=Monumental Memoirs of the Bertie Family | journal=The Gentleman's Magazine | volume=78 | year=1808 | pages=21–22 | url=https://books.google.com/books?id=9hJEAQAAMAAJ&pg=PA21}}</ref>
+*Lady Mary Catherine Bertie (14 April 1754 – 12 April 1767)
+*Peregrine Thomas Bertie, Marquess of Lindsey (21 May 1755 – 12 December 1758)
+*a son (born and died 14 September 1759)
+*[[Robert Bertie, 4th Duke of Ancaster and Kesteven]] (1756–1779)
+*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828)
+
+[[File:Charles Turner - Portrait of Charlotte Cholmondeley and son Henry.jpg|thumb|left|Lady Cholmondeley and her son [[William Cholmondeley, 3rd Marquess of Cholmondeley|William Henry Hugh Cholmondeley, 3rd Marquess of Cholmondeley]] (1805), by [[Charles Turner (engraver)|Charles Turner]]]]
+
+*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue.
+
+On the death of his father in 1742, he succeeded him in the dukedom and as [[Lord Great Chamberlain]] and [[Lord Lieutenant of Lincolnshire]], and was appointed to the [[Privy Council of Great Britain|Privy Council]].
+
+He gained the rank of [[Major-General]] on 19 January 1755, [[Lieutenant-General]] on 3 February 1759 and [[General]] on 25 May 1772.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_latest.unlink	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,16 @@
+https://en.wikipedia.org/w/index.php?title=Peregrine_Bertie,_3rd_Duke_of_Ancaster_and_Kesteven&action=edit&oldid=1079060290
+
+[[Image:Mary, Duchess of Ancaster and Kesteven.jpg|thumb|200px|Mary, Duchess of Ancaster and Kesteven, wife of the 3rd Duke of Ancaster and Kesteven.]]
+He married, firstly, Elizabeth Blundell (died 1743), widow of [[Charles Gounter Nicoll]], on 22 May 1735. He married, secondly, [[Mary Bertie, Duchess of Ancaster and Kesteven|Mary Panton]], on 27 November 1750. They had six children:<ref>{{cite journal | title=Monumental Memoirs of the Bertie Family | journal=The Gentleman's Magazine | volume=78 | year=1808 | pages=21–22 | url=https://books.google.com/books?id=9hJEAQAAMAAJ&pg=PA21}}</ref>
+*Lady Mary Catherine Bertie (14 April 1754 – 12 April 1767)
+*Peregrine Thomas Bertie, Marquess of Lindsey (21 May 1755 – 12 December 1758)
+*a son (born and died 14 September 1759)
+*[[Robert Bertie, 4th Duke of Ancaster and Kesteven]] (1756–1779)
+*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828)
+
+
+*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue.
+
+On the death of his father in 1742, he succeeded him in the dukedom and as [[Lord Great Chamberlain]] and [[Lord Lieutenant of Lincolnshire]], and was appointed to the [[Privy Council of Great Britain|Privy Council]].
+
+He gained the rank of [[Major-General]] on 19 January 1755, [[Lieutenant-General]] on 3 February 1759 and [[General]] on 25 May 1772.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_movingspace.data	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,3 @@
+img: Original-Spezi-Flaschen.jpg
+pattern: [Oo]riginal\-Spezi\-Flaschen\.jpg
+repl_img: Keine.webp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_movingspace.in	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,12 @@
+
+{{Infobox Marke
+|Name             = Spezi
+|Logo             = 
+|Besitzer         = [[Brauhaus Riegele]]
+|Einfuehrungsjahr = [[1965]]
+|Produkte         = Erfrischungsgetränke
+|Maerkte          = Deutschland
+|Website          = [https://www.spezi.com/ www.spezi.com]
+}}
+[[File:Original-Spezi-Flaschen.jpg|mini|Original-Spezi 0,5 l Flaschen mit Schraubverschluss sowie [[Kronkorken]]. Der Vertrieb der Schraubverschlussflaschen erfolgt in den blauben [[Getränkekiste]], mit Kronkorken in orangen Kisten.]]
+'''Spezi''' ist der [[Marke (Recht)|Markenname]] eines [[koffein]]haltigen [[Erfrischungsgetränk]]s,<ref>[https://register.dpma.de/DPMAregister/marke/register/889780/DE Markenregister]</ref> einem Mischgetränk aus [[Cola]] und [[Limonade|Orangenlimonade]]. „Spezi“ wird auch oft als [[Gattungsname#Schutz für Produktnamen (generalisierter oder generischer Markenname)|Gattungsbegriff]] verwendet. Die Marke wurde 1956 durch das [[Brauhaus Riegele]] in [[Augsburg]] eingetragen und bezeichnete anfänglich ein [[Bier]].
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_movingspace.replace	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,12 @@
+
+{{Infobox Marke
+|Name             = Spezi
+|Logo             = 
+|Besitzer         = [[Brauhaus Riegele]]
+|Einfuehrungsjahr = [[1965]]
+|Produkte         = Erfrischungsgetränke
+|Maerkte          = Deutschland
+|Website          = [https://www.spezi.com/ www.spezi.com]
+}}
+[[File:Keine.webp|mini|Original-Spezi 0,5 l Flaschen mit Schraubverschluss sowie [[Kronkorken]]. Der Vertrieb der Schraubverschlussflaschen erfolgt in den blauben [[Getränkekiste]], mit Kronkorken in orangen Kisten.]]
+'''Spezi''' ist der [[Marke (Recht)|Markenname]] eines [[koffein]]haltigen [[Erfrischungsgetränk]]s,<ref>[https://register.dpma.de/DPMAregister/marke/register/889780/DE Markenregister]</ref> einem Mischgetränk aus [[Cola]] und [[Limonade|Orangenlimonade]]. „Spezi“ wird auch oft als [[Gattungsname#Schutz für Produktnamen (generalisierter oder generischer Markenname)|Gattungsbegriff]] verwendet. Die Marke wurde 1956 durch das [[Brauhaus Riegele]] in [[Augsburg]] eingetragen und bezeichnete anfänglich ein [[Bier]].
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_movingspace.unlink	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,11 @@
+
+{{Infobox Marke
+|Name             = Spezi
+|Logo             = 
+|Besitzer         = [[Brauhaus Riegele]]
+|Einfuehrungsjahr = [[1965]]
+|Produkte         = Erfrischungsgetränke
+|Maerkte          = Deutschland
+|Website          = [https://www.spezi.com/ www.spezi.com]
+}}
+'''Spezi''' ist der [[Marke (Recht)|Markenname]] eines [[koffein]]haltigen [[Erfrischungsgetränk]]s,<ref>[https://register.dpma.de/DPMAregister/marke/register/889780/DE Markenregister]</ref> einem Mischgetränk aus [[Cola]] und [[Limonade|Orangenlimonade]]. „Spezi“ wird auch oft als [[Gattungsname#Schutz für Produktnamen (generalisierter oder generischer Markenname)|Gattungsbegriff]] verwendet. Die Marke wurde 1956 durch das [[Brauhaus Riegele]] in [[Augsburg]] eingetragen und bezeichnete anfänglich ein [[Bier]].
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_no-text-removal.data	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,6 @@
+#
+# TODO: this needs fixing! Complete rewrite of replacement engine.
+#
+img: Liane Foly (c) Pingouin.png
+pattern: [Ll]iane[_ ]Foly[_ ]\(c\)[_ ]Pingouin\.png
+repl_img: NoneAtAll.webp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_no-text-removal.in	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,5 @@
+Marker
+:{{re|Chatsam}} Merci pour le rapport. Le fichier a été supprimé par {{u|Gbawden}}. Veuillez utiliser [[:mw:Help:Links/fr#Liens internes|liens internes]] comme [[:File:Liane Foly (c) Pingouin.png]] et le nouveau nom [[VRT]] ici pour éviter toute interruption.
+But remove [[File:Liane Foly (c) Pingouin.png]]
+And remove [[File:Liane Foly (c) Pingouin.png|this one]] too.
+rekraM
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_no-text-removal.replace	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,5 @@
+Marker
+:{{re|Chatsam}} Merci pour le rapport. Le fichier a été supprimé par {{u|Gbawden}}. Veuillez utiliser [[:mw:Help:Links/fr#Liens internes|liens internes]] comme [[:File:NoneAtAll.webp]] et le nouveau nom [[VRT]] ici pour éviter toute interruption.
+But remove [[File:NoneAtAll.webp]]
+And remove [[File:NoneAtAll.webp|this one]] too.
+rekraM
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_no-text-removal.unlink	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,5 @@
+Marker
+:{{re|Chatsam}} Merci pour le rapport. Le fichier a été supprimé par {{u|Gbawden}}. Veuillez utiliser [[:mw:Help:Links/fr#Liens internes|liens internes]] comme [[:File:Liane Foly (c) Pingouin.png]] et le nouveau nom [[VRT]] ici pour éviter toute interruption.
+But remove 
+And remove too.
+rekraM
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_normal.data	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,4 @@
+# test data
+img: foo bar.jxs
+pattern: [Ff]oo[_ ]bar\.jxs
+repl_img: zig zag.png
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_normal.in	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,23 @@
+we love us foo and bar and jxs
+gallery
+file:foo bar.jxs | adder |badder
+file:elso.png
+Fájl:Foo_bar.jxs
+file:masodik.png
+image:foo_bar.jxs|flop flop
+image:harom.png
+foo_bar.jxs | what
+image:negy.png
+foo_bar.jxs
+aztan [[file:foo bar.jxsgz]] marad meg [[nemfoo bar.jxs]] is.
+legyen [[kep:foo bar.jxs]] vagy [[Image:foo bar.jxs|110px|foo=bar]] esetleg [[:file:foo bar.jxs|pix|bpx]] is.
+{{template
+ize = kép:foo bar.jxs
+mize = foo bar.jxs
+micsoda = [[image:foo bar.jxs]]
+mindegy
+*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828)
+
+[[File:foo bar.jxs|thumb|left|Lady Cholmondeley and her son [[William Cholmondeley, 3rd Marquess of Cholmondeley|William Henry Hugh Cholmondeley, 3rd Marquess of Cholmondeley]] (1805), by [[Charles Turner (engraver)|Charles Turner]]]]
+
+*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_normal.replace	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,23 @@
+we love us foo and bar and jxs
+gallery
+file:Zig zag.png | adder |badder
+file:elso.png
+Fájl:Zig zag.png
+file:masodik.png
+image:Zig zag.png|flop flop
+image:harom.png
+Zig zag.png | what
+image:negy.png
+Zig zag.png
+aztan [[file:foo bar.jxsgz]] marad meg [[nemfoo bar.jxs]] is.
+legyen [[kep:Zig zag.png]] vagy [[Image:Zig zag.png|110px|foo=bar]] esetleg [[:file:Zig zag.png|pix|bpx]] is.
+{{template
+ize = kép:Zig zag.png
+mize = Zig zag.png
+micsoda = [[image:Zig zag.png]]
+mindegy
+*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828)
+
+[[File:Zig zag.png|thumb|left|Lady Cholmondeley and her son [[William Cholmondeley, 3rd Marquess of Cholmondeley|William Henry Hugh Cholmondeley, 3rd Marquess of Cholmondeley]] (1805), by [[Charles Turner (engraver)|Charles Turner]]]]
+
+*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_normal.unlink	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,17 @@
+we love us foo and bar and jxs
+gallery
+file:elso.png
+file:masodik.png
+image:harom.png
+image:negy.png
+aztan [[file:foo bar.jxsgz]] marad meg [[nemfoo bar.jxs]] is.
+legyen vagy esetleg [[:file:foo bar.jxs|pix|bpx]] is.
+{{template
+ize =
+mize =
+micsoda = 
+mindegy
+*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828)
+
+
+*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_punctuation.data	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,4 @@
+# test data
+img: Remix0.png
+pattern: [Rr]emix0\.png
+repl_img: Remix1.jxr
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_punctuation.in	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,8 @@
+| logo alt               = 
+| screenshot             = Remix0.png<!-- filename only (no wikilink, no Image:/File:) -->
+| screenshot_size        = 
+
+| image15 = Azar Shiva9.jpg{{!}}border| caption15 = [[آذر شیوا]]
+| image00 = Remix0.png{{!}}border| caption16 = [[نسرین ستوده]]
+| image16 = Libérez_Nasrin_Sotoudeh!-cut.jpg{{!}}border| caption16 = [[نسرین ستوده]]
+| image17 = Hayedeh-Persian-Singer-Tehran-1977.jpg{{!}}border| caption17 = [[هایده]]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_punctuation.replace	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,8 @@
+| logo alt               = 
+| screenshot             = Remix1.jxr<!-- filename only (no wikilink, no Image:/File:) -->
+| screenshot_size        = 
+
+| image15 = Azar Shiva9.jpg{{!}}border| caption15 = [[آذر شیوا]]
+| image00 = Remix1.jxr{{!}}border| caption16 = [[نسرین ستوده]]
+| image16 = Libérez_Nasrin_Sotoudeh!-cut.jpg{{!}}border| caption16 = [[نسرین ستوده]]
+| image17 = Hayedeh-Persian-Singer-Tehran-1977.jpg{{!}}border| caption17 = [[هایده]]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_punctuation.unlink	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,8 @@
+| logo alt               = 
+| screenshot             =<!-- filename only (no wikilink, no Image:/File:) -->
+| screenshot_size        = 
+
+| image15 = Azar Shiva9.jpg{{!}}border| caption15 = [[آذر شیوا]]
+| image00 ={{!}}border| caption16 = [[نسرین ستوده]]
+| image16 = Libérez_Nasrin_Sotoudeh!-cut.jpg{{!}}border| caption16 = [[نسرین ستوده]]
+| image17 = Hayedeh-Persian-Singer-Tehran-1977.jpg{{!}}border| caption17 = [[هایده]]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_uni1.data	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,4 @@
+# test data
+img: delta747jtsarkis.jpg
+pattern: [Dd]elta747jtsarkis\.jpg
+repl_img: zig চিত্ৰ zag.png
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_uni1.in	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,15 @@
+== Flota ==
+=== Delta Air Lines Company ===
+==== Delta Airlines ====
+[[Archivo:Delta747jtsarkis.jpg|300px|thumb|[[Boeing 747]] de [[Delta Airlines]] ]]
+<center>
+{| class=\"toccolours sortable\" border=\"1\" cellpadding=\"3\" style=\"border-collapse:collapse\"
+|+ ```Flota de Delta Air Lines```
+imago = চিত্ৰ:delta747jtsarkis.jpg
+pitturo = delta747jtsarkis.jpg
+|- bgcolor=lightblue
+[[চিত্ৰ:delta747jtsarkis.jpg|thumb|বাবৰী মছজিদ]]
+!Tipo de Avión
+!Total
+!Pasajeros<br /><small>(Primera*/Economica)</small>
+!Rutas
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_uni1.replace	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,15 @@
+== Flota ==
+=== Delta Air Lines Company ===
+==== Delta Airlines ====
+[[Archivo:Zig চিত্ৰ zag.png|300px|thumb|[[Boeing 747]] de [[Delta Airlines]] ]]
+<center>
+{| class=\"toccolours sortable\" border=\"1\" cellpadding=\"3\" style=\"border-collapse:collapse\"
+|+ ```Flota de Delta Air Lines```
+imago = চিত্ৰ:Zig চিত্ৰ zag.png
+pitturo = Zig চিত্ৰ zag.png
+|- bgcolor=lightblue
+[[চিত্ৰ:Zig চিত্ৰ zag.png|thumb|বাবৰী মছজিদ]]
+!Tipo de Avión
+!Total
+!Pasajeros<br /><small>(Primera*/Economica)</small>
+!Rutas
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_uni1.unlink	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,13 @@
+== Flota ==
+=== Delta Air Lines Company ===
+==== Delta Airlines ====
+<center>
+{| class=\"toccolours sortable\" border=\"1\" cellpadding=\"3\" style=\"border-collapse:collapse\"
+|+ ```Flota de Delta Air Lines```
+imago =
+pitturo =
+|- bgcolor=lightblue
+!Tipo de Avión
+!Total
+!Pasajeros<br /><small>(Primera*/Economica)</small>
+!Rutas
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_uni2.data	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,5 @@
+# test data
+img: delta747jtsarkis.jpg
+pattern: [Dd]elta747jtsarkis\.jpg
+repl_img: zig চিত্ৰ zag.png
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_uni2.in	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,10 @@
+assami [[চিত্ৰ:delta747jtsarkis.jpg|thumb|বাবৰী মছজিদ]] two
+also 
+{|
+|table
+|-
+| bgcolor=#DDEEFF | ৬
+| bgcolor=#DDEEFF | [[চিত্ৰ:delta747jtsarkis.jpg|100px|centre]]
+| bgcolor=#DDEEFF | [[দিহিং পাটকাই ৰাষ্ট্ৰীয় উদ্যান]]
+| bgcolor=#DDEEFF | [[ডিব্ৰুগড় জিলা|ডিব্ৰুগড়]] আৰু [[তিনিচুকীয়া জিলা]]
+|}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_uni2.replace	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,10 @@
+assami [[চিত্ৰ:Zig চিত্ৰ zag.png|thumb|বাবৰী মছজিদ]] two
+also 
+{|
+|table
+|-
+| bgcolor=#DDEEFF | ৬
+| bgcolor=#DDEEFF | [[চিত্ৰ:Zig চিত্ৰ zag.png|100px|centre]]
+| bgcolor=#DDEEFF | [[দিহিং পাটকাই ৰাষ্ট্ৰীয় উদ্যান]]
+| bgcolor=#DDEEFF | [[ডিব্ৰুগড় জিলা|ডিব্ৰুগড়]] আৰু [[তিনিচুকীয়া জিলা]]
+|}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test/_t_uni2.unlink	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,10 @@
+assami two
+also 
+{|
+|table
+|-
+| bgcolor=#DDEEFF | ৬
+| bgcolor=#DDEEFF | 
+| bgcolor=#DDEEFF | [[দিহিং পাটকাই ৰাষ্ট্ৰীয় উদ্যান]]
+| bgcolor=#DDEEFF | [[ডিব্ৰুগড় জিলা|ডিব্ৰুগড়]] আৰু [[তিনিচুকীয়া জিলা]]
+|}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinquent_files/test_regex.php	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,177 @@
+#!/usr/bin/php
+<?php
+## By Peter 'grin' Gervai, 2022
+## GPLv3+ and CC-By-Sa-4.0 
+##
+## $Id$
+##
+## This file is the testing "environment" for the regex matching.
+## Tries to test unlink (removal) and replacement rules.
+##
+## The input files are in _t_*.in, and the unlink and replace
+## expected results are in _t_*.unlink and _t_*.replace.
+## The tests are listed below in the array, with the original
+## image and its expected good matching pattern, which is also
+## verified. The replacement image also set there.
+##
+
+$DEBUG=0;
+
+require_once( './matcher.inc' );
+require_once( './debug.inc' );
+
+$d = new Debug;
+#$d->set_level(9); # trace
+
+$matcher = new Matcher($d);
+
+
+# files are in
+#  input:            _t_{id}.in
+#  test data:        _t_{id}.data
+#  good replacement: _t_{id}.replace
+#  good unlink:      _t_{id}.unlink
+
+$dir_tests = './test';
+$testnames = collect_test_names($dir_tests);
+
+$data_keys = array( 'img', 'pattern', 'repl_img' );
+
+foreach ($testnames as $t) {
+    $tests[$t] = get_test_data( $dir_tests, $t, $data_keys );
+}
+
+$test_res = array( 'ok' => 0, 'bad' => 0 );
+
+# run
+foreach ($tests as $key => $t) {
+    #print( "Running test '$key'..." );
+    #print("IN=" . $t["in"] . "\n");
+    #print("Im=" . $t["img"] . "\n");
+    #print("RE=" . $t["unlink"] . "\n");
+
+    $img = $t["img"];
+    $pattern = test_prepare_pattern( $img );
+    if( $DEBUG ) {print(" Pat=$pattern\n");}
+    $res = test_result( $pattern, $t["pattern"], "pattern", $img, $key );
+    if( $res == 1 ) {
+        $test_res['bad']++;
+    } else {
+        $test_res['ok']++;
+    }
+
+    $text = read_text_from_file( "${dir_tests}/_t_${key}.in" );
+    $t_unlink = read_text_from_file( "${dir_tests}/_t_${key}.unlink" );
+    $t_replace = read_text_from_file( "${dir_tests}/_t_${key}.replace" );
+
+    $res = test_regex_unlink( $text, $pattern );
+    $res = test_result( $res, $t_unlink, "unlink", $img, $key );
+    if( $res == 1 ) {
+        $test_res['bad']++;
+    } else {
+        $test_res['ok']++;
+    }
+
+    $res = test_regex_replace( $text, $pattern, $t["repl_img"] );
+    $res = test_result( $res, $t_replace, "replace", $img, $key );
+    if( $res == 1 ) {
+        $test_res['bad']++;
+    } else {
+        $test_res['ok']++;
+    }
+}
+
+print( "\nResults:\n" );
+print( "OK : " . $test_res['ok'] . "\nBAD: " . $test_res['bad'] ."\n" );
+
+
+## collect the name of the tests (verbose code :))
+function collect_test_names($dir) {
+    $dlist = scandir($dir);
+
+    foreach ($dlist as $key => $val) {
+        if( !is_dir( $dir . '/' . $val  ) &&  preg_match( '/^_t_(.+)\.data$/', $val, $matches ) ) {
+            $tests[] = $matches[1];
+        }
+    }
+    return $tests;
+}
+
+
+function get_test_data($dir, $t, $keys) {
+    $f = fopen( "${dir}/_t_${t}.data", 'r');
+    if( $f ) {
+        while( ($line = fgets($f)) !== false ) {
+            // process a line
+            if( preg_match( '/^(\S+)\s*:\s*(.+)$/', $line, $matches ) ) {
+                # print( "Test $t ${matches[1]} => ${matches[2]}\n" );
+                if( !in_array( $matches[1], $keys ) ) {
+                    trigger_error( "Unknown key '$matches[1]' in ${t}.data", E_USER_ERROR );
+                }
+                $test[$matches[1]] = $matches[2];
+            }
+        }
+    } else {
+        trigger_error( "test $t data file is missing", E_USER_ERROR );
+    }
+
+    foreach ($keys as $k) {
+        if( !array_key_exists( $k, $test ) ) {
+            trigger_error( "test $t data is missing $k key", E_USER_ERROR );
+        }
+    }
+
+    return $test;
+}
+
+
+function test_result( $result, $expected, $name, $img, $id ) {
+    global $dir_tests;
+    if( $result <> $expected ) {
+        #print( "${test} BAD $name:$id ($img)! result=\n$result\n\nexpect=\n$expected\n\n" );
+        if( $name == 'pattern' ) {
+            print( "${id} BAD $name ($img)! expected '$expected', result '$result', fix in ${dir_tests}/_t_${id}.data, expected is in _bad_${id}.${name}\n" );
+        } else {
+            print( "${id} BAD $name ($img)! diff -u ${dir_tests}/_t_${id}.${name} _bad_${id}.${name}\n" );
+        }
+        write_text_to_file( "_bad_${id}.${name}", $result );
+        return 1;
+    } else {
+        print( "${id} OK $name ($img)!\n" );
+    }
+    return 0;
+}
+
+
+function read_text_from_file( $fname ) {
+    $f = fopen( $fname, 'r') or die( "Cannot read file $fname" );
+    $text = fread( $f, 1e8 );
+    fclose($f);
+    return $text;
+}
+
+
+function write_text_to_file($fname, $text) {
+    #print("Recording into $fname text '$text'");
+    $f = fopen( $fname, 'w') or die( "Cannot create file $fname" );
+    $res = fwrite( $f, $text );
+    fclose($f);
+}
+
+
+function test_prepare_pattern( $file ) {
+    global $matcher;
+    return $matcher->matcher_prepare_pattern( $file );
+}
+
+
+function test_regex_unlink( $text, $pattern ) {
+    global $matcher;
+    return $matcher->matcher_do_unlink( $text, $pattern );
+}
+
+
+function test_regex_replace( $text, $pattern, $newimg ) {
+    global $matcher;
+    return $matcher->matcher_do_replacement( $text, $pattern, $newimg );
+}

mercurial