# HG changeset patch # User Peter Gervai # Date 1674506222 -3600 # Node ID 3b714bbb13471d7793a2052b9d993babf7e24b63 Add files without passwords and other unwanted fluff. diff -r 000000000000 -r 3b714bbb1347 delinker_fixer/BotSecrets.pm-sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinker_fixer/BotSecrets.pm-sample Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,13 @@ +package BotSecrets; + +use strict; +use warnings; + +use Exporter; +our @ISA = qw/ Exporter /; +our @EXPORT = qw/ $db_user $db_pw $bu $bp /; + +our ($db_user, $db_pw) = ("s5****", "***"); +#my ($bu,$bp) = ('****', '****' ); +our ($bu,$bp) = ('****', '****' ); +1; diff -r 000000000000 -r 3b714bbb1347 delinker_fixer/defixer.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinker_fixer/defixer.pl Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,262 @@ +#!/usr/bin/perl +#$Id$ +# +# cleanup after delinker +# +# (c)Peter [[grin]] Gervai, 2022 +# cc-by-sa-4.0-int / gplv3+ +# + +use strict; +use warnings; +use utf8; + +use File::Basename; +use lib dirname(__FILE__); +use BotSecrets; + +use MediaWiki::Bot qw(:constants); +use DBI; + +binmode( STDOUT, ':utf8' ); + +$|=1; + +my ($db_name, $db_host, $db_port) = ("s52421__commonsdelinquent_p", "tools.db.svc.wikimedia.cloud", 3306); +## my ($db_user, $db_pw) = ("xxxxxxxxx", "xxxxxxxxxx"); + +&d("Start"); + +## connect db +my $dsn = "DBI:mysql:database=$db_name;host=$db_host;port=$db_port"; +my $dbh = DBI->connect( $dsn, $db_user, $db_pw, { mysql_enable_utf8=>1, RaiseError=>0, AutoCommit=>0 } ); +$dbh->{mysql_enable_utf8} = 1; + +## prepare sql +#my $sth = $dbh->prepare( "SELECT id,action,file,wiki,page,revision,done,note,replace_with_file FROM event WHERE action=? AND file=? AND done=? AND timestamp BETWEEN ? AND ?" ); +my $sth = $dbh->prepare( "SELECT id,action,file,wiki,page,revision,done,note,replace_with_file FROM event WHERE action=? AND done=? AND timestamp BETWEEN ? AND ?" ); + +my $sth_update = $dbh->prepare( "UPDATE event SET done=? WHERE id=?" ); + +## connect bot (enwp) +#my ($bu,$bp) = ('xxxxxxxxxxxxx', 'xxxxxxxxxxxxxx' ); +our $bot = MediaWiki::Bot->new({ + host => 'en.wikipedia.org', + login_data => { username => $bu, password=> $bp }, + do_sul => 1, + operator => 'grin', + protocol => 'https', + debug => 2, + assert => 'user', +}); +my $last_wiki='huwiki'; + +die "Cannot login" unless $bot; + +## rev +my $revert_message = "Undoing CommonsDelinker bad replace, will be retried later."; + +## prepare search +my $action = 'replace'; +#my $file = 'S-3A_MAD_DN-SC-87-05743.JPEG'; +my $done = 127; +my ($ts_from, $ts_to) = ('20220502000000', '20220506130000'); +#my $res = $sth->execute( $action, $file, $done, $ts_from, $ts_to ); +my $res = $sth->execute( $action, $done, $ts_from, $ts_to ); +if( $dbh->err ) { + die "Error doing SQL: " . $dbh->errstr; +} +print $sth->rows . " rows found.\n"; + +## results +while( my $a = $sth->fetchrow_hashref ) { + for my $key (sort keys %$a) { + print "$key=" . $$a{$key} . " "; + } + print "\n"; + + # check data + if( $last_wiki ne $$a{wiki} ) { + my $wikidata = &get_wikidata( $$a{wiki} ); + next unless $wikidata; + $bot->set_wiki( $wikidata ); + } + + my $revid = $bot->get_last( $$a{page}, 'CommonsDelinker' ); + if( !defined( $revid ) ) { + &d("Revid is missing!! skipping $$a{wiki}:$$a{page}!!"); + #&error("missing revid"); + next; + } + + if( $revid == $$a{revision} ) { + &d(" Page unchanged, undo possible! REVERTING $$a{wiki}:$$a{page}"); + + # revert + if( $bot->revert( $$a{page}, $$a{revision}, $revert_message ) ) { + &d( "Success. Updating."); + $sth_update->execute( 0, $$a{id} ); + if( $dbh->err ) { + &d( "*** Error updating db for $$a{wiki}:$$a{page} id=$$a{id}!!"); + $dbh->commit; + exit; + } + + } else { + &d( " Revert #1 failed, try to login."); + if( $bot->login( { username=>$bu, password=>$bp } ) ) { + if( $bot->revert( $$a{page}, $$a{revision}, $revert_message ) ) { + &d( "Success^2. Updating."); + $sth_update->execute( 0, $$a{id} ); + if( $dbh->err ) { + &d( "*** Error updating db for $$a{wiki}:$$a{page} id=$$a{id}!!"); + $dbh->commit; + exit; + } + } + + } else { + &d( "Login failed into $$a{wiki}. Skipping"); + &error("login failed"); + $sth_update->execute( 43, $$a{id} ); + } + } + + } else { + &d( " Page changed, oldid $$a{revision} - newid $revid; skipping (update db)."); + $sth_update->execute( 42, $$a{id} ); + if( $dbh->err ) { + &d( "*** Error updating db for non-changed $$a{wiki}:$$a{page} id=$$a{id}!!"); + } + } + + $dbh->commit; +} +&d( "Commit."); +$dbh->commit; + +exit; + +sub d { + my ($s) = @_; + print scalar(localtime) . " [$$] $s\n"; +} + + +sub error { + return; # doesn't work + my ($s) = @_; + print "error: $s; " . $bot->{error}->{code} . "; " . $bot->{error}->{details} . "\n"; + + #use Data::Dumper; + #die Dumper($bot); + #exit; +} + +## decypher short wikinames +sub get_wikidata { + my ($name) = @_; + + &d("Decode $name"); + + my $host = $bot->db_to_domain( $name ); + return { host => $host }; + + if( $name =~ /^(.{2,3}|simple)wiki$/ ) { + return { host => "$1.wikipedia.org" }; + } + + if( $name =~ /^(.{2,3})wikivoyage$/ ) { + return { host => "$1.wikivoyage.org" }; + } + + if( $name =~ /^(.{2,3})wikisource$/ ) { + return { host => "$1.wikisource.org" }; + } + + if( $name =~ /^(.{2,3})wikiquote$/ ) { + return { host => "$1.wikiquote.org" }; + } + + if( $name =~ /^(.{2,3})wikibooks$/ ) { + return { host => "$1.wikibooks.org" }; + } + + if( $name =~ /^(.{2,3})wiktionary$/ ) { + return { host => "$1.wiktionary.org" }; + } + + if( $name eq 'wikidatawiki' ) { + # wikidata probably not fucked up + return undef; + } + + &d("*** $name not implemented yet!!! ***"); + return undef; + #die "decode '$name' isn't implemented yet."; +} + +exit; + +my $article_name = 'a'; + +my $options = { revid => 13849803 }; +my $txt = $bot->get_text($article_name, $options); +die "error something" unless defined $options->{pageid}; +warn "page doesn't exist" if $options->{pageid} == MediaWiki::Bot::PAGE_NONEXISTENT; +print "Page length is ". length($txt) . "!\n"; + +my $pageid = $bot->get_id($article_name); +die "error something else" unless defined $pageid; +printf "Page id is %s\n", $pageid; + +# last _not_ by user +my $revid = $bot->get_last($article_name,'no such user'); +printf "Last revid is %s\n", $revid; + +$revid = $bot->get_last($article_name,'FoBe'); +printf "Last revid-2 is %s\n", $revid; + +$options = { oldid=> 20300641, revid=>20300648 }; +my $diff = $bot->diff($options); +print "Diff: $diff\n"; + +## commons +$bot->set_wiki({ + host => 'commons.wikimedia.org' +}); +die "Cannot login to commons" unless $bot; +print "Logged over commons.\n"; + +$options = { revid=> 568734018, oldid=>628016329 }; +$diff = $bot->diff($options); +print "Diff: $diff\n"; + +## a hiba: az elozo sor utolso szava + \n bekerult a replacementbe +## javitas: +## - ha ez az utolso edit +## - revert + +__END__ + + +
File:Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian.jpg|Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian, 1935
+ +
File:Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian.jpg|Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian,|{{c|Georgy Malenkov}} and Beria, 1940
+ + + +
File:Берия в суде.jpg|{{c|Georgy Malenkov}} and Beria, 1940
+ + + + +## generic revert +## - adatbazisbol ami done=42 +## - revision +## - ha az az utolso akkor: +## - revert +## - done=0 (pending) +## - ha nem akkor +## - done=666 (fixx it felix) +## diff -r 000000000000 -r 3b714bbb1347 delinquent_files/debug.inc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/debug.inc Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,54 @@ +debuglevel = $l; + $this->warn( "Set debug level to $this->debuglevel" ); + } + + function msg ($s) { + $this->log( 0, $s ); + } + + function error ($s) { + $this->log( 1, $s ); + } + + function warn ($s) { + $this->log( 2, $s ); + } + + function info ($s) { + $this->log( 5, $s ); + } + + function debug ($s) { + $this->log( 8, $s ); + } + + function trace ($s) { + $this->log( 9, $s ); + } + + function log ($level,$msg) { + #$now = strftime("%Y-%m-%d %T"); + if( $level > $this->debuglevel ) { + return; + } + + $now = date('c'); + print( "$now [$level] $msg\n" ); + } +} + diff -r 000000000000 -r 3b714bbb1347 delinquent_files/delinker_job.yaml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/delinker_job.yaml Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,17 @@ +## +## start: +## become commons-delinquent +## toolforge-jobs load delinker_job.yaml +## stop: +## toolforge-jobs flush +## +--- +# continuous job +- image: tf-php74 + name: delinker + command: ./demon.php + continuous: true + emails: none + # mem 512M (max 305) + #mem: 512Mi + mem: 768Mi diff -r 000000000000 -r 3b714bbb1347 delinquent_files/demon.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/demon.php Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,706 @@ +#!/usr/bin/php + [4] + ] ; + + private $d; + private $matcher; + + var $maximum_file_usage_limit = 65535; # prevent memory explosion by extreme used files (65535 ~ 650 MB) + var $delay_minutes = 10 ; # Wait after deletion + var $fallback_minutes = 120 ; # Only used if DB is empty + var $max_text_diff = 1500 ; # Max char diff + var $min_faux_template_icon = 500 ; + var $comments = array() ; + var $comments_default = array ( + 'summary' => 'Removing [[:c:File:$1|$1]], it has been deleted from Commons by [[:c:User:$2|$2]] because: $3.' , + 'replace' => 'Replacing $1 with [[File:$2]] (by [[:c:User:$3|$3]] because: $4).' , + 'by' => ' Requested by [[User:$1|]].' + ) ; + + function __construct() { + parent::__construct(); + $this->d = new Debug; + $this->matcher = new Matcher($this->d); + } + + function set_debug_mode($l) { + $this->d->set_level( $l ); + } + + function debug($msg) { + $this->d->debug($msg); + } + + // Returns the last timestamp in the tool database, or a dummy (current time - X min) + function getLastTimestamp () { + # Open tool database + $db = $this->getToolDB() ; + + # Get highest timestamp in tool DB as a starting point + $max_ts = '' ; + $sql = "SELECT max(log_timestamp) AS max_ts FROM event WHERE done=1" ; # Timestamp of Commons logging table, NOT tool edit timestamp! + $result = $this->runQuery ( $db , $sql ) ; + while($o = $result->fetch_object()){ + $max_ts = $o->max_ts ; + } + $db->close() ; + if ( $max_ts == '' ) $max_ts = date ( 'YmdHis' , time() - $this->fallback_minutes*60 ) ; # Fallback to current date minus X min + return $max_ts ; + } + + function isBadPage ( $o , $filename ) { + if ( $o->gil_page_namespace_id == 6 and $o->gil_wiki == 'commonswiki' and $o->gil_to == $filename ) return true ; // Self-reference + if ( $o->gil_page_namespace_id == 2 and $o->gil_wiki == 'commonswiki' and preg_match ( '/^\w+Bot\b/' , $o->gil_page_title ) ) return true ; // Bot subpage on Commons + if ( $o->gil_page_namespace_id == 4 and $o->gil_wiki == 'commonswiki' and preg_match ( '/(Deletion(_| )requests\/.*|Undeletion(_| )requests\/.*)\b/' , $o->gil_page_title ) ) return true ; // DR and UDR on Commons + foreach ( $this->avoidNamespaceOnWiki AS $wiki => $namespaces ) { + if ( $o->gil_wiki != $wiki ) continue ; + foreach ( $namespaces AS $namespace ) { + if ( $namespace == $o->gil_page_namespace_id ) return true ; + } + } + return false ; + } + + function getRecentDeletedFiles ( $max_ts ) { + # Open Commons database replica + $db_co = $this->getCommonsDB() ; + $cur_ts = date ( 'YmdHis' , time() - $this->delay_minutes*60 ) ; + + # Get all file deletions + $delink_files = array() ; # Files to delink + $sql = "SELECT * FROM logging_compat LEFT JOIN comment ON comment_id = log_comment_id WHERE log_type='delete' AND log_action='delete' AND log_timestamp>='$max_ts' AND log_timestamp<'$cur_ts' AND log_namespace=6" ; + $sql .= " AND NOT EXISTS (SELECT * FROM image WHERE img_name=log_title)" ; + $sql .= " AND NOT EXISTS (SELECT * FROM page WHERE page_title=log_title AND page_namespace=6 AND page_is_redirect=1)" ; # Do not remove redirects. Is that OK??? + $sql .= " ORDER BY log_timestamp ASC" ; + $result = $this->runQuery ( $db_co , $sql ) ; + while($o = $result->fetch_object()){ + $delink_files[] = $o ; + } + foreach ( $delink_files AS $deletion ) { + $filename = $deletion->log_title ; + $sql = "SELECT * FROM globalimagelinks WHERE gil_to='" . $this->getDBsafe($filename) . "'" ; + $deletion->usage = array() ; # Usage instances for this file + $result = $this->runQuery ( $db_co , $sql ) ; + while($o = $result->fetch_object()){ + if ( $this->isBadWiki($o->gil_wiki) ) continue ; + if ( $this->isBadPage($o,$filename) ) continue ; + $deletion->usage[] = $o ; + } + } + $db_co->close() ; +// print_r ( $delink_files ) ; + return $delink_files ; + } + + function getFileUsage ( $filename ) { + $this->d->trace("Get file usage for $filename"); + $ret = array() ; + $db_co = $this->getCommonsDB() ; + $cur_ts = date ( 'YmdHis' , time() - $this->delay_minutes*60 ) ; + $sql = "SELECT * FROM globalimagelinks WHERE gil_to='" . $this->getDBsafe($filename) . "'" ; + $this->d->trace("Try running: $sql"); + $result = $this->runQuery ( $db_co , $sql ) ; + $this->d->trace("Got result, looping through"); + while($o = $result->fetch_object()){ + if ( $this->isBadWiki($o->gil_wiki) ) continue ; + $ret[] = $o ; + // $this->d->trace("Added to ret, len=" . count($ret)); + # limit the maximum matches because we use more memory than toolforge allows + if( count($ret) > $this->maximum_file_usage_limit ) { + $this->d->error("Overflow!! We cannot get usage for $filename, too much hits (>$this->maximum_file_usage_limit)!"); + $ret = array(); + $ret[] = "*OVERFLOW*"; + $db_co->close(); + return $ret; + } + } + $this->d->trace("Processed " . count($ret) . " usage points"); + $db_co->close() ; + return $ret ; + } + + function canUnlinkFromNamespace ( $usage ) { + if ( $usage->gil_page_namespace_id % 2 > 0 ) return false ; // Skip talk pages + if ( $usage->gil_page_namespace_id < 0 ) return false ; // Paranoia + return true ; + } + + function fileExistenceSanityCheck ( $e , $check_commons ) { + if ( $this->hasLocalFile ( $e->wiki , $e->file ) ) { + $this->setDone ( $e->id , 2 , 'Skipped: Local file exists' ) ; + return false ; + } + if ( $check_commons and $this->hasLocalFile ( 'commonswiki' , $e->file ) ) { + $this->setDone ( $e->id , 2 , 'Skipped: Commons file exists' ) ; + return false ; + } + return true ; + } + + + function getTextFromWiki ( $wiki , $pagename ) { + $ret = false ; + $api = $this->getAPI ( $wiki ) ; + if ( $api ) { + $services = new \Mediawiki\Api\MediawikiFactory( $api ); + $page = $services->newPageGetter()->getFromTitle( $pagename ); + $revision = $page->getRevisions()->getLatest(); + + if ( $revision ) { + $ret = $revision->getContent()->getData() ; + } + } + return $ret ; + } + + /** + mode "summary" or "replace" + */ + function getLocalizedCommentPattern ( $wiki , $mode = 'summary') { + if ( !isset($mode) ) $mode = 'summary' ; + if ( isset ( $this->comments[$mode][$wiki] ) ) return $this->comments[$mode][$wiki] ; + $pattern = $this->comments_default[$mode] ; # Default + + # Try local translation + $local = $this->getTextFromWiki ( $wiki , 'User:CommonsDelinker/' . $mode . '-I18n' ) ; + if ( $local !== false ) $pattern = $local ; + + $this->comments[$mode][$wiki] = $pattern ; + return $pattern ; + } + + function constructUnlinkComment ( $file , $usage ) { + $pattern = $this->getLocalizedCommentPattern ( $usage->gil_wiki ) ; + + $c = $file->comment_text ; + if ( $usage->wiki != 'commonswiki' ) { # Point original comment links to Commons + $c = preg_replace ( '/\[\[([^|]+?)\]\]/' , '[[:c:\1|]]' , $c ) ; # Pointing to Commons (no pipe) + $c = preg_replace ( '/\[\[([^:].+?)\]\]/' , '[[:c:\1]]' , $c ) ; # Pointing to Commons (with pipe) + } + + $pattern = preg_replace ( '/\$1/' , $file->log_title , $pattern ) ; + $pattern = preg_replace ( '/\$2/' , $file->log_user_text , $pattern ) ; + $pattern = preg_replace ( '/\$3/' , $c , $pattern ) ; +# print "\n$pattern\n" ; exit ( 0 ) ; // TESTING + return $pattern ; + } + + function constructReplaceComment ( $params ) { + $pattern = $this->getLocalizedCommentPattern ( $params['wiki'] , 'replace' ) ; + + $c = $params['comment'] ; + if ( $params['wiki'] != 'commonswiki' ) { # Point original comment links to Commons + $c = preg_replace ( '/\[\[([^|]+?)\]\]/' , '[[:c:\1|]]' , $c ) ; # Pointing to Commons (no pipe) + $c = preg_replace ( '/\[\[([^:].+?)\]\]/' , '[[:c:\1]]' , $c ) ; # Pointing to Commons (with pipe) + } + + $pattern = preg_replace ( '/\$1/' , $params['file'] , $pattern ) ; + $pattern = preg_replace ( '/\$2/' , $params['replace_with_file'] , $pattern ) ; + $pattern = preg_replace ( '/\$3/' , 'CommonsDelinker' , $pattern ) ; + $pattern = preg_replace ( '/\$4/' , $c , $pattern ) ; + + if ( isset($params['user']) and $params['user'] != '' ) { + $by = $this->getLocalizedCommentPattern ( $params['wiki'] , 'by' ) ; + $by = preg_replace ( '/\$1/' , $params['user'] , $by ) ; + $pattern .= ' ' . $by ; + } + + return $pattern ; + } + + function addUnlinkEvent ( $file , $usage , &$sqls ) { + if ( !$this->canUnlinkFromNamespace ( $usage ) ) return ; + if ( $this->hasLocalFile ( $usage->gil_wiki , $usage->gil_to ) ) return ; + + $page = $usage->gil_page_title ; + if ( $usage->gil_page_namespace != '' ) $page = $usage->gil_page_namespace . ":$page" ; + $params = array ( + 'action' => 'unlink' , + 'file' => $usage->gil_to , + 'wiki' => $usage->gil_wiki , + 'page' => $page , + 'namespace' => $usage->gil_page_namespace_id , + 'comment' => $this->constructUnlinkComment ( $file , $usage ) , + 'timestamp' => date ( 'YmdHis' ) , + 'log_id' => $file->log_id , + 'log_timestamp' => $file->log_timestamp , + 'done' => 0 + ) ; +# print_r ( $params ) ; + + $s1 = array() ; + $s2 = array() ; + foreach ( $params AS $k => $v ) { + $s1[] = $k ; + $s2[] = "'" . $this->getDBsafe($v) . "'" ; + } + + $sql = "INSERT IGNORE INTO event (" . implode ( ',' , $s1 ) . ") VALUES (" . implode ( "," , $s2 ) . ")" ; + $sqls[] = $sql ; + } + + function addUnlinkEvents ( $delink_files ) { + $sqls = array() ; + foreach ( $delink_files AS $file ) { + foreach ( $file->usage AS $usage ) { + $this->addUnlinkEvent ( $file , $usage , $sqls ) ; + } + } + + $db = $this->getToolDB() ; + foreach ( $sqls AS $sql ) $this->runQuery ( $db , $sql ) ; + $db->close() ; + } + + function getJSON4Q ( $e ) { + $q = $e->page ; + $url = "http://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids=" . $q ; + $j = json_decode ( file_get_contents ( $url ) ) ; + if ( isset ( $j->entities->$q->missing ) ) { # No such item + $this->setDone ( $e->id , 2 , "No such item $q" ) ; + return false ; + } + if ( !isset ( $j->entities->$q->claims ) ) { + $this->setDone ( $e->id , 2 , "Did not find " . $e->file . " on " . $q ) ; + return false ; + } + return $j ; + } + + function performEditUnlinkWikidata ( $e ) { + $j = $this->getJSON4Q ( $e ) ; + if ( $j === false ) return ; + + $q = $e->page ; + $j = $j->entities->$q->claims ; + $remove = array() ; + foreach ( $j AS $prop => $claims ) { + foreach ( $claims AS $c ) { + if ( $c->type != 'statement' ) continue ; + if ( $c->mainsnak->datatype != 'commonsMedia' ) continue ; + if ( str_replace ( ' ' , '_' , ucfirst ( trim ( $c->mainsnak->datavalue->value ) ) ) != $e->file ) continue ; + $remove[] = $c->id ; + } + } + + if ( count($remove) > 0 ) { + $ok = $this->editWikidata ( 'wbremoveclaims' , array ( 'claim'=>implode('|',$remove) , 'summary' => $e->comment ) ) ; + if ( !$ok ) return ; + } + + $this->setDone ( $e->id , 1 ) ; # OK! + } + + + function performEditReplaceWikidata ( $e ) { + $j = $this->getJSON4Q ( $e ) ; + if ( $j === false ) return ; + + $q = $e->page ; + + $j = $j->entities->$q->claims ; + $remove = array() ; + foreach ( $j AS $prop => $claims ) { + foreach ( $claims AS $c ) { + if ( $c->type != 'statement' ) continue ; + if ( $c->mainsnak->datatype != 'commonsMedia' ) continue ; + if ( str_replace ( ' ' , '_' , ucfirst ( trim ( $c->mainsnak->datavalue->value ) ) ) != $e->file ) continue ; + $remove[] = array ( $c->id , $prop ) ; + } + } + + if ( count($remove) > 0 ) { + + # Remove old image entries + $ids = array() ; + foreach ( $remove AS $r ) $ids[] = $r[0] ; + $ok = $this->editWikidata ( 'wbremoveclaims' , array ( 'claim'=>implode('|',$ids) ) ) ; + if ( !$ok ) { + $this->d->error("performEditReplaceWikidata:1 failed"); + return ; + } + + # Add new image entries + foreach ( $remove AS $r ) { + $params = array( + 'snaktype' => 'value' , + 'property' => $r[1] , + 'value' => json_encode(str_replace('_',' ',$e->replace_with_file)) , + 'entity' => $e->page , + 'summary' => $e->comment + ) ; + + $ok = $this->editWikidata ( 'wbcreateclaim' , $params ) ; + if ( !$ok ) { + $this->d->error( "performEditReplaceWikidata:2 failed" ); + return ; + } + + } + } else { + $this->setDone ( $e->id , 2 , 'File link not found in page' ) ; + return ; + } + + $this->setDone ( $e->id , 1 ) ; # OK! + } + + + ################################################################## + ## + ## Here we make the changes. + ## Get the page, replace content and upload again. + ## + ################################################################## + function performEditText ( $e ) { + $this->d->debug( "performEditText $e->action on id=$e->id wiki=$e->wiki page=$e->page." ); + $api = $this->getAPI ( $e->wiki ) ; + if ( $api === false ) { + $this->setDone ( $e->id , 2 , "Could not connect to API" ) ; + return ; + } + $services = new \Mediawiki\Api\MediawikiFactory( $api ); + try { + $page = $services->newPageGetter()->getFromTitle( $e->page ); + } catch (Exception $ex) { + $this->setDone ( $e->id , 2 , "Page not found" ) ; + $this->d->debug("Page '$e->page' not found ($ex), id='$e->id' wiki=$e->wiki file=$e->file action=$e->action"); + return ; + } + $revision = $page->getRevisions()->getLatest(); + + if ( !$revision ) { + $this->setDone ( $e->id , 2 , "Latest revision not found" ) ; + return ; + } + + $rev_id = $revision->getId() ; + $text = $revision->getContent()->getData() ; + + $file = $e->file ; + $pattern = $this->matcher->matcher_prepare_pattern( $file ); + + $new_text = $text ; + + if ( $e->action == 'unlink' ) { + ## remove image references in text; see ./matcher.inc + $this->d->info("Try to unlink '$pattern' in $e->wiki: $e->page"); + $new_text = $this->matcher->matcher_do_unlink( $new_text, $pattern ); + + } else if ( $e->action == 'replace' ) { + ## replace image with new_file in text; see ./matcher.inc + $new_file = $e->replace_with_file; + $this->d->info("Try to replace '$pattern' with '$new_file' in $e->wiki: $e->page"); + $new_text = $this->matcher->matcher_do_replacement( $new_text, $pattern, $new_file ); + } + + if ( $text == $new_text ) { # No change + $this->setDone ( $e->id , 2 , 'File link not found in page' ) ; + $this->d->info( "Article unchanged. id=$e->id; $e->wiki: $e->page" ); + return ; + } + + if ( strlen(trim($new_text)) == 0 or abs(strlen($text)-strlen($new_text)) > $this->max_text_diff ) { + $this->setDone ( $e->id , 2 , 'Text change too big' ) ; + $this->d->warn( "Article text change is too big. id=$e->id; $e->wiki: $e->page" ); + return ; + } + + if ( !isset($e->comment) ) $e->comment = '' ; + $e->comment = (string)$e->comment ; + + $this->d->info( "Editing $e->wiki: $e->page to $e->action $e->file (w/ $new_file) AS $e->comment") ; + + $params = array ( + 'title' => $e->page , + 'text' => trim($new_text) , + 'summary' => $e->comment , + 'bot' => 1 + ) ; + + $x = $this->editWiki ( $e->wiki , 'edit' , $params ) ; + if ( $x and $x['edit']['result'] == 'Success' ) { + $this->setDone ( $e->id , 1 , array('revision'=>$rev_id) ) ; + } else { + $this->d->error( "Cannot edit wiki ($e->wiki: $e->page): " . $this->last_exception ); + $this->setDone ( $e->id , 2 , $this->last_exception ) ; + } + + } + + function performEditReplace ( $e ) { + if ( !$this->fileExistenceSanityCheck($e,false) ) return ; # Nothing to do + if ( !isset($e->namespace) ) return ; # Paranoia + if ( $e->wiki == 'wikidatawiki' && $e->namespace == 0 ) { # Wikidata item + $this->performEditReplaceWikidata ( $e ) ; + } else { # "Normal" edit + $this->performEditText ( $e ) ; + } + } + + function performEditUnlink ( $e ) { + if ( !$this->fileExistenceSanityCheck($e,true) ) return ; # Nothing to do + if ( $e->wiki == 'wikidatawiki' && $e->namespace == 0 ) { # Wikidata item + $this->performEditUnlinkWikidata ( $e ) ; + } else { # "Normal" edit + $this->d->debug( "performEditUnlink $e->action on id=$e->id wiki=$e->wiki page=$e->page." ); + $this->performEditText ( $e ) ; + } + } + + function performEdit ( $e ) { + $this->d->debug( "performEdit $e->action on id=$e->id wiki=$e->wiki page=$e->page." ); + if ( $e->action == 'unlink' ) $this->performEditUnlink ( $e ) ; + else if ( $e->action == 'replace' ) $this->performEditReplace ( $e ) ; + else { + $this->d->error( "PerformEdit got unknown action $e->action" ); + print_r ( $e ) ; + die ( "Unknown action " . $e->action ) ; + } + } + + function clearBogusIssues ( $db ) { + # Clear some previous issues + // 0=pending + // 1=done + // 2=skipped + $sql = "update `event` set done=0,note='' where note like '%rate limit%' and done=2" ; + $this->d->debug("Set done=0 (pending) on 'rate limit' events where done=2(skipped)"); + $this->runQuery ( $db , $sql ) ; + $sql = "update `event` set done=0,note='' where note like '%edit conflict%' and done=2" ; + $this->d->debug("Set done=0 (pending) on 'edit conflict' events where done=2(skipped)"); + $this->runQuery ( $db , $sql ) ; + } + + function performEdits () { + $edits = array() ; + $this->d->debug("Connecting to DB"); + $db = $this->getToolDB() ; + ## this is slow, let's do it at the end + // $this->d->debug("Clear bogus issues (reactivate rate limit/edit conflict skipped issues)"); + // $this->clearBogusIssues ( $db ) ; + $this->d->debug("Get work events (pending events)"); + $sql = "SELECT * FROM `event` WHERE done=0 ORDER BY timestamp ASC,log_timestamp ASC" ; + $result = $this->runQuery ( $db , $sql ) ; + while($o = $result->fetch_object()){ + $edits[] = $o ; + } + $db->close() ; + + $last_wiki = '' ; + foreach ( $edits AS $o ) { + if ( $last_wiki == $o->wiki ) sleep ( 5 ) ; // Edit rate limiter + $this->d->debug("Perform an edit in $o->wiki"); + try { + $this->performEdit ( $o ) ; + } catch (Exception $e) { + echo 'Caught exception: ', $e->getMessage(), "\n"; + } + $last_wiki = $o->wiki ; + } + + $this->d->debug("Connecting to DB"); + $db = $this->getToolDB() ; + $this->d->debug("Clear bogus issues (reactivate rate limit/edit conflict skipped issues)"); + $this->clearBogusIssues ( $db ) ; + $db->close() ; + } + + function addReplaceEvents () { + $cmd_page = 'User:CommonsDelinker/commands' ; + $this->d->trace("getText from User:CommonsDelinker/commands"); + $t = $this->getTextFromWiki ( 'commonswiki' , $cmd_page ) ; + if ( $t === false ) { + $this->d->error( "Could not open commands page") ; + return ; + } + + if ( preg_match ( '/\{\{[Ss]top\}\}/' , $t ) ) return ; // STOP + + $sqls = array() ; + +# $t = "{{/front}}\n{{universal replace|Overzicht - Hulst - 20118655 - RCE.jpg|Red Weaver Ant, Oecophylla smaragdina.jpg|reason=Testing}}" ; # TESTING + + $this->d->trace("Processing page content..."); + $ts = date ( 'YmdHis' ) ; + $t = explode ( "\n" , $t ) ; + $nt = array() ; + foreach ( $t AS $l ) { + if ( !preg_match ( '/^\s*\{\{\s*[Uu]niversal[ _]replace\s*\|\s*(.+?)\s*\|\s*(.+?)\s*\|\s*reason\s*=\s*(\S.*?)\s*\}\}/' , $l , $m ) ) { + if ( !preg_match ( '/^\s*\{\{\s*[Uu]niversal[ _]replace\s*\|\s*(.+?)\s*\|\s*(.+?)\s*\|\s*reason\s*=\s*(.+?)\s*\|\s*user\s*=\s*(\S.*?)\s*\}\}/' , $l , $m ) ) { + $nt[] = $l ; + continue ; + } + } + $old_file = ucfirst(str_replace(' ','_',trim($m[1]))) ; + $new_file = ucfirst(str_replace(' ','_',trim($m[2]))) ; + $this->d->trace("Process line; old:$old_file new:$new_file."); + + $comment = trim($m[3]) ; + $user = '' ; + if ( isset($m[4]) ) { + $user = str_replace(' ','_',trim($m[4])) ; + $user = preg_replace ( '/^\s*\[\[[^:]+(.+?)\s*(\||\]\]).*$/' , '$1' , $user ) ; + } + + if ( !$this->hasLocalFile ( 'commonswiki' , $new_file ) ) { + $nt[] = "No such replacement file: " . $l ; + continue ; + } + + if ( !preg_match('/\.svg$/i',$old_file) and preg_match('/\.svg$/i',$new_file) ) { + $nt[] = "Non-SVG to SVG replacement: " . $l ; + continue ; + } + + $this->d->trace("Get file usage for $old_file"); + $usages = $this->getFileUsage ( $old_file ) ; + if( $usages[0] == "*OVERFLOW*" ) { + $nt[] = "File is used on too many pages: " . $l; + $this->d->info("Skipping file $old_file; high usage (>$this->maximum_file_usage_limit)!"); + continue; + } + + $this->d->trace("Get db handle for TooDB"); + $db = $this->getToolDB() ; + + $this->d->trace("Generate replaces..."); + foreach ( $usages AS $usage ) { + $page = $usage->gil_page_title ; + if ( $usage->gil_page_namespace != '' ) $page = $usage->gil_page_namespace . ':' . $page ; + $params = array ( + 'action' => 'replace' , + 'file' => $old_file , + 'wiki' => $usage->gil_wiki , + 'page' => $page , + 'namespace' => $usage->gil_page_namespace_id , + 'timestamp' => $ts , + 'comment' => $comment , + 'log_id' => -1 , + 'log_timestamp' => $ts , + 'user' => $user , + 'done' => 0 , + 'replace_with_file' => $new_file + ) ; + $params['comment'] = $this->constructReplaceComment ( $params ) ; +// print_r ( $params ) ; + + $s1 = array() ; + $s2 = array() ; + foreach ( $params AS $k => $v ) { + $s1[] = $k ; + $s2[] = "'" . $this->getDBsafe($v) . "'" ; + } + + $this->d->trace("Add SQL to sqls[] array, len=" . count($sqls)); + $sql = "INSERT IGNORE INTO event (" . implode ( ',' , $s1 ) . ") VALUES (" . implode ( "," , $s2 ) . ")" ; + $sqls[] = $sql ; + $this->d->trace("SQL:$sql"); + + } + + $db->close() ; + + } + + $t = implode ( "\n" , $t ) ; + $nt = implode ( "\n" , $nt ) ; + if ( $t == $nt ) return ; // No change + + # Run SQL + $db = $this->getToolDB() ; + foreach ( $sqls AS $sql ) $this->runQuery ( $db , $sql ) ; + $db->close() ; + + # Save new text to Wiki + $params = array ( + 'title' => $cmd_page , + 'text' => trim($nt) , + 'summary' => 'Removing replace commands, will be executed soon' , + 'bot' => 1 + ) ; + + $this->d->info( "Editing $cmd_page...") ; + $x = $this->editWiki ( 'commonswiki' , 'edit' , $params ) ; + $this->d->debug( "Editing $cmd_page done.") ; + } + + function fixFauxTemplateReplacements () { + $todo = array() ; + $db = $this->getToolDB() ; + $sql = "DELETE FROM event WHERE action='' and file=''" ; + $result = $this->runQuery ( $db , $sql ) ; + $sql = 'select file,wiki, count(*) as cnt,namespace from event where done=0 group by file,wiki,namespace having cnt>' . $this->min_faux_template_icon ; + $result = $this->runQuery ( $db , $sql ) ; + while($o = $result->fetch_object()){ + $file = $this->getDBsafe ( $o->file ) ; + $wiki = $this->getDBsafe ( $o->wiki ) ; + $todo[] = "UPDATE event SET done=2,note='Likely template icon, skipping' WHERE file='$file' AND wiki='$wiki' AND namespace=" . $o->namespace ; + } + foreach ( $todo AS $sql ) { + $this->runQuery ( $db , $sql ) ; + } + $db->close() ; + } + + // Unlinks deleted files + function run () { + $this->d->debug("Get last timestamp"); + $max_ts = $this->getLastTimestamp() ; + $this->d->debug("Get recent deleted files"); + $delink_files = $this->getRecentDeletedFiles ( $max_ts ) ; + $this->d->debug("Add unlink events for recently deleted files"); + $this->addUnlinkEvents ( $delink_files ) ; + $this->d->debug("Add replace events"); + $this->addReplaceEvents () ; + $this->d->debug("Fix bogus template replacements"); + $this->fixFauxTemplateReplacements() ; + $this->d->debug("Perform the queued edits"); + $this->performEdits() ; + } + + function debug_run0() { + $this->d->debug("Add replace events"); + $this->addReplaceEvents () ; + + } + +} + +print "Bot is starting.\n"; +$demon = new CommonsDelinquentDemon ; + +//$demon->addReplaceEvents () ; +//$demon->performEdits() ; +//$demon->fixFauxTemplateReplacements() ; + +$demon->set_debug_mode(8); + + // test +# $demon->debug_run0(); + //\\\\\\ + +$demon->debug("Performing edits..."); +$demon->performEdits() ; +while ( 1 ) { + $demon->debug("Calling run loop..."); + $demon->run() ; + $demon->debug("Sleeping 30..."); + sleep ( 30 ) ; +} + +?> diff -r 000000000000 -r 3b714bbb1347 delinquent_files/matcher.inc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/matcher.inc Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,117 @@ +d = $debug; + $this->d->msg("Matcher debug initialized"); + } + + ## prepare a filename regex pattern from filename + function matcher_prepare_pattern( $file ) { + $first_letter = substr ( $file , 0 , 1 ) ; + $pattern = substr ( $file , 1 ) ; + # If first letter have upper/lowercase include both as [Aa] + if ( mb_strtoupper($first_letter) != mb_strtolower($first_letter) ) { + $first_letter = "[" . mb_strtoupper($first_letter) . mb_strtolower($first_letter) . "]" ; + } else { + # escape special characters and also '/' + $first_letter = preg_quote ( $first_letter , '/' ) ; # can be metacharacter + } + ## normalise mediawiki filenames: _ to space, first letter anycase, then space to [_ ] + $pattern = str_replace ( '_' , ' ' , $pattern ) ; + $pattern = $first_letter . preg_quote ( $pattern, '/' ) ; + $pattern = str_replace ( ' ' , '[_ ]' , $pattern ) ; + return $pattern; + } + + ## + ## remove the filename from various places in the text body + ## + function matcher_do_unlink( $text, $pattern ) { + $new_text = $text; + + # unicode \w + $w='[\pL\pM]'; + # unicode \s + $s='\pZ'; + # word end separator (instead of \b, but a zero-width assertion would be nicer) + $we='(?=[^\pL\pM\n]|$)'; + + # filename: " : Image : name.ext" + ### 20220523- request not to remove [[:File:....]] -g + ###$pattern_file = "$s*(: *)?$w+ *: *$pattern" ; # e.g. File:x.jog, Tập_tin:x.jpg + $pattern_file = "$s*$w+ *: *$pattern" ; # e.g. File:x.jog, Tập_tin:x.jpg + # filename in galleries (leading : cannot stand w/o namespace) + $pattern_gfile= "((: *)?$w+ *: *)?$pattern$we" ; + # links + # [[ : image : foo.jpg | pip=pop | flip [[flop]] [http://example.com x] [[zig]] zag ]] + $pattern_link = '\[\[ *' . $pattern_file . "(\[\[.*?\]\]$w*|\[.*?\]|[^\pL\pM\\n\]].*?)*\]\]"; + # if we had to remove the whole line, eat LF, too. + $pattern_link_wholeline = '^\[\[ *' . $pattern_file . "(\[\[.*?\]\]$w*|\[.*?\]|[^\pL\pM\\n\]].*?)*\]\]$s*\\n"; + # gallery entries + $pattern_gallery = '\n?^' . $s .'*'. $pattern_gfile .'[^\n]*?((?<\/gallery *>)|$)' ; + # plain gallery entry (not used now) + $pattern_gallery2 = '\n?^'. $s .'*'. $pattern .'[ \t]*\|[^\n]*$' ; + # files within templates + $pattern_template = '= *' . $pattern_gfile . ' *'; + + $this->d->trace(" PatternLink WL : $pattern_link_wholeline"); + $this->d->trace(" PatternLink : $pattern_link"); + $this->d->trace(" PatternGallery : $pattern_gallery"); + $this->d->trace(" PatternTemplate: $pattern_template"); + + # in normal link (non-multiline pattern) + # if we have to remove the whole line, do it first + $new_text = preg_replace ( "/$pattern_link_wholeline/um" , '' , $new_text ) ; + $this->d->trace("Text after link replacement (wholeline): \n>>>$new_text<<<"); + # otherwise leave one space to keep word separation + $new_text = preg_replace ( "/ *$pattern_link */u" , ' ' , $new_text ) ; + $this->d->trace("Text after link replacement: \n>>>$new_text<<<"); + # in gallery + #$new_text = preg_replace ( "/$pattern_gallery/um" , '' , $new_text ) ; + $new_text = preg_replace_callback ( "/$pattern_gallery/um", + function ($matches) { + # original if no match (doesn't get called), ${gal} if group match, empty if doesn't + if( array_key_exists( 'gal', $matches ) ) { + return $matches['gal']; + } else { + return ''; + } + }, + $new_text ) ; + + $this->d->trace("Text after gallery replacement: \n>>>$new_text<<<"); + # $new_text = preg_replace ( "/$pattern_gallery2/um" , '' , $new_text ) ; + # ? + # $new_text = preg_replace ( "/ *$pattern_file */u" , ' ' , $new_text ) ; + # in template + $new_text = preg_replace ( "/$pattern_template/um" , '=' , $new_text ) ; + $this->d->trace("Text after template replacement: \n>>>$new_text<<<"); + + return $new_text; + } + + ## + ## replace file, don't care much about the context + ## + function matcher_do_replacement( $text, $pattern, $replacement_file ) { + $new_text = $text; + # there is no mb_ucfirst + $new_file = ucfirst ( trim ( str_replace ( '_' , ' ' , $replacement_file ) ) ) ; + $pattern = '(?<=^|[^\pL\pM\n])'.$pattern.'(?=$|[^\pL\pM])'; + $this->d->trace(" ReplMatch: $pattern"); + $new_text = preg_replace ( "/$pattern/um" , $new_file , $new_text ) ; + return $new_text; + } +} diff -r 000000000000 -r 3b714bbb1347 delinquent_files/shared.inc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/shared.inc Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,193 @@ +config = parse_ini_file ( __DIR__.'/bot.cnf' ) ; + } + + // Runs a MySQL query. Optional debugging output, and output of query on error + function runQuery ( $db , $sql ) { + if ( $this->debugging ) print "$sql\n" ; + if(!$result = $db->query($sql)) throw new Exception('There was an error running the query [' . $db->error . ']:'."\n$sql\n"); + return $result ; + } + + function getCommonsDB () { + $this->last_db = openDB ( 'commons' , 'wikimedia' ) ; + return $this->last_db ; + } + + function getToolDB () { + $this->last_db = openToolDB ( 'commonsdelinquent_p' ) ; + return $this->last_db ; + } + + function getDBsafe ( $s ) { + if ( !isset ( $this->last_db ) ) die ( "getDBsafe called before database was opened!" ) ; + return $this->last_db->real_escape_string ( $s ) ; + } + + function isBadWiki ( $wiki ) { + if ( $wiki == 'ukwikimedia' ) return true ; + if ( preg_match ( '/^wikimania/' , $wiki ) ) return true ; + if ( preg_match ( '/strategy/' , $wiki ) ) return true ; + if ( preg_match ( '/foundation/' , $wiki ) ) return true ; +# if ( preg_match ( '/outreach/' , $wiki ) ) return true ; + + if ( preg_match ( '/mxwikimedia/' , $wiki ) ) return true ; + if ( preg_match ( '/rswikimedia/' , $wiki ) ) return true ; + + if ( preg_match ( '/tenwiki/' , $wiki ) ) return true ; + if ( preg_match ( '/stqwiki/' , $wiki ) ) return true ; + + if ( preg_match ( '/enwikinews/' , $wiki ) ) return true ; + + if ( preg_match ( '/testwikidatawiki/' , $wiki ) ) return true ; +# if ( preg_match ( '/^suwiki$/' , $wiki ) ) return true ; + if ( preg_match ( '/usability/' , $wiki ) ) return true ; + # SUL LOGIN not working + if ( $wiki == 'donatewiki' ) return true ; + if ( $wiki == 'idwikimedia' ) return true ; + if ( $wiki == 'bdwikimedia' ) return true ; + if ( $wiki == 'maiwikimedia' ) return true ; + if ( $wiki == 'amwikimedia' ) return true ; + if ( $wiki == 'gewikimedia' ) return true ; + + if ( $wiki == 'mniwiki' ) return true ; # -grin 2022-02-05 +# if ( $wiki == 'vecwiki' ) return true; # -grin 2022-10-03 + + if ( $wiki == 'mnwwiktionary' ) return true ; # -grin 2022-03-10 + if ( $wiki == 'mniwiktionary' ) return true ; # -grin 2022-03-10 + if ( $wiki == 'shnwiktionary' ) return true ; # -grin 2022-07-18 + if ( $wiki == 'niawiktionary' ) return true ; # -grin 2022-09-06 + + if ( $wiki == 'wawikisource' ) return true; # -grin 2022-05-02 + if ( $wiki == 'banwikisource' ) return true; # -grin 2022-08-08 + +# if ( $wiki == 'fiwikivoyage' ) return true ; +# if ( $wiki == 'brwikisource' ) return true ; +# if ( $wiki == 'liwikibooks' ) return true ; +# if ( $wiki == 'liwikisource' ) return true ; + return false ; // Wiki is OK + } + + function hasLocalFile ( $wiki , $file ) { + $ret = false ; +# print "OPENING 1: $wiki\n" ; + $db = openDBwiki ( $wiki ) ; + if ( $db === false ) { + print "FAILED TO OPEN $wiki - returning false\n" ; + return false ; + } + $this->last_db = $db ; + /// hack by grin, 2021-03-01; getDBsafe may return empty! + $sql_name = $this->getDBsafe(str_replace(' ','_',$file)); + if( $sql_name == "" ) return false; + /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + $sql = "SELECT * FROM image WHERE img_name='" . $sql_name . "' LIMIT 1" ; + try { + $result = $this->runQuery ( $this->last_db , $sql ) ; + while($o = $result->fetch_object()) $ret = true ; + } catch (Exception $e) { + echo 'Caught exception: ', $e->getMessage(), "\n"; + } + return $ret ; + } + + function setDone ( $id , $value , $meta = null) { + $db = $this->getToolDB() ; + $sql = "UPDATE event SET done=" . ($value*1) ; + if ( isset ( $meta ) ) { + if ( !is_array ( $meta ) ) $meta = array ( 'note' => $meta ) ; + foreach ( $meta AS $k => $v ) { + $sql .= ",$k='" . $this->getDBsafe($v) . "'" ; + } + } + $sql .= " WHERE id=" . ($id*1) ; + if ( $value != 1 ) print "$sql\n" ; + $this->runQuery ( $db , $sql ) ; + $db->close() ; + } + + function wiki2server ( $wiki ) { + if ( $wiki == 'wikidatawiki' ) return 'www.wikidata.org' ; + if ( $wiki == 'commonswiki' ) return 'commons.wikimedia.org' ; + if ( $wiki == 'mediawikiwiki' ) return 'www.mediawiki.org' ; + if ( $wiki == 'metawiki' ) return 'meta.wikimedia.org' ; + if ( $wiki == 'outreachwiki' ) return 'outreach.wikimedia.org' ; + if ( $wiki == 'incubatorwiki' ) return 'incubator.wikimedia.org' ; + if ( $wiki == 'sourceswiki' ) return 'wikisource.org' ; + if ( $wiki == 'specieswiki' ) return 'species.wikimedia.org' ; + /// fix by grin 2021-03-01: missing close re separator + if ( preg_match ( '/(.+)wikimedia/' , $wiki , $m ) ) return $m[1] . ".wikimedia.org" ; + if ( preg_match ( '/^(wikimania\d+)wiki$/' , $wiki , $m ) ) return $m[1] . ".wikimedia.org" ; + + if ( preg_match ( '/^(.+?)(wik.+)$/' , $wiki , $m ) ) { + $server = str_replace('_','-',$m[1]) . "." ; + if ( $server == 'be-x-old.' ) $server = 'be-tarask.' ; + + if ( $m[2] == 'wiki' ) $server .= 'wikipedia' ; + else $server .= $m[2] ; + $server .= '.org' ; + return $server ; + } + return false ; + } + + function getAPI ( $wiki ) { + # TODO check if re-opening same API, cache in object + $server = $this->wiki2server ( $wiki ) ; + if ( $server === false ) return false ; + $api = new \Mediawiki\Api\MediawikiApi( 'https://'.$server.'/w/api.php' ); + if ( !$api->isLoggedin() ) { + print "Logging into https://$server/w/api.php\n"; # --grin 2022-02-05 + $x = $api->login( new \Mediawiki\Api\ApiUser( $this->config['name'], $this->config['password'] ) ); + if ( !$x ) return false ; + } + return $api ; + } + + function editWiki ( $wiki , $action , $params ) { + $api = $this->getAPI ( $wiki ) ; + if ( $api === false ) return false ; + $params['token'] = $api->getToken() ; + $params['bot'] = 1 ; + $x = false ; + if ( $this->debugging ) print_r ( $params ) ; + try { + $x = $api->postRequest( new \Mediawiki\Api\SimpleRequest( $action, $params ) ); + } catch (Exception $e) { + echo 'Caught exception: ', $e->getMessage(), "\n"; + $this->last_exception = $e->getMessage() ; + $x = false ; + } + if ( $this->debugging ) print_r ( $x ) ; + //$api->logout() ; + $params['token'] = $api->getToken() ; + $api->postRequest( new \Mediawiki\Api\SimpleRequest( "logout", $params ) ); + return $x ; + } + + function editWikidata ( $action , $params ) { + $ret = $this->editWiki ( 'wikidatawiki' , $action , $params ) ; + if ( $this->debugging ) { + if ( is_array($ret) ) print "RET:" . $ret['success'] . "\n" ; + else print "RET: FALSE\n" ; + } + if ( is_array($ret) ) return $ret['success'] ; + return false ; + } + +} + +?> diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_desc1.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_desc1.data Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,4 @@ +img: 10 TND obverse.jpg +pattern: 10[_ ]TND[_ ]obverse\.jpg +repl_img: Халдун.jxr + diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_desc1.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_desc1.in Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,3 @@ +Ибн Халдун твърди, че е с арабски произход с цел да придобие висок социален статус.A., Ibn Khaldun: His life and Works for Mohammad Enan +[[Файл:10 TND obverse.jpg|мини|250px|Изображение на Ибн Халдун върху банкнота от 10 [[тунизийски денар]]а]] +Като цяло е известно че Ибн Халдун е роден в [[Тунис] diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_desc1.replace --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_desc1.replace Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,3 @@ +Ибн Халдун твърди, че е с арабски произход с цел да придобие висок социален статус.A., Ibn Khaldun: His life and Works for Mohammad Enan +[[Файл:Халдун.jxr|мини|250px|Изображение на Ибн Халдун върху банкнота от 10 [[тунизийски денар]]а]] +Като цяло е известно че Ибн Халдун е роден в [[Тунис] diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_desc1.unlink --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_desc1.unlink Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,2 @@ +Ибн Халдун твърди, че е с арабски произход с цел да придобие висок социален статус.A., Ibn Khaldun: His life and Works for Mohammad Enan +Като цяло е известно че Ибн Халдун е роден в [[Тунис] diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_gallery-closing-tag.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_gallery-closing-tag.data Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,3 @@ +img: Aung San Suu Kyi 1951.jpg +pattern: [Aa]ung[_ ]San[_ ]Suu[_ ]Kyi[_ ]1951\.jpg +repl_img: NothingAtAll.jxr diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_gallery-closing-tag.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_gallery-closing-tag.in Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,7 @@ +Text page + +Archivo:David_Thewlis_2008.jpg|[[David Thewlis]] en 2001 +Archivo:Aung San Suu Kyi 17 November 2011.jpg| [[Aung San Suu Kyi]] en 2010 +Archivo:Aung San Suu Kyi 1951.jpg|Aung San Suu Kyi en 1951 + +== more comes == diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_gallery-closing-tag.replace --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_gallery-closing-tag.replace Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,7 @@ +Text page + +Archivo:David_Thewlis_2008.jpg|[[David Thewlis]] en 2001 +Archivo:Aung San Suu Kyi 17 November 2011.jpg| [[Aung San Suu Kyi]] en 2010 +Archivo:NothingAtAll.jxr|Aung San Suu Kyi en 1951 + +== more comes == diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_gallery-closing-tag.unlink --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_gallery-closing-tag.unlink Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,6 @@ +Text page + +Archivo:David_Thewlis_2008.jpg|[[David Thewlis]] en 2001 +Archivo:Aung San Suu Kyi 17 November 2011.jpg| [[Aung San Suu Kyi]] en 2010 + +== more comes == diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_jap.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_jap.data Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,4 @@ +# test data +img: Yasukuni Radha Binod Pal Commending Stele.jpg +pattern: [Yy]asukuni[_ ]Radha[_ ]Binod[_ ]Pal[_ ]Commending[_ ]Stele\.jpg +repl_img: Remix1.jxr diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_jap.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_jap.in Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,3 @@ +[[画像:Radha Binod Pal01.jpg|thumb|パール判事の顕彰碑(京都霊山護国神社にて撮影)]] +[[画像:Yasukuni Radha Binod Pal Commending Stele.jpg|thumb|パール判事の顕彰碑(東京九段・[[靖国神社]]内・[[遊就館]]前にて撮影)]] +[[1886年]]に[[イギリス領インド帝国|英領インド]]・ベンガル州ノディア県クシュティヤ diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_jap.replace --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_jap.replace Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,3 @@ +[[画像:Radha Binod Pal01.jpg|thumb|パール判事の顕彰碑(京都霊山護国神社にて撮影)]] +[[画像:Remix1.jxr|thumb|パール判事の顕彰碑(東京九段・[[靖国神社]]内・[[遊就館]]前にて撮影)]] +[[1886年]]に[[イギリス領インド帝国|英領インド]]・ベンガル州ノディア県クシュティヤ diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_jap.unlink --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_jap.unlink Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,2 @@ +[[画像:Radha Binod Pal01.jpg|thumb|パール判事の顕彰碑(京都霊山護国神社にて撮影)]] +[[1886年]]に[[イギリス領インド帝国|英領インド]]・ベンガル州ノディア県クシュティヤ diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_latest.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_latest.data Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,4 @@ +# test data +img: Charles_Turner04.jpg +pattern: [Cc]harles[_ ]Turner04\.jpg +repl_img: Charles_Turner_-_Portrait_of_Charlotte_Cholmondeley_and_son_Henry.jpg diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_latest.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_latest.in Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,17 @@ +https://en.wikipedia.org/w/index.php?title=Peregrine_Bertie,_3rd_Duke_of_Ancaster_and_Kesteven&action=edit&oldid=1079060290 + +[[Image:Mary, Duchess of Ancaster and Kesteven.jpg|thumb|200px|Mary, Duchess of Ancaster and Kesteven, wife of the 3rd Duke of Ancaster and Kesteven.]] +He married, firstly, Elizabeth Blundell (died 1743), widow of [[Charles Gounter Nicoll]], on 22 May 1735. He married, secondly, [[Mary Bertie, Duchess of Ancaster and Kesteven|Mary Panton]], on 27 November 1750. They had six children:{{cite journal | title=Monumental Memoirs of the Bertie Family | journal=The Gentleman's Magazine | volume=78 | year=1808 | pages=21–22 | url=https://books.google.com/books?id=9hJEAQAAMAAJ&pg=PA21}} +*Lady Mary Catherine Bertie (14 April 1754 – 12 April 1767) +*Peregrine Thomas Bertie, Marquess of Lindsey (21 May 1755 – 12 December 1758) +*a son (born and died 14 September 1759) +*[[Robert Bertie, 4th Duke of Ancaster and Kesteven]] (1756–1779) +*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828) + +[[File:Charles Turner04.jpg|thumb|left|Lady Cholmondeley and her son [[William Cholmondeley, 3rd Marquess of Cholmondeley|William Henry Hugh Cholmondeley, 3rd Marquess of Cholmondeley]] (1805), by [[Charles Turner (engraver)|Charles Turner]]]] + +*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue. + +On the death of his father in 1742, he succeeded him in the dukedom and as [[Lord Great Chamberlain]] and [[Lord Lieutenant of Lincolnshire]], and was appointed to the [[Privy Council of Great Britain|Privy Council]]. + +He gained the rank of [[Major-General]] on 19 January 1755, [[Lieutenant-General]] on 3 February 1759 and [[General]] on 25 May 1772. diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_latest.replace --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_latest.replace Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,17 @@ +https://en.wikipedia.org/w/index.php?title=Peregrine_Bertie,_3rd_Duke_of_Ancaster_and_Kesteven&action=edit&oldid=1079060290 + +[[Image:Mary, Duchess of Ancaster and Kesteven.jpg|thumb|200px|Mary, Duchess of Ancaster and Kesteven, wife of the 3rd Duke of Ancaster and Kesteven.]] +He married, firstly, Elizabeth Blundell (died 1743), widow of [[Charles Gounter Nicoll]], on 22 May 1735. He married, secondly, [[Mary Bertie, Duchess of Ancaster and Kesteven|Mary Panton]], on 27 November 1750. They had six children:{{cite journal | title=Monumental Memoirs of the Bertie Family | journal=The Gentleman's Magazine | volume=78 | year=1808 | pages=21–22 | url=https://books.google.com/books?id=9hJEAQAAMAAJ&pg=PA21}} +*Lady Mary Catherine Bertie (14 April 1754 – 12 April 1767) +*Peregrine Thomas Bertie, Marquess of Lindsey (21 May 1755 – 12 December 1758) +*a son (born and died 14 September 1759) +*[[Robert Bertie, 4th Duke of Ancaster and Kesteven]] (1756–1779) +*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828) + +[[File:Charles Turner - Portrait of Charlotte Cholmondeley and son Henry.jpg|thumb|left|Lady Cholmondeley and her son [[William Cholmondeley, 3rd Marquess of Cholmondeley|William Henry Hugh Cholmondeley, 3rd Marquess of Cholmondeley]] (1805), by [[Charles Turner (engraver)|Charles Turner]]]] + +*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue. + +On the death of his father in 1742, he succeeded him in the dukedom and as [[Lord Great Chamberlain]] and [[Lord Lieutenant of Lincolnshire]], and was appointed to the [[Privy Council of Great Britain|Privy Council]]. + +He gained the rank of [[Major-General]] on 19 January 1755, [[Lieutenant-General]] on 3 February 1759 and [[General]] on 25 May 1772. diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_latest.unlink --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_latest.unlink Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,16 @@ +https://en.wikipedia.org/w/index.php?title=Peregrine_Bertie,_3rd_Duke_of_Ancaster_and_Kesteven&action=edit&oldid=1079060290 + +[[Image:Mary, Duchess of Ancaster and Kesteven.jpg|thumb|200px|Mary, Duchess of Ancaster and Kesteven, wife of the 3rd Duke of Ancaster and Kesteven.]] +He married, firstly, Elizabeth Blundell (died 1743), widow of [[Charles Gounter Nicoll]], on 22 May 1735. He married, secondly, [[Mary Bertie, Duchess of Ancaster and Kesteven|Mary Panton]], on 27 November 1750. They had six children:{{cite journal | title=Monumental Memoirs of the Bertie Family | journal=The Gentleman's Magazine | volume=78 | year=1808 | pages=21–22 | url=https://books.google.com/books?id=9hJEAQAAMAAJ&pg=PA21}} +*Lady Mary Catherine Bertie (14 April 1754 – 12 April 1767) +*Peregrine Thomas Bertie, Marquess of Lindsey (21 May 1755 – 12 December 1758) +*a son (born and died 14 September 1759) +*[[Robert Bertie, 4th Duke of Ancaster and Kesteven]] (1756–1779) +*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828) + + +*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue. + +On the death of his father in 1742, he succeeded him in the dukedom and as [[Lord Great Chamberlain]] and [[Lord Lieutenant of Lincolnshire]], and was appointed to the [[Privy Council of Great Britain|Privy Council]]. + +He gained the rank of [[Major-General]] on 19 January 1755, [[Lieutenant-General]] on 3 February 1759 and [[General]] on 25 May 1772. diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_movingspace.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_movingspace.data Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,3 @@ +img: Original-Spezi-Flaschen.jpg +pattern: [Oo]riginal\-Spezi\-Flaschen\.jpg +repl_img: Keine.webp diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_movingspace.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_movingspace.in Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,12 @@ + +{{Infobox Marke +|Name = Spezi +|Logo = +|Besitzer = [[Brauhaus Riegele]] +|Einfuehrungsjahr = [[1965]] +|Produkte = Erfrischungsgetränke +|Maerkte = Deutschland +|Website = [https://www.spezi.com/ www.spezi.com] +}} +[[File:Original-Spezi-Flaschen.jpg|mini|Original-Spezi 0,5 l Flaschen mit Schraubverschluss sowie [[Kronkorken]]. Der Vertrieb der Schraubverschlussflaschen erfolgt in den blauben [[Getränkekiste]], mit Kronkorken in orangen Kisten.]] +'''Spezi''' ist der [[Marke (Recht)|Markenname]] eines [[koffein]]haltigen [[Erfrischungsgetränk]]s,[https://register.dpma.de/DPMAregister/marke/register/889780/DE Markenregister] einem Mischgetränk aus [[Cola]] und [[Limonade|Orangenlimonade]]. „Spezi“ wird auch oft als [[Gattungsname#Schutz für Produktnamen (generalisierter oder generischer Markenname)|Gattungsbegriff]] verwendet. Die Marke wurde 1956 durch das [[Brauhaus Riegele]] in [[Augsburg]] eingetragen und bezeichnete anfänglich ein [[Bier]]. diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_movingspace.replace --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_movingspace.replace Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,12 @@ + +{{Infobox Marke +|Name = Spezi +|Logo = +|Besitzer = [[Brauhaus Riegele]] +|Einfuehrungsjahr = [[1965]] +|Produkte = Erfrischungsgetränke +|Maerkte = Deutschland +|Website = [https://www.spezi.com/ www.spezi.com] +}} +[[File:Keine.webp|mini|Original-Spezi 0,5 l Flaschen mit Schraubverschluss sowie [[Kronkorken]]. Der Vertrieb der Schraubverschlussflaschen erfolgt in den blauben [[Getränkekiste]], mit Kronkorken in orangen Kisten.]] +'''Spezi''' ist der [[Marke (Recht)|Markenname]] eines [[koffein]]haltigen [[Erfrischungsgetränk]]s,[https://register.dpma.de/DPMAregister/marke/register/889780/DE Markenregister] einem Mischgetränk aus [[Cola]] und [[Limonade|Orangenlimonade]]. „Spezi“ wird auch oft als [[Gattungsname#Schutz für Produktnamen (generalisierter oder generischer Markenname)|Gattungsbegriff]] verwendet. Die Marke wurde 1956 durch das [[Brauhaus Riegele]] in [[Augsburg]] eingetragen und bezeichnete anfänglich ein [[Bier]]. diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_movingspace.unlink --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_movingspace.unlink Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,11 @@ + +{{Infobox Marke +|Name = Spezi +|Logo = +|Besitzer = [[Brauhaus Riegele]] +|Einfuehrungsjahr = [[1965]] +|Produkte = Erfrischungsgetränke +|Maerkte = Deutschland +|Website = [https://www.spezi.com/ www.spezi.com] +}} +'''Spezi''' ist der [[Marke (Recht)|Markenname]] eines [[koffein]]haltigen [[Erfrischungsgetränk]]s,[https://register.dpma.de/DPMAregister/marke/register/889780/DE Markenregister] einem Mischgetränk aus [[Cola]] und [[Limonade|Orangenlimonade]]. „Spezi“ wird auch oft als [[Gattungsname#Schutz für Produktnamen (generalisierter oder generischer Markenname)|Gattungsbegriff]] verwendet. Die Marke wurde 1956 durch das [[Brauhaus Riegele]] in [[Augsburg]] eingetragen und bezeichnete anfänglich ein [[Bier]]. diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_no-text-removal.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_no-text-removal.data Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,6 @@ +# +# TODO: this needs fixing! Complete rewrite of replacement engine. +# +img: Liane Foly (c) Pingouin.png +pattern: [Ll]iane[_ ]Foly[_ ]\(c\)[_ ]Pingouin\.png +repl_img: NoneAtAll.webp diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_no-text-removal.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_no-text-removal.in Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,5 @@ +Marker +:{{re|Chatsam}} Merci pour le rapport. Le fichier a été supprimé par {{u|Gbawden}}. Veuillez utiliser [[:mw:Help:Links/fr#Liens internes|liens internes]] comme [[:File:Liane Foly (c) Pingouin.png]] et le nouveau nom [[VRT]] ici pour éviter toute interruption. +But remove [[File:Liane Foly (c) Pingouin.png]] +And remove [[File:Liane Foly (c) Pingouin.png|this one]] too. +rekraM diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_no-text-removal.replace --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_no-text-removal.replace Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,5 @@ +Marker +:{{re|Chatsam}} Merci pour le rapport. Le fichier a été supprimé par {{u|Gbawden}}. Veuillez utiliser [[:mw:Help:Links/fr#Liens internes|liens internes]] comme [[:File:NoneAtAll.webp]] et le nouveau nom [[VRT]] ici pour éviter toute interruption. +But remove [[File:NoneAtAll.webp]] +And remove [[File:NoneAtAll.webp|this one]] too. +rekraM diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_no-text-removal.unlink --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_no-text-removal.unlink Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,5 @@ +Marker +:{{re|Chatsam}} Merci pour le rapport. Le fichier a été supprimé par {{u|Gbawden}}. Veuillez utiliser [[:mw:Help:Links/fr#Liens internes|liens internes]] comme [[:File:Liane Foly (c) Pingouin.png]] et le nouveau nom [[VRT]] ici pour éviter toute interruption. +But remove +And remove too. +rekraM diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_normal.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_normal.data Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,4 @@ +# test data +img: foo bar.jxs +pattern: [Ff]oo[_ ]bar\.jxs +repl_img: zig zag.png diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_normal.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_normal.in Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,23 @@ +we love us foo and bar and jxs +gallery +file:foo bar.jxs | adder |badder +file:elso.png +Fájl:Foo_bar.jxs +file:masodik.png +image:foo_bar.jxs|flop flop +image:harom.png +foo_bar.jxs | what +image:negy.png +foo_bar.jxs +aztan [[file:foo bar.jxsgz]] marad meg [[nemfoo bar.jxs]] is. +legyen [[kep:foo bar.jxs]] vagy [[Image:foo bar.jxs|110px|foo=bar]] esetleg [[:file:foo bar.jxs|pix|bpx]] is. +{{template +ize = kép:foo bar.jxs +mize = foo bar.jxs +micsoda = [[image:foo bar.jxs]] +mindegy +*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828) + +[[File:foo bar.jxs|thumb|left|Lady Cholmondeley and her son [[William Cholmondeley, 3rd Marquess of Cholmondeley|William Henry Hugh Cholmondeley, 3rd Marquess of Cholmondeley]] (1805), by [[Charles Turner (engraver)|Charles Turner]]]] + +*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue. diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_normal.replace --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_normal.replace Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,23 @@ +we love us foo and bar and jxs +gallery +file:Zig zag.png | adder |badder +file:elso.png +Fájl:Zig zag.png +file:masodik.png +image:Zig zag.png|flop flop +image:harom.png +Zig zag.png | what +image:negy.png +Zig zag.png +aztan [[file:foo bar.jxsgz]] marad meg [[nemfoo bar.jxs]] is. +legyen [[kep:Zig zag.png]] vagy [[Image:Zig zag.png|110px|foo=bar]] esetleg [[:file:Zig zag.png|pix|bpx]] is. +{{template +ize = kép:Zig zag.png +mize = Zig zag.png +micsoda = [[image:Zig zag.png]] +mindegy +*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828) + +[[File:Zig zag.png|thumb|left|Lady Cholmondeley and her son [[William Cholmondeley, 3rd Marquess of Cholmondeley|William Henry Hugh Cholmondeley, 3rd Marquess of Cholmondeley]] (1805), by [[Charles Turner (engraver)|Charles Turner]]]] + +*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue. diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_normal.unlink --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_normal.unlink Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,17 @@ +we love us foo and bar and jxs +gallery +file:elso.png +file:masodik.png +image:harom.png +image:negy.png +aztan [[file:foo bar.jxsgz]] marad meg [[nemfoo bar.jxs]] is. +legyen vagy esetleg [[:file:foo bar.jxs|pix|bpx]] is. +{{template +ize = +mize = +micsoda = +mindegy +*[[Priscilla Bertie, 21st Baroness Willoughby de Eresby|Priscilla Barbara Elizabeth Bertie, Baroness Willoughby de Eresby]] (16 February 1761 – 29 December 1828) + + +*[[Georgiana Charlotte Cholmondeley, Marchioness Cholmondeley|Lady Georgina Charlotte Bertie]] (7 August 1761 – 1838), married [[George Cholmondeley, 1st Marquess of Cholmondeley]], and had issue. diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_punctuation.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_punctuation.data Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,4 @@ +# test data +img: Remix0.png +pattern: [Rr]emix0\.png +repl_img: Remix1.jxr diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_punctuation.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_punctuation.in Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,8 @@ +| logo alt = +| screenshot = Remix0.png +| screenshot_size = + +| image15 = Azar Shiva9.jpg{{!}}border| caption15 = [[آذر شیوا]] +| image00 = Remix0.png{{!}}border| caption16 = [[نسرین ستوده]] +| image16 = Libérez_Nasrin_Sotoudeh!-cut.jpg{{!}}border| caption16 = [[نسرین ستوده]] +| image17 = Hayedeh-Persian-Singer-Tehran-1977.jpg{{!}}border| caption17 = [[هایده]] diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_punctuation.replace --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_punctuation.replace Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,8 @@ +| logo alt = +| screenshot = Remix1.jxr +| screenshot_size = + +| image15 = Azar Shiva9.jpg{{!}}border| caption15 = [[آذر شیوا]] +| image00 = Remix1.jxr{{!}}border| caption16 = [[نسرین ستوده]] +| image16 = Libérez_Nasrin_Sotoudeh!-cut.jpg{{!}}border| caption16 = [[نسرین ستوده]] +| image17 = Hayedeh-Persian-Singer-Tehran-1977.jpg{{!}}border| caption17 = [[هایده]] diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_punctuation.unlink --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_punctuation.unlink Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,8 @@ +| logo alt = +| screenshot = +| screenshot_size = + +| image15 = Azar Shiva9.jpg{{!}}border| caption15 = [[آذر شیوا]] +| image00 ={{!}}border| caption16 = [[نسرین ستوده]] +| image16 = Libérez_Nasrin_Sotoudeh!-cut.jpg{{!}}border| caption16 = [[نسرین ستوده]] +| image17 = Hayedeh-Persian-Singer-Tehran-1977.jpg{{!}}border| caption17 = [[هایده]] diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_uni1.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_uni1.data Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,4 @@ +# test data +img: delta747jtsarkis.jpg +pattern: [Dd]elta747jtsarkis\.jpg +repl_img: zig চিত্ৰ zag.png diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_uni1.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_uni1.in Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,15 @@ +== Flota == +=== Delta Air Lines Company === +==== Delta Airlines ==== +[[Archivo:Delta747jtsarkis.jpg|300px|thumb|[[Boeing 747]] de [[Delta Airlines]] ]] +
+{| class=\"toccolours sortable\" border=\"1\" cellpadding=\"3\" style=\"border-collapse:collapse\" +|+ ```Flota de Delta Air Lines``` +imago = চিত্ৰ:delta747jtsarkis.jpg +pitturo = delta747jtsarkis.jpg +|- bgcolor=lightblue +[[চিত্ৰ:delta747jtsarkis.jpg|thumb|বাবৰী মছজিদ]] +!Tipo de Avión +!Total +!Pasajeros
(Primera*/Economica) +!Rutas diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_uni1.replace --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_uni1.replace Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,15 @@ +== Flota == +=== Delta Air Lines Company === +==== Delta Airlines ==== +[[Archivo:Zig চিত্ৰ zag.png|300px|thumb|[[Boeing 747]] de [[Delta Airlines]] ]] +
+{| class=\"toccolours sortable\" border=\"1\" cellpadding=\"3\" style=\"border-collapse:collapse\" +|+ ```Flota de Delta Air Lines``` +imago = চিত্ৰ:Zig চিত্ৰ zag.png +pitturo = Zig চিত্ৰ zag.png +|- bgcolor=lightblue +[[চিত্ৰ:Zig চিত্ৰ zag.png|thumb|বাবৰী মছজিদ]] +!Tipo de Avión +!Total +!Pasajeros
(Primera*/Economica) +!Rutas diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_uni1.unlink --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_uni1.unlink Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,13 @@ +== Flota == +=== Delta Air Lines Company === +==== Delta Airlines ==== +
+{| class=\"toccolours sortable\" border=\"1\" cellpadding=\"3\" style=\"border-collapse:collapse\" +|+ ```Flota de Delta Air Lines``` +imago = +pitturo = +|- bgcolor=lightblue +!Tipo de Avión +!Total +!Pasajeros
(Primera*/Economica) +!Rutas diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_uni2.data --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_uni2.data Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,5 @@ +# test data +img: delta747jtsarkis.jpg +pattern: [Dd]elta747jtsarkis\.jpg +repl_img: zig চিত্ৰ zag.png + diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_uni2.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_uni2.in Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,10 @@ +assami [[চিত্ৰ:delta747jtsarkis.jpg|thumb|বাবৰী মছজিদ]] two +also +{| +|table +|- +| bgcolor=#DDEEFF | ৬ +| bgcolor=#DDEEFF | [[চিত্ৰ:delta747jtsarkis.jpg|100px|centre]] +| bgcolor=#DDEEFF | [[দিহিং পাটকাই ৰাষ্ট্ৰীয় উদ্যান]] +| bgcolor=#DDEEFF | [[ডিব্ৰুগড় জিলা|ডিব্ৰুগড়]] আৰু [[তিনিচুকীয়া জিলা]] +|} diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_uni2.replace --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_uni2.replace Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,10 @@ +assami [[চিত্ৰ:Zig চিত্ৰ zag.png|thumb|বাবৰী মছজিদ]] two +also +{| +|table +|- +| bgcolor=#DDEEFF | ৬ +| bgcolor=#DDEEFF | [[চিত্ৰ:Zig চিত্ৰ zag.png|100px|centre]] +| bgcolor=#DDEEFF | [[দিহিং পাটকাই ৰাষ্ট্ৰীয় উদ্যান]] +| bgcolor=#DDEEFF | [[ডিব্ৰুগড় জিলা|ডিব্ৰুগড়]] আৰু [[তিনিচুকীয়া জিলা]] +|} diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test/_t_uni2.unlink --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test/_t_uni2.unlink Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,10 @@ +assami two +also +{| +|table +|- +| bgcolor=#DDEEFF | ৬ +| bgcolor=#DDEEFF | +| bgcolor=#DDEEFF | [[দিহিং পাটকাই ৰাষ্ট্ৰীয় উদ্যান]] +| bgcolor=#DDEEFF | [[ডিব্ৰুগড় জিলা|ডিব্ৰুগড়]] আৰু [[তিনিচুকীয়া জিলা]] +|} diff -r 000000000000 -r 3b714bbb1347 delinquent_files/test_regex.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delinquent_files/test_regex.php Mon Jan 23 21:37:02 2023 +0100 @@ -0,0 +1,177 @@ +#!/usr/bin/php +set_level(9); # trace + +$matcher = new Matcher($d); + + +# files are in +# input: _t_{id}.in +# test data: _t_{id}.data +# good replacement: _t_{id}.replace +# good unlink: _t_{id}.unlink + +$dir_tests = './test'; +$testnames = collect_test_names($dir_tests); + +$data_keys = array( 'img', 'pattern', 'repl_img' ); + +foreach ($testnames as $t) { + $tests[$t] = get_test_data( $dir_tests, $t, $data_keys ); +} + +$test_res = array( 'ok' => 0, 'bad' => 0 ); + +# run +foreach ($tests as $key => $t) { + #print( "Running test '$key'..." ); + #print("IN=" . $t["in"] . "\n"); + #print("Im=" . $t["img"] . "\n"); + #print("RE=" . $t["unlink"] . "\n"); + + $img = $t["img"]; + $pattern = test_prepare_pattern( $img ); + if( $DEBUG ) {print(" Pat=$pattern\n");} + $res = test_result( $pattern, $t["pattern"], "pattern", $img, $key ); + if( $res == 1 ) { + $test_res['bad']++; + } else { + $test_res['ok']++; + } + + $text = read_text_from_file( "${dir_tests}/_t_${key}.in" ); + $t_unlink = read_text_from_file( "${dir_tests}/_t_${key}.unlink" ); + $t_replace = read_text_from_file( "${dir_tests}/_t_${key}.replace" ); + + $res = test_regex_unlink( $text, $pattern ); + $res = test_result( $res, $t_unlink, "unlink", $img, $key ); + if( $res == 1 ) { + $test_res['bad']++; + } else { + $test_res['ok']++; + } + + $res = test_regex_replace( $text, $pattern, $t["repl_img"] ); + $res = test_result( $res, $t_replace, "replace", $img, $key ); + if( $res == 1 ) { + $test_res['bad']++; + } else { + $test_res['ok']++; + } +} + +print( "\nResults:\n" ); +print( "OK : " . $test_res['ok'] . "\nBAD: " . $test_res['bad'] ."\n" ); + + +## collect the name of the tests (verbose code :)) +function collect_test_names($dir) { + $dlist = scandir($dir); + + foreach ($dlist as $key => $val) { + if( !is_dir( $dir . '/' . $val ) && preg_match( '/^_t_(.+)\.data$/', $val, $matches ) ) { + $tests[] = $matches[1]; + } + } + return $tests; +} + + +function get_test_data($dir, $t, $keys) { + $f = fopen( "${dir}/_t_${t}.data", 'r'); + if( $f ) { + while( ($line = fgets($f)) !== false ) { + // process a line + if( preg_match( '/^(\S+)\s*:\s*(.+)$/', $line, $matches ) ) { + # print( "Test $t ${matches[1]} => ${matches[2]}\n" ); + if( !in_array( $matches[1], $keys ) ) { + trigger_error( "Unknown key '$matches[1]' in ${t}.data", E_USER_ERROR ); + } + $test[$matches[1]] = $matches[2]; + } + } + } else { + trigger_error( "test $t data file is missing", E_USER_ERROR ); + } + + foreach ($keys as $k) { + if( !array_key_exists( $k, $test ) ) { + trigger_error( "test $t data is missing $k key", E_USER_ERROR ); + } + } + + return $test; +} + + +function test_result( $result, $expected, $name, $img, $id ) { + global $dir_tests; + if( $result <> $expected ) { + #print( "${test} BAD $name:$id ($img)! result=\n$result\n\nexpect=\n$expected\n\n" ); + if( $name == 'pattern' ) { + print( "${id} BAD $name ($img)! expected '$expected', result '$result', fix in ${dir_tests}/_t_${id}.data, expected is in _bad_${id}.${name}\n" ); + } else { + print( "${id} BAD $name ($img)! diff -u ${dir_tests}/_t_${id}.${name} _bad_${id}.${name}\n" ); + } + write_text_to_file( "_bad_${id}.${name}", $result ); + return 1; + } else { + print( "${id} OK $name ($img)!\n" ); + } + return 0; +} + + +function read_text_from_file( $fname ) { + $f = fopen( $fname, 'r') or die( "Cannot read file $fname" ); + $text = fread( $f, 1e8 ); + fclose($f); + return $text; +} + + +function write_text_to_file($fname, $text) { + #print("Recording into $fname text '$text'"); + $f = fopen( $fname, 'w') or die( "Cannot create file $fname" ); + $res = fwrite( $f, $text ); + fclose($f); +} + + +function test_prepare_pattern( $file ) { + global $matcher; + return $matcher->matcher_prepare_pattern( $file ); +} + + +function test_regex_unlink( $text, $pattern ) { + global $matcher; + return $matcher->matcher_do_unlink( $text, $pattern ); +} + + +function test_regex_replace( $text, $pattern, $newimg ) { + global $matcher; + return $matcher->matcher_do_replacement( $text, $pattern, $newimg ); +}