delinquent_files/demon.php

Thu, 22 Feb 2024 20:21:22 +0100

author
Peter Gervai <grin@grin.hu>
date
Thu, 22 Feb 2024 20:21:22 +0100
changeset 8
38415be9f910
parent 6
8b4e205ffc94
child 9
43902b1af1b9
permissions
-rwxr-xr-x

Skipping {{nobots}} pages

#!/usr/bin/php
#$Id$
#

<?PHP


chdir ( '/data/project/commons-delinquent' ) ;

error_reporting(E_ERROR|E_CORE_ERROR|E_COMPILE_ERROR); # E_ALL|
ini_set('display_errors', 'On');

require_once ( './shared.inc' ) ;
require_once( './matcher.inc' );
require_once( './debug.inc' );

print "Bot startup in progress...\n";

class CommonsDelinquentDemon extends CommonsDelinquent {

	var $avoidNamespaceOnWiki = [
		'dewiki' => [4]
	] ;

	private $d;
	private $matcher;

	var $maximum_file_usage_limit = 65535; # prevent memory explosion by extreme used files (65535 ~ 650 MB)
	var $delay_minutes = 10 ;  # Wait after deletion
	var $fallback_minutes = 120 ; # Only used if DB is empty
	var $max_text_diff = 1500 ; # Max char diff
	var $min_faux_template_icon = 500 ;
	var $comments = array() ;
	var $comments_default = array (
		'summary' => 'Removing [[:c:File:$1|$1]], it has been deleted from Commons by [[:c:User:$2|$2]] because: $3.' ,
		'replace' => 'Replacing $1 with [[File:$2]] (by [[:c:User:$3|$3]] because: $4).' ,
		'by' => ' Requested by [[User:$1|]].'
	) ;

	function __construct() {
		parent::__construct();
		$this->d = new Debug;
		$this->matcher = new Matcher($this->d);
	}

	function set_debug_mode($l) {
		$this->d->set_level( $l );
	}

	function debug($msg) {
		$this->d->debug($msg);
	}

	// Returns the last timestamp in the tool database, or a dummy (current time - X min)
	function getLastTimestamp () {
		# Open tool database
		$db = $this->getToolDB() ;
		
		# Get highest timestamp in tool DB as a starting point
		$max_ts = '' ;
		$sql = "SELECT max(log_timestamp) AS max_ts FROM event WHERE done=1" ; # Timestamp of Commons logging table, NOT tool edit timestamp!
		$result = $this->runQuery ( $db , $sql ) ;
		while($o = $result->fetch_object()){
			$max_ts = $o->max_ts ;
		}
		$db->close() ;
		if ( $max_ts == '' ) $max_ts = date ( 'YmdHis' , time() - $this->fallback_minutes*60 ) ; # Fallback to current date minus X min
		return $max_ts ;
	}
	
	function isBadPage ( $o , $filename ) {
		if ( $o->gil_page_namespace_id == 6 and $o->gil_wiki == 'commonswiki' and $o->gil_to == $filename ) return true ; // Self-reference
		if ( $o->gil_page_namespace_id == 2 and $o->gil_wiki == 'commonswiki' and preg_match ( '/^\w+Bot\b/' , $o->gil_page_title ) ) return true ; // Bot subpage on Commons
		if ( $o->gil_page_namespace_id == 4 and $o->gil_wiki == 'commonswiki' and preg_match ( '/(Deletion(_| )requests\/.*|Undeletion(_| )requests\/.*)\b/' , $o->gil_page_title ) ) return true ; // DR and UDR on Commons
		foreach ( $this->avoidNamespaceOnWiki AS $wiki => $namespaces ) {
			if ( $o->gil_wiki != $wiki ) continue ;
			foreach ( $namespaces AS $namespace ) {
				if ( $namespace == $o->gil_page_namespace_id ) return true ;
			}
		}
		return false ;
	}

	## page content forbidding bots, like {{nobots}} or {{bots|deny=..us..}}
	function isForbiddenPage ( $content ) {
		if ( preg_match( '\{\{nobots\}\}|\{\{bots\s*\|\s*deny=([^}]*CommonsDelinker|all)', $content) ) return true; 
		return false;
	}
	
	function getRecentDeletedFiles ( $max_ts ) {
		# Open Commons database replica
		$db_co = $this->getCommonsDB() ;
		$cur_ts = date ( 'YmdHis' , time() - $this->delay_minutes*60 ) ;

		# Get all file deletions
		$delink_files = array() ; # Files to delink
		$sql = "SELECT * FROM logging_compat LEFT JOIN comment ON comment_id = log_comment_id WHERE log_type='delete' AND log_action='delete' AND log_timestamp>='$max_ts' AND log_timestamp<'$cur_ts' AND log_namespace=6" ;
		$sql .= " AND NOT EXISTS (SELECT * FROM image WHERE img_name=log_title)" ;
		$sql .= " AND NOT EXISTS (SELECT * FROM page WHERE page_title=log_title AND page_namespace=6 AND page_is_redirect=1)" ; # Do not remove redirects. Is that OK???
		$sql .= " ORDER BY log_timestamp ASC" ;
		$result = $this->runQuery ( $db_co , $sql ) ;
		while($o = $result->fetch_object()){
			$delink_files[] = $o ;
		}
		foreach ( $delink_files AS $deletion ) {
			$filename = $deletion->log_title ;
			$sql = "SELECT * FROM globalimagelinks WHERE gil_to='" . $this->getDBsafe($filename) . "'" ;
			$deletion->usage = array() ; # Usage instances for this file
			$result = $this->runQuery ( $db_co , $sql ) ;
			while($o = $result->fetch_object()){
				if ( $this->isBadWiki($o->gil_wiki) ) continue ;
				if ( $this->isBadPage($o,$filename) ) continue ;
				$deletion->usage[] = $o ;
			}
		}
		$db_co->close() ;
//		print_r ( $delink_files ) ;
		return $delink_files ;
	}

	function getFileUsage ( $filename ) {
		$this->d->trace("Get file usage for $filename");
		$ret = array() ;
		$db_co = $this->getCommonsDB() ;
		$cur_ts = date ( 'YmdHis' , time() - $this->delay_minutes*60 ) ;
		$sql = "SELECT * FROM globalimagelinks WHERE gil_to='" . $this->getDBsafe($filename) . "'" ;
		$this->d->trace("Try running: $sql");
		$result = $this->runQuery ( $db_co , $sql ) ;
		$this->d->trace("Got result, looping through");
		while($o = $result->fetch_object()){
			if ( $this->isBadWiki($o->gil_wiki) ) continue ;
			$ret[] = $o ;
			// $this->d->trace("Added to ret, len=" . count($ret));
			# limit the maximum matches because we use more memory than toolforge allows
			if( count($ret) > $this->maximum_file_usage_limit ) {
				$this->d->error("Overflow!! We cannot get usage for $filename, too much hits (>$this->maximum_file_usage_limit)!");
				$ret = array();
				$ret[] = "*OVERFLOW*";
				$db_co->close();
				return $ret;
			}
		}
		$this->d->trace("Processed " . count($ret) . " usage points");
		$db_co->close() ;
		return $ret ;
	}
	
	function canUnlinkFromNamespace ( $usage ) {
		if ( $usage->gil_page_namespace_id % 2 > 0 ) return false ; // Skip talk pages
		if ( $usage->gil_page_namespace_id < 0 ) return false ; // Paranoia
		return true ;
	}

	function fileExistenceSanityCheck ( $e , $check_commons ) {
		if ( $this->hasLocalFile ( $e->wiki , $e->file ) ) {
			$this->setDone ( $e->id , 2 , 'Skipped: Local file exists' ) ;
			return false ;
		}
		if ( $check_commons and $this->hasLocalFile ( 'commonswiki' , $e->file ) ) {
			$this->setDone ( $e->id , 2 , 'Skipped: Commons file exists' ) ;
			return false ;
		}
		return true ;
	}


	function getTextFromWiki ( $wiki , $pagename ) {
		$ret = false ;
		$this->d->debug( "Getting api of " . $wiki) ;
		// temp workaround 3/22/2023
		//if ($wiki === 'bclwiktionary' || $wiki === 'bjnwiktionary' || $wiki === 'gorwiktionary') {
		//	return false;
		//}
		$api = $this->getAPI ( $wiki ) ;
		if ( $api ) {
			$services = new \Mediawiki\Api\MediawikiFactory( $api );
			$page = $services->newPageGetter()->getFromTitle( $pagename );
			$revision = $page->getRevisions()->getLatest();
	
			if ( $revision ) {
				$ret = $revision->getContent()->getData() ;
			}
		}
		return $ret ;
	}
	
	/**
		mode	"summary" or "replace"
	*/
	function getLocalizedCommentPattern ( $wiki , $mode = 'summary') {
		if ( !isset($mode) ) $mode = 'summary' ;
		if ( isset ( $this->comments[$mode][$wiki] ) ) return $this->comments[$mode][$wiki] ;
		$pattern = $this->comments_default[$mode] ; # Default
		
		# Try local translation
		$local = $this->getTextFromWiki ( $wiki , 'User:CommonsDelinker/' . $mode . '-I18n' ) ;
		if ( $local !== false ) $pattern = $local ;
		
		$this->comments[$mode][$wiki] = $pattern ;
		return $pattern ;
	}
	
	function constructUnlinkComment ( $file , $usage ) {
		$pattern = $this->getLocalizedCommentPattern ( $usage->gil_wiki ) ;
		
		$c = $file->comment_text ;
		if ( $usage->wiki != 'commonswiki' ) { # Point original comment links to Commons
			$c = preg_replace ( '/\[\[([^|]+?)\]\]/' , '[[:c:\1|]]' , $c ) ; # Pointing to Commons (no pipe)
			$c = preg_replace ( '/\[\[([^:].+?)\]\]/' , '[[:c:\1]]' , $c ) ; # Pointing to Commons (with pipe)
		}

		$pattern = preg_replace ( '/\$1/' , $file->log_title , $pattern ) ;
		$pattern = preg_replace ( '/\$2/' , $file->log_user_text , $pattern ) ;
		$pattern = preg_replace ( '/\$3/' , $c , $pattern ) ;
#		print "\n$pattern\n" ; exit ( 0 ) ; // TESTING
		return $pattern ;
	}

	function constructReplaceComment ( $params ) {
		$pattern = $this->getLocalizedCommentPattern ( $params['wiki'] , 'replace' ) ;
		
		$c = $params['comment'] ;
		if ( $params['wiki'] != 'commonswiki' ) { # Point original comment links to Commons
			$c = preg_replace ( '/\[\[([^|]+?)\]\]/' , '[[:c:\1|]]' , $c ) ; # Pointing to Commons (no pipe)
			$c = preg_replace ( '/\[\[([^:].+?)\]\]/' , '[[:c:\1]]' , $c ) ; # Pointing to Commons (with pipe)
		}

		$pattern = preg_replace ( '/\$1/' , $params['file'] , $pattern ) ;
		$pattern = preg_replace ( '/\$2/' , $params['replace_with_file'] , $pattern ) ;
		$pattern = preg_replace ( '/\$3/' , 'CommonsDelinker' , $pattern ) ;
		$pattern = preg_replace ( '/\$4/' , $c , $pattern ) ;
		
		if ( isset($params['user']) and $params['user'] != '' ) {
			$by = $this->getLocalizedCommentPattern ( $params['wiki'] , 'by' ) ;
			$by = preg_replace ( '/\$1/' , $params['user'] , $by ) ;
			$pattern .= ' ' . $by ;
		}
		
		return $pattern ;
	}
	
	function addUnlinkEvent ( $file , $usage , &$sqls ) {
		if ( !$this->canUnlinkFromNamespace ( $usage ) ) return ;
		if ( $this->hasLocalFile ( $usage->gil_wiki , $usage->gil_to ) ) return ;
		
		$page = $usage->gil_page_title ;
		if ( $usage->gil_page_namespace != '' ) $page = $usage->gil_page_namespace . ":$page" ;
		$params = array (
			'action' => 'unlink' ,
			'file' => $usage->gil_to ,
			'wiki' => $usage->gil_wiki ,
			'page' => $page ,
			'namespace' => $usage->gil_page_namespace_id ,
			'comment' => $this->constructUnlinkComment ( $file , $usage ) ,
			'timestamp' => date ( 'YmdHis' ) ,
			'log_id' => $file->log_id ,
			'log_timestamp' => $file->log_timestamp ,
			'done' => 0
		) ;
#		print_r ( $params ) ;
		
		$s1 = array() ;
		$s2 = array() ;
		foreach ( $params AS $k => $v ) {
			$s1[] = $k ;
			$s2[] = "'" . $this->getDBsafe($v) . "'" ;
		}
		
		$sql = "INSERT IGNORE INTO event (" . implode ( ',' , $s1 ) . ") VALUES (" . implode ( "," , $s2 ) . ")" ;
		$sqls[] = $sql ;
	}
	
	function addUnlinkEvents ( $delink_files ) {
		$sqls = array() ;
		foreach ( $delink_files AS $file ) {
			foreach ( $file->usage AS $usage ) {
				$this->addUnlinkEvent ( $file , $usage , $sqls ) ;
			}
		}
		
		$db = $this->getToolDB() ;
		foreach ( $sqls AS $sql ) $this->runQuery ( $db , $sql ) ;
		$db->close() ;
	}
	
	function getJSON4Q ( $e ) {
		$q = $e->page ;
		$url = "http://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids=" . $q ;
		$j = json_decode ( file_get_contents ( $url ) ) ;
		if ( isset ( $j->entities->$q->missing ) ) { # No such item
			$this->setDone ( $e->id , 2 , "No such item $q" ) ;
			return false ;
		}
		if ( !isset ( $j->entities->$q->claims ) ) {
			$this->setDone ( $e->id , 2 , "Did not find " . $e->file . " on " . $q ) ;
			return false ;
		}
		return $j ;
	}

	function performEditUnlinkWikidata ( $e ) {
		$j = $this->getJSON4Q ( $e ) ;
		if ( $j === false ) return ;

		$q = $e->page ;
		$j = $j->entities->$q->claims ;
		$remove = array() ;
		foreach ( $j AS $prop => $claims ) {
			foreach ( $claims AS $c ) {
				if ( $c->type != 'statement' ) continue ;
				if ( $c->mainsnak->datatype != 'commonsMedia' ) continue ;
				if ( str_replace ( ' ' , '_' , ucfirst ( trim ( $c->mainsnak->datavalue->value ) ) ) != $e->file ) continue ;
				$remove[] = $c->id ;
			}
		}
		
		if ( count($remove) > 0 ) {
			$ok = $this->editWikidata ( 'wbremoveclaims' , array ( 'claim'=>implode('|',$remove) , 'summary' => $e->comment ) ) ;
			if ( !$ok ) return ;
		}
		
		$this->setDone ( $e->id , 1 ) ; # OK!
	}


	function performEditReplaceWikidata ( $e ) {
		$j = $this->getJSON4Q ( $e ) ;
		if ( $j === false ) return ;

		$q = $e->page ;

		$j = $j->entities->$q->claims ;
		$remove = array() ;
		foreach ( $j AS $prop => $claims ) {
			foreach ( $claims AS $c ) {
				if ( $c->type != 'statement' ) continue ;
				if ( $c->mainsnak->datatype != 'commonsMedia' ) continue ;
				if ( str_replace ( ' ' , '_' , ucfirst ( trim ( $c->mainsnak->datavalue->value ) ) ) != $e->file ) continue ;
				$remove[] = array ( $c->id , $prop ) ;
			}
		}
		
		if ( count($remove) > 0 ) {

			# Remove old image entries
			$ids = array() ;
			foreach ( $remove AS $r ) $ids[] = $r[0] ;
			$ok = $this->editWikidata ( 'wbremoveclaims' , array ( 'claim'=>implode('|',$ids) ) ) ;
			if ( !$ok ) {
				$this->d->error("performEditReplaceWikidata:1 failed");
				return ;
			}

			# Add new image entries
			foreach ( $remove AS $r ) {
				$params = array(
					'snaktype' => 'value' ,
					'property' => $r[1] ,
					'value' => json_encode(str_replace('_',' ',$e->replace_with_file)) ,
					'entity' => $e->page ,
					'summary' => $e->comment
				) ;

				$ok = $this->editWikidata ( 'wbcreateclaim' , $params ) ;
				if ( !$ok ) {
					$this->d->error( "performEditReplaceWikidata:2 failed" );
					return ;
				}

			}
		} else {
			$this->setDone ( $e->id , 2 , 'File link not found in page' ) ;
			return ;
		}
		
		$this->setDone ( $e->id , 1 ) ; # OK!
	}

	
	##################################################################
	##
	## Here we make the changes.
	## Get the page, replace content and upload again.
	##
	##################################################################
	function performEditText ( $e ) {
		$this->d->debug( "performEditText $e->action on id=$e->id wiki=$e->wiki page=$e->page." );
		$api = $this->getAPI ( $e->wiki ) ;
		if ( $api === false ) {
			$this->setDone ( $e->id , 2 , "Could not connect to API" ) ;
			return ;
		}
		$services = new \Mediawiki\Api\MediawikiFactory( $api );
		try {
			$page = $services->newPageGetter()->getFromTitle( $e->page );
		} catch (Exception $ex) {
			$this->setDone ( $e->id , 2 , "Page not found" ) ;
			$this->d->debug("Page '$e->page' not found ($ex), id='$e->id' wiki=$e->wiki file=$e->file action=$e->action");
			return ;
		}
		$revision = $page->getRevisions()->getLatest();
		
		if ( !$revision ) {
			$this->setDone ( $e->id , 2 , "Latest revision not found" ) ;
			return ;
		}
		
		$rev_id = $revision->getId() ;
		$text = $revision->getContent()->getData() ;
		
		if ( $this->isForbiddenPage( $text ) ) {
			$this->d->info("Skipping forbidden (nobots) page $e->wiki: $e->page");
			$this->setDone( $e->id, 2, "Change forbidden (nobots)" );
			return;
		}

		$file = $e->file ;
		$pattern = $this->matcher->matcher_prepare_pattern( $file );
	
		$new_text = $text ;

		if ( $e->action == 'unlink' ) {
			## remove image references in text; see ./matcher.inc
			$this->d->info("Try to unlink '$pattern' in $e->wiki: $e->page");
			$new_text = $this->matcher->matcher_do_unlink( $new_text, $pattern );

		} else if ( $e->action == 'replace' ) {
			## replace image with new_file in text; see ./matcher.inc
			$new_file = $e->replace_with_file;
			$this->d->info("Try to replace '$pattern' with '$new_file' in $e->wiki: $e->page");
			$new_text = $this->matcher->matcher_do_replacement( $new_text, $pattern, $new_file );
		}
		
		if ( $text == $new_text ) { # No change
			$this->setDone ( $e->id , 2 , 'File link not found in page' ) ;
			$this->d->info( "Article unchanged. id=$e->id; $e->wiki: $e->page" );
			return ;
		}
		
		if ( strlen(trim($new_text)) == 0 or abs(strlen($text)-strlen($new_text)) > $this->max_text_diff ) {
			$this->setDone ( $e->id , 2 , 'Text change too big' ) ;
			$this->d->warn( "Article text change is too big. id=$e->id; $e->wiki: $e->page" );
			return ;
		}
		
		if ( !isset($e->comment) ) $e->comment = '' ;
		$e->comment = (string)$e->comment ;

		$this->d->info( "Editing $e->wiki: $e->page to $e->action $e->file (w/ $new_file) AS $e->comment") ;
		
		$params = array (
			'title' => $e->page ,
			'text' => trim($new_text) ,
			'summary' => $e->comment ,
			'bot' => 1
		) ;
		
		$x = $this->editWiki ( $e->wiki , 'edit' , $params ) ;
		if ( $x and $x['edit']['result'] == 'Success' ) {
			$this->setDone ( $e->id , 1 , array('revision'=>$rev_id) ) ;
		} else {
			$this->d->error( "Cannot edit wiki ($e->wiki: $e->page): " . $this->last_exception );
			$this->setDone ( $e->id , 2 , $this->last_exception ) ;
		}

	}
	
	function performEditReplace ( $e ) {
		if ( !$this->fileExistenceSanityCheck($e,false) ) return ; # Nothing to do
		if ( !isset($e->namespace) ) return ; # Paranoia
		if ( $e->wiki == 'wikidatawiki' && $e->namespace == 0 ) { # Wikidata item
			$this->performEditReplaceWikidata ( $e ) ;
		} else { # "Normal" edit
			$this->performEditText ( $e ) ;
		}
	}
	
	function performEditUnlink ( $e ) {
		if ( !$this->fileExistenceSanityCheck($e,true) ) return ; # Nothing to do
		if ( $e->wiki == 'wikidatawiki' && $e->namespace == 0 ) { # Wikidata item
			$this->performEditUnlinkWikidata ( $e ) ;
		} else { # "Normal" edit
			$this->d->debug( "performEditUnlink $e->action on id=$e->id wiki=$e->wiki page=$e->page." );
			$this->performEditText ( $e ) ;
		}
	}
	
	function performEdit ( $e ) {
		$this->d->debug( "performEdit $e->action on id=$e->id wiki=$e->wiki page=$e->page." );
		if ( $e->action == 'unlink' ) $this->performEditUnlink ( $e ) ;
		else if ( $e->action == 'replace' ) $this->performEditReplace ( $e ) ;
		else {
			$this->d->error( "PerformEdit got unknown action $e->action" );
			print_r ( $e ) ;
			die ( "Unknown action " . $e->action ) ;
		}
	}
	
	function clearBogusIssues ( $db ) {

		$ts = date ( 'YmdHis' , time() - 60*60*24*7 ); # look back 7 days only

		# Clear some previous issues
		// 0=pending
		// 1=done
		// 2=skipped
		$sql = "update `event` set done=0,note='' where note like '%rate limit%' and done=2 and timestamp>'$ts'" ;
		$this->d->debug("Set done=0 (pending) on 'rate limit' events where done=2(skipped)");
		$this->runQuery ( $db , $sql ) ;
		$sql = "update `event` set done=0,note='' where note like '%edit conflict%' and done=2 and timestamp>'$ts'" ;
		$this->d->debug("Set done=0 (pending) on 'edit conflict' events where done=2(skipped)");
		$this->runQuery ( $db , $sql ) ;
	}
	
	function performEdits () {
		$edits = array() ;
		$this->d->debug("Connecting to DB");
		$db = $this->getToolDB() ;
		## this is slow, let's do it at the end
		// $this->d->debug("Clear bogus issues (reactivate rate limit/edit conflict skipped issues)");
		// $this->clearBogusIssues ( $db ) ;
		$this->d->debug("Get work events (pending events)");
		$sql = "SELECT * FROM `event` WHERE done=0 ORDER BY timestamp ASC,log_timestamp ASC" ;
		$result = $this->runQuery ( $db , $sql ) ;
		while($o = $result->fetch_object()){
			$edits[] = $o ;
		}
		$db->close() ;

		$last_wiki = '' ;
		foreach ( $edits AS $o ) {
			if ( $last_wiki == $o->wiki ) sleep ( 2 ) ; // Edit rate limiter
			$this->d->debug("Perform an edit in $o->wiki");
			try {
				$this->performEdit ( $o ) ;
			} catch (Exception $e) {
				echo 'Caught exception: ',  $e->getMessage(), "\n";
			}
			$last_wiki = $o->wiki ;
		}

		$this->d->debug("Connecting to DB");
		$db = $this->getToolDB() ;
		$this->d->debug("Clear bogus issues (reactivate rate limit/edit conflict skipped issues)");
		$this->clearBogusIssues ( $db ) ;
		$db->close() ;
	}
	
	function addReplaceEvents () {
		$cmd_page = 'User:CommonsDelinker/commands' ;
		$this->d->trace("getText from User:CommonsDelinker/commands");
		$t = $this->getTextFromWiki ( 'commonswiki' , $cmd_page ) ;
		if ( $t === false ) {
			$this->d->error( "Could not open commands page") ;
			return ;
		}
		
		if ( preg_match ( '/\{\{[Ss]top\}\}/' , $t ) ) return ; // STOP
		
		$sqls = array() ;
		
#		$t = "{{/front}}\n{{universal replace|Overzicht - Hulst - 20118655 - RCE.jpg|Red Weaver Ant, Oecophylla smaragdina.jpg|reason=Testing}}" ; # TESTING
		
		$this->d->trace("Processing page content...");
		$ts = date ( 'YmdHis' ) ;
		$t = explode ( "\n" , $t ) ;
		$nt = array() ;
		foreach ( $t AS $l ) {
			if ( !preg_match ( '/^\s*\{\{\s*[Uu]niversal[ _]replace\s*\|\s*(.+?)\s*\|\s*(.+?)\s*\|\s*reason\s*=\s*(\S.*?)\s*\}\}/' , $l , $m ) ) {
				if ( !preg_match ( '/^\s*\{\{\s*[Uu]niversal[ _]replace\s*\|\s*(.+?)\s*\|\s*(.+?)\s*\|\s*reason\s*=\s*(.+?)\s*\|\s*user\s*=\s*(\S.*?)\s*\}\}/' , $l , $m ) ) {
					$nt[] = $l ;
					continue ;
				}
			}
			$old_file = ucfirst(str_replace(' ','_',trim($m[1]))) ;
			$new_file = ucfirst(str_replace(' ','_',trim($m[2]))) ;
			$this->d->trace("Process line; old:$old_file new:$new_file.");
			
			$comment = trim($m[3]) ;
			$user = '' ;
			if ( isset($m[4]) ) {
				$user = str_replace(' ','_',trim($m[4])) ;
				$user = preg_replace ( '/^\s*\[\[[^:]+(.+?)\s*(\||\]\]).*$/' , '$1' , $user ) ;
			}
			
			if ( !$this->hasLocalFile ( 'commonswiki' , $new_file ) ) {
				$nt[] = "No such replacement file: " . $l ;
				continue ;
			}

			if ( !preg_match('/\.svg$/i',$old_file) and preg_match('/\.svg$/i',$new_file) ) {
				$nt[] = "Non-SVG to SVG replacement: " . $l ;
				continue ;
			}

			$this->d->trace("Get file usage for $old_file");
			$usages = $this->getFileUsage ( $old_file ) ;
			if( $usages[0] == "*OVERFLOW*" ) {
				$nt[] = "File is used on too many pages: " . $l;
				$this->d->info("Skipping file $old_file; high usage (>$this->maximum_file_usage_limit)!");
				continue;
			}
			
			$this->d->trace("Get db handle for TooDB");
			$db = $this->getToolDB() ;
			
			$this->d->trace("Generate replaces...");
			foreach ( $usages AS $usage ) {
				$page = $usage->gil_page_title ;
				if ( $usage->gil_page_namespace != '' ) $page = $usage->gil_page_namespace . ':' . $page ;
				$params = array (
					'action' => 'replace' ,
					'file' => $old_file ,
					'wiki' => $usage->gil_wiki ,
					'page' => $page ,
					'namespace' => $usage->gil_page_namespace_id ,
					'timestamp' => $ts ,
					'comment' => $comment ,
					'log_id' => -1 ,
					'log_timestamp' => $ts ,
					'user' => $user ,
					'done' => 0 ,
					'replace_with_file' => $new_file
				) ;
				$params['comment'] = $this->constructReplaceComment ( $params ) ;
//				print_r ( $params ) ;

				$s1 = array() ;
				$s2 = array() ;
				foreach ( $params AS $k => $v ) {
					$s1[] = $k ;
					$s2[] = "'" . $this->getDBsafe($v) . "'" ;
				}
				
				$this->d->trace("Add SQL to sqls[] array, len=" . count($sqls));
				$sql = "INSERT IGNORE INTO event (" . implode ( ',' , $s1 ) . ") VALUES (" . implode ( "," , $s2 ) . ")" ;
				$sqls[] = $sql ;
				$this->d->trace("SQL:$sql");

			}
			
			$db->close() ;
			
		}
		
		$t = implode ( "\n" , $t ) ;
		$nt = implode ( "\n" , $nt ) ;
		if ( $t == $nt ) return ; // No change
		
		# Run SQL
		$db = $this->getToolDB() ;
		foreach ( $sqls AS $sql ) $this->runQuery ( $db , $sql ) ;
		$db->close() ;
		
		# Save new text to Wiki
		$params = array (
			'title' => $cmd_page ,
			'text' => trim($nt) ,
			'summary' => 'Removing replace commands, will be executed soon' ,
			'bot' => 1
		) ;
		
		$this->d->info( "Editing $cmd_page...") ;
		$x = $this->editWiki ( 'commonswiki' , 'edit' , $params ) ;
		$this->d->debug( "Editing $cmd_page done.") ;
	}
	
	function fixFauxTemplateReplacements () {
		$todo = array() ;
		$db = $this->getToolDB() ;
		$sql = "DELETE FROM event WHERE action='' and file=''" ;
		$result = $this->runQuery ( $db , $sql ) ;
		$sql = 'select file,wiki, count(*) as cnt,namespace from event where done=0 group by file,wiki,namespace having cnt>' . $this->min_faux_template_icon ;
		$result = $this->runQuery ( $db , $sql ) ;
		while($o = $result->fetch_object()){
			$file = $this->getDBsafe ( $o->file ) ;
			$wiki = $this->getDBsafe ( $o->wiki ) ;
			$todo[] = "UPDATE event SET done=2,note='Likely template icon, skipping' WHERE file='$file' AND wiki='$wiki' AND namespace=" . $o->namespace ;
		}
		foreach ( $todo AS $sql ) {
			$this->runQuery ( $db , $sql ) ;
		}
		$db->close() ;
	}

	// Unlinks deleted files
	function run () {
		$this->d->debug("Get last timestamp");
		$max_ts = $this->getLastTimestamp() ;
		$this->d->debug("Get recent deleted files");
		$delink_files = $this->getRecentDeletedFiles ( $max_ts ) ;
		$this->d->debug("Add unlink events for recently deleted files");
		$this->addUnlinkEvents ( $delink_files ) ;
		$this->d->debug("Add replace events");
		$this->addReplaceEvents () ;
		$this->d->debug("Fix bogus template replacements");
		$this->fixFauxTemplateReplacements() ;
		$this->d->debug("Perform the queued edits");
		$this->performEdits() ;
	}
	
	function debug_run0() {
		$this->d->debug("Add replace events");
		$this->addReplaceEvents () ;
	
	}

}

print "Bot is starting.\n";
$demon = new CommonsDelinquentDemon ;

//$demon->addReplaceEvents () ;
//$demon->performEdits() ;
//$demon->fixFauxTemplateReplacements() ;

$demon->set_debug_mode(8);

   // test
#   $demon->debug_run0();
   //\\\\\\

$demon->debug("Performing edits...");
$demon->performEdits() ;
while ( 1 ) {
	$demon->debug("Calling run loop...");
	$demon->run() ;
	$demon->debug("Sleeping 30...");
	sleep ( 30 ) ;
}

?>

mercurial