delinquent_files/demon.php

changeset 0
3b714bbb1347
child 2
cd58c0bc21d6
equal deleted inserted replaced
-1:000000000000 0:3b714bbb1347
1 #!/usr/bin/php
2 <?PHP
3
4 chdir ( '/data/project/commons-delinquent' ) ;
5
6 error_reporting(E_ERROR|E_CORE_ERROR|E_COMPILE_ERROR); # E_ALL|
7 ini_set('display_errors', 'On');
8
9 require_once ( './shared.inc' ) ;
10 require_once( './matcher.inc' );
11 require_once( './debug.inc' );
12
13 class CommonsDelinquentDemon extends CommonsDelinquent {
14
15 var $avoidNamespaceOnWiki = [
16 'dewiki' => [4]
17 ] ;
18
19 private $d;
20 private $matcher;
21
22 var $maximum_file_usage_limit = 65535; # prevent memory explosion by extreme used files (65535 ~ 650 MB)
23 var $delay_minutes = 10 ; # Wait after deletion
24 var $fallback_minutes = 120 ; # Only used if DB is empty
25 var $max_text_diff = 1500 ; # Max char diff
26 var $min_faux_template_icon = 500 ;
27 var $comments = array() ;
28 var $comments_default = array (
29 'summary' => 'Removing [[:c:File:$1|$1]], it has been deleted from Commons by [[:c:User:$2|$2]] because: $3.' ,
30 'replace' => 'Replacing $1 with [[File:$2]] (by [[:c:User:$3|$3]] because: $4).' ,
31 'by' => ' Requested by [[User:$1|]].'
32 ) ;
33
34 function __construct() {
35 parent::__construct();
36 $this->d = new Debug;
37 $this->matcher = new Matcher($this->d);
38 }
39
40 function set_debug_mode($l) {
41 $this->d->set_level( $l );
42 }
43
44 function debug($msg) {
45 $this->d->debug($msg);
46 }
47
48 // Returns the last timestamp in the tool database, or a dummy (current time - X min)
49 function getLastTimestamp () {
50 # Open tool database
51 $db = $this->getToolDB() ;
52
53 # Get highest timestamp in tool DB as a starting point
54 $max_ts = '' ;
55 $sql = "SELECT max(log_timestamp) AS max_ts FROM event WHERE done=1" ; # Timestamp of Commons logging table, NOT tool edit timestamp!
56 $result = $this->runQuery ( $db , $sql ) ;
57 while($o = $result->fetch_object()){
58 $max_ts = $o->max_ts ;
59 }
60 $db->close() ;
61 if ( $max_ts == '' ) $max_ts = date ( 'YmdHis' , time() - $this->fallback_minutes*60 ) ; # Fallback to current date minus X min
62 return $max_ts ;
63 }
64
65 function isBadPage ( $o , $filename ) {
66 if ( $o->gil_page_namespace_id == 6 and $o->gil_wiki == 'commonswiki' and $o->gil_to == $filename ) return true ; // Self-reference
67 if ( $o->gil_page_namespace_id == 2 and $o->gil_wiki == 'commonswiki' and preg_match ( '/^\w+Bot\b/' , $o->gil_page_title ) ) return true ; // Bot subpage on Commons
68 if ( $o->gil_page_namespace_id == 4 and $o->gil_wiki == 'commonswiki' and preg_match ( '/(Deletion(_| )requests\/.*|Undeletion(_| )requests\/.*)\b/' , $o->gil_page_title ) ) return true ; // DR and UDR on Commons
69 foreach ( $this->avoidNamespaceOnWiki AS $wiki => $namespaces ) {
70 if ( $o->gil_wiki != $wiki ) continue ;
71 foreach ( $namespaces AS $namespace ) {
72 if ( $namespace == $o->gil_page_namespace_id ) return true ;
73 }
74 }
75 return false ;
76 }
77
78 function getRecentDeletedFiles ( $max_ts ) {
79 # Open Commons database replica
80 $db_co = $this->getCommonsDB() ;
81 $cur_ts = date ( 'YmdHis' , time() - $this->delay_minutes*60 ) ;
82
83 # Get all file deletions
84 $delink_files = array() ; # Files to delink
85 $sql = "SELECT * FROM logging_compat LEFT JOIN comment ON comment_id = log_comment_id WHERE log_type='delete' AND log_action='delete' AND log_timestamp>='$max_ts' AND log_timestamp<'$cur_ts' AND log_namespace=6" ;
86 $sql .= " AND NOT EXISTS (SELECT * FROM image WHERE img_name=log_title)" ;
87 $sql .= " AND NOT EXISTS (SELECT * FROM page WHERE page_title=log_title AND page_namespace=6 AND page_is_redirect=1)" ; # Do not remove redirects. Is that OK???
88 $sql .= " ORDER BY log_timestamp ASC" ;
89 $result = $this->runQuery ( $db_co , $sql ) ;
90 while($o = $result->fetch_object()){
91 $delink_files[] = $o ;
92 }
93 foreach ( $delink_files AS $deletion ) {
94 $filename = $deletion->log_title ;
95 $sql = "SELECT * FROM globalimagelinks WHERE gil_to='" . $this->getDBsafe($filename) . "'" ;
96 $deletion->usage = array() ; # Usage instances for this file
97 $result = $this->runQuery ( $db_co , $sql ) ;
98 while($o = $result->fetch_object()){
99 if ( $this->isBadWiki($o->gil_wiki) ) continue ;
100 if ( $this->isBadPage($o,$filename) ) continue ;
101 $deletion->usage[] = $o ;
102 }
103 }
104 $db_co->close() ;
105 // print_r ( $delink_files ) ;
106 return $delink_files ;
107 }
108
109 function getFileUsage ( $filename ) {
110 $this->d->trace("Get file usage for $filename");
111 $ret = array() ;
112 $db_co = $this->getCommonsDB() ;
113 $cur_ts = date ( 'YmdHis' , time() - $this->delay_minutes*60 ) ;
114 $sql = "SELECT * FROM globalimagelinks WHERE gil_to='" . $this->getDBsafe($filename) . "'" ;
115 $this->d->trace("Try running: $sql");
116 $result = $this->runQuery ( $db_co , $sql ) ;
117 $this->d->trace("Got result, looping through");
118 while($o = $result->fetch_object()){
119 if ( $this->isBadWiki($o->gil_wiki) ) continue ;
120 $ret[] = $o ;
121 // $this->d->trace("Added to ret, len=" . count($ret));
122 # limit the maximum matches because we use more memory than toolforge allows
123 if( count($ret) > $this->maximum_file_usage_limit ) {
124 $this->d->error("Overflow!! We cannot get usage for $filename, too much hits (>$this->maximum_file_usage_limit)!");
125 $ret = array();
126 $ret[] = "*OVERFLOW*";
127 $db_co->close();
128 return $ret;
129 }
130 }
131 $this->d->trace("Processed " . count($ret) . " usage points");
132 $db_co->close() ;
133 return $ret ;
134 }
135
136 function canUnlinkFromNamespace ( $usage ) {
137 if ( $usage->gil_page_namespace_id % 2 > 0 ) return false ; // Skip talk pages
138 if ( $usage->gil_page_namespace_id < 0 ) return false ; // Paranoia
139 return true ;
140 }
141
142 function fileExistenceSanityCheck ( $e , $check_commons ) {
143 if ( $this->hasLocalFile ( $e->wiki , $e->file ) ) {
144 $this->setDone ( $e->id , 2 , 'Skipped: Local file exists' ) ;
145 return false ;
146 }
147 if ( $check_commons and $this->hasLocalFile ( 'commonswiki' , $e->file ) ) {
148 $this->setDone ( $e->id , 2 , 'Skipped: Commons file exists' ) ;
149 return false ;
150 }
151 return true ;
152 }
153
154
155 function getTextFromWiki ( $wiki , $pagename ) {
156 $ret = false ;
157 $api = $this->getAPI ( $wiki ) ;
158 if ( $api ) {
159 $services = new \Mediawiki\Api\MediawikiFactory( $api );
160 $page = $services->newPageGetter()->getFromTitle( $pagename );
161 $revision = $page->getRevisions()->getLatest();
162
163 if ( $revision ) {
164 $ret = $revision->getContent()->getData() ;
165 }
166 }
167 return $ret ;
168 }
169
170 /**
171 mode "summary" or "replace"
172 */
173 function getLocalizedCommentPattern ( $wiki , $mode = 'summary') {
174 if ( !isset($mode) ) $mode = 'summary' ;
175 if ( isset ( $this->comments[$mode][$wiki] ) ) return $this->comments[$mode][$wiki] ;
176 $pattern = $this->comments_default[$mode] ; # Default
177
178 # Try local translation
179 $local = $this->getTextFromWiki ( $wiki , 'User:CommonsDelinker/' . $mode . '-I18n' ) ;
180 if ( $local !== false ) $pattern = $local ;
181
182 $this->comments[$mode][$wiki] = $pattern ;
183 return $pattern ;
184 }
185
186 function constructUnlinkComment ( $file , $usage ) {
187 $pattern = $this->getLocalizedCommentPattern ( $usage->gil_wiki ) ;
188
189 $c = $file->comment_text ;
190 if ( $usage->wiki != 'commonswiki' ) { # Point original comment links to Commons
191 $c = preg_replace ( '/\[\[([^|]+?)\]\]/' , '[[:c:\1|]]' , $c ) ; # Pointing to Commons (no pipe)
192 $c = preg_replace ( '/\[\[([^:].+?)\]\]/' , '[[:c:\1]]' , $c ) ; # Pointing to Commons (with pipe)
193 }
194
195 $pattern = preg_replace ( '/\$1/' , $file->log_title , $pattern ) ;
196 $pattern = preg_replace ( '/\$2/' , $file->log_user_text , $pattern ) ;
197 $pattern = preg_replace ( '/\$3/' , $c , $pattern ) ;
198 # print "\n$pattern\n" ; exit ( 0 ) ; // TESTING
199 return $pattern ;
200 }
201
202 function constructReplaceComment ( $params ) {
203 $pattern = $this->getLocalizedCommentPattern ( $params['wiki'] , 'replace' ) ;
204
205 $c = $params['comment'] ;
206 if ( $params['wiki'] != 'commonswiki' ) { # Point original comment links to Commons
207 $c = preg_replace ( '/\[\[([^|]+?)\]\]/' , '[[:c:\1|]]' , $c ) ; # Pointing to Commons (no pipe)
208 $c = preg_replace ( '/\[\[([^:].+?)\]\]/' , '[[:c:\1]]' , $c ) ; # Pointing to Commons (with pipe)
209 }
210
211 $pattern = preg_replace ( '/\$1/' , $params['file'] , $pattern ) ;
212 $pattern = preg_replace ( '/\$2/' , $params['replace_with_file'] , $pattern ) ;
213 $pattern = preg_replace ( '/\$3/' , 'CommonsDelinker' , $pattern ) ;
214 $pattern = preg_replace ( '/\$4/' , $c , $pattern ) ;
215
216 if ( isset($params['user']) and $params['user'] != '' ) {
217 $by = $this->getLocalizedCommentPattern ( $params['wiki'] , 'by' ) ;
218 $by = preg_replace ( '/\$1/' , $params['user'] , $by ) ;
219 $pattern .= ' ' . $by ;
220 }
221
222 return $pattern ;
223 }
224
225 function addUnlinkEvent ( $file , $usage , &$sqls ) {
226 if ( !$this->canUnlinkFromNamespace ( $usage ) ) return ;
227 if ( $this->hasLocalFile ( $usage->gil_wiki , $usage->gil_to ) ) return ;
228
229 $page = $usage->gil_page_title ;
230 if ( $usage->gil_page_namespace != '' ) $page = $usage->gil_page_namespace . ":$page" ;
231 $params = array (
232 'action' => 'unlink' ,
233 'file' => $usage->gil_to ,
234 'wiki' => $usage->gil_wiki ,
235 'page' => $page ,
236 'namespace' => $usage->gil_page_namespace_id ,
237 'comment' => $this->constructUnlinkComment ( $file , $usage ) ,
238 'timestamp' => date ( 'YmdHis' ) ,
239 'log_id' => $file->log_id ,
240 'log_timestamp' => $file->log_timestamp ,
241 'done' => 0
242 ) ;
243 # print_r ( $params ) ;
244
245 $s1 = array() ;
246 $s2 = array() ;
247 foreach ( $params AS $k => $v ) {
248 $s1[] = $k ;
249 $s2[] = "'" . $this->getDBsafe($v) . "'" ;
250 }
251
252 $sql = "INSERT IGNORE INTO event (" . implode ( ',' , $s1 ) . ") VALUES (" . implode ( "," , $s2 ) . ")" ;
253 $sqls[] = $sql ;
254 }
255
256 function addUnlinkEvents ( $delink_files ) {
257 $sqls = array() ;
258 foreach ( $delink_files AS $file ) {
259 foreach ( $file->usage AS $usage ) {
260 $this->addUnlinkEvent ( $file , $usage , $sqls ) ;
261 }
262 }
263
264 $db = $this->getToolDB() ;
265 foreach ( $sqls AS $sql ) $this->runQuery ( $db , $sql ) ;
266 $db->close() ;
267 }
268
269 function getJSON4Q ( $e ) {
270 $q = $e->page ;
271 $url = "http://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids=" . $q ;
272 $j = json_decode ( file_get_contents ( $url ) ) ;
273 if ( isset ( $j->entities->$q->missing ) ) { # No such item
274 $this->setDone ( $e->id , 2 , "No such item $q" ) ;
275 return false ;
276 }
277 if ( !isset ( $j->entities->$q->claims ) ) {
278 $this->setDone ( $e->id , 2 , "Did not find " . $e->file . " on " . $q ) ;
279 return false ;
280 }
281 return $j ;
282 }
283
284 function performEditUnlinkWikidata ( $e ) {
285 $j = $this->getJSON4Q ( $e ) ;
286 if ( $j === false ) return ;
287
288 $q = $e->page ;
289 $j = $j->entities->$q->claims ;
290 $remove = array() ;
291 foreach ( $j AS $prop => $claims ) {
292 foreach ( $claims AS $c ) {
293 if ( $c->type != 'statement' ) continue ;
294 if ( $c->mainsnak->datatype != 'commonsMedia' ) continue ;
295 if ( str_replace ( ' ' , '_' , ucfirst ( trim ( $c->mainsnak->datavalue->value ) ) ) != $e->file ) continue ;
296 $remove[] = $c->id ;
297 }
298 }
299
300 if ( count($remove) > 0 ) {
301 $ok = $this->editWikidata ( 'wbremoveclaims' , array ( 'claim'=>implode('|',$remove) , 'summary' => $e->comment ) ) ;
302 if ( !$ok ) return ;
303 }
304
305 $this->setDone ( $e->id , 1 ) ; # OK!
306 }
307
308
309 function performEditReplaceWikidata ( $e ) {
310 $j = $this->getJSON4Q ( $e ) ;
311 if ( $j === false ) return ;
312
313 $q = $e->page ;
314
315 $j = $j->entities->$q->claims ;
316 $remove = array() ;
317 foreach ( $j AS $prop => $claims ) {
318 foreach ( $claims AS $c ) {
319 if ( $c->type != 'statement' ) continue ;
320 if ( $c->mainsnak->datatype != 'commonsMedia' ) continue ;
321 if ( str_replace ( ' ' , '_' , ucfirst ( trim ( $c->mainsnak->datavalue->value ) ) ) != $e->file ) continue ;
322 $remove[] = array ( $c->id , $prop ) ;
323 }
324 }
325
326 if ( count($remove) > 0 ) {
327
328 # Remove old image entries
329 $ids = array() ;
330 foreach ( $remove AS $r ) $ids[] = $r[0] ;
331 $ok = $this->editWikidata ( 'wbremoveclaims' , array ( 'claim'=>implode('|',$ids) ) ) ;
332 if ( !$ok ) {
333 $this->d->error("performEditReplaceWikidata:1 failed");
334 return ;
335 }
336
337 # Add new image entries
338 foreach ( $remove AS $r ) {
339 $params = array(
340 'snaktype' => 'value' ,
341 'property' => $r[1] ,
342 'value' => json_encode(str_replace('_',' ',$e->replace_with_file)) ,
343 'entity' => $e->page ,
344 'summary' => $e->comment
345 ) ;
346
347 $ok = $this->editWikidata ( 'wbcreateclaim' , $params ) ;
348 if ( !$ok ) {
349 $this->d->error( "performEditReplaceWikidata:2 failed" );
350 return ;
351 }
352
353 }
354 } else {
355 $this->setDone ( $e->id , 2 , 'File link not found in page' ) ;
356 return ;
357 }
358
359 $this->setDone ( $e->id , 1 ) ; # OK!
360 }
361
362
363 ##################################################################
364 ##
365 ## Here we make the changes.
366 ## Get the page, replace content and upload again.
367 ##
368 ##################################################################
369 function performEditText ( $e ) {
370 $this->d->debug( "performEditText $e->action on id=$e->id wiki=$e->wiki page=$e->page." );
371 $api = $this->getAPI ( $e->wiki ) ;
372 if ( $api === false ) {
373 $this->setDone ( $e->id , 2 , "Could not connect to API" ) ;
374 return ;
375 }
376 $services = new \Mediawiki\Api\MediawikiFactory( $api );
377 try {
378 $page = $services->newPageGetter()->getFromTitle( $e->page );
379 } catch (Exception $ex) {
380 $this->setDone ( $e->id , 2 , "Page not found" ) ;
381 $this->d->debug("Page '$e->page' not found ($ex), id='$e->id' wiki=$e->wiki file=$e->file action=$e->action");
382 return ;
383 }
384 $revision = $page->getRevisions()->getLatest();
385
386 if ( !$revision ) {
387 $this->setDone ( $e->id , 2 , "Latest revision not found" ) ;
388 return ;
389 }
390
391 $rev_id = $revision->getId() ;
392 $text = $revision->getContent()->getData() ;
393
394 $file = $e->file ;
395 $pattern = $this->matcher->matcher_prepare_pattern( $file );
396
397 $new_text = $text ;
398
399 if ( $e->action == 'unlink' ) {
400 ## remove image references in text; see ./matcher.inc
401 $this->d->info("Try to unlink '$pattern' in $e->wiki: $e->page");
402 $new_text = $this->matcher->matcher_do_unlink( $new_text, $pattern );
403
404 } else if ( $e->action == 'replace' ) {
405 ## replace image with new_file in text; see ./matcher.inc
406 $new_file = $e->replace_with_file;
407 $this->d->info("Try to replace '$pattern' with '$new_file' in $e->wiki: $e->page");
408 $new_text = $this->matcher->matcher_do_replacement( $new_text, $pattern, $new_file );
409 }
410
411 if ( $text == $new_text ) { # No change
412 $this->setDone ( $e->id , 2 , 'File link not found in page' ) ;
413 $this->d->info( "Article unchanged. id=$e->id; $e->wiki: $e->page" );
414 return ;
415 }
416
417 if ( strlen(trim($new_text)) == 0 or abs(strlen($text)-strlen($new_text)) > $this->max_text_diff ) {
418 $this->setDone ( $e->id , 2 , 'Text change too big' ) ;
419 $this->d->warn( "Article text change is too big. id=$e->id; $e->wiki: $e->page" );
420 return ;
421 }
422
423 if ( !isset($e->comment) ) $e->comment = '' ;
424 $e->comment = (string)$e->comment ;
425
426 $this->d->info( "Editing $e->wiki: $e->page to $e->action $e->file (w/ $new_file) AS $e->comment") ;
427
428 $params = array (
429 'title' => $e->page ,
430 'text' => trim($new_text) ,
431 'summary' => $e->comment ,
432 'bot' => 1
433 ) ;
434
435 $x = $this->editWiki ( $e->wiki , 'edit' , $params ) ;
436 if ( $x and $x['edit']['result'] == 'Success' ) {
437 $this->setDone ( $e->id , 1 , array('revision'=>$rev_id) ) ;
438 } else {
439 $this->d->error( "Cannot edit wiki ($e->wiki: $e->page): " . $this->last_exception );
440 $this->setDone ( $e->id , 2 , $this->last_exception ) ;
441 }
442
443 }
444
445 function performEditReplace ( $e ) {
446 if ( !$this->fileExistenceSanityCheck($e,false) ) return ; # Nothing to do
447 if ( !isset($e->namespace) ) return ; # Paranoia
448 if ( $e->wiki == 'wikidatawiki' && $e->namespace == 0 ) { # Wikidata item
449 $this->performEditReplaceWikidata ( $e ) ;
450 } else { # "Normal" edit
451 $this->performEditText ( $e ) ;
452 }
453 }
454
455 function performEditUnlink ( $e ) {
456 if ( !$this->fileExistenceSanityCheck($e,true) ) return ; # Nothing to do
457 if ( $e->wiki == 'wikidatawiki' && $e->namespace == 0 ) { # Wikidata item
458 $this->performEditUnlinkWikidata ( $e ) ;
459 } else { # "Normal" edit
460 $this->d->debug( "performEditUnlink $e->action on id=$e->id wiki=$e->wiki page=$e->page." );
461 $this->performEditText ( $e ) ;
462 }
463 }
464
465 function performEdit ( $e ) {
466 $this->d->debug( "performEdit $e->action on id=$e->id wiki=$e->wiki page=$e->page." );
467 if ( $e->action == 'unlink' ) $this->performEditUnlink ( $e ) ;
468 else if ( $e->action == 'replace' ) $this->performEditReplace ( $e ) ;
469 else {
470 $this->d->error( "PerformEdit got unknown action $e->action" );
471 print_r ( $e ) ;
472 die ( "Unknown action " . $e->action ) ;
473 }
474 }
475
476 function clearBogusIssues ( $db ) {
477 # Clear some previous issues
478 // 0=pending
479 // 1=done
480 // 2=skipped
481 $sql = "update `event` set done=0,note='' where note like '%rate limit%' and done=2" ;
482 $this->d->debug("Set done=0 (pending) on 'rate limit' events where done=2(skipped)");
483 $this->runQuery ( $db , $sql ) ;
484 $sql = "update `event` set done=0,note='' where note like '%edit conflict%' and done=2" ;
485 $this->d->debug("Set done=0 (pending) on 'edit conflict' events where done=2(skipped)");
486 $this->runQuery ( $db , $sql ) ;
487 }
488
489 function performEdits () {
490 $edits = array() ;
491 $this->d->debug("Connecting to DB");
492 $db = $this->getToolDB() ;
493 ## this is slow, let's do it at the end
494 // $this->d->debug("Clear bogus issues (reactivate rate limit/edit conflict skipped issues)");
495 // $this->clearBogusIssues ( $db ) ;
496 $this->d->debug("Get work events (pending events)");
497 $sql = "SELECT * FROM `event` WHERE done=0 ORDER BY timestamp ASC,log_timestamp ASC" ;
498 $result = $this->runQuery ( $db , $sql ) ;
499 while($o = $result->fetch_object()){
500 $edits[] = $o ;
501 }
502 $db->close() ;
503
504 $last_wiki = '' ;
505 foreach ( $edits AS $o ) {
506 if ( $last_wiki == $o->wiki ) sleep ( 5 ) ; // Edit rate limiter
507 $this->d->debug("Perform an edit in $o->wiki");
508 try {
509 $this->performEdit ( $o ) ;
510 } catch (Exception $e) {
511 echo 'Caught exception: ', $e->getMessage(), "\n";
512 }
513 $last_wiki = $o->wiki ;
514 }
515
516 $this->d->debug("Connecting to DB");
517 $db = $this->getToolDB() ;
518 $this->d->debug("Clear bogus issues (reactivate rate limit/edit conflict skipped issues)");
519 $this->clearBogusIssues ( $db ) ;
520 $db->close() ;
521 }
522
523 function addReplaceEvents () {
524 $cmd_page = 'User:CommonsDelinker/commands' ;
525 $this->d->trace("getText from User:CommonsDelinker/commands");
526 $t = $this->getTextFromWiki ( 'commonswiki' , $cmd_page ) ;
527 if ( $t === false ) {
528 $this->d->error( "Could not open commands page") ;
529 return ;
530 }
531
532 if ( preg_match ( '/\{\{[Ss]top\}\}/' , $t ) ) return ; // STOP
533
534 $sqls = array() ;
535
536 # $t = "{{/front}}\n{{universal replace|Overzicht - Hulst - 20118655 - RCE.jpg|Red Weaver Ant, Oecophylla smaragdina.jpg|reason=Testing}}" ; # TESTING
537
538 $this->d->trace("Processing page content...");
539 $ts = date ( 'YmdHis' ) ;
540 $t = explode ( "\n" , $t ) ;
541 $nt = array() ;
542 foreach ( $t AS $l ) {
543 if ( !preg_match ( '/^\s*\{\{\s*[Uu]niversal[ _]replace\s*\|\s*(.+?)\s*\|\s*(.+?)\s*\|\s*reason\s*=\s*(\S.*?)\s*\}\}/' , $l , $m ) ) {
544 if ( !preg_match ( '/^\s*\{\{\s*[Uu]niversal[ _]replace\s*\|\s*(.+?)\s*\|\s*(.+?)\s*\|\s*reason\s*=\s*(.+?)\s*\|\s*user\s*=\s*(\S.*?)\s*\}\}/' , $l , $m ) ) {
545 $nt[] = $l ;
546 continue ;
547 }
548 }
549 $old_file = ucfirst(str_replace(' ','_',trim($m[1]))) ;
550 $new_file = ucfirst(str_replace(' ','_',trim($m[2]))) ;
551 $this->d->trace("Process line; old:$old_file new:$new_file.");
552
553 $comment = trim($m[3]) ;
554 $user = '' ;
555 if ( isset($m[4]) ) {
556 $user = str_replace(' ','_',trim($m[4])) ;
557 $user = preg_replace ( '/^\s*\[\[[^:]+(.+?)\s*(\||\]\]).*$/' , '$1' , $user ) ;
558 }
559
560 if ( !$this->hasLocalFile ( 'commonswiki' , $new_file ) ) {
561 $nt[] = "No such replacement file: " . $l ;
562 continue ;
563 }
564
565 if ( !preg_match('/\.svg$/i',$old_file) and preg_match('/\.svg$/i',$new_file) ) {
566 $nt[] = "Non-SVG to SVG replacement: " . $l ;
567 continue ;
568 }
569
570 $this->d->trace("Get file usage for $old_file");
571 $usages = $this->getFileUsage ( $old_file ) ;
572 if( $usages[0] == "*OVERFLOW*" ) {
573 $nt[] = "File is used on too many pages: " . $l;
574 $this->d->info("Skipping file $old_file; high usage (>$this->maximum_file_usage_limit)!");
575 continue;
576 }
577
578 $this->d->trace("Get db handle for TooDB");
579 $db = $this->getToolDB() ;
580
581 $this->d->trace("Generate replaces...");
582 foreach ( $usages AS $usage ) {
583 $page = $usage->gil_page_title ;
584 if ( $usage->gil_page_namespace != '' ) $page = $usage->gil_page_namespace . ':' . $page ;
585 $params = array (
586 'action' => 'replace' ,
587 'file' => $old_file ,
588 'wiki' => $usage->gil_wiki ,
589 'page' => $page ,
590 'namespace' => $usage->gil_page_namespace_id ,
591 'timestamp' => $ts ,
592 'comment' => $comment ,
593 'log_id' => -1 ,
594 'log_timestamp' => $ts ,
595 'user' => $user ,
596 'done' => 0 ,
597 'replace_with_file' => $new_file
598 ) ;
599 $params['comment'] = $this->constructReplaceComment ( $params ) ;
600 // print_r ( $params ) ;
601
602 $s1 = array() ;
603 $s2 = array() ;
604 foreach ( $params AS $k => $v ) {
605 $s1[] = $k ;
606 $s2[] = "'" . $this->getDBsafe($v) . "'" ;
607 }
608
609 $this->d->trace("Add SQL to sqls[] array, len=" . count($sqls));
610 $sql = "INSERT IGNORE INTO event (" . implode ( ',' , $s1 ) . ") VALUES (" . implode ( "," , $s2 ) . ")" ;
611 $sqls[] = $sql ;
612 $this->d->trace("SQL:$sql");
613
614 }
615
616 $db->close() ;
617
618 }
619
620 $t = implode ( "\n" , $t ) ;
621 $nt = implode ( "\n" , $nt ) ;
622 if ( $t == $nt ) return ; // No change
623
624 # Run SQL
625 $db = $this->getToolDB() ;
626 foreach ( $sqls AS $sql ) $this->runQuery ( $db , $sql ) ;
627 $db->close() ;
628
629 # Save new text to Wiki
630 $params = array (
631 'title' => $cmd_page ,
632 'text' => trim($nt) ,
633 'summary' => 'Removing replace commands, will be executed soon' ,
634 'bot' => 1
635 ) ;
636
637 $this->d->info( "Editing $cmd_page...") ;
638 $x = $this->editWiki ( 'commonswiki' , 'edit' , $params ) ;
639 $this->d->debug( "Editing $cmd_page done.") ;
640 }
641
642 function fixFauxTemplateReplacements () {
643 $todo = array() ;
644 $db = $this->getToolDB() ;
645 $sql = "DELETE FROM event WHERE action='' and file=''" ;
646 $result = $this->runQuery ( $db , $sql ) ;
647 $sql = 'select file,wiki, count(*) as cnt,namespace from event where done=0 group by file,wiki,namespace having cnt>' . $this->min_faux_template_icon ;
648 $result = $this->runQuery ( $db , $sql ) ;
649 while($o = $result->fetch_object()){
650 $file = $this->getDBsafe ( $o->file ) ;
651 $wiki = $this->getDBsafe ( $o->wiki ) ;
652 $todo[] = "UPDATE event SET done=2,note='Likely template icon, skipping' WHERE file='$file' AND wiki='$wiki' AND namespace=" . $o->namespace ;
653 }
654 foreach ( $todo AS $sql ) {
655 $this->runQuery ( $db , $sql ) ;
656 }
657 $db->close() ;
658 }
659
660 // Unlinks deleted files
661 function run () {
662 $this->d->debug("Get last timestamp");
663 $max_ts = $this->getLastTimestamp() ;
664 $this->d->debug("Get recent deleted files");
665 $delink_files = $this->getRecentDeletedFiles ( $max_ts ) ;
666 $this->d->debug("Add unlink events for recently deleted files");
667 $this->addUnlinkEvents ( $delink_files ) ;
668 $this->d->debug("Add replace events");
669 $this->addReplaceEvents () ;
670 $this->d->debug("Fix bogus template replacements");
671 $this->fixFauxTemplateReplacements() ;
672 $this->d->debug("Perform the queued edits");
673 $this->performEdits() ;
674 }
675
676 function debug_run0() {
677 $this->d->debug("Add replace events");
678 $this->addReplaceEvents () ;
679
680 }
681
682 }
683
684 print "Bot is starting.\n";
685 $demon = new CommonsDelinquentDemon ;
686
687 //$demon->addReplaceEvents () ;
688 //$demon->performEdits() ;
689 //$demon->fixFauxTemplateReplacements() ;
690
691 $demon->set_debug_mode(8);
692
693 // test
694 # $demon->debug_run0();
695 //\\\\\\
696
697 $demon->debug("Performing edits...");
698 $demon->performEdits() ;
699 while ( 1 ) {
700 $demon->debug("Calling run loop...");
701 $demon->run() ;
702 $demon->debug("Sleeping 30...");
703 sleep ( 30 ) ;
704 }
705
706 ?>

mercurial