|
1 #!/usr/bin/php |
|
2 <?PHP |
|
3 |
|
4 chdir ( '/data/project/commons-delinquent' ) ; |
|
5 |
|
6 error_reporting(E_ERROR|E_CORE_ERROR|E_COMPILE_ERROR); # E_ALL| |
|
7 ini_set('display_errors', 'On'); |
|
8 |
|
9 require_once ( './shared.inc' ) ; |
|
10 require_once( './matcher.inc' ); |
|
11 require_once( './debug.inc' ); |
|
12 |
|
13 class CommonsDelinquentDemon extends CommonsDelinquent { |
|
14 |
|
15 var $avoidNamespaceOnWiki = [ |
|
16 'dewiki' => [4] |
|
17 ] ; |
|
18 |
|
19 private $d; |
|
20 private $matcher; |
|
21 |
|
22 var $maximum_file_usage_limit = 65535; # prevent memory explosion by extreme used files (65535 ~ 650 MB) |
|
23 var $delay_minutes = 10 ; # Wait after deletion |
|
24 var $fallback_minutes = 120 ; # Only used if DB is empty |
|
25 var $max_text_diff = 1500 ; # Max char diff |
|
26 var $min_faux_template_icon = 500 ; |
|
27 var $comments = array() ; |
|
28 var $comments_default = array ( |
|
29 'summary' => 'Removing [[:c:File:$1|$1]], it has been deleted from Commons by [[:c:User:$2|$2]] because: $3.' , |
|
30 'replace' => 'Replacing $1 with [[File:$2]] (by [[:c:User:$3|$3]] because: $4).' , |
|
31 'by' => ' Requested by [[User:$1|]].' |
|
32 ) ; |
|
33 |
|
34 function __construct() { |
|
35 parent::__construct(); |
|
36 $this->d = new Debug; |
|
37 $this->matcher = new Matcher($this->d); |
|
38 } |
|
39 |
|
40 function set_debug_mode($l) { |
|
41 $this->d->set_level( $l ); |
|
42 } |
|
43 |
|
44 function debug($msg) { |
|
45 $this->d->debug($msg); |
|
46 } |
|
47 |
|
48 // Returns the last timestamp in the tool database, or a dummy (current time - X min) |
|
49 function getLastTimestamp () { |
|
50 # Open tool database |
|
51 $db = $this->getToolDB() ; |
|
52 |
|
53 # Get highest timestamp in tool DB as a starting point |
|
54 $max_ts = '' ; |
|
55 $sql = "SELECT max(log_timestamp) AS max_ts FROM event WHERE done=1" ; # Timestamp of Commons logging table, NOT tool edit timestamp! |
|
56 $result = $this->runQuery ( $db , $sql ) ; |
|
57 while($o = $result->fetch_object()){ |
|
58 $max_ts = $o->max_ts ; |
|
59 } |
|
60 $db->close() ; |
|
61 if ( $max_ts == '' ) $max_ts = date ( 'YmdHis' , time() - $this->fallback_minutes*60 ) ; # Fallback to current date minus X min |
|
62 return $max_ts ; |
|
63 } |
|
64 |
|
65 function isBadPage ( $o , $filename ) { |
|
66 if ( $o->gil_page_namespace_id == 6 and $o->gil_wiki == 'commonswiki' and $o->gil_to == $filename ) return true ; // Self-reference |
|
67 if ( $o->gil_page_namespace_id == 2 and $o->gil_wiki == 'commonswiki' and preg_match ( '/^\w+Bot\b/' , $o->gil_page_title ) ) return true ; // Bot subpage on Commons |
|
68 if ( $o->gil_page_namespace_id == 4 and $o->gil_wiki == 'commonswiki' and preg_match ( '/(Deletion(_| )requests\/.*|Undeletion(_| )requests\/.*)\b/' , $o->gil_page_title ) ) return true ; // DR and UDR on Commons |
|
69 foreach ( $this->avoidNamespaceOnWiki AS $wiki => $namespaces ) { |
|
70 if ( $o->gil_wiki != $wiki ) continue ; |
|
71 foreach ( $namespaces AS $namespace ) { |
|
72 if ( $namespace == $o->gil_page_namespace_id ) return true ; |
|
73 } |
|
74 } |
|
75 return false ; |
|
76 } |
|
77 |
|
78 function getRecentDeletedFiles ( $max_ts ) { |
|
79 # Open Commons database replica |
|
80 $db_co = $this->getCommonsDB() ; |
|
81 $cur_ts = date ( 'YmdHis' , time() - $this->delay_minutes*60 ) ; |
|
82 |
|
83 # Get all file deletions |
|
84 $delink_files = array() ; # Files to delink |
|
85 $sql = "SELECT * FROM logging_compat LEFT JOIN comment ON comment_id = log_comment_id WHERE log_type='delete' AND log_action='delete' AND log_timestamp>='$max_ts' AND log_timestamp<'$cur_ts' AND log_namespace=6" ; |
|
86 $sql .= " AND NOT EXISTS (SELECT * FROM image WHERE img_name=log_title)" ; |
|
87 $sql .= " AND NOT EXISTS (SELECT * FROM page WHERE page_title=log_title AND page_namespace=6 AND page_is_redirect=1)" ; # Do not remove redirects. Is that OK??? |
|
88 $sql .= " ORDER BY log_timestamp ASC" ; |
|
89 $result = $this->runQuery ( $db_co , $sql ) ; |
|
90 while($o = $result->fetch_object()){ |
|
91 $delink_files[] = $o ; |
|
92 } |
|
93 foreach ( $delink_files AS $deletion ) { |
|
94 $filename = $deletion->log_title ; |
|
95 $sql = "SELECT * FROM globalimagelinks WHERE gil_to='" . $this->getDBsafe($filename) . "'" ; |
|
96 $deletion->usage = array() ; # Usage instances for this file |
|
97 $result = $this->runQuery ( $db_co , $sql ) ; |
|
98 while($o = $result->fetch_object()){ |
|
99 if ( $this->isBadWiki($o->gil_wiki) ) continue ; |
|
100 if ( $this->isBadPage($o,$filename) ) continue ; |
|
101 $deletion->usage[] = $o ; |
|
102 } |
|
103 } |
|
104 $db_co->close() ; |
|
105 // print_r ( $delink_files ) ; |
|
106 return $delink_files ; |
|
107 } |
|
108 |
|
109 function getFileUsage ( $filename ) { |
|
110 $this->d->trace("Get file usage for $filename"); |
|
111 $ret = array() ; |
|
112 $db_co = $this->getCommonsDB() ; |
|
113 $cur_ts = date ( 'YmdHis' , time() - $this->delay_minutes*60 ) ; |
|
114 $sql = "SELECT * FROM globalimagelinks WHERE gil_to='" . $this->getDBsafe($filename) . "'" ; |
|
115 $this->d->trace("Try running: $sql"); |
|
116 $result = $this->runQuery ( $db_co , $sql ) ; |
|
117 $this->d->trace("Got result, looping through"); |
|
118 while($o = $result->fetch_object()){ |
|
119 if ( $this->isBadWiki($o->gil_wiki) ) continue ; |
|
120 $ret[] = $o ; |
|
121 // $this->d->trace("Added to ret, len=" . count($ret)); |
|
122 # limit the maximum matches because we use more memory than toolforge allows |
|
123 if( count($ret) > $this->maximum_file_usage_limit ) { |
|
124 $this->d->error("Overflow!! We cannot get usage for $filename, too much hits (>$this->maximum_file_usage_limit)!"); |
|
125 $ret = array(); |
|
126 $ret[] = "*OVERFLOW*"; |
|
127 $db_co->close(); |
|
128 return $ret; |
|
129 } |
|
130 } |
|
131 $this->d->trace("Processed " . count($ret) . " usage points"); |
|
132 $db_co->close() ; |
|
133 return $ret ; |
|
134 } |
|
135 |
|
136 function canUnlinkFromNamespace ( $usage ) { |
|
137 if ( $usage->gil_page_namespace_id % 2 > 0 ) return false ; // Skip talk pages |
|
138 if ( $usage->gil_page_namespace_id < 0 ) return false ; // Paranoia |
|
139 return true ; |
|
140 } |
|
141 |
|
142 function fileExistenceSanityCheck ( $e , $check_commons ) { |
|
143 if ( $this->hasLocalFile ( $e->wiki , $e->file ) ) { |
|
144 $this->setDone ( $e->id , 2 , 'Skipped: Local file exists' ) ; |
|
145 return false ; |
|
146 } |
|
147 if ( $check_commons and $this->hasLocalFile ( 'commonswiki' , $e->file ) ) { |
|
148 $this->setDone ( $e->id , 2 , 'Skipped: Commons file exists' ) ; |
|
149 return false ; |
|
150 } |
|
151 return true ; |
|
152 } |
|
153 |
|
154 |
|
155 function getTextFromWiki ( $wiki , $pagename ) { |
|
156 $ret = false ; |
|
157 $api = $this->getAPI ( $wiki ) ; |
|
158 if ( $api ) { |
|
159 $services = new \Mediawiki\Api\MediawikiFactory( $api ); |
|
160 $page = $services->newPageGetter()->getFromTitle( $pagename ); |
|
161 $revision = $page->getRevisions()->getLatest(); |
|
162 |
|
163 if ( $revision ) { |
|
164 $ret = $revision->getContent()->getData() ; |
|
165 } |
|
166 } |
|
167 return $ret ; |
|
168 } |
|
169 |
|
170 /** |
|
171 mode "summary" or "replace" |
|
172 */ |
|
173 function getLocalizedCommentPattern ( $wiki , $mode = 'summary') { |
|
174 if ( !isset($mode) ) $mode = 'summary' ; |
|
175 if ( isset ( $this->comments[$mode][$wiki] ) ) return $this->comments[$mode][$wiki] ; |
|
176 $pattern = $this->comments_default[$mode] ; # Default |
|
177 |
|
178 # Try local translation |
|
179 $local = $this->getTextFromWiki ( $wiki , 'User:CommonsDelinker/' . $mode . '-I18n' ) ; |
|
180 if ( $local !== false ) $pattern = $local ; |
|
181 |
|
182 $this->comments[$mode][$wiki] = $pattern ; |
|
183 return $pattern ; |
|
184 } |
|
185 |
|
186 function constructUnlinkComment ( $file , $usage ) { |
|
187 $pattern = $this->getLocalizedCommentPattern ( $usage->gil_wiki ) ; |
|
188 |
|
189 $c = $file->comment_text ; |
|
190 if ( $usage->wiki != 'commonswiki' ) { # Point original comment links to Commons |
|
191 $c = preg_replace ( '/\[\[([^|]+?)\]\]/' , '[[:c:\1|]]' , $c ) ; # Pointing to Commons (no pipe) |
|
192 $c = preg_replace ( '/\[\[([^:].+?)\]\]/' , '[[:c:\1]]' , $c ) ; # Pointing to Commons (with pipe) |
|
193 } |
|
194 |
|
195 $pattern = preg_replace ( '/\$1/' , $file->log_title , $pattern ) ; |
|
196 $pattern = preg_replace ( '/\$2/' , $file->log_user_text , $pattern ) ; |
|
197 $pattern = preg_replace ( '/\$3/' , $c , $pattern ) ; |
|
198 # print "\n$pattern\n" ; exit ( 0 ) ; // TESTING |
|
199 return $pattern ; |
|
200 } |
|
201 |
|
202 function constructReplaceComment ( $params ) { |
|
203 $pattern = $this->getLocalizedCommentPattern ( $params['wiki'] , 'replace' ) ; |
|
204 |
|
205 $c = $params['comment'] ; |
|
206 if ( $params['wiki'] != 'commonswiki' ) { # Point original comment links to Commons |
|
207 $c = preg_replace ( '/\[\[([^|]+?)\]\]/' , '[[:c:\1|]]' , $c ) ; # Pointing to Commons (no pipe) |
|
208 $c = preg_replace ( '/\[\[([^:].+?)\]\]/' , '[[:c:\1]]' , $c ) ; # Pointing to Commons (with pipe) |
|
209 } |
|
210 |
|
211 $pattern = preg_replace ( '/\$1/' , $params['file'] , $pattern ) ; |
|
212 $pattern = preg_replace ( '/\$2/' , $params['replace_with_file'] , $pattern ) ; |
|
213 $pattern = preg_replace ( '/\$3/' , 'CommonsDelinker' , $pattern ) ; |
|
214 $pattern = preg_replace ( '/\$4/' , $c , $pattern ) ; |
|
215 |
|
216 if ( isset($params['user']) and $params['user'] != '' ) { |
|
217 $by = $this->getLocalizedCommentPattern ( $params['wiki'] , 'by' ) ; |
|
218 $by = preg_replace ( '/\$1/' , $params['user'] , $by ) ; |
|
219 $pattern .= ' ' . $by ; |
|
220 } |
|
221 |
|
222 return $pattern ; |
|
223 } |
|
224 |
|
225 function addUnlinkEvent ( $file , $usage , &$sqls ) { |
|
226 if ( !$this->canUnlinkFromNamespace ( $usage ) ) return ; |
|
227 if ( $this->hasLocalFile ( $usage->gil_wiki , $usage->gil_to ) ) return ; |
|
228 |
|
229 $page = $usage->gil_page_title ; |
|
230 if ( $usage->gil_page_namespace != '' ) $page = $usage->gil_page_namespace . ":$page" ; |
|
231 $params = array ( |
|
232 'action' => 'unlink' , |
|
233 'file' => $usage->gil_to , |
|
234 'wiki' => $usage->gil_wiki , |
|
235 'page' => $page , |
|
236 'namespace' => $usage->gil_page_namespace_id , |
|
237 'comment' => $this->constructUnlinkComment ( $file , $usage ) , |
|
238 'timestamp' => date ( 'YmdHis' ) , |
|
239 'log_id' => $file->log_id , |
|
240 'log_timestamp' => $file->log_timestamp , |
|
241 'done' => 0 |
|
242 ) ; |
|
243 # print_r ( $params ) ; |
|
244 |
|
245 $s1 = array() ; |
|
246 $s2 = array() ; |
|
247 foreach ( $params AS $k => $v ) { |
|
248 $s1[] = $k ; |
|
249 $s2[] = "'" . $this->getDBsafe($v) . "'" ; |
|
250 } |
|
251 |
|
252 $sql = "INSERT IGNORE INTO event (" . implode ( ',' , $s1 ) . ") VALUES (" . implode ( "," , $s2 ) . ")" ; |
|
253 $sqls[] = $sql ; |
|
254 } |
|
255 |
|
256 function addUnlinkEvents ( $delink_files ) { |
|
257 $sqls = array() ; |
|
258 foreach ( $delink_files AS $file ) { |
|
259 foreach ( $file->usage AS $usage ) { |
|
260 $this->addUnlinkEvent ( $file , $usage , $sqls ) ; |
|
261 } |
|
262 } |
|
263 |
|
264 $db = $this->getToolDB() ; |
|
265 foreach ( $sqls AS $sql ) $this->runQuery ( $db , $sql ) ; |
|
266 $db->close() ; |
|
267 } |
|
268 |
|
269 function getJSON4Q ( $e ) { |
|
270 $q = $e->page ; |
|
271 $url = "http://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids=" . $q ; |
|
272 $j = json_decode ( file_get_contents ( $url ) ) ; |
|
273 if ( isset ( $j->entities->$q->missing ) ) { # No such item |
|
274 $this->setDone ( $e->id , 2 , "No such item $q" ) ; |
|
275 return false ; |
|
276 } |
|
277 if ( !isset ( $j->entities->$q->claims ) ) { |
|
278 $this->setDone ( $e->id , 2 , "Did not find " . $e->file . " on " . $q ) ; |
|
279 return false ; |
|
280 } |
|
281 return $j ; |
|
282 } |
|
283 |
|
284 function performEditUnlinkWikidata ( $e ) { |
|
285 $j = $this->getJSON4Q ( $e ) ; |
|
286 if ( $j === false ) return ; |
|
287 |
|
288 $q = $e->page ; |
|
289 $j = $j->entities->$q->claims ; |
|
290 $remove = array() ; |
|
291 foreach ( $j AS $prop => $claims ) { |
|
292 foreach ( $claims AS $c ) { |
|
293 if ( $c->type != 'statement' ) continue ; |
|
294 if ( $c->mainsnak->datatype != 'commonsMedia' ) continue ; |
|
295 if ( str_replace ( ' ' , '_' , ucfirst ( trim ( $c->mainsnak->datavalue->value ) ) ) != $e->file ) continue ; |
|
296 $remove[] = $c->id ; |
|
297 } |
|
298 } |
|
299 |
|
300 if ( count($remove) > 0 ) { |
|
301 $ok = $this->editWikidata ( 'wbremoveclaims' , array ( 'claim'=>implode('|',$remove) , 'summary' => $e->comment ) ) ; |
|
302 if ( !$ok ) return ; |
|
303 } |
|
304 |
|
305 $this->setDone ( $e->id , 1 ) ; # OK! |
|
306 } |
|
307 |
|
308 |
|
309 function performEditReplaceWikidata ( $e ) { |
|
310 $j = $this->getJSON4Q ( $e ) ; |
|
311 if ( $j === false ) return ; |
|
312 |
|
313 $q = $e->page ; |
|
314 |
|
315 $j = $j->entities->$q->claims ; |
|
316 $remove = array() ; |
|
317 foreach ( $j AS $prop => $claims ) { |
|
318 foreach ( $claims AS $c ) { |
|
319 if ( $c->type != 'statement' ) continue ; |
|
320 if ( $c->mainsnak->datatype != 'commonsMedia' ) continue ; |
|
321 if ( str_replace ( ' ' , '_' , ucfirst ( trim ( $c->mainsnak->datavalue->value ) ) ) != $e->file ) continue ; |
|
322 $remove[] = array ( $c->id , $prop ) ; |
|
323 } |
|
324 } |
|
325 |
|
326 if ( count($remove) > 0 ) { |
|
327 |
|
328 # Remove old image entries |
|
329 $ids = array() ; |
|
330 foreach ( $remove AS $r ) $ids[] = $r[0] ; |
|
331 $ok = $this->editWikidata ( 'wbremoveclaims' , array ( 'claim'=>implode('|',$ids) ) ) ; |
|
332 if ( !$ok ) { |
|
333 $this->d->error("performEditReplaceWikidata:1 failed"); |
|
334 return ; |
|
335 } |
|
336 |
|
337 # Add new image entries |
|
338 foreach ( $remove AS $r ) { |
|
339 $params = array( |
|
340 'snaktype' => 'value' , |
|
341 'property' => $r[1] , |
|
342 'value' => json_encode(str_replace('_',' ',$e->replace_with_file)) , |
|
343 'entity' => $e->page , |
|
344 'summary' => $e->comment |
|
345 ) ; |
|
346 |
|
347 $ok = $this->editWikidata ( 'wbcreateclaim' , $params ) ; |
|
348 if ( !$ok ) { |
|
349 $this->d->error( "performEditReplaceWikidata:2 failed" ); |
|
350 return ; |
|
351 } |
|
352 |
|
353 } |
|
354 } else { |
|
355 $this->setDone ( $e->id , 2 , 'File link not found in page' ) ; |
|
356 return ; |
|
357 } |
|
358 |
|
359 $this->setDone ( $e->id , 1 ) ; # OK! |
|
360 } |
|
361 |
|
362 |
|
363 ################################################################## |
|
364 ## |
|
365 ## Here we make the changes. |
|
366 ## Get the page, replace content and upload again. |
|
367 ## |
|
368 ################################################################## |
|
369 function performEditText ( $e ) { |
|
370 $this->d->debug( "performEditText $e->action on id=$e->id wiki=$e->wiki page=$e->page." ); |
|
371 $api = $this->getAPI ( $e->wiki ) ; |
|
372 if ( $api === false ) { |
|
373 $this->setDone ( $e->id , 2 , "Could not connect to API" ) ; |
|
374 return ; |
|
375 } |
|
376 $services = new \Mediawiki\Api\MediawikiFactory( $api ); |
|
377 try { |
|
378 $page = $services->newPageGetter()->getFromTitle( $e->page ); |
|
379 } catch (Exception $ex) { |
|
380 $this->setDone ( $e->id , 2 , "Page not found" ) ; |
|
381 $this->d->debug("Page '$e->page' not found ($ex), id='$e->id' wiki=$e->wiki file=$e->file action=$e->action"); |
|
382 return ; |
|
383 } |
|
384 $revision = $page->getRevisions()->getLatest(); |
|
385 |
|
386 if ( !$revision ) { |
|
387 $this->setDone ( $e->id , 2 , "Latest revision not found" ) ; |
|
388 return ; |
|
389 } |
|
390 |
|
391 $rev_id = $revision->getId() ; |
|
392 $text = $revision->getContent()->getData() ; |
|
393 |
|
394 $file = $e->file ; |
|
395 $pattern = $this->matcher->matcher_prepare_pattern( $file ); |
|
396 |
|
397 $new_text = $text ; |
|
398 |
|
399 if ( $e->action == 'unlink' ) { |
|
400 ## remove image references in text; see ./matcher.inc |
|
401 $this->d->info("Try to unlink '$pattern' in $e->wiki: $e->page"); |
|
402 $new_text = $this->matcher->matcher_do_unlink( $new_text, $pattern ); |
|
403 |
|
404 } else if ( $e->action == 'replace' ) { |
|
405 ## replace image with new_file in text; see ./matcher.inc |
|
406 $new_file = $e->replace_with_file; |
|
407 $this->d->info("Try to replace '$pattern' with '$new_file' in $e->wiki: $e->page"); |
|
408 $new_text = $this->matcher->matcher_do_replacement( $new_text, $pattern, $new_file ); |
|
409 } |
|
410 |
|
411 if ( $text == $new_text ) { # No change |
|
412 $this->setDone ( $e->id , 2 , 'File link not found in page' ) ; |
|
413 $this->d->info( "Article unchanged. id=$e->id; $e->wiki: $e->page" ); |
|
414 return ; |
|
415 } |
|
416 |
|
417 if ( strlen(trim($new_text)) == 0 or abs(strlen($text)-strlen($new_text)) > $this->max_text_diff ) { |
|
418 $this->setDone ( $e->id , 2 , 'Text change too big' ) ; |
|
419 $this->d->warn( "Article text change is too big. id=$e->id; $e->wiki: $e->page" ); |
|
420 return ; |
|
421 } |
|
422 |
|
423 if ( !isset($e->comment) ) $e->comment = '' ; |
|
424 $e->comment = (string)$e->comment ; |
|
425 |
|
426 $this->d->info( "Editing $e->wiki: $e->page to $e->action $e->file (w/ $new_file) AS $e->comment") ; |
|
427 |
|
428 $params = array ( |
|
429 'title' => $e->page , |
|
430 'text' => trim($new_text) , |
|
431 'summary' => $e->comment , |
|
432 'bot' => 1 |
|
433 ) ; |
|
434 |
|
435 $x = $this->editWiki ( $e->wiki , 'edit' , $params ) ; |
|
436 if ( $x and $x['edit']['result'] == 'Success' ) { |
|
437 $this->setDone ( $e->id , 1 , array('revision'=>$rev_id) ) ; |
|
438 } else { |
|
439 $this->d->error( "Cannot edit wiki ($e->wiki: $e->page): " . $this->last_exception ); |
|
440 $this->setDone ( $e->id , 2 , $this->last_exception ) ; |
|
441 } |
|
442 |
|
443 } |
|
444 |
|
445 function performEditReplace ( $e ) { |
|
446 if ( !$this->fileExistenceSanityCheck($e,false) ) return ; # Nothing to do |
|
447 if ( !isset($e->namespace) ) return ; # Paranoia |
|
448 if ( $e->wiki == 'wikidatawiki' && $e->namespace == 0 ) { # Wikidata item |
|
449 $this->performEditReplaceWikidata ( $e ) ; |
|
450 } else { # "Normal" edit |
|
451 $this->performEditText ( $e ) ; |
|
452 } |
|
453 } |
|
454 |
|
455 function performEditUnlink ( $e ) { |
|
456 if ( !$this->fileExistenceSanityCheck($e,true) ) return ; # Nothing to do |
|
457 if ( $e->wiki == 'wikidatawiki' && $e->namespace == 0 ) { # Wikidata item |
|
458 $this->performEditUnlinkWikidata ( $e ) ; |
|
459 } else { # "Normal" edit |
|
460 $this->d->debug( "performEditUnlink $e->action on id=$e->id wiki=$e->wiki page=$e->page." ); |
|
461 $this->performEditText ( $e ) ; |
|
462 } |
|
463 } |
|
464 |
|
465 function performEdit ( $e ) { |
|
466 $this->d->debug( "performEdit $e->action on id=$e->id wiki=$e->wiki page=$e->page." ); |
|
467 if ( $e->action == 'unlink' ) $this->performEditUnlink ( $e ) ; |
|
468 else if ( $e->action == 'replace' ) $this->performEditReplace ( $e ) ; |
|
469 else { |
|
470 $this->d->error( "PerformEdit got unknown action $e->action" ); |
|
471 print_r ( $e ) ; |
|
472 die ( "Unknown action " . $e->action ) ; |
|
473 } |
|
474 } |
|
475 |
|
476 function clearBogusIssues ( $db ) { |
|
477 # Clear some previous issues |
|
478 // 0=pending |
|
479 // 1=done |
|
480 // 2=skipped |
|
481 $sql = "update `event` set done=0,note='' where note like '%rate limit%' and done=2" ; |
|
482 $this->d->debug("Set done=0 (pending) on 'rate limit' events where done=2(skipped)"); |
|
483 $this->runQuery ( $db , $sql ) ; |
|
484 $sql = "update `event` set done=0,note='' where note like '%edit conflict%' and done=2" ; |
|
485 $this->d->debug("Set done=0 (pending) on 'edit conflict' events where done=2(skipped)"); |
|
486 $this->runQuery ( $db , $sql ) ; |
|
487 } |
|
488 |
|
489 function performEdits () { |
|
490 $edits = array() ; |
|
491 $this->d->debug("Connecting to DB"); |
|
492 $db = $this->getToolDB() ; |
|
493 ## this is slow, let's do it at the end |
|
494 // $this->d->debug("Clear bogus issues (reactivate rate limit/edit conflict skipped issues)"); |
|
495 // $this->clearBogusIssues ( $db ) ; |
|
496 $this->d->debug("Get work events (pending events)"); |
|
497 $sql = "SELECT * FROM `event` WHERE done=0 ORDER BY timestamp ASC,log_timestamp ASC" ; |
|
498 $result = $this->runQuery ( $db , $sql ) ; |
|
499 while($o = $result->fetch_object()){ |
|
500 $edits[] = $o ; |
|
501 } |
|
502 $db->close() ; |
|
503 |
|
504 $last_wiki = '' ; |
|
505 foreach ( $edits AS $o ) { |
|
506 if ( $last_wiki == $o->wiki ) sleep ( 5 ) ; // Edit rate limiter |
|
507 $this->d->debug("Perform an edit in $o->wiki"); |
|
508 try { |
|
509 $this->performEdit ( $o ) ; |
|
510 } catch (Exception $e) { |
|
511 echo 'Caught exception: ', $e->getMessage(), "\n"; |
|
512 } |
|
513 $last_wiki = $o->wiki ; |
|
514 } |
|
515 |
|
516 $this->d->debug("Connecting to DB"); |
|
517 $db = $this->getToolDB() ; |
|
518 $this->d->debug("Clear bogus issues (reactivate rate limit/edit conflict skipped issues)"); |
|
519 $this->clearBogusIssues ( $db ) ; |
|
520 $db->close() ; |
|
521 } |
|
522 |
|
523 function addReplaceEvents () { |
|
524 $cmd_page = 'User:CommonsDelinker/commands' ; |
|
525 $this->d->trace("getText from User:CommonsDelinker/commands"); |
|
526 $t = $this->getTextFromWiki ( 'commonswiki' , $cmd_page ) ; |
|
527 if ( $t === false ) { |
|
528 $this->d->error( "Could not open commands page") ; |
|
529 return ; |
|
530 } |
|
531 |
|
532 if ( preg_match ( '/\{\{[Ss]top\}\}/' , $t ) ) return ; // STOP |
|
533 |
|
534 $sqls = array() ; |
|
535 |
|
536 # $t = "{{/front}}\n{{universal replace|Overzicht - Hulst - 20118655 - RCE.jpg|Red Weaver Ant, Oecophylla smaragdina.jpg|reason=Testing}}" ; # TESTING |
|
537 |
|
538 $this->d->trace("Processing page content..."); |
|
539 $ts = date ( 'YmdHis' ) ; |
|
540 $t = explode ( "\n" , $t ) ; |
|
541 $nt = array() ; |
|
542 foreach ( $t AS $l ) { |
|
543 if ( !preg_match ( '/^\s*\{\{\s*[Uu]niversal[ _]replace\s*\|\s*(.+?)\s*\|\s*(.+?)\s*\|\s*reason\s*=\s*(\S.*?)\s*\}\}/' , $l , $m ) ) { |
|
544 if ( !preg_match ( '/^\s*\{\{\s*[Uu]niversal[ _]replace\s*\|\s*(.+?)\s*\|\s*(.+?)\s*\|\s*reason\s*=\s*(.+?)\s*\|\s*user\s*=\s*(\S.*?)\s*\}\}/' , $l , $m ) ) { |
|
545 $nt[] = $l ; |
|
546 continue ; |
|
547 } |
|
548 } |
|
549 $old_file = ucfirst(str_replace(' ','_',trim($m[1]))) ; |
|
550 $new_file = ucfirst(str_replace(' ','_',trim($m[2]))) ; |
|
551 $this->d->trace("Process line; old:$old_file new:$new_file."); |
|
552 |
|
553 $comment = trim($m[3]) ; |
|
554 $user = '' ; |
|
555 if ( isset($m[4]) ) { |
|
556 $user = str_replace(' ','_',trim($m[4])) ; |
|
557 $user = preg_replace ( '/^\s*\[\[[^:]+(.+?)\s*(\||\]\]).*$/' , '$1' , $user ) ; |
|
558 } |
|
559 |
|
560 if ( !$this->hasLocalFile ( 'commonswiki' , $new_file ) ) { |
|
561 $nt[] = "No such replacement file: " . $l ; |
|
562 continue ; |
|
563 } |
|
564 |
|
565 if ( !preg_match('/\.svg$/i',$old_file) and preg_match('/\.svg$/i',$new_file) ) { |
|
566 $nt[] = "Non-SVG to SVG replacement: " . $l ; |
|
567 continue ; |
|
568 } |
|
569 |
|
570 $this->d->trace("Get file usage for $old_file"); |
|
571 $usages = $this->getFileUsage ( $old_file ) ; |
|
572 if( $usages[0] == "*OVERFLOW*" ) { |
|
573 $nt[] = "File is used on too many pages: " . $l; |
|
574 $this->d->info("Skipping file $old_file; high usage (>$this->maximum_file_usage_limit)!"); |
|
575 continue; |
|
576 } |
|
577 |
|
578 $this->d->trace("Get db handle for TooDB"); |
|
579 $db = $this->getToolDB() ; |
|
580 |
|
581 $this->d->trace("Generate replaces..."); |
|
582 foreach ( $usages AS $usage ) { |
|
583 $page = $usage->gil_page_title ; |
|
584 if ( $usage->gil_page_namespace != '' ) $page = $usage->gil_page_namespace . ':' . $page ; |
|
585 $params = array ( |
|
586 'action' => 'replace' , |
|
587 'file' => $old_file , |
|
588 'wiki' => $usage->gil_wiki , |
|
589 'page' => $page , |
|
590 'namespace' => $usage->gil_page_namespace_id , |
|
591 'timestamp' => $ts , |
|
592 'comment' => $comment , |
|
593 'log_id' => -1 , |
|
594 'log_timestamp' => $ts , |
|
595 'user' => $user , |
|
596 'done' => 0 , |
|
597 'replace_with_file' => $new_file |
|
598 ) ; |
|
599 $params['comment'] = $this->constructReplaceComment ( $params ) ; |
|
600 // print_r ( $params ) ; |
|
601 |
|
602 $s1 = array() ; |
|
603 $s2 = array() ; |
|
604 foreach ( $params AS $k => $v ) { |
|
605 $s1[] = $k ; |
|
606 $s2[] = "'" . $this->getDBsafe($v) . "'" ; |
|
607 } |
|
608 |
|
609 $this->d->trace("Add SQL to sqls[] array, len=" . count($sqls)); |
|
610 $sql = "INSERT IGNORE INTO event (" . implode ( ',' , $s1 ) . ") VALUES (" . implode ( "," , $s2 ) . ")" ; |
|
611 $sqls[] = $sql ; |
|
612 $this->d->trace("SQL:$sql"); |
|
613 |
|
614 } |
|
615 |
|
616 $db->close() ; |
|
617 |
|
618 } |
|
619 |
|
620 $t = implode ( "\n" , $t ) ; |
|
621 $nt = implode ( "\n" , $nt ) ; |
|
622 if ( $t == $nt ) return ; // No change |
|
623 |
|
624 # Run SQL |
|
625 $db = $this->getToolDB() ; |
|
626 foreach ( $sqls AS $sql ) $this->runQuery ( $db , $sql ) ; |
|
627 $db->close() ; |
|
628 |
|
629 # Save new text to Wiki |
|
630 $params = array ( |
|
631 'title' => $cmd_page , |
|
632 'text' => trim($nt) , |
|
633 'summary' => 'Removing replace commands, will be executed soon' , |
|
634 'bot' => 1 |
|
635 ) ; |
|
636 |
|
637 $this->d->info( "Editing $cmd_page...") ; |
|
638 $x = $this->editWiki ( 'commonswiki' , 'edit' , $params ) ; |
|
639 $this->d->debug( "Editing $cmd_page done.") ; |
|
640 } |
|
641 |
|
642 function fixFauxTemplateReplacements () { |
|
643 $todo = array() ; |
|
644 $db = $this->getToolDB() ; |
|
645 $sql = "DELETE FROM event WHERE action='' and file=''" ; |
|
646 $result = $this->runQuery ( $db , $sql ) ; |
|
647 $sql = 'select file,wiki, count(*) as cnt,namespace from event where done=0 group by file,wiki,namespace having cnt>' . $this->min_faux_template_icon ; |
|
648 $result = $this->runQuery ( $db , $sql ) ; |
|
649 while($o = $result->fetch_object()){ |
|
650 $file = $this->getDBsafe ( $o->file ) ; |
|
651 $wiki = $this->getDBsafe ( $o->wiki ) ; |
|
652 $todo[] = "UPDATE event SET done=2,note='Likely template icon, skipping' WHERE file='$file' AND wiki='$wiki' AND namespace=" . $o->namespace ; |
|
653 } |
|
654 foreach ( $todo AS $sql ) { |
|
655 $this->runQuery ( $db , $sql ) ; |
|
656 } |
|
657 $db->close() ; |
|
658 } |
|
659 |
|
660 // Unlinks deleted files |
|
661 function run () { |
|
662 $this->d->debug("Get last timestamp"); |
|
663 $max_ts = $this->getLastTimestamp() ; |
|
664 $this->d->debug("Get recent deleted files"); |
|
665 $delink_files = $this->getRecentDeletedFiles ( $max_ts ) ; |
|
666 $this->d->debug("Add unlink events for recently deleted files"); |
|
667 $this->addUnlinkEvents ( $delink_files ) ; |
|
668 $this->d->debug("Add replace events"); |
|
669 $this->addReplaceEvents () ; |
|
670 $this->d->debug("Fix bogus template replacements"); |
|
671 $this->fixFauxTemplateReplacements() ; |
|
672 $this->d->debug("Perform the queued edits"); |
|
673 $this->performEdits() ; |
|
674 } |
|
675 |
|
676 function debug_run0() { |
|
677 $this->d->debug("Add replace events"); |
|
678 $this->addReplaceEvents () ; |
|
679 |
|
680 } |
|
681 |
|
682 } |
|
683 |
|
684 print "Bot is starting.\n"; |
|
685 $demon = new CommonsDelinquentDemon ; |
|
686 |
|
687 //$demon->addReplaceEvents () ; |
|
688 //$demon->performEdits() ; |
|
689 //$demon->fixFauxTemplateReplacements() ; |
|
690 |
|
691 $demon->set_debug_mode(8); |
|
692 |
|
693 // test |
|
694 # $demon->debug_run0(); |
|
695 //\\\\\\ |
|
696 |
|
697 $demon->debug("Performing edits..."); |
|
698 $demon->performEdits() ; |
|
699 while ( 1 ) { |
|
700 $demon->debug("Calling run loop..."); |
|
701 $demon->run() ; |
|
702 $demon->debug("Sleeping 30..."); |
|
703 sleep ( 30 ) ; |
|
704 } |
|
705 |
|
706 ?> |