Sat, 17 Feb 2024 15:36:09 +0100
demon.php: ease some db pressure by not querying old entries (7 days).
- replaced some rejects workaround by exclusions
- some moar debug msgs
#!/usr/bin/perl #$Id$ # # cleanup after delinker # # (c)Peter [[grin]] Gervai, 2022 # cc-by-sa-4.0-int / gplv3+ # use strict; use warnings; use utf8; use File::Basename; use lib dirname(__FILE__); use BotSecrets; use MediaWiki::Bot qw(:constants); use DBI; binmode( STDOUT, ':utf8' ); $|=1; my ($db_name, $db_host, $db_port) = ("s52421__commonsdelinquent_p", "tools.db.svc.wikimedia.cloud", 3306); ## my ($db_user, $db_pw) = ("xxxxxxxxx", "xxxxxxxxxx"); &d("Start"); ## connect db my $dsn = "DBI:mysql:database=$db_name;host=$db_host;port=$db_port"; my $dbh = DBI->connect( $dsn, $db_user, $db_pw, { mysql_enable_utf8=>1, RaiseError=>0, AutoCommit=>0 } ); $dbh->{mysql_enable_utf8} = 1; ## prepare sql #my $sth = $dbh->prepare( "SELECT id,action,file,wiki,page,revision,done,note,replace_with_file FROM event WHERE action=? AND file=? AND done=? AND timestamp BETWEEN ? AND ?" ); my $sth = $dbh->prepare( "SELECT id,action,file,wiki,page,revision,done,note,replace_with_file FROM event WHERE action=? AND done=? AND timestamp BETWEEN ? AND ?" ); my $sth_update = $dbh->prepare( "UPDATE event SET done=? WHERE id=?" ); ## connect bot (enwp) #my ($bu,$bp) = ('xxxxxxxxxxxxx', 'xxxxxxxxxxxxxx' ); our $bot = MediaWiki::Bot->new({ host => 'en.wikipedia.org', login_data => { username => $bu, password=> $bp }, do_sul => 1, operator => 'grin', protocol => 'https', debug => 2, assert => 'user', }); my $last_wiki='huwiki'; die "Cannot login" unless $bot; ## rev my $revert_message = "Undoing CommonsDelinker bad replace, will be retried later."; ## prepare search my $action = 'replace'; #my $file = 'S-3A_MAD_DN-SC-87-05743.JPEG'; my $done = 127; my ($ts_from, $ts_to) = ('20220502000000', '20220506130000'); #my $res = $sth->execute( $action, $file, $done, $ts_from, $ts_to ); my $res = $sth->execute( $action, $done, $ts_from, $ts_to ); if( $dbh->err ) { die "Error doing SQL: " . $dbh->errstr; } print $sth->rows . " rows found.\n"; ## results while( my $a = $sth->fetchrow_hashref ) { for my $key (sort keys %$a) { print "$key=" . $$a{$key} . " "; } print "\n"; # check data if( $last_wiki ne $$a{wiki} ) { my $wikidata = &get_wikidata( $$a{wiki} ); next unless $wikidata; $bot->set_wiki( $wikidata ); } my $revid = $bot->get_last( $$a{page}, 'CommonsDelinker' ); if( !defined( $revid ) ) { &d("Revid is missing!! skipping $$a{wiki}:$$a{page}!!"); #&error("missing revid"); next; } if( $revid == $$a{revision} ) { &d(" Page unchanged, undo possible! REVERTING $$a{wiki}:$$a{page}"); # revert if( $bot->revert( $$a{page}, $$a{revision}, $revert_message ) ) { &d( "Success. Updating."); $sth_update->execute( 0, $$a{id} ); if( $dbh->err ) { &d( "*** Error updating db for $$a{wiki}:$$a{page} id=$$a{id}!!"); $dbh->commit; exit; } } else { &d( " Revert #1 failed, try to login."); if( $bot->login( { username=>$bu, password=>$bp } ) ) { if( $bot->revert( $$a{page}, $$a{revision}, $revert_message ) ) { &d( "Success^2. Updating."); $sth_update->execute( 0, $$a{id} ); if( $dbh->err ) { &d( "*** Error updating db for $$a{wiki}:$$a{page} id=$$a{id}!!"); $dbh->commit; exit; } } } else { &d( "Login failed into $$a{wiki}. Skipping"); &error("login failed"); $sth_update->execute( 43, $$a{id} ); } } } else { &d( " Page changed, oldid $$a{revision} - newid $revid; skipping (update db)."); $sth_update->execute( 42, $$a{id} ); if( $dbh->err ) { &d( "*** Error updating db for non-changed $$a{wiki}:$$a{page} id=$$a{id}!!"); } } $dbh->commit; } &d( "Commit."); $dbh->commit; exit; sub d { my ($s) = @_; print scalar(localtime) . " [$$] $s\n"; } sub error { return; # doesn't work my ($s) = @_; print "error: $s; " . $bot->{error}->{code} . "; " . $bot->{error}->{details} . "\n"; #use Data::Dumper; #die Dumper($bot); #exit; } ## decypher short wikinames sub get_wikidata { my ($name) = @_; &d("Decode $name"); my $host = $bot->db_to_domain( $name ); return { host => $host }; if( $name =~ /^(.{2,3}|simple)wiki$/ ) { return { host => "$1.wikipedia.org" }; } if( $name =~ /^(.{2,3})wikivoyage$/ ) { return { host => "$1.wikivoyage.org" }; } if( $name =~ /^(.{2,3})wikisource$/ ) { return { host => "$1.wikisource.org" }; } if( $name =~ /^(.{2,3})wikiquote$/ ) { return { host => "$1.wikiquote.org" }; } if( $name =~ /^(.{2,3})wikibooks$/ ) { return { host => "$1.wikibooks.org" }; } if( $name =~ /^(.{2,3})wiktionary$/ ) { return { host => "$1.wiktionary.org" }; } if( $name eq 'wikidatawiki' ) { # wikidata probably not fucked up return undef; } &d("*** $name not implemented yet!!! ***"); return undef; #die "decode '$name' isn't implemented yet."; } exit; my $article_name = 'a'; my $options = { revid => 13849803 }; my $txt = $bot->get_text($article_name, $options); die "error something" unless defined $options->{pageid}; warn "page doesn't exist" if $options->{pageid} == MediaWiki::Bot::PAGE_NONEXISTENT; print "Page length is ". length($txt) . "!\n"; my $pageid = $bot->get_id($article_name); die "error something else" unless defined $pageid; printf "Page id is %s\n", $pageid; # last _not_ by user my $revid = $bot->get_last($article_name,'no such user'); printf "Last revid is %s\n", $revid; $revid = $bot->get_last($article_name,'FoBe'); printf "Last revid-2 is %s\n", $revid; $options = { oldid=> 20300641, revid=>20300648 }; my $diff = $bot->diff($options); print "Diff: $diff\n"; ## commons $bot->set_wiki({ host => 'commons.wikimedia.org' }); die "Cannot login to commons" unless $bot; print "Logged over commons.\n"; $options = { revid=> 568734018, oldid=>628016329 }; $diff = $bot->diff($options); print "Diff: $diff\n"; ## a hiba: az elozo sor utolso szava + \n bekerult a replacementbe ## javitas: ## - ha ez az utolso edit ## - revert __END__ <tr> <td class="diff-marker" data-marker="???"></td> <td class="diff-deletedline diff-side-deleted"><div>File:Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian.jpg|Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian, <del class="diffchange diffchange-inline">1935</del></div></td> <td class="diff-marker" data-marker="+"></td> <td class="diff-addedline diff-side-added"><div>File:Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian.jpg|Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian,<ins class="diffchange diffchange-inline">|{{c|Georgy</ins> <ins class="diffchange diffchange-inline">Malenkov}} and Beria, 1940</ins></div></td> </tr> <tr> <td class="diff-marker" data-marker="???"></td> <td class="diff-deletedline diff-side-deleted"><div>File:?????????? ?? ????????.jpg|{{c|Georgy Malenkov}} and Beria, 1940</div></td> <td colspan="2" class="diff-empty diff-side-added"></td> </tr> ## generic revert ## - adatbazisbol ami done=42 ## - revision ## - ha az az utolso akkor: ## - revert ## - done=0 (pending) ## - ha nem akkor ## - done=666 (fixx it felix) ##