delinker_fixer/defixer.pl

changeset 0
3b714bbb1347
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delinker_fixer/defixer.pl	Mon Jan 23 21:37:02 2023 +0100
@@ -0,0 +1,262 @@
+#!/usr/bin/perl
+#$Id$
+#
+# cleanup after delinker
+#
+# (c)Peter [[grin]] Gervai, 2022
+# cc-by-sa-4.0-int / gplv3+
+#
+
+use strict;
+use warnings;
+use utf8;
+
+use File::Basename;
+use lib dirname(__FILE__);
+use BotSecrets;
+
+use MediaWiki::Bot qw(:constants);
+use DBI;
+
+binmode( STDOUT, ':utf8' );
+
+$|=1;
+
+my ($db_name, $db_host, $db_port) = ("s52421__commonsdelinquent_p", "tools.db.svc.wikimedia.cloud", 3306);
+## my ($db_user, $db_pw) = ("xxxxxxxxx", "xxxxxxxxxx");
+
+&d("Start");
+
+## connect db
+my $dsn = "DBI:mysql:database=$db_name;host=$db_host;port=$db_port";
+my $dbh = DBI->connect( $dsn, $db_user, $db_pw, { mysql_enable_utf8=>1, RaiseError=>0, AutoCommit=>0 } );
+$dbh->{mysql_enable_utf8} = 1;
+
+## prepare sql
+#my $sth = $dbh->prepare( "SELECT id,action,file,wiki,page,revision,done,note,replace_with_file FROM event WHERE action=? AND file=? AND done=? AND timestamp BETWEEN ? AND ?" );
+my $sth = $dbh->prepare( "SELECT id,action,file,wiki,page,revision,done,note,replace_with_file FROM event WHERE action=? AND done=? AND timestamp BETWEEN ? AND ?" );
+
+my $sth_update = $dbh->prepare( "UPDATE event SET done=? WHERE id=?" );
+
+## connect bot (enwp)
+#my ($bu,$bp) = ('xxxxxxxxxxxxx', 'xxxxxxxxxxxxxx' );
+our $bot = MediaWiki::Bot->new({
+    host => 'en.wikipedia.org',
+    login_data => { username => $bu, password=> $bp },
+    do_sul => 1,
+    operator => 'grin',
+    protocol => 'https',
+    debug => 2,
+    assert => 'user',
+});
+my $last_wiki='huwiki';
+
+die "Cannot login" unless $bot;
+
+## rev
+my $revert_message = "Undoing CommonsDelinker bad replace, will be retried later.";
+
+## prepare search
+my $action = 'replace';
+#my $file = 'S-3A_MAD_DN-SC-87-05743.JPEG';
+my $done = 127;
+my ($ts_from, $ts_to) = ('20220502000000', '20220506130000');
+#my $res = $sth->execute( $action, $file, $done, $ts_from, $ts_to );
+my $res = $sth->execute( $action, $done, $ts_from, $ts_to );
+if( $dbh->err ) {
+    die "Error doing SQL: " . $dbh->errstr;
+}
+print $sth->rows . " rows found.\n";
+
+## results
+while( my $a = $sth->fetchrow_hashref ) {
+    for my $key (sort keys %$a) {
+        print "$key=" . $$a{$key} . " ";
+    }
+    print "\n";
+
+    # check data
+    if( $last_wiki ne $$a{wiki} ) {
+        my $wikidata = &get_wikidata( $$a{wiki} );
+        next unless $wikidata;
+        $bot->set_wiki( $wikidata );
+    }
+
+    my $revid = $bot->get_last( $$a{page}, 'CommonsDelinker' );
+    if( !defined( $revid ) ) {
+        &d("Revid is missing!! skipping $$a{wiki}:$$a{page}!!");
+        #&error("missing revid");
+        next;
+    }
+
+    if( $revid == $$a{revision} ) {
+        &d(" Page unchanged, undo possible! REVERTING $$a{wiki}:$$a{page}");
+
+        # revert
+        if( $bot->revert( $$a{page}, $$a{revision}, $revert_message ) ) {
+            &d( "Success. Updating.");
+            $sth_update->execute( 0, $$a{id} );
+            if( $dbh->err ) {
+                &d( "*** Error updating db for $$a{wiki}:$$a{page} id=$$a{id}!!");
+                $dbh->commit;
+                exit;
+            }
+
+        } else {
+            &d( " Revert #1 failed, try to login.");
+            if( $bot->login( { username=>$bu, password=>$bp } ) ) {
+                if( $bot->revert( $$a{page}, $$a{revision}, $revert_message ) ) {
+                    &d( "Success^2. Updating.");
+                    $sth_update->execute( 0, $$a{id} );
+                    if( $dbh->err ) {
+                        &d( "*** Error updating db for $$a{wiki}:$$a{page} id=$$a{id}!!");
+                        $dbh->commit;
+                        exit;
+                    }
+                }
+
+            } else {
+                &d( "Login failed into $$a{wiki}. Skipping");
+                &error("login failed");
+                $sth_update->execute( 43, $$a{id} );
+            }
+        }
+
+    } else {
+        &d( " Page changed, oldid $$a{revision} - newid $revid; skipping (update db).");
+        $sth_update->execute( 42, $$a{id} );
+        if( $dbh->err ) {
+            &d( "*** Error updating db for non-changed $$a{wiki}:$$a{page} id=$$a{id}!!");
+        }
+    }
+
+    $dbh->commit;
+}
+&d( "Commit.");
+$dbh->commit;
+
+exit;
+
+sub d {
+    my ($s) = @_;
+    print scalar(localtime) .  " [$$] $s\n";
+}
+
+
+sub error {
+    return; # doesn't work
+    my ($s) = @_;
+    print "error: $s; " . $bot->{error}->{code} . "; " . $bot->{error}->{details} . "\n";
+    
+    #use Data::Dumper;
+    #die Dumper($bot);
+    #exit;
+}
+
+## decypher short wikinames
+sub get_wikidata {
+    my ($name) = @_;
+
+    &d("Decode $name");
+
+    my $host = $bot->db_to_domain( $name );
+    return { host => $host };
+
+    if( $name =~ /^(.{2,3}|simple)wiki$/ ) {
+        return { host => "$1.wikipedia.org" };
+    }
+
+    if( $name =~ /^(.{2,3})wikivoyage$/ ) {
+        return { host => "$1.wikivoyage.org" };
+    }
+
+    if( $name =~ /^(.{2,3})wikisource$/ ) {
+        return { host => "$1.wikisource.org" };
+    }
+
+    if( $name =~ /^(.{2,3})wikiquote$/ ) {
+        return { host => "$1.wikiquote.org" };
+    }
+
+    if( $name =~ /^(.{2,3})wikibooks$/ ) {
+        return { host => "$1.wikibooks.org" };
+    }
+
+    if( $name =~ /^(.{2,3})wiktionary$/ ) {
+        return { host => "$1.wiktionary.org" };
+    }
+
+    if( $name eq 'wikidatawiki' ) {
+        # wikidata probably not fucked up
+        return undef;
+    }
+
+    &d("*** $name not implemented yet!!! ***");
+    return undef;
+    #die "decode '$name' isn't implemented yet.";
+}
+
+exit;
+
+my $article_name = 'a';
+
+my $options = { revid => 13849803 };
+my $txt = $bot->get_text($article_name, $options);
+die "error something" unless defined $options->{pageid};
+warn "page doesn't exist" if $options->{pageid} == MediaWiki::Bot::PAGE_NONEXISTENT;
+print "Page length is ". length($txt) . "!\n";
+
+my $pageid = $bot->get_id($article_name);
+die "error something else" unless defined $pageid;
+printf "Page id is %s\n", $pageid;
+
+# last _not_ by user
+my $revid = $bot->get_last($article_name,'no such user');
+printf "Last revid is %s\n", $revid;
+
+$revid = $bot->get_last($article_name,'FoBe');
+printf "Last revid-2 is %s\n", $revid;
+
+$options = { oldid=> 20300641, revid=>20300648 };
+my $diff = $bot->diff($options);
+print "Diff: $diff\n";
+
+## commons
+$bot->set_wiki({
+    host => 'commons.wikimedia.org'
+});
+die "Cannot login to commons" unless $bot;
+print "Logged over commons.\n";
+
+$options = { revid=> 568734018, oldid=>628016329 };
+$diff = $bot->diff($options);
+print "Diff: $diff\n";
+
+## a hiba: az elozo sor utolso szava + \n bekerult a replacementbe
+## javitas: 
+##  - ha ez az utolso edit
+##  - revert
+
+__END__
+<tr>
+    <td class="diff-marker" data-marker="−"></td>
+    <td class="diff-deletedline diff-side-deleted"><div>File:Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian.jpg|Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian, <del class="diffchange diffchange-inline">1935</del></div></td>
+    <td class="diff-marker" data-marker="+"></td>
+    <td class="diff-addedline diff-side-added"><div>File:Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian.jpg|Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian,<ins class="diffchange diffchange-inline">|{{c|Georgy</ins> <ins class="diffchange diffchange-inline">Malenkov}} and Beria, 1940</ins></div></td>
+</tr>
+<tr>
+    <td class="diff-marker" data-marker="−"></td>
+    <td class="diff-deletedline diff-side-deleted"><div>File:Берия в суде.jpg|{{c|Georgy Malenkov}} and Beria, 1940</div></td>
+    <td colspan="2" class="diff-empty diff-side-added"></td>
+</tr>
+
+
+## generic revert
+## - adatbazisbol ami done=42
+## - revision
+## - ha az az utolso akkor:
+##  - revert
+##  - done=0 (pending)
+## - ha nem akkor
+##  - done=666 (fixx it felix)
+##

mercurial