delinker_fixer/defixer.pl

Sat, 17 Feb 2024 15:34:18 +0100

author
Peter Gervai <grin@grin.hu>
date
Sat, 17 Feb 2024 15:34:18 +0100
changeset 5
ff125aa259db
parent 0
3b714bbb1347
permissions
-rwxr-xr-x

Clip note to the DB field size (varchar64). Added more exclusions:
- wikisources (requested)
- azwikimedia (suggested)
- gurwiki (broken SUL)
- gomwiktionary (broken SUL)

#!/usr/bin/perl
#$Id$
#
# cleanup after delinker
#
# (c)Peter [[grin]] Gervai, 2022
# cc-by-sa-4.0-int / gplv3+
#

use strict;
use warnings;
use utf8;

use File::Basename;
use lib dirname(__FILE__);
use BotSecrets;

use MediaWiki::Bot qw(:constants);
use DBI;

binmode( STDOUT, ':utf8' );

$|=1;

my ($db_name, $db_host, $db_port) = ("s52421__commonsdelinquent_p", "tools.db.svc.wikimedia.cloud", 3306);
## my ($db_user, $db_pw) = ("xxxxxxxxx", "xxxxxxxxxx");

&d("Start");

## connect db
my $dsn = "DBI:mysql:database=$db_name;host=$db_host;port=$db_port";
my $dbh = DBI->connect( $dsn, $db_user, $db_pw, { mysql_enable_utf8=>1, RaiseError=>0, AutoCommit=>0 } );
$dbh->{mysql_enable_utf8} = 1;

## prepare sql
#my $sth = $dbh->prepare( "SELECT id,action,file,wiki,page,revision,done,note,replace_with_file FROM event WHERE action=? AND file=? AND done=? AND timestamp BETWEEN ? AND ?" );
my $sth = $dbh->prepare( "SELECT id,action,file,wiki,page,revision,done,note,replace_with_file FROM event WHERE action=? AND done=? AND timestamp BETWEEN ? AND ?" );

my $sth_update = $dbh->prepare( "UPDATE event SET done=? WHERE id=?" );

## connect bot (enwp)
#my ($bu,$bp) = ('xxxxxxxxxxxxx', 'xxxxxxxxxxxxxx' );
our $bot = MediaWiki::Bot->new({
    host => 'en.wikipedia.org',
    login_data => { username => $bu, password=> $bp },
    do_sul => 1,
    operator => 'grin',
    protocol => 'https',
    debug => 2,
    assert => 'user',
});
my $last_wiki='huwiki';

die "Cannot login" unless $bot;

## rev
my $revert_message = "Undoing CommonsDelinker bad replace, will be retried later.";

## prepare search
my $action = 'replace';
#my $file = 'S-3A_MAD_DN-SC-87-05743.JPEG';
my $done = 127;
my ($ts_from, $ts_to) = ('20220502000000', '20220506130000');
#my $res = $sth->execute( $action, $file, $done, $ts_from, $ts_to );
my $res = $sth->execute( $action, $done, $ts_from, $ts_to );
if( $dbh->err ) {
    die "Error doing SQL: " . $dbh->errstr;
}
print $sth->rows . " rows found.\n";

## results
while( my $a = $sth->fetchrow_hashref ) {
    for my $key (sort keys %$a) {
        print "$key=" . $$a{$key} . " ";
    }
    print "\n";

    # check data
    if( $last_wiki ne $$a{wiki} ) {
        my $wikidata = &get_wikidata( $$a{wiki} );
        next unless $wikidata;
        $bot->set_wiki( $wikidata );
    }

    my $revid = $bot->get_last( $$a{page}, 'CommonsDelinker' );
    if( !defined( $revid ) ) {
        &d("Revid is missing!! skipping $$a{wiki}:$$a{page}!!");
        #&error("missing revid");
        next;
    }

    if( $revid == $$a{revision} ) {
        &d(" Page unchanged, undo possible! REVERTING $$a{wiki}:$$a{page}");

        # revert
        if( $bot->revert( $$a{page}, $$a{revision}, $revert_message ) ) {
            &d( "Success. Updating.");
            $sth_update->execute( 0, $$a{id} );
            if( $dbh->err ) {
                &d( "*** Error updating db for $$a{wiki}:$$a{page} id=$$a{id}!!");
                $dbh->commit;
                exit;
            }

        } else {
            &d( " Revert #1 failed, try to login.");
            if( $bot->login( { username=>$bu, password=>$bp } ) ) {
                if( $bot->revert( $$a{page}, $$a{revision}, $revert_message ) ) {
                    &d( "Success^2. Updating.");
                    $sth_update->execute( 0, $$a{id} );
                    if( $dbh->err ) {
                        &d( "*** Error updating db for $$a{wiki}:$$a{page} id=$$a{id}!!");
                        $dbh->commit;
                        exit;
                    }
                }

            } else {
                &d( "Login failed into $$a{wiki}. Skipping");
                &error("login failed");
                $sth_update->execute( 43, $$a{id} );
            }
        }

    } else {
        &d( " Page changed, oldid $$a{revision} - newid $revid; skipping (update db).");
        $sth_update->execute( 42, $$a{id} );
        if( $dbh->err ) {
            &d( "*** Error updating db for non-changed $$a{wiki}:$$a{page} id=$$a{id}!!");
        }
    }

    $dbh->commit;
}
&d( "Commit.");
$dbh->commit;

exit;

sub d {
    my ($s) = @_;
    print scalar(localtime) .  " [$$] $s\n";
}


sub error {
    return; # doesn't work
    my ($s) = @_;
    print "error: $s; " . $bot->{error}->{code} . "; " . $bot->{error}->{details} . "\n";
    
    #use Data::Dumper;
    #die Dumper($bot);
    #exit;
}

## decypher short wikinames
sub get_wikidata {
    my ($name) = @_;

    &d("Decode $name");

    my $host = $bot->db_to_domain( $name );
    return { host => $host };

    if( $name =~ /^(.{2,3}|simple)wiki$/ ) {
        return { host => "$1.wikipedia.org" };
    }

    if( $name =~ /^(.{2,3})wikivoyage$/ ) {
        return { host => "$1.wikivoyage.org" };
    }

    if( $name =~ /^(.{2,3})wikisource$/ ) {
        return { host => "$1.wikisource.org" };
    }

    if( $name =~ /^(.{2,3})wikiquote$/ ) {
        return { host => "$1.wikiquote.org" };
    }

    if( $name =~ /^(.{2,3})wikibooks$/ ) {
        return { host => "$1.wikibooks.org" };
    }

    if( $name =~ /^(.{2,3})wiktionary$/ ) {
        return { host => "$1.wiktionary.org" };
    }

    if( $name eq 'wikidatawiki' ) {
        # wikidata probably not fucked up
        return undef;
    }

    &d("*** $name not implemented yet!!! ***");
    return undef;
    #die "decode '$name' isn't implemented yet.";
}

exit;

my $article_name = 'a';

my $options = { revid => 13849803 };
my $txt = $bot->get_text($article_name, $options);
die "error something" unless defined $options->{pageid};
warn "page doesn't exist" if $options->{pageid} == MediaWiki::Bot::PAGE_NONEXISTENT;
print "Page length is ". length($txt) . "!\n";

my $pageid = $bot->get_id($article_name);
die "error something else" unless defined $pageid;
printf "Page id is %s\n", $pageid;

# last _not_ by user
my $revid = $bot->get_last($article_name,'no such user');
printf "Last revid is %s\n", $revid;

$revid = $bot->get_last($article_name,'FoBe');
printf "Last revid-2 is %s\n", $revid;

$options = { oldid=> 20300641, revid=>20300648 };
my $diff = $bot->diff($options);
print "Diff: $diff\n";

## commons
$bot->set_wiki({
    host => 'commons.wikimedia.org'
});
die "Cannot login to commons" unless $bot;
print "Logged over commons.\n";

$options = { revid=> 568734018, oldid=>628016329 };
$diff = $bot->diff($options);
print "Diff: $diff\n";

## a hiba: az elozo sor utolso szava + \n bekerult a replacementbe
## javitas: 
##  - ha ez az utolso edit
##  - revert

__END__
<tr>
    <td class="diff-marker" data-marker="???"></td>
    <td class="diff-deletedline diff-side-deleted"><div>File:Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian.jpg|Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian, <del class="diffchange diffchange-inline">1935</del></div></td>
    <td class="diff-marker" data-marker="+"></td>
    <td class="diff-addedline diff-side-added"><div>File:Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian.jpg|Nestor Lakoba, Nikita Khrushchev, Lavrenti Beria and Aghasi Khanjian,<ins class="diffchange diffchange-inline">|{{c|Georgy</ins> <ins class="diffchange diffchange-inline">Malenkov}} and Beria, 1940</ins></div></td>
</tr>
<tr>
    <td class="diff-marker" data-marker="???"></td>
    <td class="diff-deletedline diff-side-deleted"><div>File:?????????? ?? ????????.jpg|{{c|Georgy Malenkov}} and Beria, 1940</div></td>
    <td colspan="2" class="diff-empty diff-side-added"></td>
</tr>


## generic revert
## - adatbazisbol ami done=42
## - revision
## - ha az az utolso akkor:
##  - revert
##  - done=0 (pending)
## - ha nem akkor
##  - done=666 (fixx it felix)
##

mercurial