split_mariadb_dump_into_tables.pl
author Peter Gervai <grin@grin.hu>
Wed, 12 Apr 2023 20:16:17 +0200
changeset 12 5301f455fc7f
permissions -rwxr-xr-x
Add split_mariadb_dump_into_tables.pl
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
12
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
     1
#!/usr/bin/perl
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
     2
# 
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
     3
# take apart a mysql dump.gz to into compressed table.zstd files
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
     4
#
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
     5
# cc-by_sa / GPLv3+ by Peter 'grin' Gervai, 2023
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
     6
#
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
     7
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
     8
use warnings;
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
     9
use strict;
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    10
use IO::File;
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    11
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    12
# the dump file
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    13
my $fn = "db_dump.sql.gz";
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    14
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    15
# skip these tables
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    16
my %skip = ( 'stupid_table' => 1, 'dont_need_either' => 1 );
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    17
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    18
# target dir
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    19
my $dir = "d_split_tables";
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    20
`mkdir -p ${dir}`			# that's the laziest way, highly unportable, yadda-yadda
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    21
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    22
# cat a compressed file
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    23
my $gzip_cat = "pigz -dcf";		# or 'gz' if you wish single threaded
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    24
my $zstd_cat = "zstd -T0 -dcf";		# or 'gz -dcf' or whatever
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    25
# store it compressed
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    26
my $zstd_store = "zstd -T0 - -o";	# or 'gz - -o' or even 'cat >'
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    27
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    28
# globals are okay
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    29
my $table='';	# actually processing table name (convenience)
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    30
my $skip=0;	# are we skipping a table now?
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    31
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    32
# write prelude into this file
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    33
my $fout = IO::File->new( "| ${zstd_store} prelude.sql" ) or die "cannot create prelude.sql: $!";
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    34
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    35
my $cnt=0; 	# count tables for fun
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    36
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    37
# read the compressed dump (usually .gz)
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    38
$f = IO::File->new( "${gzip_cat} $fn |" ) or die "cannot read pipe: $!";
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    39
while( <$f> ) {
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    40
    if( $skip ) {			# we are actively skipping this table
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    41
        if( /^UNLOCK TABLES;/ ) {	# that's the end of a table dump (for me, if it isn't for you... tough luck)
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    42
            $skip = 0;			# end of skip mode
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    43
        }
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    44
        next;				# skip, get next line
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    45
    }
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    46
    
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    47
    if( /^DROP TABLE.+`([^`]+)`;/ ) {	# start of a new table (again, for me)
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    48
        $table = $1;			# better readability: get table name
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    49
        if( $skip{$table} ) {		# this could be rewritten as a regex, for example
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    50
            print "SKIP $table!\n";	# skip stuff in %skip hash
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    51
            $skip = 1;			# entering skip mode
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    52
            next;
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    53
        }
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    54
        $fout->close;			# let's start a new file for a new table
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    55
        $fout = IO::File->new( "| ${zstd_store} ${dir}/table_${table}.sql.zst" ) or die "Cannot pipe for table_${table}.sql: $!";
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    56
        $cnt++;
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    57
        print "Created table_${table} as #${cnt}...         \r";
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    58
    }
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    59
    
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    60
    print $fout $_;			# no skip, we have a file already open, so write the line there
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    61
}
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    62
5301f455fc7f Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff changeset
    63
print "\n\nDone. ($cnt)\n\n";		# sayonara.