author | Peter Gervai <grin@grin.hu> |
Wed, 12 Apr 2023 20:16:17 +0200 | |
changeset 12 | 5301f455fc7f |
permissions | -rwxr-xr-x |
12
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
1 |
#!/usr/bin/perl |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
2 |
# |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
3 |
# take apart a mysql dump.gz to into compressed table.zstd files |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
4 |
# |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
5 |
# cc-by_sa / GPLv3+ by Peter 'grin' Gervai, 2023 |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
6 |
# |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
7 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
8 |
use warnings; |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
9 |
use strict; |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
10 |
use IO::File; |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
11 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
12 |
# the dump file |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
13 |
my $fn = "db_dump.sql.gz"; |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
14 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
15 |
# skip these tables |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
16 |
my %skip = ( 'stupid_table' => 1, 'dont_need_either' => 1 ); |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
17 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
18 |
# target dir |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
19 |
my $dir = "d_split_tables"; |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
20 |
`mkdir -p ${dir}` # that's the laziest way, highly unportable, yadda-yadda |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
21 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
22 |
# cat a compressed file |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
23 |
my $gzip_cat = "pigz -dcf"; # or 'gz' if you wish single threaded |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
24 |
my $zstd_cat = "zstd -T0 -dcf"; # or 'gz -dcf' or whatever |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
25 |
# store it compressed |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
26 |
my $zstd_store = "zstd -T0 - -o"; # or 'gz - -o' or even 'cat >' |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
27 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
28 |
# globals are okay |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
29 |
my $table=''; # actually processing table name (convenience) |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
30 |
my $skip=0; # are we skipping a table now? |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
31 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
32 |
# write prelude into this file |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
33 |
my $fout = IO::File->new( "| ${zstd_store} prelude.sql" ) or die "cannot create prelude.sql: $!"; |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
34 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
35 |
my $cnt=0; # count tables for fun |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
36 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
37 |
# read the compressed dump (usually .gz) |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
38 |
$f = IO::File->new( "${gzip_cat} $fn |" ) or die "cannot read pipe: $!"; |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
39 |
while( <$f> ) { |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
40 |
if( $skip ) { # we are actively skipping this table |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
41 |
if( /^UNLOCK TABLES;/ ) { # that's the end of a table dump (for me, if it isn't for you... tough luck) |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
42 |
$skip = 0; # end of skip mode |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
43 |
} |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
44 |
next; # skip, get next line |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
45 |
} |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
46 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
47 |
if( /^DROP TABLE.+`([^`]+)`;/ ) { # start of a new table (again, for me) |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
48 |
$table = $1; # better readability: get table name |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
49 |
if( $skip{$table} ) { # this could be rewritten as a regex, for example |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
50 |
print "SKIP $table!\n"; # skip stuff in %skip hash |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
51 |
$skip = 1; # entering skip mode |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
52 |
next; |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
53 |
} |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
54 |
$fout->close; # let's start a new file for a new table |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
55 |
$fout = IO::File->new( "| ${zstd_store} ${dir}/table_${table}.sql.zst" ) or die "Cannot pipe for table_${table}.sql: $!"; |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
56 |
$cnt++; |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
57 |
print "Created table_${table} as #${cnt}... \r"; |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
58 |
} |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
59 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
60 |
print $fout $_; # no skip, we have a file already open, so write the line there |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
61 |
} |
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
62 |
|
5301f455fc7f
Add split_mariadb_dump_into_tables.pl
Peter Gervai <grin@grin.hu>
parents:
diff
changeset
|
63 |
print "\n\nDone. ($cnt)\n\n"; # sayonara. |