split_mariadb_dump_into_tables.pl
changeset 12 5301f455fc7f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/split_mariadb_dump_into_tables.pl	Wed Apr 12 20:16:17 2023 +0200
@@ -0,0 +1,63 @@
+#!/usr/bin/perl
+# 
+# take apart a mysql dump.gz to into compressed table.zstd files
+#
+# cc-by_sa / GPLv3+ by Peter 'grin' Gervai, 2023
+#
+
+use warnings;
+use strict;
+use IO::File;
+
+# the dump file
+my $fn = "db_dump.sql.gz";
+
+# skip these tables
+my %skip = ( 'stupid_table' => 1, 'dont_need_either' => 1 );
+
+# target dir
+my $dir = "d_split_tables";
+`mkdir -p ${dir}`			# that's the laziest way, highly unportable, yadda-yadda
+
+# cat a compressed file
+my $gzip_cat = "pigz -dcf";		# or 'gz' if you wish single threaded
+my $zstd_cat = "zstd -T0 -dcf";		# or 'gz -dcf' or whatever
+# store it compressed
+my $zstd_store = "zstd -T0 - -o";	# or 'gz - -o' or even 'cat >'
+
+# globals are okay
+my $table='';	# actually processing table name (convenience)
+my $skip=0;	# are we skipping a table now?
+
+# write prelude into this file
+my $fout = IO::File->new( "| ${zstd_store} prelude.sql" ) or die "cannot create prelude.sql: $!";
+
+my $cnt=0; 	# count tables for fun
+
+# read the compressed dump (usually .gz)
+$f = IO::File->new( "${gzip_cat} $fn |" ) or die "cannot read pipe: $!";
+while( <$f> ) {
+    if( $skip ) {			# we are actively skipping this table
+        if( /^UNLOCK TABLES;/ ) {	# that's the end of a table dump (for me, if it isn't for you... tough luck)
+            $skip = 0;			# end of skip mode
+        }
+        next;				# skip, get next line
+    }
+    
+    if( /^DROP TABLE.+`([^`]+)`;/ ) {	# start of a new table (again, for me)
+        $table = $1;			# better readability: get table name
+        if( $skip{$table} ) {		# this could be rewritten as a regex, for example
+            print "SKIP $table!\n";	# skip stuff in %skip hash
+            $skip = 1;			# entering skip mode
+            next;
+        }
+        $fout->close;			# let's start a new file for a new table
+        $fout = IO::File->new( "| ${zstd_store} ${dir}/table_${table}.sql.zst" ) or die "Cannot pipe for table_${table}.sql: $!";
+        $cnt++;
+        print "Created table_${table} as #${cnt}...         \r";
+    }
+    
+    print $fout $_;			# no skip, we have a file already open, so write the line there
+}
+
+print "\n\nDone. ($cnt)\n\n";		# sayonara.