Skipping {{nobots}} pages

Thu, 22 Feb 2024 20:21:22 +0100

author
Peter Gervai <grin@grin.hu>
date
Thu, 22 Feb 2024 20:21:22 +0100
changeset 8
38415be9f910
parent 7
0d6c71adcee9
child 9
43902b1af1b9

Skipping {{nobots}} pages

delinquent_files/demon.php file | annotate | diff | comparison | revisions
--- a/delinquent_files/demon.php	Sat Feb 17 15:36:33 2024 +0100
+++ b/delinquent_files/demon.php	Thu Feb 22 20:21:22 2024 +0100
@@ -80,6 +80,12 @@
 		}
 		return false ;
 	}
+
+	## page content forbidding bots, like {{nobots}} or {{bots|deny=..us..}}
+	function isForbiddenPage ( $content ) {
+		if ( preg_match( '\{\{nobots\}\}|\{\{bots\s*\|\s*deny=([^}]*CommonsDelinker|all)', $content) ) return true; 
+		return false;
+	}
 	
 	function getRecentDeletedFiles ( $max_ts ) {
 		# Open Commons database replica
@@ -402,6 +408,12 @@
 		$rev_id = $revision->getId() ;
 		$text = $revision->getContent()->getData() ;
 		
+		if ( $this->isForbiddenPage( $text ) ) {
+			$this->d->info("Skipping forbidden (nobots) page $e->wiki: $e->page");
+			$this->setDone( $e->id, 2, "Change forbidden (nobots)" );
+			return;
+		}
+
 		$file = $e->file ;
 		$pattern = $this->matcher->matcher_prepare_pattern( $file );
 	

mercurial