diff --git a/includes/autoload.php b/includes/autoload.php index fd744e6..7cc79a1 100644 --- a/includes/autoload.php +++ b/includes/autoload.php @@ -1,24 +1,31 @@ . // load some common functions require __DIR__ . '/functions.php'; // autoload boz-mw classes -require __DIR__ . '/boz-mw/autoload.php'; +require __DIR__ . '/boz-mw/autoload-with-laser-cannon.php'; // load the dummy config class require __DIR__ . '/class-Config.php'; +/** + * Require a configuration file or create it + * + * https://gitpull.it/source/boz-mw/browse/master/include/functions.php + * https://gitpull.it/source/boz-mw/browse/master/include/class-cli%5CConfigWizard.php + */ +config_wizard( __DIR__ . '/../config.php' ); diff --git a/orphanize.php b/orphanize.php index ed7054c..d6cabb5 100755 --- a/orphanize.php +++ b/orphanize.php @@ -1,522 +1,516 @@ #!/usr/bin/php . namespace orphanizerbot; // die on whatever error set_error_handler( function( $errno, $errstr, $errfile, $errline ) { if( error_reporting() !== 0 ) { throw new \ErrorException( $errstr, 0, $errno, $errfile, $errline ); } } ); // do not expose from web isset( $argv[ 0 ] ) or exit( 1 ); // autoload classes require __DIR__ . '/includes/autoload.php'; -// require config file -file_exists( $config_path = __DIR__ . '/config.php' ) - or die( "Please provide the configuration file at $config_path\n" ); - -require $config_path; - // how much titles at time requesting - this is a MediaWiki limit define( 'MAX_TRANCHE_TITLES', 50 ); // classes used use \cli\Input; use \cli\Opts; use \cli\ParamFlag; use \cli\ParamValued; use \cli\Log; use \web\MediaWikis; use \mw\Wikilink; use \mw\Ns; use \mw\API; use \mw\API\ProtectedPageException; use \mw\API\EditConflictException; use \regex\Generic as Regex; // register available options $opts = Opts::instance()->register( [ // register arguments with a value new ParamValued( 'wiki', null, 'Specify a wiki from its UID' ), new ParamValued( 'cfg', null, 'Title of an on-wiki configuration page with JSON content model' ), new ParamValued( 'list', null, 'Specify a pagename that should contain the wikilinks to be orphanized' ), new ParamValued( 'summary', null, 'Edit summary' ), new ParamValued( 'list-summary', null, 'Edit summary for editing the list' ), new ParamValued( 'done-text', null, 'Replacement for the wikilink in the list' ), new ParamValued( 'ns', null, 'Namespace whitelist (values separated by pipe)' ), new ParamValued( 'delay', null, 'Additional delay between each edit' ), new ParamValued( 'warmup', null, 'Start only if the last edit on the list was done at least $warmup seconds ago' ), new ParamValued( 'cooldown', null, 'End early when reaching this number of edits' ), new ParamValued( 'turbofresa', null, 'If the list is older than this number of seconds a turbofresa will be spawned to clean the list' ), new ParamValued( 'turbofresa-text', null, 'Text that will be saved to clean an old list' ), new ParamValued( 'turbofresa-summary', null, 'Edit summary to be used when cleaning an old list' ), new ParamValued( 'seealso', null, 'Title of your local "See also" section' ), // register arguments without a value new ParamFlag( 'skip-permissions', null, 'Execute the bot even if the list was last edited by a non-sysop (or by the bot itself)' ), new ParamFlag( 'debug', null, 'Increase verbosity' ), new ParamFlag( 'help', 'h', 'Show this message and quit' ), new ParamFlag( 'no-interaction', null, 'Do not confirm every change' ), ] ); // show help screen if( $opts->getArg( 'help' ) ) { show_help(); } // cli-only parameters $NO_INTERACTION = $opts->getArg( 'no-interaction' ); $TITLE_SOURCE = $opts->getArg( 'list', 'Utente:OrfanizzaBot/Wikilink da orfanizzare' ); Log::info( "start" ); // increase verbosity if( $opts->getArg( 'debug' ) ) { Log::$DEBUG = true; } // wiki uid (from command line or from configuration file) $wiki_uid = Config::instance()->get( 'wiki' ); $wiki_uid = $opts->getArg( 'wiki', $wiki_uid ); if( ! $wiki_uid ) { Log::error( "please choose the wiki! exit" ); exit( 1 ); } // wiki instance $wiki = Mediawikis::findFromUid( $wiki_uid ); // try to load the wiki config try { wiki_config(); } catch( \Exception $e ) { // I don't have any clue about this but sometime happen Log::error( sprintf( "failed reading wiki configuration: %s", $e->getMessage() ) ); exit( 1 ); } // parameters available both from cli and on-wiki $SUMMARY = option( 'summary', "Bot: pages orphanization" ); $LIST_SUMMARY = option( 'list-summary', "Bot: orphanization list update" ); $DONE_TEXT = option( 'done-text', "* [[Special:WhatLinksHere/$1]] - {{done}}" ); $NS = option( 'ns' ); $WARMUP = option( 'warmup', -1 ); $COOLDOWN = option( 'cooldown', 1000 ); $DELAY = option( 'delay', 0 ); $SEEALSO = option( 'seealso', "See also" ); $TURBOFRESA = option( 'turbofresa', 86400 ); $TURBOFRESA_TEXT = option( 'turbofresa-text', "== List ==\n* ..." ); $TURBOFRESA_SUMMARY = option( 'turbofresa-summary', "Bot: list clean" ); $SKIP_PERMISSIONS = option( 'skip-permissions' ); // hardcoded values (@TODO: consider an option) $GROUP = 'sysop'; // my username (well, it's not so important, just used to have a friendlier log message) $ME = explode( '@', API::$DEFAULT_USERNAME, 2 )[ 0 ]; // query titles to be orphanized alongside the last revision of the list $responses = $wiki->createQuery( [ 'action' => 'query', 'titles' => $TITLE_SOURCE, 'prop' => [ 'links', 'revisions', ], 'rvslots' => 'main', 'rvprop' => [ 'comment', // the edit summary is used to detect if the list was already cleaned 'user', // the username is used to detect if the last user is allowed 'timestamp', // the timestamp is used to check the age of the last edit 'content', // page content ], ] ); // remember this to avoid edit conflicts $list_timestamp = null; $list_content = null; // collect links and take the last edit timestamp $titles_to_be_orphanized = []; Log::info( "reading $TITLE_SOURCE" ); foreach( $responses as $response ) { foreach( $response->query->pages as $page ) { // check if list is unexisting if( isset( $page->missing ) ) { Log::error( "missing list $TITLE_SOURCE" ); exit( 1 ); } if( isset( $page->revisions ) ) { // there is just one revision $revision = reset( $page->revisions ); // save list content $list_content = $revision->slots->main->{ '*' }; // check warmup $list_timestamp = $revision->timestamp; $timestamp_datetime = \DateTime::createFromFormat( \DateTime::ISO8601, $list_timestamp ); $seconds = time() - $timestamp_datetime->getTimestamp(); if( $seconds < $WARMUP ) { Log::info( "list edited just $seconds seconds ago: quit until warmup $WARMUP" ); exit( 1 ); } // eventually clear list if( $seconds > $TURBOFRESA ) { if( $revision->comment === $TURBOFRESA_SUMMARY ) { Log::info( "list edited $seconds seconds ago. already cleared. quit" ); } else { Log::info( "list edited $seconds seconds ago. spawning a turbofresa to clear the list. quit" ); // TODO: dedicated customizable summary // TODO: customizable content $wiki->login()->edit( [ 'title' => $TITLE_SOURCE, 'summary' => $TURBOFRESA_SUMMARY, 'text' => $TURBOFRESA_TEXT, 'basetimestamp' => $list_timestamp, 'bot' => 1, ] ); } exit( 0 ); } // check user $lastuser = $revision->user; $rights = $wiki->fetch( [ 'action' => 'query', 'list' => 'users', 'usprop' => 'groups', 'ususers' => $lastuser, ] ); // warn about that above user and eventually quit $lastuser_was = "$lastuser was the last editor: "; $groups = reset( $rights->query->users )->groups; if( in_array( $GROUP, $groups, true ) ) { Log::info( $lastuser_was . "a $GROUP. OK" ); } else { // show a friendly message if it's just me $its_me = $wiki->isLogged() && $lastuser === $wiki->getUsername() || $lastuser === $ME; if( $its_me ) { Log::info( $lastuser_was . "It's-a me, Mario! quit" ); } else { Log::error( $lastuser_was . "not a $GROUP. quit" ); } if( $SKIP_PERMISSIONS ) { Log::warn( "skip list permission failure because of 'skip-permissions' option enabled" ); } else { // it's me? exit normally. exit( $its_me ? 0 : 1 ); } } } // collect links (if any) if( isset( $page->links ) ) { foreach( $page->links as $link ) { $titles_to_be_orphanized[] = $link->title; } } } } // keep a copy $involved_pagetitles = $titles_to_be_orphanized; // log titles if( $titles_to_be_orphanized ) { Log::info( 'found ' . count( $titles_to_be_orphanized ) . ' pages to be orphanized:' ); foreach( $titles_to_be_orphanized as $title ) { Log::info( " $title" ); } } // associative array of page IDs as key and a boolean as value containg pages to be orphanized $involved_pageids = []; // note that the API accepts a maximum trance of titles while( $less_titles_to_be_orphanized = array_splice( $titles_to_be_orphanized, 0, MAX_TRANCHE_TITLES ) ) { // API arguments for the linkshere query $linksto_args = [ 'action' => 'query', 'titles' => $less_titles_to_be_orphanized, 'prop' => 'linkshere', 'lhprop' => [ 'pageid', 'title', ], 'lhshow' => '!redirect', 'lhlimit' => 300, ]; // limit to certain namespaces from command line if( $NS !== null ) { $linksto_args[ 'lhnamespace' ] = $NS; } // cumulate the linkshere page ids Log::info( "requesting linkshere..." ); $linksto = $wiki->createQuery( $linksto_args ); foreach( $linksto as $response ) { foreach( $response->query->pages as $page ) { if( isset( $page->linkshere ) ) { foreach( $page->linkshere as $linkingpage ) { if( $linkingpage->title !== $TITLE_SOURCE ) { $involved_pageids[] = (int) $linkingpage->pageid; } } } } } } // count of involved pages if( $involved_pagetitles ) { Log::info( sprintf( "found %d pages containing the %d involved wlinks", count( $involved_pageids ), count( $involved_pagetitles ) ) ); } // number of edited pages $edits = 0; // note that the API accepts a maximum tranche of IDs while( $less_involved_pageids = array_splice( $involved_pageids, 0, MAX_TRANCHE_TITLES ) ) { // query last revision $responses = $wiki->createQuery( [ 'action' => 'query', 'pageids' => $less_involved_pageids, 'prop' => 'revisions', 'rvslots' => 'main', 'rvprop' => [ 'content', 'timestamp', ], ] ); // for each response foreach( $responses as $response ) { // for each page foreach( $response->query->pages as $page ) { // avoid too many edits if( $edits > $COOLDOWN ) { Log::info( "reached cooldown: stop" ); exit( 0 ); } // page ID to be edited $pageid = $page->pageid; // does it have a revision? if( !isset( $page->revisions[ 0 ] ) ) { continue; } // the first revision $revision = $page->revisions[ 0 ]; // timestamp of the revision useful to avoid edit conflicts $timestamp = $revision->timestamp; // wikitext from the main slot of this revision $wikitext_raw = $revision->slots->main->{ '*' }; // create a Wikitext object $wikitext = $wiki->createWikitext( $wikitext_raw ); // for each of the titles to be orphanized foreach( $involved_pagetitles as $involved_pagetitle ) { // parse the title being orphanized $title = $wiki->createTitleParsing( $involved_pagetitle ); // if it's a category, remove it if( $title->getNs()->getID() === 14 ) { $wikitext->removeCategory( $title->getTitle() ); } // a wikilink with and without alias $wikilink_simple = $wiki->createWikilink( $title, Wikilink::NO_ALIAS ); $wikilink_alias = $wiki->createWikilink( $title, Wikilink::WHATEVER_ALIAS ); // replace simple links e.g. [[Hello]] $wikilink_regex_simple = $wikilink_simple->getRegex( [ 'title-group-name' => 'title', ] ); // replace links with alias e.g. [[Hello|whatever]] $wikilink_regex_alias = $wikilink_alias->getRegex( [ 'alias-group-name' => 'alias', ] ); // replace entry from "See also" section $wikilink_regex_clean = $wikilink_simple->getRegex(); $wikilink_regex_clean = Regex::spaceBurger( $wikilink_regex_clean ); $seealso = preg_quote( $SEEALSO ); $seealso_regex = '/' . Regex::groupNamed( "\\n== *$seealso *== *((?!=).*\\n)*", 'keep' ) . Regex::groupNamed( "[ \\t]*\*[ \\t]*{$wikilink_regex_clean}.*\\n", 'wlink' ) . '/'; Log::debug( "regex simple wikilink:" ); Log::debug( $wikilink_regex_simple ); Log::debug( "regex wikilink aliased:" ); Log::debug( $wikilink_regex_alias ); Log::debug( "regex see also:" ); Log::debug( $seealso_regex ); // strip out the entry from «See also» section $wikitext->pregReplaceCallback( $seealso_regex, function ( $matches ) { return $matches[ 'keep' ]; } ); // convert '[[Hello]]' to 'Hello' $wikitext->pregReplaceCallback( "/$wikilink_regex_simple/", function ( $matches ) { // fix unwanted indentations $title = ltrim( $matches[ 'title' ], ':' ); return trim( $title ); } ); // convert '[[Hello|world]]' to 'world' $wikitext->pregReplaceCallback( "/$wikilink_regex_alias/", function ( $matches ) { // fix unwanted indentations return trim( $matches[ 'alias' ] ); } ); } // end loop titles to be orphanized // check for changes and save if( $wikitext->isChanged() ) { Log::info( "changes on page $pageid:" ); foreach( $wikitext->getHumanUniqueSobstitutions() as $substitution ) { Log::info( "\t $substitution" ); } if( $NO_INTERACTION || 'n' !== Input::yesNoQuestion( "confirm changes" ) ) { try { // the entire world absolutely needs this shitty ASCII animation - trust me if( $edits && $DELAY ) { Log::info( "delay $DELAY seconds", [ 'newline' => false ] ); for( $i = 0; $i < $DELAY; $i++ ) { sleep( 1 ); echo '.'; } echo "\n"; } // eventually login and save $wiki->login()->edit( [ 'pageid' => $pageid, 'text' => $wikitext->getWikitext(), 'summary' => $SUMMARY, 'basetimestamp' => $timestamp, 'minor' => 1, 'bot' => 1, ] ); $edits++; } catch( ProtectedPageException $e ) { Log::warn( "skip protected page $pageid" ); } } // end confirmation } // end save } // end loop pages } // end loop responses } // end loop involved page IDs // content of the list $wikitext = $wiki->createWikitext( $list_content ); // remove each entry from the list foreach( $involved_pagetitles as $title_raw ) { $wlink = $wiki->createTitleParsing( $title_raw ) ->createWikilink( Wikilink::WHATEVER_ALIAS ) ->getRegex(); // strip out the whole related line and replace with something else $from = "/.*$wlink.*/"; // @todo In case done-text contains the full link to a page, and it has already been // replaced in a previous run, don't replace it again. $to = str_replace( '$1', $title_raw, $DONE_TEXT ); $wikitext->pregReplace( $from, $to ); } // update list if( $wikitext->isChanged() ) { Log::info( "removing orphanized pages from list" ); try { $wiki->login()->edit( [ 'title' => $TITLE_SOURCE, 'text' => $wikitext->getWikitext(), 'summary' => $LIST_SUMMARY, 'basetimestamp' => $list_timestamp, 'bot' => 1, ] ); } catch( ProtectedPageException $e ) { Log::warn( "can't update list because of protection" ); } catch( EditConflictException $e ) { Log::warn( "ARGHHHH! Is someone editing my list? MY PRECIOUSss LIST!?!? WHAAT?? I will find you, and I will rewrite your edit. Damn human beings... asd." ); } } else { Log::info( "nothing to be done" ); } Log::info( "end" );