diff --git a/tools/mega-export-xml.php b/tools/mega-export-xml.php index fccf955..efc0d68 100755 --- a/tools/mega-export-xml.php +++ b/tools/mega-export-xml.php @@ -1,200 +1,202 @@ #!/usr/bin/env php . // exit if not CLI $argv or exit( 1 ); // load boz-mw require __DIR__ . '/../autoload-with-laser-cannon.php'; // the number '500' gives to much $DEFAULT_LIMIT = 100; // load configuration config_wizard( 'config.php' ); use \web\MediaWikis; use \cli\Log; // all the available wiki UIDs $mediawiki_uids = implode( ', ', MediaWikis::allUIDs() ); // register all CLI parameters $opts = cli_options() ->addValued( 'wiki', null, "Available wikis: $mediawiki_uids" ) ->addValued( 'limit', null, "Number of revisions for each request", $DEFAULT_LIMIT ) ->addValued( 'file', null, "Output filename", 'export.xml' ) ->addFlag( 'help', 'h', "Show this help and quit" ); $messages = []; // choosen wiki $wiki_uid = $opts->get( 'wiki' ); if( !$wiki_uid ) { $messages[] = "Please specify --wiki=WIKI"; } // page titles $page_titles = $opts::unnamedArguments(); if( !$page_titles ) { $messages[] = "Please specify some page titles"; } // output filename $filename = $opts->get( 'file' ); if( !$filename ) { $messages[] = "Please specify a filename"; } $limit = (int) $opts->get( 'limit' ); // show the help $show_help = $opts->get( 'help' ); if( $show_help ) { $messages = []; } else { $show_help = $messages; } if( $show_help ) { echo "Usage:\n {$argv[ 0 ]} --wiki=WIKI --file=export.xml [OPTIONS] Page_title\n"; echo "Allowed OPTIONS:\n"; $opts->printParams(); foreach( $messages as $msg ) { echo "\nError: $msg"; } echo "\n"; exit( $opts->get( 'help' ) ? 0 : 1 ); } // try to open the file $file = fopen( $filename, 'w' ); if( !$file ) { Log::error( "Can't open file '$filename'" ); exit( 1 ); } // pick the wiki and login -$wiki = wiki( $wiki_uid )->login(); +$wiki = wiki( $wiki_uid ); + +$wiki->login(); // build the MediaWiki API query $requests = $wiki->createQuery( [ 'action' => 'query', 'titles' => $page_titles, 'prop' => 'revisions', 'rvprop' => [ 'ids', 'flags', 'timestamp', 'user', 'userid', 'size', 'slotsize', 'sha1', 'comment', 'content', ], 'rvslots' => 'main', 'rvlimit' => $limit, ] ); // total number of revisions $total = 0; $n_requests = 1; // do not print to the out $out = '' . "\n"; foreach( $requests as $request ) { // show a kind of progress Log::info( sprintf( "processing request %d (continuing from $total revisions)", $n_requests++ ) ); $response_warning_shown = false; foreach( $request->query->pages as $page ) { if( isset( $page->missing ) ) { Log::error( "Page '{$page->title}' is missing" ); exit( 1 ); } $alert_much_revisions = true; foreach( $page->revisions as $i => $revision ) { // avoid nonsense revisions if( empty( $revision->comment ) ) { $count = count( $page->revisions ); if( $count !== $limit && !$response_warning_shown ) { Log::warn( "response with $count revisions instead of $limit: consider to lower your limit (ignore if you see this just once)" ); $response_warning_shown = true; } } $total++; foreach( $revision->slots as $slot ) { // avoid nonsense slots if( empty( $slot->contentmodel ) ) { continue; } $safe_user = htmlentities( $revision->user ); $safe_userid = htmlentities( $revision->userid ); $safe_comment = htmlentities( $revision->comment ); $safe_model = htmlentities( $slot->contentmodel ); $safe_format = htmlentities( $slot->contentformat ); $safe_text = htmlentities( $slot->{'*'} ); $out .= "\n"; $out .= "\t{$revision->revid}\n"; $out .= "\t{$revision->parentid}\n"; $out .= "\t{$revision->timestamp}\n"; $out .= "\t\n"; $out .= "\t\t$safe_user\n"; $out .= "\t\t$safe_userid\n"; $out .= "\t\n"; $out .= "\t$safe_comment ?>"; $out .= "\t$safe_model\n"; $out .= "\t$safe_format\n"; $out .= "\tsize}\">$safe_text\n"; $out .= "\t{$revision->sha1}\n"; $out .= "\n"; } } } // write the file in chunks fwrite( $file, $out ); $out = ''; } Log::info( sprintf( "you mega-exported $total revisions! nice shot! See %s", $opts->get( 'file' ) ) ); fwrite( $file, "\n" ); fclose( $file );