diff --git a/includes/boz-mw b/includes/boz-mw index 23d9a38..caab1b1 160000 --- a/includes/boz-mw +++ b/includes/boz-mw @@ -1 +1 @@ -Subproject commit 23d9a382c4ca72d2fd950934a94042bc59969a89 +Subproject commit caab1b1216efbe9ff4ada330710d4b4c8d8485eb diff --git a/includes/class-PDCs.php b/includes/class-PDCs.php index 709b406..e1e3ceb 100644 --- a/includes/class-PDCs.php +++ b/includes/class-PDCs.php @@ -1,167 +1,167 @@ . namespace itwikidelbot; use DateTime; use mw\API\PageMatcher; /** * Handle some PDCs */ class PDCs { /** * Sort some PDCs by creation date * * @param $pdcs array */ public static function sortByCreationDate( & $pdcs ) { // sort PDCs by start date usort( $pdcs, function ( $a, $b ) { return $a->getCreationDate() > $b->getCreationDate(); } ); } /** * Index some PDCs by their type * * @param $pdcs array * @return array */ public static function indexByType( $pdcs ) { $pdcs_by_type = []; foreach( CategoryYearMonthDayTypes::all() as $Type ) { $pdcs_by_type[ $Type::PDC_TYPE ] = []; } foreach( $pdcs as $pdc ) { $type = $pdc->getType(); $pdcs_by_type[ $type ][] = $pdc; } return $pdcs_by_type; } /** * Discard all the PDCs that do not belong to a certain date * * @param $pdcs array * @param $date DateTime * @return array */ public static function filterByDate( $pdcs, DateTime $date ) { $y_m_d = $date->format( 'Y-m-d' ); return array_filter( $pdcs, function ( $pdc ) use ( $y_m_d ) { return $pdc->getStartDate()->format( 'Y-m-d' ) === $y_m_d; } ); } /** * Discard all the PDCs that are not multiple * * @param $pdcs array * @return $pdcs */ public static function filterNotMultiple( $pdcs ) { return array_filter( $pdcs, function ( $pdc ) { return ! $pdc->isMultiple(); } ); } /** * Get all the title subjects of the specified PDCs * * @param $pdcs array * @return array */ public static function titleSubjects( $pdcs ) { return array_map( function ( $pdc ) { return $pdc->getTitleSubject(); }, $pdcs ); } /** * Populate the specified PDCs that miss informations * * This method is intended to do less API requests as possible. * * @param $pdcs array * @return array */ public static function populateMissingInformations( $pdcs ) { self::populateMissingInformationsFromLastRevision( $pdcs ); } /** * Populate the specified PDCs with missing informations that can * be obtained fetching the last revision. * * This method is intended to do less API requests as possible. * * @param $pdcs */ private static function populateMissingInformationsFromLastRevision( $pdcs ) { if( ! $pdcs ) { return; } /* * fetch the last revision from all the PDCs * * @see https://it.wikipedia.org/w/api.php?action=help&modules=query%2Brevisions */ $query = Page::api()->createQuery( [ 'action' => 'query', 'titles' => self::titleSubjects( $pdcs ), 'prop' => 'revisions', 'rvprop' => [ 'content', 'timestamp', ], 'rvslots' => 'main', 'rvsection' => '0', ] ); // callback fired for every match between response pages and PDCs $matching_callback = function ( $page, $pdc ) { if( isset( $page->revisions[ 0 ] ) ) { // this is the only one $revision = $page->revisions[ 0 ]; // populate the lastedit date $lastedit_date = Page::createDateTimeFromString( $revision->timestamp ); $pdc->setLasteditDate( $lastedit_date ); // populate the subject themes if( isset( $revision->slots->main ) ) { $pdc->setSubjectThemesScrapingSubjectWikitext( $revision->slots->main->{ '*' } ); } } }; // callback that returns the PDC page title $pdc_page_title_callback = function ( $pdc ) { return $pdc->getTitleSubject(); }; // query continuation foreach( $query->getGenerator() as $response ) { // match page results and PDCs by title - ( new PageMatcher( $response->query, $pdcs ) ) + ( new PageMatcher( $response, $pdcs ) ) ->matchByTitle( $matching_callback, $pdc_page_title_callback ); } } } diff --git a/includes/class-Page.php b/includes/class-Page.php index a10a758..9118931 100644 --- a/includes/class-Page.php +++ b/includes/class-Page.php @@ -1,216 +1,216 @@ . namespace itwikidelbot; use DateTime; use DateTimeZone; use cli\Input; use cli\Log; use wm\WikipediaIt; use mw\Tokens; /** * Handle a page */ class Page { /** * Time zone of Italian Wikipedia community. * * @var string */ const COMMUNITY_TIMEZONE = 'Europe/Rome'; /** * Enable this flag to ask for every changes * * @var bool */ public static $ASK_BEFORE_SAVING = false; /** * @var string Page title with prefix */ private $title; /** * Cache for the existing status of this page * * @var bool */ private $exists; /** * Construct a Page * * @param $title Page title with its prefix */ public function __construct( $title ) { $this->title = $title; } /** * Get the page title with its prefix * * @return string */ public function getTitle() { return $this->title; } /** * Save this page * * @param $content string Page content * @param $summary string Edit summary * @return mixed */ public function saveByContentSummary( $content, $summary ) { $title = $this->getTitle(); $api = self::api(); $args = [ 'action' => 'edit', 'title' => $title, 'text' => $content, 'summary' => $summary, 'token' => $api->login()->getToken( Tokens::CSRF ), 'bot' => 1, ]; if( self::$ASK_BEFORE_SAVING ) { print_r( $args ); if( 'y' !== Input::yesNoQuestion( "Save?" ) ) { return false; } } Log::info( "writing [[$title]]" ); return $api->post( $args ); } /** * Save this page if it does not exist * * @param $content string Page content * @param $summary string Edit summary * @return bool|mixed False if not created */ public function saveByContentSummaryIfNotExists( $content, $summary ) { if( ! $this->exists() ) { return $this->saveByContentSummary( $content, $summary ); } return false; } /** * Set internally if this page exists * * @param $exists bool * @return bool|mixed False if not created */ public function setIfExists( $exists ) { $this->exists = $exists; } /** * Check if this page exists (the result is cached) * * @return bool */ public function exists() { if( null === $this->exists ) { $result = self::api()->fetch( [ 'action' => 'query', 'prop' => 'info', 'titles' => $this->getTitle(), ] ); foreach( $result->query->pages as $pageid => $page ) { $this->exists = ! isset( $page->missing ); } } return $this->exists; } /** * Fetch the first revision date by direction * * @param $dir string direction * @return DateTime */ public function fetchFirstRevisionDateByDirection( $direction ) { $response = self::api()->fetch( [ 'action' => 'query', 'titles' => $this->getTitle(), 'prop' => 'revisions', 'rvprop' => [ 'timestamp' ], 'rvlimit' => 1, 'rvdir' => $direction, ] ); foreach( $response->query->pages as $page ) { if( isset( $page->revisions ) ) { foreach( $page->revisions as $revision ) { return self::createDateTimeFromString( $revision->timestamp ); } } } throw new \Exception( 'unable to fetch the creation date' ); } /** * Fetch the creation date of this page * * @return DateTime */ public function fetchCreationDate() { return $this->fetchFirstRevisionDateByDirection( 'newer' ); } /** * Fetch the creation date of this page * * @return DateTime */ public function fetchLasteditDate() { return $this->fetchFirstRevisionDateByDirection( 'older' ); } /** * Get the API related to this page * * @return mw\API */ public static function api() { - return WikipediaIt::getInstance(); + return WikipediaIt::instance(); } /** * Create a DateTime object from a MediaWiki formatted date * * MediaWiki dates are formatted following the ISO8601 standard * and you may want to specify your community timezone. * * @param $datetime string * @return DateTime */ public static function createDateTimeFromString( $datetime ) { return DateTime::createFromFormat( DateTime::ISO8601, $datetime ) ->setTimezone( new DateTimeZone( self::COMMUNITY_TIMEZONE ) ); } } diff --git a/includes/class-Pages.php b/includes/class-Pages.php index ee5d748..8aa139e 100644 --- a/includes/class-Pages.php +++ b/includes/class-Pages.php @@ -1,74 +1,74 @@ . namespace itwikidelbot; use mw\API\PageMatcher; /** * Handle multiple pages */ class Pages { /** * Get page titles from pages * * @param $pages array * @return array */ public static function titles( $pages ) { return array_map( function ( $page ) { return $page->getTitle(); }, $pages ); } /* * Check if some pages exist * * This method is intended to do less API requests as possible. * * @param $pages array */ public static function populateWheneverTheyExist( $pages ) { // API query to check if these pages exist $query = Page::api()->createQuery( [ 'action' => 'query', 'prop' => 'info', 'titles' => self::titles( $pages ), ] ); // callback to retrieve the page title $page_title_callback = function ( $page ) { return $page->getTitle(); }; // callback fired for every match between response pages and my pages $matching_callback = function ( $response_page, $my_page ) { $my_page->setIfExists( ! isset( $response_page->missing ) ); }; // query continuation foreach( $query->getGenerator() as $response ) { // match response pages with my pages - ( new PageMatcher( $response->query, $pages ) ) + ( new PageMatcher( $response, $pages ) ) ->matchByTitle( $matching_callback, $page_title_callback ); } } }