diff --git a/2021-02-legavolley/README.md b/2021-02-legavolley/README.md new file mode 100644 index 0000000..808eee3 --- /dev/null +++ b/2021-02-legavolley/README.md @@ -0,0 +1,24 @@ +# 2021 LegaVolley bot + +This directorty contains some tools useful during the 2021 LegaVolley sprint in Wikimedia Commons and Wikidata. + +## Consensus + +- [X] [consensus on it.wiki](https://it.wikipedia.org/wiki/Progetto:Sport/Pallavolo/Legavolley) +- [X] [consensus on Wikimedia Commons](https://commons.wikimedia.org/wiki/Commons:Bots/Requests/Valerio_Bozzolan_bot) (previous year) +- [X] [consensus on Wikimedia Commons](https://commons.wikimedia.org/wiki/Commons:Bots/Requests/Valerio_Bozzolan_bot_(5)) + +## Usage + + ./list-files-in-cat.php + ./verify-personal-cats-deeper.php + +## License + +Copyright (C) 2019 Valerio Bozzolan + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with this program. If not, see . diff --git a/2021-02-legavolley/autoload.php b/2021-02-legavolley/autoload.php new file mode 100644 index 0000000..a62f0fa --- /dev/null +++ b/2021-02-legavolley/autoload.php @@ -0,0 +1,8 @@ +. // autoload framework -require __DIR__ . '/../includes/boz-mw/autoload.php'; +require 'autoload.php'; $MAIN_CAT = 'Category:Files from Legavolley stream'; $wiki = \wm\Commons::instance(); +// collect first-class categories $queries = $wiki->createQuery( [ 'action' => 'query', 'list' => 'categorymembers', 'cmtitle' => $MAIN_CAT, 'cmtype' => 'subcat', ] ); -$sub_categories = []; +// query all sub-categories +$volleyball_categories = []; foreach( $queries as $query ) { foreach( $query->query->categorymembers as $page ) { - $sub_categories[] = $page->title; + $volleyball_categories[] = $page->title; } } -$files = []; +$players = []; // now for each sthat we have the correct categories -foreach( $sub_categories as $sub_category ) { +foreach( $volleyball_categories as $sub_category ) { + // prepare to query each file with their categories $queries = $wiki->createQuery( [ - 'action' => 'query', - 'list' => 'categorymembers', - 'cmtitle' => $sub_category, - 'cmtype' => 'file', + // query files + 'action' => 'query', + 'generator' => 'categorymembers', + 'gcmtitle' => $sub_category, + 'gcmtype' => 'file', + 'gcmlimit' => 500, + + // for each file get its categories + 'prop' => 'categories', + 'clshow' => '!hidden', ] ); // for each query foreach( $queries as $query ) { - $members = $query->query->categorymembers ?? []; - foreach( $members as $page ) { - - $title = $page->title; - $files[] = $title; + // for each file + $pages = $query->query->pages ?? []; + foreach( $pages as $page ) { echo "$title\n"; + $title = $page->title; + $pageid = $page->pageid; + + // get or create + $players[ $pageid ] = $players[ $pageid ] ?? new VolleyballPlayerFile(); + $player = $players[ $pageid ]; + $player->file = $title; + + // eventually loop the categories + $categories = $page->categories ?? []; + foreach( $categories as $category ) { + + echo "$category_title\n"; + $category_title = $category->title; + + $player->cats[] = $category_title; + } } } } + +$data = serialize( $players ); + +file_put_contents( 'data/players.serialized', $data ); diff --git a/2021-02-legavolley/verify-personal-cats-deeper.php b/2021-02-legavolley/verify-personal-cats-deeper.php new file mode 100755 index 0000000..658a277 --- /dev/null +++ b/2021-02-legavolley/verify-personal-cats-deeper.php @@ -0,0 +1,65 @@ +#!/usr/bin/php +. + +// autoload framework +require 'autoload.php'; + +$commons = wiki( 'commonswiki' ); + +$data_raw = file_get_contents( 'data/players.serialized' ); +$players = unserialize( $data_raw ); + +foreach( $players as $player ) { + + $file = $commons->createTitleParsing( $player->file ); + $filename = $file->getTitle()->get(); + + $filename_words = explode( ' ', $filename ); + + $first_word = $filename_words[ 0 ]; + + if( $player->cats ) { + + foreach( $player->cats as $cat_raw ) { + + $cat = $commons->createTitleParsing( $cat_raw ); + $cat_name = $cat->getTitle()->get(); + + if( strpos( $cat_name, $first_word ) !== false ) { + $player->cat = $cat_raw; + break; + } + } + } +} + +$fp = fopen( 'data/players.csv', 'w' ); + +fputcsv( $fp, [ + "Filename", + "Personal category", +] ); + +foreach( $players as $player ) { + + fputcsv( $fp, [ + $player->file, + $player->cat ?? '', + ] ); + +} +fclose( $fp ); diff --git a/2021-02-legavolley/verify-personal-cats.php b/2021-02-legavolley/verify-personal-cats.php old mode 100644 new mode 100755