diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/README.md b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/README.md index c499e42..bd6572e 100644 --- a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/README.md +++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/README.md @@ -1,27 +1,49 @@ # Importer of multimedia files from https://iconoteca.arc.usi.ch/ ## Description Welcome in the importer of multimedia files for https://iconoteca.arc.usi.ch/. For more information about the consensus: https://it.wikipedia.org/wiki/Wikipedia:Raduni/Biblioteca_dell%27Accademia_di_Mendrisio_4_ottobre_2020 ## Installation From this directory: ``` git clone https://github.com/phpquery/phpquery ``` +## Usage ## + +First download locally one of their collections: + +``` +wget https://iconoteca..../collection-asd.html +``` + +Then you can examine that HTML page and bulk-download the available images from it: + +``` +./parse-html-and-import.php collection-asd.html +``` + +The you can bulk-upload your files just selecting your directory with the images/metadata and selecting a template: + +``` +./upload.php images/ template/collezione-biblioteca.php +``` + +Happy hacking! + ## License Copyright (C) 2020 Valerio Bozzolan This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/bootstrap.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/bootstrap.php new file mode 100644 index 0000000..887b837 --- /dev/null +++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/bootstrap.php @@ -0,0 +1,96 @@ +. + +// require two dummy classes +require 'include/functions.php'; +require 'include/class-Metadata.php'; +require 'include/class-MetadataValue.php'; + +// base URL to be scraped +define( 'BASE_URL', 'https://iconoteca.arc.usi.ch' ); + +// inventory prefix to be stripped out to read the image ID (note the double slash! asd) +define( 'INVENTORY_PREFIX_TO_STRIP', BASE_URL . '//thumb.php?inventario=' ); + +// URL to the single photo from the image ID (DOI) +define( 'INVENTORY_URL_FORMAT', BASE_URL . '/it/inventario/%d' ); + +// URL of the high quality image +define( 'HIGH_QUALITY_IMAGE_URL', BASE_URL . '/image-viewer.php?inventario=%d' ); + +// URL of the high quality image +define( 'LOW_QUALITY_IMAGE_URL', BASE_URL . '/image_permission_show.php?inventario=%d' ); + +// image download name (with image ID) +define( 'IMAGE_DOWNLOAD_NAME', 'images/%d.jpg' ); + +// array of metadatas displayed in the body in the '.metadati' selector +// basically they are the labels displayed in the body on every image like this one: +// https://iconoteca.arc.usi.ch/it/inventario/51630 +$METADATA_BODY = [ + new Metadata( 'Luogo rappresentato' ), + new Metadata( 'Tipologia di risorsa' ), + new Metadata( 'Creatore' ), + new Metadata( 'Data' ), + new Metadata( 'DOI', function ( $p ) { + + // the DOI is a link, so just extract the URL + + // text displayed after the label (manually stripping the label) + return $p->find( 'a' )->attr( 'href' ); + } ), + new Metadata( 'ID immagine' ), + new Metadata( 'Licenza', function( $p ) { + + // the License is a link, so just extract the URL + + // text displayed after the label (manually stripping the label) + return $p->find( 'a' )->attr( 'href' ); + } ), +]; + +// array of metadatas displayed in the footer in the '.metadati_completi' selector +// basically they are the labels displayed in the footer on every image like this one: +// https://iconoteca.arc.usi.ch/it/inventario/51630 +$METADATA_FOOTER = [ + new Metadata( 'Titolo opera' ), + new Metadata( 'Titolo originale' ), + new Metadata( 'Iscrizione' ), + new Metadata( 'Collezione' ), + new Metadata( 'Data creazione' ), + new Metadata( 'Luogo creazione' ), + new Metadata( 'Nome creatore' ), + new Metadata( 'Descrittori Sbt' ), + new Metadata( 'Descrittori Getty AAT' ), + new Metadata( 'Luogo rappresentato', function( $p ) { + + // take just the text inside the link + return $p->find( 'a' )->text(); + } ), + new Metadata( 'Classificazione' ), + new Metadata( 'Tipo materiale' ), + new Metadata( 'Designazione specifica del materiale' ), + new Metadata( 'Supporto originale' ), + new Metadata( 'Materiale del supporto' ), + new Metadata( 'Nome oggetto culturale' ), + new Metadata( 'Colore' ), + new Metadata( 'Polarità' ), + new Metadata( 'Tipo supporto' ), + new Metadata( 'Processo e tecnica' ), + new Metadata( 'Montaggio' ), + new Metadata( 'Orientamento e forma' ), + new Metadata( 'Dimensioni' ), +]; diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-Metadata.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-Metadata.php new file mode 100644 index 0000000..26111c7 --- /dev/null +++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-Metadata.php @@ -0,0 +1,112 @@ +. + +/** + * Metadata + * + * Basically the Titolo opera, Titolo originale etc. from: + * https://iconoteca.arc.usi.ch/it/inventario/51630 + */ +class Metadata { + + public $label; + + public $valueAdapter; + + /** + * Constructor + * + * @param string $label Metadata label e.g. 'Titolo opera' + * @param function $value_adapter Optional callable + */ + public function __construct( $label, $value_adapter = null ) { + $this->label = $label; + $this->valueAdapter = $value_adapter; + } + + /** + * Get the text of the label + * + * Basically from 'foo' its 'foo:' + * + * @return string + */ + public function getLabel() { + return $this->label . ':'; + } + + /** + * Check if a label matches the one of this metadata + * + * @return bool + */ + public function matchesLabel( $label ) { + return $this->getLabel() === $label; + } + + /** + * Create a MetadataValue object from a value + * + * Note that the value will be adapted. + * + * @param mixed $value + * @return Metadatavalue + */ + public function createValue( $value ) { + + // eventually apply the custom value adapter + if( $this->valueAdapter ) { + $user_adapter = $this->valueAdapter; + $value = $user_adapter( $value ); + } else { + // otherwise apply the default value adapter + $value = self::defaultValueAdapter( $value ); + } + + // eventually fix HTML links + $value = html_link_2_wikitext( $value ); + + return new MetadataValue( $this, $value ); + } + + /** + * Default value adapter + * + * Note: as default the value is the paragraph selector. So we strip the label and get the clean data. + * + * @param string $img_metadata_p + * @return string + */ + private static function defaultValueAdapter( $img_metadata_p ) { + + // text displayed after the label (manually stripping the label) + $img_metadata_p_text = $img_metadata_p->html(); + + // label + // it contains 'Titolo originale:' + $img_metadata_p_label = $img_metadata_p->find( 'label' ); + + // label text + // e.g. 'Titolo originale:' + $img_metadata_p_label_html = $img_metadata_p_label->html(); + + // complete text of the paragraph stripping its label + $img_metadata_p_text = trim( str_replace( "", '', $img_metadata_p_text ) ); + + return $img_metadata_p_text; + + } +} diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-MetadataValue.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-MetadataValue.php new file mode 100644 index 0000000..134c2c4 --- /dev/null +++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-MetadataValue.php @@ -0,0 +1,37 @@ +. + +/** + * A Metadata related to a value + */ +class MetadataValue { + + public $metadata; + + public $value; + + public function __construct( Metadata $metadata, $value ) { + $this->metadata = $metadata; + $this->value = $value; + } + + public function getData() { + return [ + $this->metadata->label, + $this->value, + ]; + } +} diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/functions.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/functions.php new file mode 100644 index 0000000..92f4420 --- /dev/null +++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/functions.php @@ -0,0 +1,101 @@ +. + +/** + * Find a matching metadata from a label and return a MetadataValue + * + * @param array $metadatas Array of known metadatas + * @param string $label Original label like 'Titolo originale:' + * @param string $value Value related to the matching metadata + * @return MetadataValue|false Matching metadata or false if not found + */ +function find_matching_metadatavalue_from_label( $metadatas, $label, $value ) { + + // find the matching metadata + foreach( $metadatas as $metadata ) { + if( $metadata->matchesLabel( $label ) ) { + return $metadata->createValue( $value ); + } + } + + // no metadata no party + return false; +} + +/** + * Print a message + */ +function message( $message ) { + printf( + "[%s] %s\n", + date( 'Y-m-d H:i:s' ), + $message + ); +} + +/** + * Covert an HTML link to a wikitext one + */ +function html_link_2_wikitext( $txt ) { + + return preg_replace_callback( '@(.+?)@', function( $matches ) { + + // eventually make the URL absolute + $url = $matches[1]; + if( $url[0] === '/' ) { + $url = BASE_URL . $url; + } + + return sprintf( + '[%s %s]', + $url, + $matches[2] + ); + }, $txt ); + + // Sculture +} + +/** + * Require a certain page from the template directory + * + * It will eventually echo something. + * + * @param $name string page name + * @param $args mixed arguments to be passed to the page scope + */ +function template( $template, $template_args = [] ) { + extract( $template_args, EXTR_SKIP ); + return require $template; +} + +/** + * Get the template output + * + * It will echo nothing. + * + * @param $name string page name (to be sanitized) + * @param $args mixed arguments to be passed to the page scope + * @see template() + * @return string The template output + */ +function template_content( $name, $args = [] ) { + ob_start(); + template( $name, $args ); + $text = ob_get_contents(); + ob_end_clean(); + return $text; +} diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/parse-html-and-import.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/parse-html-and-import.php index d8ae553..e881cf4 100755 --- a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/parse-html-and-import.php +++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/parse-html-and-import.php @@ -1,400 +1,219 @@ #!/usr/bin/php find( 'a' )->attr( 'href' ); - } ), - new Metadata( 'ID immagine' ), - new Metadata( 'Licenza', function( $p ) { - - // the License is a link, so just extract the URL - - // text displayed after the label (manually stripping the label) - return $p->find( 'a' )->attr( 'href' ); - } ), -]; - -// array of metadatas displayed in the footer in the '.metadati_completi' selector -// basically they are the labels displayed in the footer on every image like this one: -// https://iconoteca.arc.usi.ch/it/inventario/51630 -$METADATA_FOOTER = [ - new Metadata( 'Titolo opera' ), - new Metadata( 'Titolo originale' ), - new Metadata( 'Iscrizione' ), - new Metadata( 'Collezione' ), - new Metadata( 'Data creazione' ), - new Metadata( 'Luogo creazione' ), - new Metadata( 'Nome creatore' ), - new Metadata( 'Descrittori Sbt' ), - new Metadata( 'Descrittori Getty AAT' ), - new Metadata( 'Luogo rappresentato', function( $p ) { - - // take just the text inside the link - return $p->find( 'a' )->text(); - } ), - new Metadata( 'Classificazione' ), - new Metadata( 'Tipo materiale' ), - new Metadata( 'Designazione specifica del materiale' ), - new Metadata( 'Supporto originale' ), - new Metadata( 'Materiale del supporto' ), - new Metadata( 'Nome oggetto culturale' ), - new Metadata( 'Colore' ), - new Metadata( 'Polarità' ), - new Metadata( 'Tipo supporto' ), - new Metadata( 'Processo e tecnica' ), - new Metadata( 'Montaggio' ), - new Metadata( 'Orientamento e forma' ), - new Metadata( 'Dimensioni' ), -]; +# Copyright (C) 2020 Valerio Bozzolan +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +// load common files +require 'bootstrap.php'; // metadata by selector $METADATA_BY_SELECTOR = [ '.metadati' => $METADATA_BODY, '.metadati_completi' => $METADATA_FOOTER, ]; // phpQuery1 // https://github.com/phpquery/phpquery $PHPQUERY = __DIR__ . '/phpquery/phpQuery/phpQuery.php'; // no phpQuery no party if( !file_exists( $PHPQUERY ) ) { echo "Please read the README\n"; exit( 5 ); } // load phpQuery require $PHPQUERY; // no command line no party if( !$argv ) { echo "Not in command line?\n"; exit( 1 ); } // no first argument no party $page = $argv[1] ?? null; if( !$page ) { echo "Usage:\n {$argv[0]} FILE.html\n"; exit( 2 ); } // no file no party if( !file_exists( $page ) ) { echo "Unexisting file $page\n"; exit( 3 ); } // read the file content $content = file_get_contents( $page ); // no content no party if( !$content ) { echo "No content no party\n"; exit( 4 ); } // parse the document $document = phpQuery::newDocument( $content ); // enter in page content $content = pq( $document )->find( '.page-content' ); // traverse the DOM tree foreach( $content->find( '.row' ) as $row ) { foreach( pq( $row )->find( '.col-md-4' ) as $col ) { // image element $img = pq( $col )->find( 'img' ); // image relative path in the URL $img_path = $img->attr( 'src' ); // no URL no party (wrong elements) if( !$img_path ) { continue; } // absolute image URL $img_url = BASE_URL . '/' . $img_path; // image identifier $img_id = str_replace( INVENTORY_PREFIX_TO_STRIP, '', $img_url ); // it's an integer $img_id = (int) $img_id; // image permalink $img_page_url = sprintf( INVENTORY_URL_FORMAT, $img_id ); // image permalink HTMl content message( "Sucking $img_page_url..." ); $img_page_content = file_get_contents( $img_page_url ); if( !$img_page_content ) { message( "Skip failed download $img_page_url" ); continue; } // parse image permalink page $img_page = pq( phpQuery::newDocument( $img_page_content ) ); // image data read $img_metadata_values = []; // loop all the possible metadatas finding them from the right selector foreach( $METADATA_BY_SELECTOR as $metadata_selector => $possible_metadatas ) { // parse image body metadatas section foreach( $img_page->find( $metadata_selector ) as $img_metadata ) { // traverse all the paragraphs containing metadatas and try to parse foreach( pq( $img_metadata )->find( 'p' ) as $img_metadata_p_raw ) { // paragraph element $img_metadata_p = pq( $img_metadata_p_raw ); // label // it contains 'Titolo originale:' $img_metadata_p_label = $img_metadata_p->find( 'label' ); // label text // e.g. 'Titolo originale:' $img_metadata_p_label_txt = $img_metadata_p_label->text(); // metadata matching this label $img_metadata_value = find_matching_metadatavalue_from_label( $possible_metadatas, $img_metadata_p_label_txt, $img_metadata_p ); // gotcha? if( $img_metadata_value ) { $img_metadata_values[] = $img_metadata_value; } else { message( "Unknown metadata '$img_metadata_p_label_txt' not found in $metadata_selector" ); } } } } // main image $img_main = $img_page->find( '.zoomviewer img' ); - // low quality image URL + // hight quality image URL $img_hq_url = sprintf( HIGH_QUALITY_IMAGE_URL, $img_id ); + // low quality image URL + $img_lq_url = sprintf( + LOW_QUALITY_IMAGE_URL, + $img_id + ); + // image pathname $img_path = sprintf( IMAGE_DOWNLOAD_NAME, $img_id ); // build a metadata file $img_path_json = "$img_path.json"; $img_data_json = []; foreach( $img_metadata_values as $img_metadata_value ) { message( " $key: $value" ); list( $key, $value ) = $img_metadata_value->getData(); $img_data_json[ $key ] = $value; } // no json write no party if( !file_put_contents( $img_path_json, json_encode( $img_data_json, JSON_PRETTY_PRINT ) ) ) { message( "cannot write $img_path_json" ); } - // eventually download the image and save - if( !file_exists( $img_path ) ) { + foreach( [ $img_hq_url, $img_lq_url ] as $img_url ) { - message( "Fetching $img_hq_url in $img_path..." ); - - // download the image - $img_hq_bin = file_get_contents( $img_hq_url ); - - // save the HQ image or write an error - if( !file_put_contents( $img_path, $img_hq_bin ) ) { - message( "cannot write $img_path" ); - } - } + // eventually download the image and save + if( !file_exists( $img_path ) ) { - // all right - message( "completed $img_id" ); - } + message( "Fetching $img_url in $img_path..." ); -} + // download the image + $img_bin = file_get_contents( $img_url ); -/** - * Metadata - * - * Basically the Titolo opera, Titolo originale etc. from: - * https://iconoteca.arc.usi.ch/it/inventario/51630 - */ -class Metadata { - - public $label; - - public $valueAdapter; - - /** - * Constructor - * - * @param string $label Metadata label e.g. 'Titolo opera' - * @param function $value_adapter Optional callable - */ - public function __construct( $label, $value_adapter = null ) { - $this->label = $label; - $this->valueAdapter = $value_adapter; - } + // sometime this is not an image but is a shitty text + // «ERRORE: il livello d'accesso impostato al file non consente di scaricare questa immagine» ASD + if( strlen( $img_bin ) > 1000 ) { - /** - * Get the text of the label - * - * Basically from 'foo' its 'foo:' - * - * @return string - */ - public function getLabel() { - return $this->label . ':'; - } + // save the HQ image or write an error + if( !file_put_contents( $img_path, $img_bin ) ) { + message( "cannot write $img_path" ); + } + } else { - /** - * Check if a label matches the one of this metadata - * - * @return bool - */ - public function matchesLabel( $label ) { - return $this->getLabel() === $label; - } + // WHAAT THE FUUUUUCK IS THIS SHIT + message( "invalid image" ); + } + } - /** - * Create a MetadataValue object from a value - * - * Note that the value will be adapted. - * - * @param mixed $value - * @return Metadatavalue - */ - public function createValue( $value ) { - - // eventually apply the custom value adapter - if( $this->valueAdapter ) { - $user_adapter = $this->valueAdapter; - $value = $user_adapter( $value ); - } else { - // otherwise apply the default value adapter - $value = self::defaultValueAdapter( $value ); } - return new MetadataValue( $this, $value ); - } - - /** - * Default value adapter - * - * Note: as default the value is the paragraph selector. So we strip the label and get the clean data. - * - * @param string $img_metadata_p - * @return string - */ - private static function defaultValueAdapter( $img_metadata_p ) { - - // text displayed after the label (manually stripping the label) - $img_metadata_p_text = $img_metadata_p->html(); - - // label - // it contains 'Titolo originale:' - $img_metadata_p_label = $img_metadata_p->find( 'label' ); - - // label text - // e.g. 'Titolo originale:' - $img_metadata_p_label_html = $img_metadata_p_label->html(); - - // complete text of the paragraph stripping its label - $img_metadata_p_text = trim( str_replace( "", '', $img_metadata_p_text ) ); - - return $img_metadata_p_text; - - } -} - -/** - * A Metadata related to a value - */ -class MetadataValue { - - public $metadata; - - public $value; - - public function __construct( Metadata $metadata, $value ) { - $this->metadata = $metadata; - $this->value = $value; - } - - public function getData() { - return [ - $this->metadata->label, - $this->value, - ]; - } -} - -/** - * Find a matching metadata from a label and return a MetadataValue - * - * @param array $metadatas Array of known metadatas - * @param string $label Original label like 'Titolo originale:' - * @param string $value Value related to the matching metadata - * @return MetadataValue|false Matching metadata or false if not found - */ -function find_matching_metadatavalue_from_label( $metadatas, $label, $value ) { - - // find the matching metadata - foreach( $metadatas as $metadata ) { - if( $metadata->matchesLabel( $label ) ) { - return $metadata->createValue( $value ); - } + // all right + message( "completed $img_id" ); } - // no metadata no party - return false; -} - -function message( $message ) { - printf( - "[%s] %s\n", - date( 'Y-m-d H:i:s' ), - $message - ); } diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/template/collezione-biblioteca.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/template/collezione-biblioteca.php new file mode 100644 index 0000000..e6c0f16 --- /dev/null +++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/template/collezione-biblioteca.php @@ -0,0 +1,56 @@ +. + +// this is a template to build a generic Commons file description + +?> +=={{int:filedesc}}== +{{Information +|description= + +|date= + +|source= + +|author= + +|permission= +|other versions= +}} + +=={{int:license-header}}== + + +== {{int:metadata}} == +{| class="wikitable" + $value ) { + + // line row + if( !$first ) { + echo "|-\n"; + } + + echo "! $key\n"; + echo "| $value\n"; + + $first = false; + } +?> +|} + +[[Category:Collezione Biblioteca - Iconoteca dell'architettura in Mendrisio, Switzerland]] diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/upload.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/upload.php new file mode 100755 index 0000000..7779101 --- /dev/null +++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/upload.php @@ -0,0 +1,139 @@ +#!/usr/bin/php +. + +// autoload framework +require __DIR__ . '/../includes/boz-mw/autoload.php'; + +// require some values +require 'bootstrap.php'; + +// load configuration file or create one +cli\ConfigWizard::requireOrCreate( __DIR__ . '/../config.php' ); + +// load Wikimedia Commons +$commons = \wm\Commons::instance(); + +// login in Commons +$commons->login(); + +// input directory +$dir = $argv[1] ?? null; +$template = $argv[2] ?? null; + +// no dir no party +if( !$dir || !$template ) { + printf( "Usage:\n %s DIRECTORY TEMPLATE\n", $argv[0] ); + exit( 1 ); +} + +// scan the directory +foreach( glob( "$dir/*.jpg" ) as $file ) { + + // no data no party + $file_data = @file_get_contents( "$file.json" ); + $file_data = json_decode( $file_data ); + if( !$file_data ) { + echo "skip $file missing data\n"; + continue; + } + + // check available metadata + $img_id = $file_data->{"ID immagine"}; + $title = $file_data->{"Titolo opera"}; + $title_orig = $file_data->{"Titolo originale"} ?? null; + $collection = $file_data->{"Collezione"} ?? null; + $license = $file_data->Licenza ?? null; + $type = $file_data->{"Tipologia di risorsa"} ?? "Fotografia"; + $size = $file_data->{"Dimensioni"} ?? ''; + $material = $file_data->{"Materiale del supporto"} ?? ''; + $author_name = $file_data->{"Nome creatore"} ?? ''; + $author = $file_data->{"Creatore"} ?? $author_name ?? "ignoto"; + $date = $file_data->{"Data"} ?? $file_data->{"Data creazione"} ?? null; + + // check license + $license_templates = ''; + if( $license === 'https://creativecommons.org/licenses/by-sa/4.0/deed.it' ) { + $license_templates .= "{{Cc-by-sa-4.0}}"; + } else { + throw new Exception( "unknown license $license" ); + } + + // source URL + $source_url = null; + if( $img_id ) { + $source_url = sprintf( INVENTORY_URL_FORMAT, $img_id ); + } + + // build a smart description + $description = []; +// if( $type ) { +// if( $material ) { +// $description[] = "$type ($material)"; +// } else { +// $description[] = $type; +// } +// } + $description[] = $title ?? $title_orig; + $description = implode( '. ', $description ); + $description = "{{it|$description}}"; + + // build the page content + $page_content = template_content( $template, [ + 'DESCRIPTION' => $description, + 'LICENSE_TEMPLATES' => $license_templates, + 'DATE' => $date, + 'AUTHOR' => $author, + 'METADATA' => $file_data, + 'SOURCE' => $source_url, + ] ); + + // print a message + $filename = "$title.jpg"; + printf( + "try to upload https://commons.wikimedia.org/wiki/File:%s (%s)\n", + rawurlencode( $filename ), + $img_id + ); + + // upload this damn image + try { + + $response = $commons->upload( [ + 'comment' => "Bot: import related to [[w:it:Wikipedia:Raduni/Biblioteca dell'Accademia di Mendrisio 4 ottobre 2020]]", + 'text' => $page_content, + 'filename' => "$title.jpg", + \network\ContentDisposition::createFromNameURLType( 'file', $file, 'image/jpg' ), + ] ); + + if( $response->upload->result === 'Success' ) { + echo "Done."; + } else { + // what the fuuck? + print_r( $response ); + } + + } catch( Exception $e ) { + printf( "%s: %s", get_class( $e ), $e->getMessage() ); + file_put_contents( 'log.out.err', $e->getMessage(), FILE_APPEND ); + } + + // put in the log this shit + file_put_contents( 'log.out', "$img_id;$filename\n", FILE_APPEND ); + + // wait to do not use the bot flag + sleep( 60 ); +} diff --git a/includes/boz-mw b/includes/boz-mw index 229f61d..2a1aa59 160000 --- a/includes/boz-mw +++ b/includes/boz-mw @@ -1 +1 @@ -Subproject commit 229f61d15c93bf2889363235f6a9b2909f39aeca +Subproject commit 2a1aa59906856ca06f860c50a42544e81c253029