diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/README.md b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/README.md
index c499e42..bd6572e 100644
--- a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/README.md
+++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/README.md
@@ -1,27 +1,49 @@
# Importer of multimedia files from https://iconoteca.arc.usi.ch/
## Description
Welcome in the importer of multimedia files for https://iconoteca.arc.usi.ch/.
For more information about the consensus:
https://it.wikipedia.org/wiki/Wikipedia:Raduni/Biblioteca_dell%27Accademia_di_Mendrisio_4_ottobre_2020
## Installation
From this directory:
```
git clone https://github.com/phpquery/phpquery
```
+## Usage ##
+
+First download locally one of their collections:
+
+```
+wget https://iconoteca..../collection-asd.html
+```
+
+Then you can examine that HTML page and bulk-download the available images from it:
+
+```
+./parse-html-and-import.php collection-asd.html
+```
+
+The you can bulk-upload your files just selecting your directory with the images/metadata and selecting a template:
+
+```
+./upload.php images/ template/collezione-biblioteca.php
+```
+
+Happy hacking!
+
## License
Copyright (C) 2020 Valerio Bozzolan
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License along with this program. If not, see .
diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/bootstrap.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/bootstrap.php
new file mode 100644
index 0000000..887b837
--- /dev/null
+++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/bootstrap.php
@@ -0,0 +1,96 @@
+.
+
+// require two dummy classes
+require 'include/functions.php';
+require 'include/class-Metadata.php';
+require 'include/class-MetadataValue.php';
+
+// base URL to be scraped
+define( 'BASE_URL', 'https://iconoteca.arc.usi.ch' );
+
+// inventory prefix to be stripped out to read the image ID (note the double slash! asd)
+define( 'INVENTORY_PREFIX_TO_STRIP', BASE_URL . '//thumb.php?inventario=' );
+
+// URL to the single photo from the image ID (DOI)
+define( 'INVENTORY_URL_FORMAT', BASE_URL . '/it/inventario/%d' );
+
+// URL of the high quality image
+define( 'HIGH_QUALITY_IMAGE_URL', BASE_URL . '/image-viewer.php?inventario=%d' );
+
+// URL of the high quality image
+define( 'LOW_QUALITY_IMAGE_URL', BASE_URL . '/image_permission_show.php?inventario=%d' );
+
+// image download name (with image ID)
+define( 'IMAGE_DOWNLOAD_NAME', 'images/%d.jpg' );
+
+// array of metadatas displayed in the body in the '.metadati' selector
+// basically they are the labels displayed in the body on every image like this one:
+// https://iconoteca.arc.usi.ch/it/inventario/51630
+$METADATA_BODY = [
+ new Metadata( 'Luogo rappresentato' ),
+ new Metadata( 'Tipologia di risorsa' ),
+ new Metadata( 'Creatore' ),
+ new Metadata( 'Data' ),
+ new Metadata( 'DOI', function ( $p ) {
+
+ // the DOI is a link, so just extract the URL
+
+ // text displayed after the label (manually stripping the label)
+ return $p->find( 'a' )->attr( 'href' );
+ } ),
+ new Metadata( 'ID immagine' ),
+ new Metadata( 'Licenza', function( $p ) {
+
+ // the License is a link, so just extract the URL
+
+ // text displayed after the label (manually stripping the label)
+ return $p->find( 'a' )->attr( 'href' );
+ } ),
+];
+
+// array of metadatas displayed in the footer in the '.metadati_completi' selector
+// basically they are the labels displayed in the footer on every image like this one:
+// https://iconoteca.arc.usi.ch/it/inventario/51630
+$METADATA_FOOTER = [
+ new Metadata( 'Titolo opera' ),
+ new Metadata( 'Titolo originale' ),
+ new Metadata( 'Iscrizione' ),
+ new Metadata( 'Collezione' ),
+ new Metadata( 'Data creazione' ),
+ new Metadata( 'Luogo creazione' ),
+ new Metadata( 'Nome creatore' ),
+ new Metadata( 'Descrittori Sbt' ),
+ new Metadata( 'Descrittori Getty AAT' ),
+ new Metadata( 'Luogo rappresentato', function( $p ) {
+
+ // take just the text inside the link
+ return $p->find( 'a' )->text();
+ } ),
+ new Metadata( 'Classificazione' ),
+ new Metadata( 'Tipo materiale' ),
+ new Metadata( 'Designazione specifica del materiale' ),
+ new Metadata( 'Supporto originale' ),
+ new Metadata( 'Materiale del supporto' ),
+ new Metadata( 'Nome oggetto culturale' ),
+ new Metadata( 'Colore' ),
+ new Metadata( 'Polarità' ),
+ new Metadata( 'Tipo supporto' ),
+ new Metadata( 'Processo e tecnica' ),
+ new Metadata( 'Montaggio' ),
+ new Metadata( 'Orientamento e forma' ),
+ new Metadata( 'Dimensioni' ),
+];
diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-Metadata.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-Metadata.php
new file mode 100644
index 0000000..26111c7
--- /dev/null
+++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-Metadata.php
@@ -0,0 +1,112 @@
+.
+
+/**
+ * Metadata
+ *
+ * Basically the Titolo opera, Titolo originale etc. from:
+ * https://iconoteca.arc.usi.ch/it/inventario/51630
+ */
+class Metadata {
+
+ public $label;
+
+ public $valueAdapter;
+
+ /**
+ * Constructor
+ *
+ * @param string $label Metadata label e.g. 'Titolo opera'
+ * @param function $value_adapter Optional callable
+ */
+ public function __construct( $label, $value_adapter = null ) {
+ $this->label = $label;
+ $this->valueAdapter = $value_adapter;
+ }
+
+ /**
+ * Get the text of the label
+ *
+ * Basically from 'foo' its 'foo:'
+ *
+ * @return string
+ */
+ public function getLabel() {
+ return $this->label . ':';
+ }
+
+ /**
+ * Check if a label matches the one of this metadata
+ *
+ * @return bool
+ */
+ public function matchesLabel( $label ) {
+ return $this->getLabel() === $label;
+ }
+
+ /**
+ * Create a MetadataValue object from a value
+ *
+ * Note that the value will be adapted.
+ *
+ * @param mixed $value
+ * @return Metadatavalue
+ */
+ public function createValue( $value ) {
+
+ // eventually apply the custom value adapter
+ if( $this->valueAdapter ) {
+ $user_adapter = $this->valueAdapter;
+ $value = $user_adapter( $value );
+ } else {
+ // otherwise apply the default value adapter
+ $value = self::defaultValueAdapter( $value );
+ }
+
+ // eventually fix HTML links
+ $value = html_link_2_wikitext( $value );
+
+ return new MetadataValue( $this, $value );
+ }
+
+ /**
+ * Default value adapter
+ *
+ * Note: as default the value is the paragraph selector. So we strip the label and get the clean data.
+ *
+ * @param string $img_metadata_p
+ * @return string
+ */
+ private static function defaultValueAdapter( $img_metadata_p ) {
+
+ // text displayed after the label (manually stripping the label)
+ $img_metadata_p_text = $img_metadata_p->html();
+
+ // label
+ // it contains 'Titolo originale:'
+ $img_metadata_p_label = $img_metadata_p->find( 'label' );
+
+ // label text
+ // e.g. 'Titolo originale:'
+ $img_metadata_p_label_html = $img_metadata_p_label->html();
+
+ // complete text of the paragraph stripping its label
+ $img_metadata_p_text = trim( str_replace( "", '', $img_metadata_p_text ) );
+
+ return $img_metadata_p_text;
+
+ }
+}
diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-MetadataValue.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-MetadataValue.php
new file mode 100644
index 0000000..134c2c4
--- /dev/null
+++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/class-MetadataValue.php
@@ -0,0 +1,37 @@
+.
+
+/**
+ * A Metadata related to a value
+ */
+class MetadataValue {
+
+ public $metadata;
+
+ public $value;
+
+ public function __construct( Metadata $metadata, $value ) {
+ $this->metadata = $metadata;
+ $this->value = $value;
+ }
+
+ public function getData() {
+ return [
+ $this->metadata->label,
+ $this->value,
+ ];
+ }
+}
diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/functions.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/functions.php
new file mode 100644
index 0000000..92f4420
--- /dev/null
+++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/include/functions.php
@@ -0,0 +1,101 @@
+.
+
+/**
+ * Find a matching metadata from a label and return a MetadataValue
+ *
+ * @param array $metadatas Array of known metadatas
+ * @param string $label Original label like 'Titolo originale:'
+ * @param string $value Value related to the matching metadata
+ * @return MetadataValue|false Matching metadata or false if not found
+ */
+function find_matching_metadatavalue_from_label( $metadatas, $label, $value ) {
+
+ // find the matching metadata
+ foreach( $metadatas as $metadata ) {
+ if( $metadata->matchesLabel( $label ) ) {
+ return $metadata->createValue( $value );
+ }
+ }
+
+ // no metadata no party
+ return false;
+}
+
+/**
+ * Print a message
+ */
+function message( $message ) {
+ printf(
+ "[%s] %s\n",
+ date( 'Y-m-d H:i:s' ),
+ $message
+ );
+}
+
+/**
+ * Covert an HTML link to a wikitext one
+ */
+function html_link_2_wikitext( $txt ) {
+
+ return preg_replace_callback( '@(.+?)@', function( $matches ) {
+
+ // eventually make the URL absolute
+ $url = $matches[1];
+ if( $url[0] === '/' ) {
+ $url = BASE_URL . $url;
+ }
+
+ return sprintf(
+ '[%s %s]',
+ $url,
+ $matches[2]
+ );
+ }, $txt );
+
+ // Sculture
+}
+
+/**
+ * Require a certain page from the template directory
+ *
+ * It will eventually echo something.
+ *
+ * @param $name string page name
+ * @param $args mixed arguments to be passed to the page scope
+ */
+function template( $template, $template_args = [] ) {
+ extract( $template_args, EXTR_SKIP );
+ return require $template;
+}
+
+/**
+ * Get the template output
+ *
+ * It will echo nothing.
+ *
+ * @param $name string page name (to be sanitized)
+ * @param $args mixed arguments to be passed to the page scope
+ * @see template()
+ * @return string The template output
+ */
+function template_content( $name, $args = [] ) {
+ ob_start();
+ template( $name, $args );
+ $text = ob_get_contents();
+ ob_end_clean();
+ return $text;
+}
diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/parse-html-and-import.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/parse-html-and-import.php
index d8ae553..e881cf4 100755
--- a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/parse-html-and-import.php
+++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/parse-html-and-import.php
@@ -1,400 +1,219 @@
#!/usr/bin/php
find( 'a' )->attr( 'href' );
- } ),
- new Metadata( 'ID immagine' ),
- new Metadata( 'Licenza', function( $p ) {
-
- // the License is a link, so just extract the URL
-
- // text displayed after the label (manually stripping the label)
- return $p->find( 'a' )->attr( 'href' );
- } ),
-];
-
-// array of metadatas displayed in the footer in the '.metadati_completi' selector
-// basically they are the labels displayed in the footer on every image like this one:
-// https://iconoteca.arc.usi.ch/it/inventario/51630
-$METADATA_FOOTER = [
- new Metadata( 'Titolo opera' ),
- new Metadata( 'Titolo originale' ),
- new Metadata( 'Iscrizione' ),
- new Metadata( 'Collezione' ),
- new Metadata( 'Data creazione' ),
- new Metadata( 'Luogo creazione' ),
- new Metadata( 'Nome creatore' ),
- new Metadata( 'Descrittori Sbt' ),
- new Metadata( 'Descrittori Getty AAT' ),
- new Metadata( 'Luogo rappresentato', function( $p ) {
-
- // take just the text inside the link
- return $p->find( 'a' )->text();
- } ),
- new Metadata( 'Classificazione' ),
- new Metadata( 'Tipo materiale' ),
- new Metadata( 'Designazione specifica del materiale' ),
- new Metadata( 'Supporto originale' ),
- new Metadata( 'Materiale del supporto' ),
- new Metadata( 'Nome oggetto culturale' ),
- new Metadata( 'Colore' ),
- new Metadata( 'Polarità' ),
- new Metadata( 'Tipo supporto' ),
- new Metadata( 'Processo e tecnica' ),
- new Metadata( 'Montaggio' ),
- new Metadata( 'Orientamento e forma' ),
- new Metadata( 'Dimensioni' ),
-];
+# Copyright (C) 2020 Valerio Bozzolan
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+// load common files
+require 'bootstrap.php';
// metadata by selector
$METADATA_BY_SELECTOR = [
'.metadati' => $METADATA_BODY,
'.metadati_completi' => $METADATA_FOOTER,
];
// phpQuery1
// https://github.com/phpquery/phpquery
$PHPQUERY = __DIR__ . '/phpquery/phpQuery/phpQuery.php';
// no phpQuery no party
if( !file_exists( $PHPQUERY ) ) {
echo "Please read the README\n";
exit( 5 );
}
// load phpQuery
require $PHPQUERY;
// no command line no party
if( !$argv ) {
echo "Not in command line?\n";
exit( 1 );
}
// no first argument no party
$page = $argv[1] ?? null;
if( !$page ) {
echo "Usage:\n {$argv[0]} FILE.html\n";
exit( 2 );
}
// no file no party
if( !file_exists( $page ) ) {
echo "Unexisting file $page\n";
exit( 3 );
}
// read the file content
$content = file_get_contents( $page );
// no content no party
if( !$content ) {
echo "No content no party\n";
exit( 4 );
}
// parse the document
$document = phpQuery::newDocument( $content );
// enter in page content
$content = pq( $document )->find( '.page-content' );
// traverse the DOM tree
foreach( $content->find( '.row' ) as $row ) {
foreach( pq( $row )->find( '.col-md-4' ) as $col ) {
// image element
$img = pq( $col )->find( 'img' );
// image relative path in the URL
$img_path = $img->attr( 'src' );
// no URL no party (wrong elements)
if( !$img_path ) {
continue;
}
// absolute image URL
$img_url = BASE_URL . '/' . $img_path;
// image identifier
$img_id = str_replace( INVENTORY_PREFIX_TO_STRIP, '', $img_url );
// it's an integer
$img_id = (int) $img_id;
// image permalink
$img_page_url = sprintf(
INVENTORY_URL_FORMAT,
$img_id
);
// image permalink HTMl content
message( "Sucking $img_page_url..." );
$img_page_content = file_get_contents( $img_page_url );
if( !$img_page_content ) {
message( "Skip failed download $img_page_url" );
continue;
}
// parse image permalink page
$img_page = pq( phpQuery::newDocument( $img_page_content ) );
// image data read
$img_metadata_values = [];
// loop all the possible metadatas finding them from the right selector
foreach( $METADATA_BY_SELECTOR as $metadata_selector => $possible_metadatas ) {
// parse image body metadatas section
foreach( $img_page->find( $metadata_selector ) as $img_metadata ) {
// traverse all the paragraphs containing metadatas and try to parse
foreach( pq( $img_metadata )->find( 'p' ) as $img_metadata_p_raw ) {
// paragraph element
$img_metadata_p = pq( $img_metadata_p_raw );
// label
// it contains 'Titolo originale:'
$img_metadata_p_label = $img_metadata_p->find( 'label' );
// label text
// e.g. 'Titolo originale:'
$img_metadata_p_label_txt = $img_metadata_p_label->text();
// metadata matching this label
$img_metadata_value = find_matching_metadatavalue_from_label( $possible_metadatas, $img_metadata_p_label_txt, $img_metadata_p );
// gotcha?
if( $img_metadata_value ) {
$img_metadata_values[] = $img_metadata_value;
} else {
message( "Unknown metadata '$img_metadata_p_label_txt' not found in $metadata_selector" );
}
}
}
}
// main image
$img_main = $img_page->find( '.zoomviewer img' );
- // low quality image URL
+ // hight quality image URL
$img_hq_url = sprintf(
HIGH_QUALITY_IMAGE_URL,
$img_id
);
+ // low quality image URL
+ $img_lq_url = sprintf(
+ LOW_QUALITY_IMAGE_URL,
+ $img_id
+ );
+
// image pathname
$img_path = sprintf(
IMAGE_DOWNLOAD_NAME,
$img_id
);
// build a metadata file
$img_path_json = "$img_path.json";
$img_data_json = [];
foreach( $img_metadata_values as $img_metadata_value ) {
message( " $key: $value" );
list( $key, $value ) = $img_metadata_value->getData();
$img_data_json[ $key ] = $value;
}
// no json write no party
if( !file_put_contents( $img_path_json, json_encode( $img_data_json, JSON_PRETTY_PRINT ) ) ) {
message( "cannot write $img_path_json" );
}
- // eventually download the image and save
- if( !file_exists( $img_path ) ) {
+ foreach( [ $img_hq_url, $img_lq_url ] as $img_url ) {
- message( "Fetching $img_hq_url in $img_path..." );
-
- // download the image
- $img_hq_bin = file_get_contents( $img_hq_url );
-
- // save the HQ image or write an error
- if( !file_put_contents( $img_path, $img_hq_bin ) ) {
- message( "cannot write $img_path" );
- }
- }
+ // eventually download the image and save
+ if( !file_exists( $img_path ) ) {
- // all right
- message( "completed $img_id" );
- }
+ message( "Fetching $img_url in $img_path..." );
-}
+ // download the image
+ $img_bin = file_get_contents( $img_url );
-/**
- * Metadata
- *
- * Basically the Titolo opera, Titolo originale etc. from:
- * https://iconoteca.arc.usi.ch/it/inventario/51630
- */
-class Metadata {
-
- public $label;
-
- public $valueAdapter;
-
- /**
- * Constructor
- *
- * @param string $label Metadata label e.g. 'Titolo opera'
- * @param function $value_adapter Optional callable
- */
- public function __construct( $label, $value_adapter = null ) {
- $this->label = $label;
- $this->valueAdapter = $value_adapter;
- }
+ // sometime this is not an image but is a shitty text
+ // «ERRORE: il livello d'accesso impostato al file non consente di scaricare questa immagine» ASD
+ if( strlen( $img_bin ) > 1000 ) {
- /**
- * Get the text of the label
- *
- * Basically from 'foo' its 'foo:'
- *
- * @return string
- */
- public function getLabel() {
- return $this->label . ':';
- }
+ // save the HQ image or write an error
+ if( !file_put_contents( $img_path, $img_bin ) ) {
+ message( "cannot write $img_path" );
+ }
+ } else {
- /**
- * Check if a label matches the one of this metadata
- *
- * @return bool
- */
- public function matchesLabel( $label ) {
- return $this->getLabel() === $label;
- }
+ // WHAAT THE FUUUUUCK IS THIS SHIT
+ message( "invalid image" );
+ }
+ }
- /**
- * Create a MetadataValue object from a value
- *
- * Note that the value will be adapted.
- *
- * @param mixed $value
- * @return Metadatavalue
- */
- public function createValue( $value ) {
-
- // eventually apply the custom value adapter
- if( $this->valueAdapter ) {
- $user_adapter = $this->valueAdapter;
- $value = $user_adapter( $value );
- } else {
- // otherwise apply the default value adapter
- $value = self::defaultValueAdapter( $value );
}
- return new MetadataValue( $this, $value );
- }
-
- /**
- * Default value adapter
- *
- * Note: as default the value is the paragraph selector. So we strip the label and get the clean data.
- *
- * @param string $img_metadata_p
- * @return string
- */
- private static function defaultValueAdapter( $img_metadata_p ) {
-
- // text displayed after the label (manually stripping the label)
- $img_metadata_p_text = $img_metadata_p->html();
-
- // label
- // it contains 'Titolo originale:'
- $img_metadata_p_label = $img_metadata_p->find( 'label' );
-
- // label text
- // e.g. 'Titolo originale:'
- $img_metadata_p_label_html = $img_metadata_p_label->html();
-
- // complete text of the paragraph stripping its label
- $img_metadata_p_text = trim( str_replace( "", '', $img_metadata_p_text ) );
-
- return $img_metadata_p_text;
-
- }
-}
-
-/**
- * A Metadata related to a value
- */
-class MetadataValue {
-
- public $metadata;
-
- public $value;
-
- public function __construct( Metadata $metadata, $value ) {
- $this->metadata = $metadata;
- $this->value = $value;
- }
-
- public function getData() {
- return [
- $this->metadata->label,
- $this->value,
- ];
- }
-}
-
-/**
- * Find a matching metadata from a label and return a MetadataValue
- *
- * @param array $metadatas Array of known metadatas
- * @param string $label Original label like 'Titolo originale:'
- * @param string $value Value related to the matching metadata
- * @return MetadataValue|false Matching metadata or false if not found
- */
-function find_matching_metadatavalue_from_label( $metadatas, $label, $value ) {
-
- // find the matching metadata
- foreach( $metadatas as $metadata ) {
- if( $metadata->matchesLabel( $label ) ) {
- return $metadata->createValue( $value );
- }
+ // all right
+ message( "completed $img_id" );
}
- // no metadata no party
- return false;
-}
-
-function message( $message ) {
- printf(
- "[%s] %s\n",
- date( 'Y-m-d H:i:s' ),
- $message
- );
}
diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/template/collezione-biblioteca.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/template/collezione-biblioteca.php
new file mode 100644
index 0000000..e6c0f16
--- /dev/null
+++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/template/collezione-biblioteca.php
@@ -0,0 +1,56 @@
+.
+
+// this is a template to build a generic Commons file description
+
+?>
+=={{int:filedesc}}==
+{{Information
+|description== $DESCRIPTION ?>
+
+|date== $DATE ?>
+
+|source== $SOURCE ?>
+
+|author== $AUTHOR ?>
+
+|permission=
+|other versions=
+}}
+
+=={{int:license-header}}==
+= $LICENSE_TEMPLATES ?>
+
+== {{int:metadata}} ==
+{| class="wikitable"
+ $value ) {
+
+ // line row
+ if( !$first ) {
+ echo "|-\n";
+ }
+
+ echo "! $key\n";
+ echo "| $value\n";
+
+ $first = false;
+ }
+?>
+|}
+
+[[Category:Collezione Biblioteca - Iconoteca dell'architettura in Mendrisio, Switzerland]]
diff --git a/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/upload.php b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/upload.php
new file mode 100755
index 0000000..7779101
--- /dev/null
+++ b/2020-10-04-mendrisio-sucker-iconoteca.arc.usi.ch/upload.php
@@ -0,0 +1,139 @@
+#!/usr/bin/php
+.
+
+// autoload framework
+require __DIR__ . '/../includes/boz-mw/autoload.php';
+
+// require some values
+require 'bootstrap.php';
+
+// load configuration file or create one
+cli\ConfigWizard::requireOrCreate( __DIR__ . '/../config.php' );
+
+// load Wikimedia Commons
+$commons = \wm\Commons::instance();
+
+// login in Commons
+$commons->login();
+
+// input directory
+$dir = $argv[1] ?? null;
+$template = $argv[2] ?? null;
+
+// no dir no party
+if( !$dir || !$template ) {
+ printf( "Usage:\n %s DIRECTORY TEMPLATE\n", $argv[0] );
+ exit( 1 );
+}
+
+// scan the directory
+foreach( glob( "$dir/*.jpg" ) as $file ) {
+
+ // no data no party
+ $file_data = @file_get_contents( "$file.json" );
+ $file_data = json_decode( $file_data );
+ if( !$file_data ) {
+ echo "skip $file missing data\n";
+ continue;
+ }
+
+ // check available metadata
+ $img_id = $file_data->{"ID immagine"};
+ $title = $file_data->{"Titolo opera"};
+ $title_orig = $file_data->{"Titolo originale"} ?? null;
+ $collection = $file_data->{"Collezione"} ?? null;
+ $license = $file_data->Licenza ?? null;
+ $type = $file_data->{"Tipologia di risorsa"} ?? "Fotografia";
+ $size = $file_data->{"Dimensioni"} ?? '';
+ $material = $file_data->{"Materiale del supporto"} ?? '';
+ $author_name = $file_data->{"Nome creatore"} ?? '';
+ $author = $file_data->{"Creatore"} ?? $author_name ?? "ignoto";
+ $date = $file_data->{"Data"} ?? $file_data->{"Data creazione"} ?? null;
+
+ // check license
+ $license_templates = '';
+ if( $license === 'https://creativecommons.org/licenses/by-sa/4.0/deed.it' ) {
+ $license_templates .= "{{Cc-by-sa-4.0}}";
+ } else {
+ throw new Exception( "unknown license $license" );
+ }
+
+ // source URL
+ $source_url = null;
+ if( $img_id ) {
+ $source_url = sprintf( INVENTORY_URL_FORMAT, $img_id );
+ }
+
+ // build a smart description
+ $description = [];
+// if( $type ) {
+// if( $material ) {
+// $description[] = "$type ($material)";
+// } else {
+// $description[] = $type;
+// }
+// }
+ $description[] = $title ?? $title_orig;
+ $description = implode( '. ', $description );
+ $description = "{{it|$description}}";
+
+ // build the page content
+ $page_content = template_content( $template, [
+ 'DESCRIPTION' => $description,
+ 'LICENSE_TEMPLATES' => $license_templates,
+ 'DATE' => $date,
+ 'AUTHOR' => $author,
+ 'METADATA' => $file_data,
+ 'SOURCE' => $source_url,
+ ] );
+
+ // print a message
+ $filename = "$title.jpg";
+ printf(
+ "try to upload https://commons.wikimedia.org/wiki/File:%s (%s)\n",
+ rawurlencode( $filename ),
+ $img_id
+ );
+
+ // upload this damn image
+ try {
+
+ $response = $commons->upload( [
+ 'comment' => "Bot: import related to [[w:it:Wikipedia:Raduni/Biblioteca dell'Accademia di Mendrisio 4 ottobre 2020]]",
+ 'text' => $page_content,
+ 'filename' => "$title.jpg",
+ \network\ContentDisposition::createFromNameURLType( 'file', $file, 'image/jpg' ),
+ ] );
+
+ if( $response->upload->result === 'Success' ) {
+ echo "Done.";
+ } else {
+ // what the fuuck?
+ print_r( $response );
+ }
+
+ } catch( Exception $e ) {
+ printf( "%s: %s", get_class( $e ), $e->getMessage() );
+ file_put_contents( 'log.out.err', $e->getMessage(), FILE_APPEND );
+ }
+
+ // put in the log this shit
+ file_put_contents( 'log.out', "$img_id;$filename\n", FILE_APPEND );
+
+ // wait to do not use the bot flag
+ sleep( 60 );
+}
diff --git a/includes/boz-mw b/includes/boz-mw
index 229f61d..2a1aa59 160000
--- a/includes/boz-mw
+++ b/includes/boz-mw
@@ -1 +1 @@
-Subproject commit 229f61d15c93bf2889363235f6a9b2909f39aeca
+Subproject commit 2a1aa59906856ca06f860c50a42544e81c253029