Page MenuHomeGitPull.it

No OneTemporary

Size
10 KB
Referenced Files
None
Subscribers
None
diff --git a/2018-02-italian-parks/includes/functions.php b/2018-02-italian-parks/includes/functions.php
index c4e32f1..f29c800 100644
--- a/2018-02-italian-parks/includes/functions.php
+++ b/2018-02-italian-parks/includes/functions.php
@@ -1,37 +1,96 @@
<?php
# Copyright (C) 2018 Valerio Bozzolan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+/**
+ * @param $plate plate codes e.g. 'TO/MI'
+ * @return string Wikidata entity Q-IDs e.g. [ 'Q495', 'Q490' ]
+ */
+function plate_2_wikidataIDs( $plates ) {
+ $plates = explode( '/', $plates );
+ array_walk( $plates, function ( & $value ) {
+ $value = plate_2_wikidataID( $value );
+ } );
+ return $plates;
+}
+
+/**
+ * @param $plate plate code e.g. 'TO'
+ * @return array Wikidata entity Q-ID e.g. 'Q495'
+ */
function plate_2_wikidataID( $plate ) {
- static $plates;
+ // read the plate codes once
+ static $plates;
if( ! $plates ) {
$data = file_get_contents( 'data/italian-license-plate-codes.csv' );
$rows = explode( "\n", $data );
array_shift( $rows ); // skip head
-
$plates = [];
foreach( $rows as $row ) {
- $row = explode( ',', $row, 3 );
- $plates[ $row[0] ] = str_replace( 'http://www.wikidata.org/entity/', '', $row[1] );
+ $data = explode( ',', $row, 3 );
+ if( 3 === count( $data ) ) {
+ list( $code, $wikidata_url, $label ) = $data;
+ $plates[ $code ] = str_replace( 'http://www.wikidata.org/entity/', '', $wikidata_url );
+ }
}
}
if( ! isset( $plates[ $plate ] ) ) {
throw new Exception('missing plate');
}
return $plates[ $plate ];
}
+
+/**
+ * @param $coordinates string e.g. '46.15876°N 9.893705°E'
+ * @return array north/est pair
+ */
+function filter_coordinates( $coordinates ) {
+ $coordinates = str_replace( [ 'N', 'E', '°'], '' , $coordinates ); // damn syntax
+ $coordinates = str_replace( ',', '.', $coordinates ); // damn commma
+ $coordinates = preg_replace( '/ +/', ' ', $coordinates ); // damn syntax
+ $coordinates = explode( ' ', $coordinates, 2 );
+ if( 2 !== count( $coordinates ) ) {
+ throw new Exception( 'wrong coordinates' );
+ }
+ return [
+ (float) $coordinates[ 0 ],
+ (float) $coordinates[ 1 ]
+ ];
+}
+
+/**
+ * @param $label string
+ */
+function filter_label( $label ) {
+ $label = trim( $label, "][ \n" );
+ $label = explode( '|', $label, 2 );
+ if( count( $label ) === 2 ) {
+ return $label[1];
+ }
+ return $label[0];
+}
+
+/**
+ * Read from the STDIN
+ * @param $default string
+ * @return string
+ */
+function read( $default = '' ) {
+ $v = trim( fgets( STDIN ) );
+ return $v ? $v : $default;
+}
diff --git a/2018-02-italian-parks/italian-parks-uniformer.php b/2018-02-italian-parks/italian-parks-uniformer.php
index f0b5a96..39f257a 100755
--- a/2018-02-italian-parks/italian-parks-uniformer.php
+++ b/2018-02-italian-parks/italian-parks-uniformer.php
@@ -1,171 +1,243 @@
#!/usr/bin/php
<?php
# Copyright (C) 2018 Valerio Bozzolan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# boz-mw
require '../includes/boz-mw/autoload.php';
# bot credentials
require '../config.php';
# local functions
require 'includes/functions.php';
# classes shortcut
+use wm\Wikidata;
use wb\DataModel;
+use wb\Label;
+use wb\StatementItem;
+use wb\StatementString;
+use wb\StatementQuantity;
+use wb\StatementGlobeCoordinate;
+use wb\StatementTime;
+use wb\DataValueTime;
use wb\Snaks;
use wb\SnakItem;
-use wb\StatementItem;
+
+# login and fetch the CSRF token
+define( 'CSRF',
+ Wikidata::getInstance()->login()->fetch( [
+ 'action' => 'query',
+ 'meta' => 'tokens',
+ 'type' => 'csrf'
+ ] )->query->tokens->csrftoken
+);
# data
$handle = fopen('data/italian-parks-data.csv', 'r') or die('asd');
// Wikidata references
$REFERENCES = [ [
'snaks' => (
new Snaks( [
// stated in: Ministry of the Environment
new SnakItem( 'P248', 'Q3858479' )
] )
)->getAll()
] ];
$row = 0;
while( ( $data = fgetcsv( $handle, 1000, ',' ) ) !== false ) {
// skip headers
if( $row++ < 3 ) {
continue;
}
// null empty values
foreach( $data as & $value ) {
$value = trim( $value );
+ $value = str_replace( "\n", '', $value ); // the dataset is a bit dirty
if( ! $value ) {
$value = null;
}
}
// Add missing columns to don't make list() errors
$n = count( $data );
for( $i = $n; $i < 20; $i++ ) {
$data[] = null;
}
$P1435 = [];
list(
$wikidata_ID,
$P131, // located in the administrative territorial entity
// string(2)
$P625, // coordinate location
// string coordinates
$P4800, // EUAP ID
// string
$P809, // WDPA ID
// string
$P627, // string Q-ID
$P3425, // Natura 2000 site ID
// string ID
$label, // string [[something]]
$P2046, // area
// float
+ // various heritage designation
$P1435['Q796174'],
$P1435['Q1191622'],
$P1435['Q2463705'],
$P1435['Q46169'],
$P1435['Q48078443'],
$P1435['Q3936952'],
$P1435['Q3936950'],
$P1435['Q23790'],
- $P126, // string
- $P571 // int
+ $P126, // maintained by
+ // string
+ $P571 // inception
+ // int
) = $data;
- $new_data = new DataModel();
+ // fetch existing Wikidata entity
+ $existing = null;
+ if( $wikidata_ID = 'Q4115189' ) {
+ $existing = DataModel::createFromObject(
+ Wikidata::getInstance()->fetch( [
+ 'action' => 'wbgetentities',
+ 'ids' => $wikidata_ID,
+ 'props' => 'info|sitelinks|aliases|labels|claims|datatype'
+ ] )->entities->{ $wikidata_ID }
+ );
+ }
+
+ $new = new DataModel();
+ $statements = [];
+ $summary = 'test before [[Wd:Requests for permissions/Bot/Valerio Bozzolan bot 4|importing italian parks]]';
+
+ // label
+ if( $label ) {
+ if( ! $existing || ! $existing->hasLabelsInLanguage('it') ) {
+ $label = filter_label( $label );
+ $new->setLabel( new Label( 'it', $label ) );
+ $summary .= ' +label[it]';
+ }
+ }
// instance of: nature reserve
- $new_data->addClaim(
- ( new StatementItem( 'P31', 'Q179049' ) )
- ->setReferences( $REFERENCES )
- );
+ $statements[] = new StatementItem( 'P31', 'Q179049' );
// located in the administrative territorial entity
if( $P131 ) {
// can specify multiple cities
$P131_city_IDs = plate_2_wikidataIDs( $P131 );
foreach( $P131_city_IDs as $P131_city_ID ) {
- $new_data->addClaim(
- ( new StatementItem( 'P131', $P131_city_ID ) )
- ->setReferences( $REFERENCES )
- );
+ $statements[] = new StatementItem( 'P131', $P131_city_ID );
}
}
// coordinate location
if( $P625 ) {
list( $lat, $lng ) = filter_coordinates( $P625 );
- $new_data->addClaim(
- ( new StatementGlobeCoordinate( 'P625', $lat, $lng ) )
- ->setReferences( $REFERENCES )
- );
+ $statements[] = new StatementGlobeCoordinate( 'P625', $lat, $lng, 0.01 ); // last: precision
}
// EUAP ID
if( $P4800 ) {
// can specify multiple values
foreach( explode( '/', $P4800 ) as $P4800_value ) {
- $new_data->addClaim(
- ( new StatementString( 'P4800', trim( $P4800_value ) ) )
- ->setReferences( $REFERENCES )
- );
+ $statements[] = new StatementString( 'P4800', trim( $P4800_value ), 0.01 );
}
}
// WDPA ID
if( $P809 ) {
foreach( explode( '/', $P809 ) as $P809_value ) {
- $new_data->addClaim(
+ $new->addClaim(
( new StatementString( 'P809', trim( $P809_value ) ) )
->setReferences( $REFERENCES )
);
}
}
// Natura 2000 site ID
if( $P3425 ) {
- $new_data->addClaim(
- ( new StatementString( 'P3425', trim( $P3425 ) ) )
- ->setReferences( $REFERENCES )
- );
+ $statements[] = new StatementString( 'P3425', trim( $P3425 ) );
}
// area
if( $P2046 ) {
$P2046 = (float) str_replace( ',', '.', $P2046 );
- $new_data->addClaim(
- ( new StatementQuantity( 'P2046', $P2046, 'Q35852' ) ) // 'Q35852' = hectare
- ->setReferences( $REFERENCES )
- );
+ $statements[] = new StatementQuantity( 'P2046', $P2046, 'Q35852' ); // 'Q35852' = hectare
+ }
+
+ // heritage designation
+ foreach( $P1435 as $P1435_item => $selected ) {
+ if( ! empty( trim( $selected ) ) ) {
+ $statements[] = new StatementItem( 'P1435', $P1435_item );
+ }
+ }
+
+ /* TODO
+ if( $P126 ) {
+ $statements[] =
}
+ */
- if( $wikidata_ID ) {
- // Edit
- } else {
- // Create
+ // inception
+ if( $P571 ) {
+ $P571 = (int) $P571;
+ $statements[] = new StatementTime( 'P571', "+$P571-00-00T00:00:00Z", DataValueTime::PRECISION_YEARS );
+
+ }
+
+ // append statements without duplicating
+ foreach( $statements as $statement ) {
+ $property = $statement->getMainsnak()->getProperty();
+ if( ! $existing || ! $existing->hasClaimsInProperty( $property ) ) {
+ $new->addClaim(
+ $statement->setReferences( $REFERENCES )
+ );
+ $summary .= " +[[P:$property]]";
+ }
+ }
+
+ // check and save
+ echo $new->getJSON( JSON_PRETTY_PRINT ) . "\n";
+ echo $summary . "\n";
+ echo "Save? ";
+ if( read('y') === 'y' ) {
+ echo "Saving\n";
+
+ $args = [
+ 'action' => 'wbeditentity',
+ 'token' => CSRF,
+ 'bot' => 1,
+ 'data' => $new->getJSON(),
+ 'summary' => $summary,
+ ];
+
+ if( $wikidata_ID ) {
+ // save existing
+ $args['id'] = $wikidata_ID;
+ }
+ Wikidata::getInstance()->post( $args );
}
}
fclose($handle);

File Metadata

Mime Type
text/x-diff
Expires
Wed, Apr 22, 15:01 (1 d, 21 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1862826
Default Alt Text
(10 KB)

Event Timeline