diff --git a/cli/import-mise-functions.php b/cli/import-mise-functions.php index fcaf66a..c531001 100755 --- a/cli/import-mise-functions.php +++ b/cli/import-mise-functions.php @@ -1,234 +1,255 @@ . */ function indexed_array($rows, $property_index, $property_return) { $indexed = []; foreach($rows as $row) { $indexed[ $row->{$property_index} ] = $row->{$property_return}; } return $indexed; } function get_station_ID($station_miseID, $station_name = null, $station_type = null, $station_address = null, $station_lat = null, $station_lon = null, $comune_ID = null, $stationowner_ID = null, $fuelprovider_ID = null) { global $stations; if( isset( $stations[ $station_miseID ] ) ) { return $stations[ $station_miseID ]; } if( $station_name === null ) { - throw new Exception( _("La stazione miseID %d doveva essere già stata inserita"), $station_miseID ); + throw new Exception( __("La stazione miseID %d doveva essere già stata inserita"), $station_miseID ); } if($station_type === 'Altro') { $station_type = 'ALTRO'; } elseif($station_type === 'Strada Statale') { $station_type = 'STRADA_STATALE'; } elseif($station_type === 'Autostradale') { $station_type = 'AUTOSTRADALE'; } else { - throw new Exception( sprintf( "Tipo di stazione non prevista: %s", esc_html( $station_type ) ) ); + throw new Exception( sprintf( "Tipo di stazione non prevista: %s", $station_type ) ); } insert_row('station', [ new DBCol('station_miseID', $station_miseID, 'd'), new DBCol('station_name', $station_name, 's'), new DBCol('station_type', $station_type, 's'), new DBCol('station_address', $station_address, 's'), new DBCol('station_lat', $station_lat, 'f'), new DBCol('station_lon', $station_lon, 'f'), new DBCol('comune_ID', $comune_ID, 'd'), new DBCol('stationowner_ID', $stationowner_ID, 'd'), new DBCol('fuelprovider_ID', $fuelprovider_ID, 'd') ] ); $station = query_row( sprintf( "SELECT station_ID, station_miseID " . "FROM {$GLOBALS[T]('station')} " . "WHERE station_ID = %d", last_inserted_ID() ), 'Station' ); return $stations[ $station->station_miseID ] = $station->station_ID; } function get_comune_ID($comune_uid, $comune_name, $provincia_name) { global $comuni; if( isset( $comuni[ $comune_uid ] ) ) { return $comuni[ $comune_uid ]; } insert_row('comune', [ new DBCol('comune_uid', $comune_uid, 's'), new DBCol('comune_name', $comune_name, 's') ] ); $comune = query_row( sprintf( "SELECT comune_ID, comune_uid " . "FROM {$GLOBALS[T]('comune')} " . "WHERE comune_ID = %d", last_inserted_ID() ), 'Comune' ); insert_row('rel_provincia_comune', [ new DBCol( 'provincia_ID', get_provincia_ID( generate_slug($provincia_name), $provincia_name ), 'd' ), new DBCol('comune_ID', $comune->comune_ID, 'd') ] ); return $comuni[ $comune->comune_uid ] = $comune->comune_ID; } function get_fuelprovider_ID($fuelprovider_uid, $fuelprovider_name) { global $fuelproviders; if( isset( $fuelproviders[ $fuelprovider_uid ] ) ) { return $fuelproviders[ $fuelprovider_uid ]; } insert_row('fuelprovider', [ new DBCol('fuelprovider_uid', $fuelprovider_uid, 's'), new DBCol('fuelprovider_name', $fuelprovider_name, 's') ] ); $fuelprovider = query_row( sprintf( "SELECT fuelprovider_ID, fuelprovider_uid " . "FROM {$GLOBALS[T]('fuelprovider')} " . "WHERE fuelprovider_ID = %d", last_inserted_ID() ), 'Fuelprovider' ); return $fuelproviders[ $fuelprovider->fuelprovider_uid ] = $fuelprovider->fuelprovider_ID; } function get_stationowner_ID($stationowner_uid, $stationowner_name) { global $stationowners; if( isset( $stationowners[ $stationowner_uid ] ) ) { return $stationowners[ $stationowner_uid ]; } // Lol if( ($pos = strpos($stationowner_name, ' IL REGISTRO IMPRESE NON GARANTISCE') ) !== false ) { $stationowner_name = substr($stationowner_name, 0, $pos); $stationowner_uid = generate_slug( $stationowner_name ); $stationowner_note = substr($stationowner_name, $pos); } else { $stationowner_note = null; } + if( strlen( $stationowner_uid ) > 255 ) { + throw new Exception( sprintf( "what: %s", $stationowner_uid ) ); + } + // Another check if( isset( $stationowners[ $stationowner_uid ] ) ) { return $stationowners[ $stationowner_uid ]; } insert_row('stationowner', [ new DBCol('stationowner_uid', $stationowner_uid, 's'), new DBCol('stationowner_name', $stationowner_name, 's'), new DBCol('stationowner_note', $stationowner_note, 'snull') ] ); $stationowner = query_row( sprintf( "SELECT stationowner_ID, stationowner_uid " . "FROM {$GLOBALS[T]('stationowner')} " . "WHERE stationowner_ID = %d", last_inserted_ID() ), 'Stationowner' ); return $stationowners[ $stationowner->stationowner_uid ] = $stationowner->stationowner_ID;; } function get_fuel_ID($fuel_uid, $fuel_name) { global $fuels; if( isset( $fuels[ $fuel_uid ] ) ) { return $fuels[ $fuel_uid ]; } insert_row('fuel', [ new DBCol('fuel_uid', $fuel_uid, 's'), new DBCol('fuel_name', $fuel_name, 's') ] ); $fuel = query_row( sprintf( "SELECT fuel_ID, fuel_uid " . "FROM {$GLOBALS[T]('fuel')} " . "WHERE fuel_ID = %d", last_inserted_ID() ), 'Fuel' ); return $fuels[ $fuel->fuel_uid ] = $fuel->fuel_ID; } function get_provincia_ID($provincia_uid, $provincia_name) { global $provincie; if( isset( $provincie[ $provincia_uid ] ) ) { return $provincie[ $provincia_uid ]; } insert_row('provincia', [ new DBCol('provincia_uid', $provincia_uid, 's'), new DBCol('provincia_name', $provincia_name, 's') ] ); $provincia = query_row( sprintf( "SELECT provincia_ID, provincia_uid " . "FROM {$GLOBALS[T]('provincia')} " . "WHERE provincia_ID = %d", last_inserted_ID() ), 'Provincia' ); return $provincie[ $provincia->provincia_uid ] = $provincia->provincia_ID; } /** * 11/12/2015 20:30 => 2015-12-11 20:30:00 * * @return string MySQL/MariaDB datetime */ function itdate2datetime($time) { $dateTime = DateTime::createFromFormat('d/m/Y H:i:s', $time); return $dateTime->format('Y-m-d H:i:s'); } + +function normalize_shitty_mise_dataset( $file ) { + + $contents = file_get_contents( $file ); + $contents = str_replace( "\n\n+", "\n", $contents ); + file_put_contents( $file, $contents ); + +} + +function clean_shitty_mise_csv_values( & $data ) { + + // for now just trim shit + foreach( $data as $k => &$v ) { + $v = trim( $v ); + } + +} diff --git a/cli/import-mise.php b/cli/import-mise.php index f458f1b..53a4e77 100755 --- a/cli/import-mise.php +++ b/cli/import-mise.php @@ -1,146 +1,181 @@ +#!/usr/bin/php . */ /* * Manually download data from Ministero dello Sviluppo Economico * http://www.sviluppoeconomico.gov.it/index.php/it/open-data/elenco-dataset/2032336-carburanti-prezzi-praticati-e-anagrafica-degli-impianti * * Files: * http://www.sviluppoeconomico.gov.it/images/exportCSV/prezzo_alle_8.csv * http://www.sviluppoeconomico.gov.it/images/exportCSV/anagrafica_impianti_attivi.csv * * The files are licensed under the terms of the Italian Open Data License v2.0 * http://www.dati.gov.it/iodl/2.0/ */ require __DIR__ . '/../load.php'; require __DIR__ . '/import-mise-functions.php'; +// https://gitpull.it/T755 +$CSV_SEPARATOR_BUGFIX_MISE = ';'; // asd +$CSV_ENCLOSURE_BUGFIX_MISE = '~'; // asd + +// 05/03/2022 01:30:05 +$MISE_DATETIME_FORMAT = 'd/m/Y H:i:s'; + +$MYSQL_DATE_FORMAT = 'Y-m-d H:i:s'; + //query("TRUNCATE {$T('rel_provincia_comune')}"); //query("TRUNCATE {$T('provincia')}"); //query("TRUNCATE {$T('comune')}"); //query("TRUNCATE {$T('station')}"); //query("TRUNCATE {$T('stationowner')}"); //query("TRUNCATE {$T('fuelprovider')}"); //query("TRUNCATE {$T('fuel')}"); query("TRUNCATE {$T('price')}"); $comuni = query_results("SELECT comune_ID, comune_uid FROM {$T('comune')}", 'Comune'); $comuni = indexed_array($comuni, 'comune_uid', 'comune_ID'); $provincie = query_results("SELECT provincia_ID, provincia_uid FROM {$T('provincia')}", 'Provincia'); $provincie = indexed_array($provincie, 'provincia_uid', 'provincia_ID'); $fuels = query_results("SELECT fuel_ID, fuel_uid FROM {$T('fuel')}", 'Fuel'); $fuels = indexed_array($fuels, 'fuel_uid', 'fuel_ID'); $stations = query_results("SELECT station_ID, station_miseID FROM {$T('station')}", 'Station'); $stations = indexed_array($stations, 'station_miseID', 'station_ID'); $stationowners = query_results("SELECT stationowner_ID, stationowner_uid FROM {$T('stationowner')}", 'Stationowner'); $stationowners = indexed_array($stationowners, 'stationowner_uid', 'stationowner_ID'); $fuelproviders = query_results("SELECT fuelprovider_ID, fuelprovider_uid FROM {$T('fuelprovider')}", 'Fuelprovider'); $fuelproviders = indexed_array($fuelproviders, 'fuelprovider_uid', 'fuelprovider_ID'); try { - if( ! isset( $argv[1], $argv[2] ) ) { - throw new Exception( - sprintf( - _("Utilizzo: %s FILE_STAZIONI.csv PREZZI_ALLE_8.csv"), - esc_html( $argv[0] ) - ), - 1 - ); + if( !isset( $argv[1], $argv[2] ) ) { + throw new Exception( sprintf( + __("Utilizzo: %s FILE_STAZIONI.csv PREZZI_ALLE_8.csv"), + esc_html( $argv[0] ) + ) ); } - define('FILENAME_STATIONS', $argv[1]); - define('FILENAME_PRICES', $argv[2]); + define( 'FILENAME_STATIONS', $argv[1] ); + define( 'FILENAME_PRICES', $argv[2] ); - if( ! file_exists(FILENAME_STATIONS) ) { - throw new Exception( _("File delle stazioni non trovato"), 2 ); + if( !file_exists(FILENAME_STATIONS) ) { + throw new Exception( __("File delle stazioni non trovato"), 2 ); } - if( ! $handle = fopen(FILENAME_STATIONS, 'r') ) { - throw new Exception( _("Impossibile aprire il file delle stazioni"), 3 ); + // normalize shitty things + normalize_shitty_mise_dataset( FILENAME_STATIONS ); + + if( !$handle = fopen(FILENAME_STATIONS, 'r') ) { + throw new Exception( __("Impossibile aprire il file delle stazioni"), 3 ); } // Waste first 2 lines - fgetcsv($handle); - fgetcsv($handle); - while( $data = fgetcsv($handle, 512, ';') ) { + fgetcsv( $handle ); + fgetcsv( $handle ); + while( $data = fgetcsv( $handle, 1000, $CSV_SEPARATOR_BUGFIX_MISE, $CSV_ENCLOSURE_BUGFIX_MISE ) ) { + + clean_shitty_mise_csv_values( $data ); + get_station_ID( (int) $data[0] /*$station_miseID*/, $data[4] /*$station_name*/, $data[3] /*$station_type*/, $data[5] /*$station_address*/, (float) $data[8] /*$station_lat*/, (float) $data[9] /*$station_lon*/, get_comune_ID( generate_slug( $data[6] ) /*$comune_uid*/, $data[6] /*$comune_name*/, $data[7] /*$provincia_name*/ ), get_stationowner_ID( generate_slug( $data[1] ) /*$stationowner_uid*/, $data[1] /*$stationowner_name*/ ), get_fuelprovider_ID( generate_slug( $data[2] ) /*$fuelprovider_uid*/, $data[2] /*$fuelprovider_name*/ ) ); } fclose($handle); // Prices - if( ! file_exists(FILENAME_PRICES) ) { - throw new Exception( _("File dei prezzi non trovato"), 4 ); + if( !file_exists(FILENAME_PRICES) ) { + throw new Exception( __("File dei prezzi non trovato"), 4 ); } + normalize_shitty_mise_dataset( FILENAME_PRICES ); + if( ! $handle = fopen(FILENAME_PRICES, 'r') ) { - throw new Exception( _("Impossibile aprire il file dei prezzi"), 5 ); + throw new Exception( __("Impossibile aprire il file dei prezzi"), 5 ); } // Waste first 2 lines fgetcsv($handle); fgetcsv($handle); - while( $data = fgetcsv($handle, 255, ';') ) { - insert_row('price', [ - new DBCol('price_value', (float) $data[2], 'f'), - new DBCol('price_self', (int) $data[3], 'd'), - new DBCol('price_date', itdate2datetime($data[4]), 's'), - new DBCol( - 'fuel_ID', - get_fuel_ID( - generate_slug($data[1]) /*$fuel_uid*/, - $data[1] /*$fuel_name*/ + while( $data = fgetcsv( $handle, 255, $CSV_SEPARATOR_BUGFIX_MISE, $CSV_ENCLOSURE_BUGFIX_MISE ) ) { + + clean_shitty_mise_csv_values( $data ); + + $stat_id = $data[0]; + $fuel_uid = $data[1]; + $price = $data[2]; + $self = $data[3]; + $date_raw = $data[4]; + + // the date is expressed in this way + // 05/03/2022 01:30:05 + $date = DateTime::createFromFormat( $MISE_DATETIME_FORMAT, $date_raw ); + + if( $date ) { + $date_mysql = $date->format( $MYSQL_DATE_FORMAT ); + + insert_row('price', [ + new DBCol('price_value', (float) $price, 'f'), + new DBCol('price_self', (int) $self, 'd'), + new DBCol('price_date', $date_mysql, 's'), + new DBCol( + 'fuel_ID', + get_fuel_ID( + generate_slug( $fuel_uid ), + $fuel_uid, // fuel_name + ), + 'd' ), - 'd' - ), - new DBCol('station_ID', get_station_ID( (int) $data[0] ), 'd') - ] ); + new DBCol('station_ID', get_station_ID( (int) $stat_id ), 'd') + ] ); + } else { + echo "Warning: nonsense date '$date' related to stat ID '$stat_id' \n"; + } } fclose($handle); } catch(Exception $e) { printf( _("Errore: %s."), $e->getMessage() ); echo "\n"; + echo $e->getTraceAsString() . "\n"; exit( $e->getCode() ); }