Changeset View
Changeset View
Standalone View
Standalone View
src/it/reyboz/bustorino/backend/gtfs/GtfsDataParser.java
- This file was added.
package it.reyboz.bustorino.backend.gtfs; | |||||
import android.content.Context; | |||||
import android.util.Log; | |||||
import androidx.annotation.NonNull; | |||||
import it.reyboz.bustorino.backend.Fetcher; | |||||
import it.reyboz.bustorino.backend.networkTools; | |||||
import it.reyboz.bustorino.data.gtfs.CsvTableInserter; | |||||
import org.jsoup.Jsoup; | |||||
import org.jsoup.nodes.Attributes; | |||||
import org.jsoup.nodes.Document; | |||||
import org.jsoup.nodes.Element; | |||||
import org.jsoup.select.Elements; | |||||
import java.io.BufferedReader; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.net.HttpURLConnection; | |||||
import java.net.URL; | |||||
import java.text.ParseException; | |||||
import java.text.SimpleDateFormat; | |||||
import java.util.*; | |||||
import java.util.concurrent.atomic.AtomicReference; | |||||
import java.util.zip.ZipEntry; | |||||
import java.util.zip.ZipInputStream; | |||||
abstract public class GtfsDataParser { | |||||
public static final String GTFS_ADDRESS="https://www.gtt.to.it/open_data/gtt_gtfs.zip"; | |||||
public static final String GTFS_PAGE_ADDRESS="http://aperto.comune.torino.it/dataset/feed-gtfs-trasporti-gtt"; | |||||
private static final String DEBUG_TAG = "BusTO-GTFSDataParser"; | |||||
/** | |||||
* First trial for a function to download the zip | |||||
* @param res Fetcher.result | |||||
* @return the list of files inside the ziè | |||||
*/ | |||||
public static ArrayList<String> readFilesList(AtomicReference<Fetcher.Result> res){ | |||||
HttpURLConnection urlConnection; | |||||
InputStream in; | |||||
ArrayList<String> result = new ArrayList<>(); | |||||
try { | |||||
final URL gtfsUrl = new URL(GTFS_ADDRESS); | |||||
urlConnection = (HttpURLConnection) gtfsUrl.openConnection(); | |||||
} catch(IOException e) { | |||||
//e.printStackTrace(); | |||||
res.set(Fetcher.Result.SERVER_ERROR); // even when offline, urlConnection works fine. WHY. | |||||
return null; | |||||
} | |||||
urlConnection.setConnectTimeout(4000); | |||||
urlConnection.setReadTimeout(50*1000); | |||||
try { | |||||
in = urlConnection.getInputStream(); | |||||
} catch (Exception e) { | |||||
try { | |||||
if(urlConnection.getResponseCode()==404) | |||||
res.set(Fetcher.Result.SERVER_ERROR_404); | |||||
} catch (IOException e2) { | |||||
e2.printStackTrace(); | |||||
} | |||||
return null; | |||||
} | |||||
try (ZipInputStream stream = new ZipInputStream(in)) { | |||||
// now iterate through each item in the stream. The get next | |||||
// entry call will return a ZipEntry for each file in the | |||||
// stream | |||||
ZipEntry entry; | |||||
while ((entry = stream.getNextEntry()) != null) { | |||||
String s = String.format(Locale.ENGLISH, "Entry: %s len %d added", | |||||
entry.getName(), | |||||
entry.getSize() | |||||
); | |||||
System.out.println(s); | |||||
// Once we get the entry from the stream, the stream is | |||||
// positioned read to read the raw data, and we keep | |||||
// reading until read returns 0 or less. | |||||
result.add(entry.getName()); | |||||
} | |||||
} catch (IOException e) { | |||||
e.printStackTrace(); | |||||
} | |||||
// we must always close the zip file. | |||||
return result; | |||||
} | |||||
public static Date getLastGTFSUpdateDate(AtomicReference<Fetcher.Result> res) { | |||||
URL theURL; | |||||
final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.ENGLISH); | |||||
//final Date baseDate = dateFormat.parse("1970-00-00T00:00:00+0000"); | |||||
final Date nullDate = new Date(0); | |||||
try{ | |||||
theURL = new URL(GTFS_PAGE_ADDRESS); | |||||
} catch (IOException ex){ | |||||
Log.e(DEBUG_TAG, "Fixed URL is null, this is a real issue"); | |||||
return nullDate; | |||||
} | |||||
res.set(Fetcher.Result.OK); | |||||
final String fullPageDOM = networkTools.getDOM(theURL, res); | |||||
if(fullPageDOM== null){ | |||||
//Something wrong happend | |||||
Log.e(DEBUG_TAG, "Cannot get URL"); | |||||
return nullDate; | |||||
} | |||||
res.set(Fetcher.Result.OK); | |||||
Document doc = Jsoup.parse(fullPageDOM); | |||||
Elements sections = doc.select("section.additional-info"); | |||||
Date finalDate = new Date(0); | |||||
for (Element sec: sections){ | |||||
Element head = sec.select("h3").first(); | |||||
String headTitle = head.text(); | |||||
if(!headTitle.trim().toLowerCase(Locale.ITALIAN).equals("informazioni supplementari")) | |||||
continue; | |||||
for (Element row: sec.select("tr")){ | |||||
if(!row.selectFirst("th").text().trim() | |||||
.toLowerCase(Locale.ITALIAN).equals("ultimo aggiornamento")) | |||||
continue; | |||||
Attributes spanAttributes = row.selectFirst("td > span").attributes(); | |||||
String dateAsString = spanAttributes.get("data-datetime"); | |||||
try { | |||||
finalDate = dateFormat.parse(dateAsString); | |||||
return finalDate; | |||||
}catch (ParseException ex){ | |||||
Log.e(DEBUG_TAG, "Wrong date for the last update of GTFS Data: "+dateAsString); | |||||
res.set(Fetcher.Result.PARSER_ERROR); | |||||
ex.printStackTrace(); | |||||
} | |||||
break; | |||||
} | |||||
} | |||||
res.set(Fetcher.Result.PARSER_ERROR); | |||||
return finalDate; | |||||
} | |||||
public static void readCSVWithColumns(BufferedReader reader, String tableName, Context con) throws IOException { | |||||
String[] elements; | |||||
String line; | |||||
final String header = reader.readLine(); | |||||
if (header == null){ | |||||
throw new IOException(); | |||||
} | |||||
elements = header.split("\n")[0].split(","); | |||||
System.out.println(Arrays.toString(elements)); | |||||
final HashMap<Integer,String> columnMap = new HashMap<>(); | |||||
final CsvTableInserter inserter = new CsvTableInserter(tableName,con); | |||||
for (int i=0; i<elements.length; i++){ | |||||
// if(tableColumns.contains(elements[i].trim())){ | |||||
columnMap.put(i, elements[i].trim()); | |||||
} | |||||
while((line = reader.readLine())!=null){ | |||||
//there is a line of data | |||||
elements = line.split("\n")[0].split(","); | |||||
final Map<String,String> rowsMap = getColumnsAsString(elements, columnMap); | |||||
inserter.addElement(rowsMap); | |||||
//GtfsTable newEle = T.fromLine(elements,selectColumns); | |||||
} | |||||
//commit data | |||||
inserter.updateTable(); | |||||
} | |||||
@NonNull | |||||
private static Map<String,String> getColumnsAsString(@NonNull String[] lineElements, Map<Integer,String> colsIndices){ | |||||
final HashMap<String,String> theMap = new HashMap<>(); | |||||
for(int l=0; l<lineElements.length; l++){ | |||||
if(!colsIndices.containsKey(l)) | |||||
continue; | |||||
theMap.put(colsIndices.get(l), lineElements[l].trim()); | |||||
} | |||||
return theMap; | |||||
} | |||||
} |
Public contents are in Creative Commons Attribution-ShareAlike 4.0 (CC-BY-SA) or GNU Free Documentation License (at your option) unless otherwise noted. · Contact / Register