Changeset View
Changeset View
Standalone View
Standalone View
src/it/reyboz/bustorino/backend/gtfs/GtfsDataParser.java
- This file was added.
package it.reyboz.bustorino.backend.gtfs; | |||||
import android.util.Log; | |||||
import it.reyboz.bustorino.backend.Fetcher; | |||||
import it.reyboz.bustorino.backend.networkTools; | |||||
import org.jsoup.Jsoup; | |||||
import org.jsoup.nodes.Attributes; | |||||
import org.jsoup.nodes.Document; | |||||
import org.jsoup.nodes.Element; | |||||
import org.jsoup.select.Elements; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.net.HttpURLConnection; | |||||
import java.net.URI; | |||||
import java.net.URL; | |||||
import java.text.ParseException; | |||||
import java.text.SimpleDateFormat; | |||||
import java.time.format.DateTimeFormatter; | |||||
import java.time.format.DateTimeFormatterBuilder; | |||||
import java.util.ArrayList; | |||||
import java.util.Date; | |||||
import java.util.Locale; | |||||
import java.util.concurrent.atomic.AtomicReference; | |||||
import java.util.zip.ZipEntry; | |||||
import java.util.zip.ZipInputStream; | |||||
abstract public class GtfsDataParser { | |||||
public static final String GTFS_ADDRESS="https://www.gtt.to.it/open_data/gtt_gtfs.zip"; | |||||
public static final String GTFS_PAGE_ADDRESS="http://aperto.comune.torino.it/dataset/feed-gtfs-trasporti-gtt"; | |||||
private static final String DEBUG_TAG = "BusTO-GTFSDataParser"; | |||||
/** | |||||
* First trial for a function to download the zip | |||||
* @param res Fetcher.result | |||||
* @return the list of files inside the ziè | |||||
*/ | |||||
public static ArrayList<String> readFilesList(AtomicReference<Fetcher.Result> res){ | |||||
HttpURLConnection urlConnection; | |||||
InputStream in; | |||||
ArrayList<String> result = new ArrayList<>(); | |||||
try { | |||||
final URL gtfsUrl = new URL(GTFS_ADDRESS); | |||||
urlConnection = (HttpURLConnection) gtfsUrl.openConnection(); | |||||
} catch(IOException e) { | |||||
//e.printStackTrace(); | |||||
res.set(Fetcher.Result.SERVER_ERROR); // even when offline, urlConnection works fine. WHY. | |||||
return null; | |||||
} | |||||
urlConnection.setConnectTimeout(4000); | |||||
urlConnection.setReadTimeout(50*1000); | |||||
try { | |||||
in = urlConnection.getInputStream(); | |||||
} catch (Exception e) { | |||||
try { | |||||
if(urlConnection.getResponseCode()==404) | |||||
res.set(Fetcher.Result.SERVER_ERROR_404); | |||||
} catch (IOException e2) { | |||||
e2.printStackTrace(); | |||||
} | |||||
return null; | |||||
} | |||||
try (ZipInputStream stream = new ZipInputStream(in)) { | |||||
// now iterate through each item in the stream. The get next | |||||
// entry call will return a ZipEntry for each file in the | |||||
// stream | |||||
ZipEntry entry; | |||||
while ((entry = stream.getNextEntry()) != null) { | |||||
String s = String.format(Locale.ENGLISH, "Entry: %s len %d added", | |||||
entry.getName(), | |||||
entry.getSize() | |||||
); | |||||
System.out.println(s); | |||||
// Once we get the entry from the stream, the stream is | |||||
// positioned read to read the raw data, and we keep | |||||
// reading until read returns 0 or less. | |||||
result.add(entry.getName()); | |||||
} | |||||
} catch (IOException e) { | |||||
e.printStackTrace(); | |||||
} | |||||
// we must always close the zip file. | |||||
return result; | |||||
} | |||||
public static Date getLastGTFSUpdateDate(AtomicReference<Fetcher.Result> res) { | |||||
URL theURL; | |||||
final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"); | |||||
//final Date baseDate = dateFormat.parse("1970-00-00T00:00:00+0000"); | |||||
final Date nullDate = new Date(0); | |||||
try{ | |||||
theURL = new URL(GTFS_PAGE_ADDRESS); | |||||
} catch (IOException ex){ | |||||
Log.e(DEBUG_TAG, "Fixed URL is null, this is a real issue"); | |||||
return nullDate; | |||||
} | |||||
res.set(Fetcher.Result.OK); | |||||
final String fullPageDOM = networkTools.getDOM(theURL, res); | |||||
if(fullPageDOM== null){ | |||||
//Something wrong happend | |||||
Log.e(DEBUG_TAG, "Cannot get URL"); | |||||
return nullDate; | |||||
} | |||||
res.set(Fetcher.Result.OK); | |||||
Document doc = Jsoup.parse(fullPageDOM); | |||||
Elements sections = doc.select("section.additional-info"); | |||||
Date finalDate = new Date(0); | |||||
for (Element sec: sections){ | |||||
Element head = sec.select("h3").first(); | |||||
String headTitle = head.text(); | |||||
if(!headTitle.trim().toLowerCase(Locale.ITALIAN).equals("informazioni supplementari")) | |||||
continue; | |||||
for (Element row: sec.select("tr")){ | |||||
if(!row.selectFirst("th").text().trim() | |||||
.toLowerCase(Locale.ITALIAN).equals("ultimo aggiornamento")) | |||||
continue; | |||||
Attributes spanAttributes = row.selectFirst("td > span").attributes(); | |||||
String dateAsString = spanAttributes.get("data-datetime"); | |||||
try { | |||||
finalDate = dateFormat.parse(dateAsString); | |||||
return finalDate; | |||||
}catch (ParseException ex){ | |||||
Log.e(DEBUG_TAG, "Wrong date for the last update of GTFS Data: "+dateAsString); | |||||
res.set(Fetcher.Result.PARSER_ERROR); | |||||
ex.printStackTrace(); | |||||
} | |||||
break; | |||||
} | |||||
} | |||||
res.set(Fetcher.Result.PARSER_ERROR); | |||||
return finalDate; | |||||
} | |||||
} |
Public contents are in Creative Commons Attribution-ShareAlike 4.0 (CC-BY-SA) or GNU Free Documentation License (at your option) unless otherwise noted. · Contact / Register