diff --git a/5T-GTT-TimetableParser b/5T-GTT-TimetableParser new file mode 100755 index 0000000..c7defbf Binary files /dev/null and b/5T-GTT-TimetableParser differ diff --git a/READ ME b/READ ME new file mode 100644 index 0000000..f1e479a --- /dev/null +++ b/READ ME @@ -0,0 +1,13 @@ +== 5T/GTT Timetable Parser == + +To compile or run you must have installed these packages: + * libcurlpp-dev + $ sudo apt install libcurlpp-dev + * libcurl4-openssl-dev + $ sudo apt install libcurl4-openssl-dev + +To compile: + $ g++ -o 5T-GTT-TimetableParser TimetableParser.cpp -lcurl -lcurlpp + +To run: + $ ./5T-GTT-TimetableParser diff --git a/TimetableParser.cpp b/TimetableParser.cpp new file mode 100644 index 0000000..db9a21f --- /dev/null +++ b/TimetableParser.cpp @@ -0,0 +1,93 @@ +#include +#include +#include +#include + +using namespace std; +using namespace curlpp; + +bool isInteger(string str) { + // check if the string is a number + for (int i = 0; i < str.length(); i++) + if (isdigit(str[i]) == false) + return false; + return true; +} + +int main() { + + string id (""); + + while (id.compare("q") != 0) { + + // input of the stop id + do { + + cout << "stop id (\"q\" to quit) > "; + getline (cin, id); + + if (!isInteger(id) && id.compare("q") != 0) + cout << "\"" << id << "\" is not a valid stop id" << endl << endl; + + } while (!isInteger(id) && id.compare("q") != 0); + + // RAII cleanup + Cleanup myCleanup; + + // send request and get a result. + // here I use a shortcut to get it in a string + ostringstream os; + os << options::Url(string("http://www.5t.torino.it/5t/trasporto/arrival-times-byline.jsp?action=getTransitsByLine&shortName=" + id)); + string htmlSource = os.str(); + + if (id.compare("q") == 0) + break; + else if (htmlSource.find("

Nessun passaggio previsto all'ora indicata.

") != string::npos) + cout << "stop not found" << endl; + else { + // I erase all spaces and newlines (which give problems) from the HTML source code + htmlSource.erase(remove_if(htmlSource.begin(), htmlSource.end(), ::isspace), htmlSource.end()); + + // split the HTML source code every link tag (which contains the bus line numbers) because sucks + vector htmlSegments; + size_t pos = 0; + while ((pos = htmlSource.find("::iterator it = htmlSegments.begin() ; it != htmlSegments.end(); ++it) { + string htmlSegment = *it; + + smatch m; + regex e ("href=\"linea-dettaglio.jsp\\?codlinea=\\d+\">(\\w+)"); // matches the bus line number + + while (regex_search (htmlSegment, m, e)) { + cout << m.str(1) + "\t"; + htmlSegment = m.suffix().str(); + } + + e = "(\\d{2}:\\d{2})(?:(\\*))?"; // matches the arrival times of the bus line + + auto words_begin = sregex_iterator(htmlSegment.begin(), htmlSegment.end(), e); + auto words_end = sregex_iterator(); + + for (sregex_iterator i = words_begin; i != words_end; ++i) { + m = *i; + if (m.size() == 3) + cout << m.str(1) + "*\t"; + else + cout << m.str(1) + "\t"; + } + + cout << endl; + } + } + + cout << endl; + } + + return 0; +} +