Handling of "feed_info" and "transfers" files
This commit is contained in:
parent
2c520910a2
commit
a8abf9c0fa
|
@ -81,4 +81,16 @@ CREATE TABLE
|
||||||
CREATE INDEX stop_times__by_trip_id
|
CREATE INDEX stop_times__by_trip_id
|
||||||
ON stop_times (trip_id);
|
ON stop_times (trip_id);
|
||||||
CREATE INDEX stop_times__by_stop_id
|
CREATE INDEX stop_times__by_stop_id
|
||||||
ON stop_times (stop_id);
|
ON stop_times (stop_id);
|
||||||
|
|
||||||
|
CREATE TABLE
|
||||||
|
feed_info(
|
||||||
|
feed_id INT NOT NULL,
|
||||||
|
feed_publisher_name TEXT NOT NULL,
|
||||||
|
feed_publisher_url TEXT NOT NULL,
|
||||||
|
feed_lang TEXT NOT NULL,
|
||||||
|
feed_start_date TEXT NOT NULL,
|
||||||
|
feed_end_date TEXT NOT NULL,
|
||||||
|
feed_version TEXT NOT NULL,
|
||||||
|
PRIMARY KEY (feed_id)
|
||||||
|
);
|
|
@ -8,7 +8,7 @@ import math
|
||||||
import contextlib as ctx
|
import contextlib as ctx
|
||||||
import http.client as http
|
import http.client as http
|
||||||
|
|
||||||
from typing import Callable, Any, Optional
|
from typing import Callable, Any
|
||||||
|
|
||||||
|
|
||||||
# GTFS reference:
|
# GTFS reference:
|
||||||
|
@ -60,7 +60,12 @@ def optional(f: Mapper) -> Mapper:
|
||||||
return map
|
return map
|
||||||
|
|
||||||
|
|
||||||
CSV_FIELDS: list[tuple[str, Optional[list[tuple[str, Optional[Mapper]]]]]] = [
|
def discarded(v: str):
|
||||||
|
# No-op mapper for field intentionally not included in the schema
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
CSV_FIELDS: list[tuple[str, list[tuple[str, Mapper | None]]]] = [
|
||||||
(
|
(
|
||||||
"agency.txt",
|
"agency.txt",
|
||||||
[
|
[
|
||||||
|
@ -133,8 +138,33 @@ CSV_FIELDS: list[tuple[str, Optional[list[tuple[str, Optional[Mapper]]]]]] = [
|
||||||
("shape_dist_traveled", None),
|
("shape_dist_traveled", None),
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
("feed_info.txt", None),
|
(
|
||||||
("transfers.txt", None),
|
"feed_info.txt",
|
||||||
|
[
|
||||||
|
("feed_id", int),
|
||||||
|
("feed_publisher_name", str),
|
||||||
|
("feed_publisher_url", str),
|
||||||
|
("feed_lang", str),
|
||||||
|
("feed_start_date", str),
|
||||||
|
("feed_end_date", str),
|
||||||
|
("feed_version", str),
|
||||||
|
("conv_rev", discarded), # proprietary, undocumented
|
||||||
|
("plan_rev", discarded), # proprietary, undocumented
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"transfers.txt",
|
||||||
|
[
|
||||||
|
# We expect that file to have no records.
|
||||||
|
# All None mappers will fail if it is not the case
|
||||||
|
('from_stop_id', None),
|
||||||
|
('to_stop_id', None),
|
||||||
|
('transfer_type', None),
|
||||||
|
('min_transfer_time', None),
|
||||||
|
('from_route_id', None),
|
||||||
|
('to_route_id', None),
|
||||||
|
]
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -184,8 +214,6 @@ def _create_schema(db: sqlite3.Connection, schema_file: str):
|
||||||
|
|
||||||
def _load_data(zip_in: zipfile.ZipFile, db: sqlite3.Connection):
|
def _load_data(zip_in: zipfile.ZipFile, db: sqlite3.Connection):
|
||||||
for csv_file, fields in CSV_FIELDS:
|
for csv_file, fields in CSV_FIELDS:
|
||||||
if fields is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
table = csv_file[:-4]
|
table = csv_file[:-4]
|
||||||
print(f"Loading table {table!r}")
|
print(f"Loading table {table!r}")
|
||||||
|
@ -200,22 +228,38 @@ def _load_data(zip_in: zipfile.ZipFile, db: sqlite3.Connection):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
_check_csv_headers(next(reader), fields)
|
headers = next(reader)
|
||||||
|
_check_csv_headers(headers, fields)
|
||||||
|
|
||||||
place_holders = ",".join(
|
place_holders = ",".join(
|
||||||
"?" for _field, mapper in fields if mapper is not None
|
"?" for _field, mapper in fields if mapper not in (None, discarded)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not place_holders:
|
||||||
|
try:
|
||||||
|
first_row = next(reader)
|
||||||
|
except StopIteration:
|
||||||
|
continue
|
||||||
|
|
||||||
|
raise NotImplementedError(list(zip(headers, first_row)))
|
||||||
|
|
||||||
|
def map_row(row: list[str]):
|
||||||
|
assert all(
|
||||||
|
not value
|
||||||
|
for (_field, mapper), value in zip(fields, row)
|
||||||
|
if mapper is None
|
||||||
|
)
|
||||||
|
|
||||||
|
return [
|
||||||
|
mapper(value)
|
||||||
|
for (_field, mapper), value in zip(fields, row)
|
||||||
|
if mapper not in (None, discarded)
|
||||||
|
]
|
||||||
|
|
||||||
with db:
|
with db:
|
||||||
db.executemany(
|
db.executemany(
|
||||||
f"INSERT INTO {table} VALUES ({place_holders})",
|
f"INSERT INTO {table} VALUES ({place_holders})",
|
||||||
(
|
(map_row(row) for row in reader),
|
||||||
[
|
|
||||||
mapper(value)
|
|
||||||
for (_field, mapper), value in zip(fields, row)
|
|
||||||
if mapper is not None
|
|
||||||
]
|
|
||||||
for row in reader
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user