#!/usr/bin/env python3 """ Fetch all indicators tagged with transport topics via the topic filter, to catch anything missed by keyword search. """ import json import requests from pathlib import Path BASE = "https://data360api.worldbank.org/data360" RESULTS_DIR = Path(__file__).parent.parent / "results" TRANSPORT_TOPICS = { "P4_000003": "Transport", "P4_000023": "Air Transport", "P4_000025": "Green and Inclusive Mobility", "P4_000026": "Highway Asset Management and Rural Access", "P4_000027": "Maritime Transport and Logistics", "P4_000029": "Railways", "P4_000030": "Regional Connectivity and Transport Corridors", "P4_000031": "Road Safety", "P4_000032": "Transport Economics", "P4_000033": "Urban Mobility", } def search_by_topic(topic_id, top=250, skip=0): resp = requests.post(f"{BASE}/searchv2", json={ "search": "*", "filter": f"series_description/topics/any(t: t/id eq '{topic_id}')", "select": ",".join([ "series_description/idno", "series_description/name", "series_description/database_id", "series_description/database_name", "series_description/definition_short", "series_description/topics", "series_description/measurement_unit", "series_description/periodicity", ]), "top": top, "skip": skip, "count": True, }) resp.raise_for_status() return resp.json() def main(): all_by_topic = {} all_indicators = {} for topic_id, topic_name in TRANSPORT_TOPICS.items(): print(f"\n=== {topic_id}: {topic_name} ===") data = search_by_topic(topic_id) count = data.get("@odata.count", 0) values = data.get("value", []) print(f" Count: {count}, Fetched: {len(values)}") topic_indicators = [] for v in values: sd = v.get("series_description", {}) idno = sd.get("idno") ind = { "idno": idno, "name": sd.get("name"), "database_id": sd.get("database_id"), "database_name": sd.get("database_name"), "definition_short": sd.get("definition_short"), "measurement_unit": sd.get("measurement_unit"), "periodicity": sd.get("periodicity"), "topics": sd.get("topics", []), } topic_indicators.append(ind) if idno and idno not in all_indicators: all_indicators[idno] = ind print(f" {idno}: {sd.get('name', '?')[:80]}") all_by_topic[topic_id] = { "topic_name": topic_name, "count": count, "indicators": topic_indicators, } # Save with open(RESULTS_DIR / "transport-by-topic-filter.json", "w") as f: json.dump(all_by_topic, f, indent=2) with open(RESULTS_DIR / "transport-topic-filtered-all.json", "w") as f: json.dump(list(all_indicators.values()), f, indent=2) print(f"\n\nTotal unique indicators by topic filter: {len(all_indicators)}") print(f"Saved: transport-by-topic-filter.json") print(f"Saved: transport-topic-filtered-all.json") if __name__ == "__main__": main()