#!/usr/bin/env python3 """ Deep-dive into specific secondary categories to find the best transport-adjacent indicators. Searches targeted terms and inspects results to pick the most useful ones per category. """ import json import requests from pathlib import Path BASE = "https://data360api.worldbank.org/data360" RESULTS_DIR = Path(__file__).parent.parent / "results" SELECT_FIELDS = ",".join([ "series_description/idno", "series_description/name", "series_description/database_id", "series_description/database_name", "series_description/definition_short", "series_description/measurement_unit", "series_description/periodicity", "series_description/topics", ]) def search(query, top=50, filter_expr=None): body = { "search": query, "select": SELECT_FIELDS, "top": top, "count": True, } if filter_expr: body["filter"] = filter_expr resp = requests.post(f"{BASE}/searchv2", json=body) resp.raise_for_status() return resp.json() def print_results(data, label, max_show=20): count = data.get("@odata.count", 0) values = data.get("value", []) print(f"\n [{label}] ({count} total, showing {min(len(values), max_show)}):") for v in values[:max_show]: sd = v.get("series_description", {}) idno = sd.get("idno", "") name = (sd.get("name") or "")[:80] db = sd.get("database_id", "") unit = sd.get("measurement_unit", "") print(f" {idno} [{db}] {name} ({unit})") def main(): all_results = {} # ── CLIMATE / EMISSIONS (transport-specific) ── print("\n" + "=" * 70) print("CLIMATE & EMISSIONS - transport-specific") print("=" * 70) for q in [ "CO2 emissions transport", "greenhouse gas transport sector", "fossil fuel subsidy transport", "carbon tax fuel", "air pollution PM2.5", "climate vulnerability infrastructure", "flood risk infrastructure", "sea level rise coastal", ]: data = search(q, top=15) print_results(data, q) # ── ENERGY / FUEL (for transport) ── print("\n" + "=" * 70) print("ENERGY & FUEL - transport-relevant") print("=" * 70) for q in [ "fuel price oil petroleum", "fuel import export", "electric vehicle charging", "electricity access rural", "energy consumption per capita", "renewable energy capacity", "pump price gasoline diesel", ]: data = search(q, top=15) print_results(data, q) # ── CONFLICT / FRAGILITY ── print("\n" + "=" * 70) print("CONFLICT & FRAGILITY") print("=" * 70) for q in [ "political stability violence", "conflict intensity", "internally displaced refugee", "government effectiveness regulatory quality", "fragile conflict affected state", "battle deaths armed conflict", ]: data = search(q, top=10) print_results(data, q) # ── LAND USE & URBANIZATION ── print("\n" + "=" * 70) print("LAND USE & URBANIZATION") print("=" * 70) for q in [ "urban population growth city", "population density", "slum population", "land area surface", "rural population access", "urban agglomeration", "deforestation land use change", "elevation sea level low coastal", ]: data = search(q, top=10) print_results(data, q) # ── WATER & WATERWAYS ── print("\n" + "=" * 70) print("WATER & WATERWAYS") print("=" * 70) for q in [ "inland waterway navigation", "river basin transboundary", "dam capacity reservoir", "coastal flood", "ocean shipping maritime", "fishery port coastal economy", ]: data = search(q, top=10) print_results(data, q) # ── TRADE & CORRIDORS ── print("\n" + "=" * 70) print("TRADE & ECONOMIC CORRIDORS") print("=" * 70) for q in [ "trade GDP merchandise", "customs clearance border time", "foreign direct investment", "transport services exports imports", "tariff trade barrier", "special economic zone", "trade in value added", ]: data = search(q, top=10) print_results(data, q) # ── GOVERNMENT SPENDING ── print("\n" + "=" * 70) print("GOVERNMENT SPENDING - infrastructure/transport") print("=" * 70) for q in [ "government expenditure transport", "government expenditure economic affairs", "public investment infrastructure", "budget allocation transport road", "pump price gasoline diesel", ]: data = search(q, top=15) print_results(data, q) # ── DEMOGRAPHICS ── print("\n" + "=" * 70) print("POPULATION & DEMOGRAPHICS") print("=" * 70) for q in [ "population total growth rate", "GDP per capita PPP", "labor force employment", "remittance migration", "poverty headcount", ]: data = search(q, top=10) print_results(data, q) if __name__ == "__main__": main()