#!/usr/bin/env python3 """ Curate the most transport-relevant secondary indicators from the API exploration. These are the indicators you'd use *alongside* core transport data for contextual analysis: climate, conflict, land use, waterways, trade, spending, demographics. """ import json from collections import defaultdict from pathlib import Path RESULTS_DIR = Path(__file__).parent.parent / "results" # Verified indicator IDs grouped by secondary category and sub-category. # Each entry: (idno, database_id, name, transport_relevance) HIGHLIGHTS = { "Climate & Emissions": { "Transport GHG & CO2": [ ("WRI_CLIMATEWATCH_ALL_GHG_TRANSPORT", "WRI_CLIMATEWATCH", "GHG emissions from transportation sector", "Direct transport emissions"), ("WB_WDI_EN_GHG_CH4_TR_MT_CE_AR5", "WB_WDI", "Methane (CH4) emissions from Transport (Energy)", "Transport methane specifically"), ("OWID_CB_CO2", "OWID_CB", "Annual CO2 emissions (total, excl. LULUCF)", "Context for transport share"), ("OWID_CB_CO2_PER_CAPITA", "OWID_CB", "Annual CO2 emissions per capita", "Per-capita emissions context"), ("WB_WDI_EN_ATM_CO2E_KT", "WB_WDI", "CO2 emissions (kt)", "National total for transport share calc"), ("WB_WDI_EN_ATM_CO2E_PC", "WB_WDI", "CO2 emissions per capita (metric tons)", "Per-capita context"), ("OWID_CB_OIL_CO2", "OWID_CB", "Annual CO2 emissions from oil", "Transport is major oil consumer"), ("WB_CSC_EN_ATM_GHGT_GT_CE", "WB_CSC", "Total GHG emissions (MtCO2eq/year)", "National GHG totals"), ], "Air Quality & Pollution": [ ("WB_WDI_EN_ATM_PM25_MC_M3", "WB_WDI", "PM2.5 air pollution, mean annual exposure (ug/m3)", "Transport is major PM2.5 source"), ("WB_WDI_EN_ATM_PM25_MC_ZS", "WB_WDI", "PM2.5 pollution, pop exposed above WHO guideline", "Health impact of transport pollution"), ("WB_CSC_EN_ATM_HAZA", "WB_CSC", "% people exposed to hazardous air quality", "Transport pollution exposure"), ], "Climate Risk & Disasters": [ ("UNDRR_SFM_A1", "UNDRR_SFM", "Deaths and missing persons from disasters", "Transport infrastructure vulnerability"), ("UNDRR_SFM_D1", "UNDRR_SFM", "Damage to critical infrastructure from disasters", "Direct infrastructure damage"), ("UNDRR_SFM_C1", "UNDRR_SFM", "Direct economic loss from disasters (USD)", "Economic cost of infra damage"), ("WB_WDI_EN_CLC_MDAT_ZS", "WB_WDI", "Droughts, floods, extreme temps (% pop affected)", "Climate risk to transport infra"), ("WB_THINK_HAZARD_FL_LEVEL", "WB_THINK_HAZARD", "Hazard level for Floods (1-4)", "Flood risk to roads/rail"), ("WB_THINK_HAZARD_CF_LEVEL", "WB_THINK_HAZARD", "Hazard level for Coastal Floods (1-4)", "Coastal transport infra risk"), ("IMF_CDIR_ECFRMH", "IMF_CDIR", "Climate-driven hazard & exposure index", "Climate risk composite"), ("IMF_CDIR_ECFRMR", "IMF_CDIR", "Climate-driven risk indicator (INFORM)", "Overall climate risk"), ], "Fossil Fuel Subsidies (transport-specific)": [ ("IMF_FFS_ECGFT", "IMF_FFS", "Fossil Fuel Subsidies - Total (% GDP)", "Total subsidy context"), ("IMF_FFS_ECGFTIG", "IMF_FFS", "Implicit Fossil Fuel Subsidies - Congestion (% GDP)", "Congestion cost not priced into transport"), ("IMF_FFS_ECGFTIR", "IMF_FFS", "Implicit Fossil Fuel Subsidies - Road damage (% GDP)", "Road damage cost not priced in"), ("IMF_FFS_ECGFTIA", "IMF_FFS", "Implicit Fossil Fuel Subsidies - Accidents (% GDP)", "Accident cost not priced in"), ("IMF_FFS_ECGFTIP", "IMF_FFS", "Implicit Fossil Fuel Subsidies - Petroleum (% GDP)", "Petroleum underpricing"), ("IMF_FFS_ECGFTEP", "IMF_FFS", "Explicit Fossil Fuel Subsidies - Petroleum (% GDP)", "Direct petroleum subsidies"), ("IMF_FFS_ECGFTIL", "IMF_FFS", "Implicit Fossil Fuel Subsidies - Local Air Pollution (% GDP)", "Transport air pollution cost"), ], }, "Energy & Fuel": { "Fuel Prices & Consumption": [ ("WEF_TTDI_FUELPRICE", "WEF_TTDI", "Fuel price levels, US$/litre", "Direct transport cost"), ("WB_WDI_EG_USE_PCAP_KG_OE", "WB_WDI", "Energy use per capita (kg oil equivalent)", "Energy intensity context"), ("WB_WDI_EG_GDP_PUSE_KO_PP", "WB_WDI", "GDP per unit of energy use (PPP$/kg oil eq)", "Energy efficiency"), ("WB_WDI_EG_USE_COMM_FO_ZS", "WB_WDI", "Fossil fuel energy consumption (% of total)", "Fossil fuel dependence"), ("WB_WDI_TX_VAL_FUEL_ZS_UN", "WB_WDI", "Fuel exports (% of merchandise exports)", "Fuel trade dependence"), ("WB_WDI_TM_VAL_FUEL_ZS_UN", "WB_WDI", "Fuel imports (% of merchandise imports)", "Fuel import dependence"), ], "Electricity & Electrification": [ ("WB_WDI_EG_ELC_ACCS_ZS", "WB_WDI", "Access to electricity (% population)", "EV/electrification potential"), ("WB_WDI_EG_ELC_ACCS_RU_ZS", "WB_WDI", "Access to electricity, rural (% rural pop)", "Rural electrification for e-mobility"), ("WB_WDI_EG_ELC_LOSS_ZS", "WB_WDI", "Electric power T&D losses (% output)", "Grid quality for EV charging"), ("WB_WDI_EG_FEC_RNEW_ZS", "WB_WDI", "Renewable energy consumption (% total)", "Green transport potential"), ("WB_SE4ALL_EG_EGEN_RNEW", "WB_SE4ALL", "Installed renewable electricity capacity (W/capita)", "Renewable infra capacity"), ], }, "Conflict & Fragility": { "Political Stability": [ ("WB_WGI_PV_EST", "WB_WGI", "Political Stability & Absence of Violence: Estimate", "Stability for infra investment"), ("WB_WGI_PV_PER_RNK", "WB_WGI", "Political Stability: Percentile Rank", "Comparative stability"), ("BS_BTI_Q13_3", "BS_BTI", "Conflict intensity (BTI, 0-10)", "Conflict impact on transport"), ("WB_WDI_VC_BTL_DETH", "WB_WDI", "Battle-related deaths", "Active conflict indicator"), ("WEF_TTDI_ORGVIOLENCE", "WEF_TTDI", "Organized violence deaths per 100k pop", "Violence affecting transport safety"), ], "Displacement & Migration": [ ("WB_WDI_SM_POP_FDIP", "WB_WDI", "Forcibly displaced people", "Displacement creates transport demand"), ("WB_WDI_SM_POP_IDPC", "WB_WDI", "Internally displaced persons (IDPs)", "Internal displacement / transport need"), ("WB_WDI_VC_IDP_NWDS", "WB_WDI", "New displacement from disasters", "Disaster displacement"), ("WB_SSGD_NET_MIGRATION", "WB_SSGD", "Net migration", "Migration flows affect transport"), ("WB_KNOMAD_MRI", "WB_KNOMAD", "Remittance inflows (US$ million)", "Diaspora economic linkages"), ], "Governance Quality": [ ("WB_WGI_GE_EST", "WB_WGI", "Government Effectiveness: Estimate", "Capacity to deliver transport infra"), ("WB_WGI_RQ_EST", "WB_WGI", "Regulatory Quality: Estimate", "Transport regulation quality"), ("WB_WGI_CC_EST", "WB_WGI", "Control of Corruption: Estimate", "Corruption in infrastructure"), ("WB_WGI_RL_EST", "WB_WGI", "Rule of Law: Estimate", "Contract enforcement for PPPs"), ], }, "Land Use & Urbanization": { "Urbanization": [ ("WB_WDI_SP_URB_TOTL", "WB_WDI", "Urban population (total)", "Urban transport demand"), ("WB_WDI_SP_URB_TOTL_IN_ZS", "WB_WDI", "Urban population (% of total)", "Urbanization rate"), ("WB_WDI_SP_URB_GROW", "WB_WDI", "Urban population growth (annual %)", "Urban transport demand growth"), ("WB_WDI_SP_RUR_TOTL_ZS", "WB_WDI", "Rural population (% of total)", "Rural transport need"), ("WB_WDI_EN_URB_MCTY_TL_ZS", "WB_WDI", "Pop in agglomerations >1M (%)", "Megacity transport challenge"), ("WB_WDI_EN_POP_SLUM_UR_ZS", "WB_WDI", "Slum population (% urban)", "Informal transport access need"), ], "Land Area & Geography": [ ("WB_WDI_AG_LND_TOTL_K2", "WB_WDI", "Land area (sq km)", "Transport network scale context"), ("WB_WDI_AG_SRF_TOTL_K2", "WB_WDI", "Surface area (sq km)", "Country size context"), ("WB_WDI_EN_POP_DNST", "WB_WDI", "Population density (per sq km)", "Network density demand"), ("WB_WDI_AG_LND_TOTL_UR_K2", "WB_WDI", "Urban land area (sq km)", "Urban transport footprint"), ("WB_WDI_AG_LND_EL5M_ZS", "WB_WDI", "Land below 5m elevation (% total)", "Sea level rise transport risk"), ("WB_WDI_AG_LND_EL5M_UR_K2", "WB_WDI", "Urban land below 5m elevation (sq km)", "Coastal urban infra at risk"), ("WB_ESG_AG_LND_AGRI_ZS", "WB_ESG", "Agricultural land (% of land area)", "Land competition with transport"), ("WB_WDI_AG_LND_FRST_ZS", "WB_WDI", "Forest area (% of land area)", "Deforestation / road building"), ], }, "Water & Waterways": { "Inland Water & Flood Risk": [ ("WB_WDI_ER_H2O_INTR_K3", "WB_WDI", "Renewable freshwater resources (billion m3)", "Inland waterway potential"), ("FAO_AS_4197", "FAO_AS", "Total dam capacity (km3)", "Dam infrastructure / navigation"), ("WB_CLEAR_AQUASTAT_4471", "WB_CLEAR", "Dam capacity per capita (m3/person)", "Water infra capacity"), ("FAO_AS_4193", "FAO_AS", "Exploitable renewable surface water", "Surface water / river transport"), ("FAO_AS_4543", "FAO_AS", "Flood occurrence (WRI)", "Flood risk to transport"), ("WB_CLEAR_RFR_WRI", "WB_CLEAR", "Riverine flood risk (score)", "River flood risk to transport"), ], "Coastal & Maritime Context": [ ("OHI_OHI_GIS", "OHI_OHI", "Ocean Health Index", "Marine environment for shipping"), ("WB_ESG_EN_CLC_CSTP_ZS", "WB_ESG", "Coastal protection (1=Low to 4=High)", "Coastal infra protection"), ("UNEP_OPH_MAR_CST_HAB", "UNEP_OPH", "Marine and Coastal Habitats (sq km)", "Coastal environment context"), ("WB_CLEAR_ILO_WISE_PT", "WB_CLEAR", "Water-dependent employment (% employment)", "Blue economy / port dependence"), ], }, "Trade & Economic Corridors": { "Trade Volumes & Facilitation": [ ("WB_WDI_NE_TRD_GNFS_ZS", "WB_WDI", "Trade (% of GDP)", "Trade openness / transport demand"), ("WB_WDI_TG_VAL_TOTL_GD_ZS", "WB_WDI", "Merchandise trade (% of GDP)", "Goods trade intensity"), ("WB_WDI_IC_CUS_DURS_EX", "WB_WDI", "Time to export, border compliance (hours)", "Export facilitation"), ("WB_WDI_IC_CUS_DURS_IM", "WB_WDI", "Time to import, border compliance (hours)", "Import facilitation"), ("WB_WDI_LP_IMP_DURS_MD", "WB_WDI", "Lead time to import, median (days)", "Import logistics speed"), ("WB_WDI_LP_EXP_DURS_MD", "WB_WDI", "Lead time to export, median (days)", "Export logistics speed"), ("WEF_GCI_CLEAREFF", "WEF_GCI", "Border clearance efficiency (1-5)", "Customs efficiency score"), ("WB_WDI_TM_TAX_MRCH_SM_AR_ZS", "WB_WDI", "Applied tariff rate, simple mean, all products (%)", "Trade barriers"), ], "Transport Services Trade": [ ("WB_WDI_TX_VAL_TRAN_ZS_WT", "WB_WDI", "Transport services (% commercial service exports)", "Transport services export share"), ("WB_WDI_TM_VAL_TRAN_ZS_WT", "WB_WDI", "Transport services (% commercial service imports)", "Transport services import share"), ("WB_WDI_BX_GSR_TRAN_ZS", "WB_WDI", "Transport services (% service exports, BoP)", "Transport in services balance"), ("WB_WDI_BM_GSR_TRAN_ZS", "WB_WDI", "Transport services (% service imports, BoP)", "Transport in services balance"), ("WB_WDI_BG_GSR_NFSV_GD_ZS", "WB_WDI", "Trade in services (% GDP)", "Services trade intensity"), ], "FDI & Investment": [ ("WB_WDI_BX_KLT_DINV_WD_GD_ZS", "WB_WDI", "FDI net inflows (% GDP)", "Investment climate for transport"), ("WB_WDI_BX_KLT_DINV_CD_WD", "WB_WDI", "FDI net inflows (current US$)", "FDI scale"), ], }, "Government Spending & Investment": { "Transport Spending": [ ("IMF_COFOG_GEAT_GF0405", "IMF_GFSCOFOG", "Government expenditure on transport (% GDP)", "Direct transport spending"), ("IMF_COFOG_GEA_GF04", "IMF_GFSCOFOG", "Government expenditure on economic affairs (% GDP)", "Broader economic spending"), ("IMF_ET_ECGTEN", "IMF_ET", "Taxes on Energy (including fuel for transport)", "Transport taxation (energy)"), ("IMF_ET_ECGTET", "IMF_ET", "Taxes on Transport (excluding fuel)", "Transport taxation (direct)"), ], "Infrastructure Investment": [ ("WB_WDI_IE_PPI_TRAN_CD", "WB_WDI", "Investment in transport with private participation (US$)", "Private transport investment"), ("WB_WDI_IE_PPN_TRAN_CD", "WB_WDI", "PPP investment in transport (US$)", "PPP transport investment"), ("WB_PPI_TOT_INV", "WB_PPI", "Private Participation in Infrastructure - Total (US$ million)", "Total PPI context"), ("WB_WDI_IE_PPI_ENGY_CD", "WB_WDI", "Investment in energy with private participation (US$)", "Energy infra investment (comparison)"), ("IDB_INFRALATAM_3", "IDB_INFRALATAM", "Public investment in economic infrastructure (% GDP)", "Public infra investment benchmark"), ], "Budget Transparency": [ ("IBP_OBS_OBI", "IBP_OBS", "Open Budget Index Score (0-100)", "Budget transparency for infra spend"), ], }, "Population & Demographics": { "Demographics": [ ("WB_WDI_SP_POP_TOTL", "WB_WDI", "Population, total", "Transport demand denominator"), ("WB_WDI_SP_POP_GROW", "WB_WDI", "Population growth (annual %)", "Transport demand growth"), ("WB_WDI_NY_GDP_PCAP_PP_CD", "WB_WDI", "GDP per capita, PPP (current international $)", "Affordability context"), ("WB_WDI_NY_GDP_MKTP_CD", "WB_WDI", "GDP (current US$)", "Economic scale"), ("WB_WDI_SI_POV_DDAY", "WB_WDI", "Poverty headcount ratio at $2.15/day (%)", "Transport affordability context"), ], "Labour": [ ("WB_WDI_SL_TLF_TOTL_IN", "WB_WDI", "Labor force, total", "Commuting demand"), ("WB_WDI_SL_UEM_TOTL_ZS", "WB_WDI", "Unemployment (% total labor force)", "Economic context"), ], }, } def main(): # Load all available indicator metadata for enrichment all_meta = {} for fn in RESULTS_DIR.glob("secondary-*.json"): try: with open(fn) as f: data = json.load(f) if isinstance(data, list): for i in data: if i.get("idno"): all_meta[i["idno"]] = i except Exception: pass # Build the output output = {} total = 0 missing = [] for category, subcats in HIGHLIGHTS.items(): cat_data = {"subcategories": {}, "total": 0} for subcat, indicators in subcats.items(): subcat_data = [] for idno, db, name, relevance in indicators: meta = all_meta.get(idno, {}) entry = { "idno": idno, "database_id": db, "name": meta.get("name") or name, "definition_short": meta.get("definition_short"), "measurement_unit": meta.get("measurement_unit"), "periodicity": meta.get("periodicity"), "transport_relevance": relevance, } subcat_data.append(entry) total += 1 if not meta: missing.append(idno) cat_data["subcategories"][subcat] = subcat_data cat_data["total"] += len(subcat_data) output[category] = cat_data summary = {"total_secondary_indicators": total, "categories": output} with open(RESULTS_DIR / "secondary-highlights.json", "w") as f: json.dump(summary, f, indent=2) # Print print(f"Total secondary indicators: {total}") if missing: print(f"Not found in cached results (will still work via API): {len(missing)}") for m in missing: print(f" - {m}") print() for cat, data in HIGHLIGHTS.items(): cat_total = sum(len(inds) for inds in data.values()) print(f"{cat}: {cat_total} indicators") for subcat, inds in data.items(): print(f" {subcat}: {len(inds)}") print(f"\nSaved: secondary-highlights.json") if __name__ == "__main__": main()