Screenshots
Example Usage
data/
master_countries.sql # 281 canonical entities
countries.sql # 9,536 country-year records
categories.sql # 83,682 category records
field_name_mappings.sql # 1,132 field name standardization rules
fields/
country_fields_1990.sql.gz # Split by year (36 gzipped files)
...
country_fields_2025.sql.gz
schema/
create_tables.sql # DDL for all 5 tables
create_field_values.sql # DDL for FieldValues (structured sub-values)
etl/
build_archive.py # HTML parser (2000-2020)
load_gutenberg_years.py # Text parser (1990-2001)
reload_json_years.py # JSON loader (2021-2025)
build_field_mappings.py # Field name standardization
classify_entities.py # Entity type classification
repair_1996_truncated.py # CIA original text parser for 7 truncated 1996 countries
validate_integrity.py # Data quality checks
export_to_sqlite.py # SQL Server -> SQLite export (with FTS5)
structured_parsing/
parse_field_values.py # Decompose text blobs into typed sub-values (55 parsers)
validate_field_values.py # Validation: spot checks, coverage, numeric stats
export_field_value