61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213 | def load_world_from_hdf5(input_file, config_file="configs/2021/serialization_config.yaml", slim=False):
"""
Load a World object from an HDF5 file created by export_to_hdf5.
This method deserializes the complete world state (geography, population,
venues, and relationships) from an HDF5 file.
Args:
input_file: Path to input HDF5 file
config_file: Path to serialization YAML config (default: yaml/serialization_config.yaml)
Returns:
World: Reconstructed World object with geography, population, venues, and relationships
Example:
>>> from may.serialization import load_world_from_hdf5
>>> world = load_world_from_hdf5("world_state.h5")
>>> print(world)
<World: 1000 units, 95,231 people, 36,443 venues (36,443 households, 0 other)>
"""
from may.world import World
logger.info("")
logger.info("=" * 60)
logger.info("LOADING WORLD FROM HDF5" + (" (slim mode)" if slim else ""))
logger.info("=" * 60)
logger.info(f"Input file: {input_file}")
config = SerializationConfig(config_file)
with h5py.File(input_file, 'r') as f:
# Read metadata
logger.info("Reading metadata...")
num_people = f.attrs.get('num_people', 0)
num_venues = f.attrs.get('num_venues', 0)
num_geo_units = f.attrs.get('num_geo_units', 0)
logger.info(f" Geography units: {num_geo_units:,}")
logger.info(f" People: {num_people:,}")
logger.info(f" Venues: {num_venues:,}")
# Pre-load names and registries from metadata group (new format)
geo_names = None
level_registry = None
venue_names = None
type_registry = None
subset_names_arr = None
if 'metadata' in f:
meta = f['metadata']
if 'names' in meta:
if 'geography' in meta['names']:
geo_names = meta['names']['geography'][:].astype(str)
if 'venues' in meta['names']:
venue_names = meta['names']['venues'][:].astype(str)
if 'subsets' in meta['names']:
subset_names_arr = meta['names']['subsets'][:].astype(str)
if 'registries' in meta:
if 'geo_levels' in meta['registries']:
level_registry = meta['registries']['geo_levels'][:].astype(str)
if 'venue_types' in meta['registries']:
type_registry = meta['registries']['venue_types'][:].astype(str)
# Load Geography
geography = None
if 'geography' in f:
logger.info("Loading geography...")
try:
geography = _load_geography(f['geography'], config, geo_names, level_registry)
except Exception as e:
logger.error(f"Failed to load geography: {e}")
raise
else:
logger.error("No geography data found in HDF5 file")
raise OSError
# Load Population
laptime = time.perf_counter()
population = None
if 'population' in f:
logger.info("Loading population...")
try:
population = _load_population(f['population'], geography, config, slim=slim)
except Exception as e:
logger.warning(f"Failed to load population: {e}")
logger.warning("World will be created without population")
else:
logger.warning("No population data found in HDF5 file")
logger.warning("World will be created without population")
logger.info(f"Population created in {time.perf_counter() - laptime:.2f} seconds")
# Load Venues
venue_manager = None
if 'venues' in f:
logger.info("Loading venues...")
try:
venue_manager = _load_venues(f['venues'], geography, config, venue_names, type_registry, subset_names_arr, slim=slim)
except Exception as e:
logger.warning(f"Failed to load venues: {e}")
logger.warning("World will be created without venues")
else:
logger.warning("No venue data found in HDF5 file")
logger.warning("World will be created without venues")
# Load Relationships (activity_map)
# Group was renamed from 'relationships' to 'activity_mappings' in new format
activity_group_name = 'activity_mappings' if 'activity_mappings' in f else 'relationships'
# Compute aggregate statistics from HDF5 data before closing the file
slim_statistics = None
unit_statistics = None
if slim:
logger.info("Computing slim statistics from HDF5...")
try:
slim_statistics = _compute_slim_statistics(f)
logger.info("Slim statistics computed.")
except Exception as e:
logger.warning(f"Failed to compute slim statistics: {e}")
if geography:
logger.info("Computing per-unit statistics from HDF5...")
try:
unit_statistics = _compute_unit_statistics(f, geography)
logger.info(f"Per-unit statistics computed for {len(unit_statistics)} units.")
except Exception as e:
logger.warning(f"Failed to compute unit statistics: {e}")
if slim:
logger.info("Slim mode: skipping relationship loading (member counts already injected).")
elif activity_group_name in f and config.should_include_activity_map():
logger.info("Loading relationships...")
try:
if population and venue_manager:
_load_relationships(f[activity_group_name], population, venue_manager)
else:
logger.warning("Cannot load relationships: population or venues missing")
except Exception as e:
logger.warning(f"Failed to load relationships: {e}")
logger.warning("World will be created without relationships")
elif activity_group_name not in f:
logger.info("No relationship data found in HDF5 file")
# Create World object
world = World(geography=geography, population=population, venues=venue_manager)
if slim_statistics is not None:
world._slim_statistics = slim_statistics
if unit_statistics is not None:
world._unit_statistics = unit_statistics
logger.info("")
logger.info("Load complete")
logger.info(f" {world}")
logger.info("-" * 50)
return world
|