Skip to content

World loader

World loader for importing from HDF5 format.

Loads world state (geography, population, venues) from HDF5 file created by WorldSerializer.export_to_hdf5().

load_world_from_hdf5(input_file, config_file='configs/2021/serialization_config.yaml', slim=False)

Load a World object from an HDF5 file created by export_to_hdf5.

This method deserializes the complete world state (geography, population, venues, and relationships) from an HDF5 file.

Parameters:

Name Type Description Default
input_file

Path to input HDF5 file

required
config_file

Path to serialization YAML config (default: yaml/serialization_config.yaml)

'configs/2021/serialization_config.yaml'

Returns:

Name Type Description
World

Reconstructed World object with geography, population, venues, and relationships

Example

from may.serialization import load_world_from_hdf5 world = load_world_from_hdf5("world_state.h5") print(world)

Source code in may/serialization/world_loader.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def load_world_from_hdf5(input_file, config_file="configs/2021/serialization_config.yaml", slim=False):
    """
    Load a World object from an HDF5 file created by export_to_hdf5.

    This method deserializes the complete world state (geography, population,
    venues, and relationships) from an HDF5 file.

    Args:
        input_file: Path to input HDF5 file
        config_file: Path to serialization YAML config (default: yaml/serialization_config.yaml)

    Returns:
        World: Reconstructed World object with geography, population, venues, and relationships

    Example:
        >>> from may.serialization import load_world_from_hdf5
        >>> world = load_world_from_hdf5("world_state.h5")
        >>> print(world)
        <World: 1000 units, 95,231 people, 36,443 venues (36,443 households, 0 other)>
    """
    from may.world import World

    logger.info("")
    logger.info("=" * 60)
    logger.info("LOADING WORLD FROM HDF5" + (" (slim mode)" if slim else ""))
    logger.info("=" * 60)
    logger.info(f"Input file: {input_file}")

    config = SerializationConfig(config_file)

    with h5py.File(input_file, 'r') as f:
        # Read metadata
        logger.info("Reading metadata...")
        num_people = f.attrs.get('num_people', 0)
        num_venues = f.attrs.get('num_venues', 0)
        num_geo_units = f.attrs.get('num_geo_units', 0)

        logger.info(f"  Geography units: {num_geo_units:,}")
        logger.info(f"  People: {num_people:,}")
        logger.info(f"  Venues: {num_venues:,}")

        # Pre-load names and registries from metadata group (new format)
        geo_names = None
        level_registry = None
        venue_names = None
        type_registry = None
        subset_names_arr = None

        if 'metadata' in f:
            meta = f['metadata']
            if 'names' in meta:
                if 'geography' in meta['names']:
                    geo_names = meta['names']['geography'][:].astype(str)
                if 'venues' in meta['names']:
                    venue_names = meta['names']['venues'][:].astype(str)
                if 'subsets' in meta['names']:
                    subset_names_arr = meta['names']['subsets'][:].astype(str)
            if 'registries' in meta:
                if 'geo_levels' in meta['registries']:
                    level_registry = meta['registries']['geo_levels'][:].astype(str)
                if 'venue_types' in meta['registries']:
                    type_registry = meta['registries']['venue_types'][:].astype(str)

        # Load Geography
        geography = None
        if 'geography' in f:
            logger.info("Loading geography...")
            try:
                geography = _load_geography(f['geography'], config, geo_names, level_registry)
            except Exception as e:
                logger.error(f"Failed to load geography: {e}")
                raise
        else:
            logger.error("No geography data found in HDF5 file")
            raise OSError

        # Load Population
        laptime = time.perf_counter()
        population = None
        if 'population' in f:
            logger.info("Loading population...")
            try:
                population = _load_population(f['population'], geography, config, slim=slim)
            except Exception as e:
                logger.warning(f"Failed to load population: {e}")
                logger.warning("World will be created without population")
        else:
            logger.warning("No population data found in HDF5 file")
            logger.warning("World will be created without population")
        logger.info(f"Population created in {time.perf_counter() - laptime:.2f} seconds")
        # Load Venues
        venue_manager = None
        if 'venues' in f:
            logger.info("Loading venues...")
            try:
                venue_manager = _load_venues(f['venues'], geography, config, venue_names, type_registry, subset_names_arr, slim=slim)
            except Exception as e:
                logger.warning(f"Failed to load venues: {e}")
                logger.warning("World will be created without venues")
        else:
            logger.warning("No venue data found in HDF5 file")
            logger.warning("World will be created without venues")

        # Load Relationships (activity_map)
        # Group was renamed from 'relationships' to 'activity_mappings' in new format
        activity_group_name = 'activity_mappings' if 'activity_mappings' in f else 'relationships'
        # Compute aggregate statistics from HDF5 data before closing the file
        slim_statistics = None
        unit_statistics = None
        if slim:
            logger.info("Computing slim statistics from HDF5...")
            try:
                slim_statistics = _compute_slim_statistics(f)
                logger.info("Slim statistics computed.")
            except Exception as e:
                logger.warning(f"Failed to compute slim statistics: {e}")
            if geography:
                logger.info("Computing per-unit statistics from HDF5...")
                try:
                    unit_statistics = _compute_unit_statistics(f, geography)
                    logger.info(f"Per-unit statistics computed for {len(unit_statistics)} units.")
                except Exception as e:
                    logger.warning(f"Failed to compute unit statistics: {e}")

        if slim:
            logger.info("Slim mode: skipping relationship loading (member counts already injected).")
        elif activity_group_name in f and config.should_include_activity_map():
            logger.info("Loading relationships...")
            try:
                if population and venue_manager:
                    _load_relationships(f[activity_group_name], population, venue_manager)
                else:
                    logger.warning("Cannot load relationships: population or venues missing")
            except Exception as e:
                logger.warning(f"Failed to load relationships: {e}")
                logger.warning("World will be created without relationships")
        elif activity_group_name not in f:
            logger.info("No relationship data found in HDF5 file")

    # Create World object
    world = World(geography=geography, population=population, venues=venue_manager)

    if slim_statistics is not None:
        world._slim_statistics = slim_statistics
    if unit_statistics is not None:
        world._unit_statistics = unit_statistics

    logger.info("")
    logger.info("Load complete")
    logger.info(f"  {world}")
    logger.info("-" * 50)

    return world