Skip to content

Serialization config

Configuration loader for world serialization.

Loads YAML configuration specifying which properties and attributes to include when exporting world state to HDF5.

SerializationConfig

Loads and validates serialization configuration from YAML.

The config file specifies which properties to include for: - Population (person.properties) - Geography (geographical_unit.properties) - Venues (venue.properties, per-type) - Subsets (subset properties) - Relationships (activity_map, hierarchies)

Source code in may/serialization/serialization_config.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
class SerializationConfig:
    """
    Loads and validates serialization configuration from YAML.

    The config file specifies which properties to include for:
    - Population (person.properties)
    - Geography (geographical_unit.properties)
    - Venues (venue.properties, per-type)
    - Subsets (subset properties)
    - Relationships (activity_map, hierarchies)
    """

    def __init__(self, config_file):
        """
        Initialize SerializationConfig.

        Args:
            config_file: Path to YAML configuration file
        """
        self.config_file = pr.resolve(config_file)
        self.config = None

        # Parsed configuration sections
        self.population_properties = []
        self.geography_include_coordinates = True
        self.geography_properties = []
        self.venue_global_settings = {}
        self.venue_type_properties = {}
        self.subset_properties = []
        self.relationships = {}
        self.output_settings = {}

        self._load_config()

    def _load_config(self):
        """Load and parse the YAML configuration file."""
        if not os.path.exists(self.config_file):
            raise FileNotFoundError(f"Serialization config not found: {self.config_file}")

        logger.info(f"Loading serialization config from {self.config_file}")

        with open(self.config_file, 'r') as f:
            self.config = yaml.safe_load(f)

        if not self.config:
            raise ValueError(f"Empty serialization config: {self.config_file}")

        # Parse each section
        self._parse_population()
        self._parse_geography()
        self._parse_venues()
        self._parse_subsets()
        self._parse_relationships()
        self._parse_output()

    def _parse_population(self):
        """Parse population configuration section."""
        pop_config = self.config.get('population', {})
        self.population_properties = pop_config.get('properties', [])

        logger.info(f"Population: {len(self.population_properties)} additional properties to serialize")
        if self.population_properties:
            logger.info(f"  Properties: {self.population_properties}")

    def _parse_geography(self):
        """Parse geography configuration section."""
        geo_config = self.config.get('geography', {})
        self.geography_include_coordinates = geo_config.get('include_coordinates', True)
        self.geography_properties = geo_config.get('properties', [])

        logger.info(f"Geography: coordinates={self.geography_include_coordinates}, "
                   f"{len(self.geography_properties)} additional properties")

    def _parse_venues(self):
        """Parse venues configuration section."""
        venues_config = self.config.get('venues', {})

        # Global settings
        self.venue_global_settings = venues_config.get('global', {})

        # Per-type properties
        types_config = venues_config.get('types', {})
        for venue_type, type_config in types_config.items():
            properties = type_config.get('properties', [])
            self.venue_type_properties[venue_type] = properties

            if properties:
                logger.info(f"Venue '{venue_type}': {len(properties)} properties to serialize")
                logger.info(f"  Properties: {properties}")
            else:
                logger.debug(f"Venue '{venue_type}': minimal serialization (core attributes only)")

    def _parse_subsets(self):
        """Parse subsets configuration section."""
        subsets_config = self.config.get('subsets', {})
        self.subset_properties = subsets_config.get('properties', [])

    def _parse_relationships(self):
        """Parse relationships configuration section."""
        self.relationships = self.config.get('relationships', {})

    def _parse_output(self):
        """Parse output settings section."""
        self.output_settings = self.config.get('output', {})

    def get_person_properties(self):
        """
        Get list of person properties to serialize.

        Returns:
            List of property names from person.properties dict
        """
        return self.population_properties

    def get_geography_settings(self):
        """
        Get geography serialization settings.

        Returns:
            Dict with 'include_coordinates' and 'properties' keys
        """
        return {
            'include_coordinates': self.geography_include_coordinates,
            'properties': self.geography_properties
        }

    def get_venue_properties(self, venue_type):
        """
        Get list of properties to serialize for a specific venue type.

        Args:
            venue_type: Type of venue (e.g., "school", "household")

        Returns:
            List of property names to include, or [] if not configured
        """
        return self.venue_type_properties.get(venue_type, [])

    def get_venue_global_settings(self):
        """
        Get global venue serialization settings.

        Returns:
            Dict with global venue settings
        """
        return self.venue_global_settings

    def should_include_activity_map(self):
        """Check if activity_map should be serialized."""
        return self.relationships.get('include_activity_map', True)

    def should_include_venue_hierarchy(self):
        """Check if venue parent-child hierarchy should be serialized."""
        return self.relationships.get('include_venue_hierarchy', True)

    def should_include_geography_hierarchy(self):
        """Check if geography parent-child hierarchy should be serialized."""
        return self.relationships.get('include_geography_hierarchy', True)

    def get_compression_settings(self):
        """
        Get HDF5 compression settings.

        Returns:
            Dict with 'compression' and 'compression_level' keys
        """
        return {
            'compression': self.output_settings.get('compression', 'gzip'),
            'compression_level': self.output_settings.get('compression_level', 4)
        }

    def get_metadata_settings(self):
        """
        Get metadata settings.

        Returns:
            Dict with 'include' and 'fields' keys
        """
        return {
            'include': self.output_settings.get('include_metadata', True),
            'fields': self.output_settings.get('metadata', [])
        }

__init__(config_file)

Initialize SerializationConfig.

Parameters:

Name Type Description Default
config_file

Path to YAML configuration file

required
Source code in may/serialization/serialization_config.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def __init__(self, config_file):
    """
    Initialize SerializationConfig.

    Args:
        config_file: Path to YAML configuration file
    """
    self.config_file = pr.resolve(config_file)
    self.config = None

    # Parsed configuration sections
    self.population_properties = []
    self.geography_include_coordinates = True
    self.geography_properties = []
    self.venue_global_settings = {}
    self.venue_type_properties = {}
    self.subset_properties = []
    self.relationships = {}
    self.output_settings = {}

    self._load_config()

get_compression_settings()

Get HDF5 compression settings.

Returns:

Type Description

Dict with 'compression' and 'compression_level' keys

Source code in may/serialization/serialization_config.py
175
176
177
178
179
180
181
182
183
184
185
def get_compression_settings(self):
    """
    Get HDF5 compression settings.

    Returns:
        Dict with 'compression' and 'compression_level' keys
    """
    return {
        'compression': self.output_settings.get('compression', 'gzip'),
        'compression_level': self.output_settings.get('compression_level', 4)
    }

get_geography_settings()

Get geography serialization settings.

Returns:

Type Description

Dict with 'include_coordinates' and 'properties' keys

Source code in may/serialization/serialization_config.py
130
131
132
133
134
135
136
137
138
139
140
def get_geography_settings(self):
    """
    Get geography serialization settings.

    Returns:
        Dict with 'include_coordinates' and 'properties' keys
    """
    return {
        'include_coordinates': self.geography_include_coordinates,
        'properties': self.geography_properties
    }

get_metadata_settings()

Get metadata settings.

Returns:

Type Description

Dict with 'include' and 'fields' keys

Source code in may/serialization/serialization_config.py
187
188
189
190
191
192
193
194
195
196
197
def get_metadata_settings(self):
    """
    Get metadata settings.

    Returns:
        Dict with 'include' and 'fields' keys
    """
    return {
        'include': self.output_settings.get('include_metadata', True),
        'fields': self.output_settings.get('metadata', [])
    }

get_person_properties()

Get list of person properties to serialize.

Returns:

Type Description

List of property names from person.properties dict

Source code in may/serialization/serialization_config.py
121
122
123
124
125
126
127
128
def get_person_properties(self):
    """
    Get list of person properties to serialize.

    Returns:
        List of property names from person.properties dict
    """
    return self.population_properties

get_venue_global_settings()

Get global venue serialization settings.

Returns:

Type Description

Dict with global venue settings

Source code in may/serialization/serialization_config.py
154
155
156
157
158
159
160
161
def get_venue_global_settings(self):
    """
    Get global venue serialization settings.

    Returns:
        Dict with global venue settings
    """
    return self.venue_global_settings

get_venue_properties(venue_type)

Get list of properties to serialize for a specific venue type.

Parameters:

Name Type Description Default
venue_type

Type of venue (e.g., "school", "household")

required

Returns:

Type Description

List of property names to include, or [] if not configured

Source code in may/serialization/serialization_config.py
142
143
144
145
146
147
148
149
150
151
152
def get_venue_properties(self, venue_type):
    """
    Get list of properties to serialize for a specific venue type.

    Args:
        venue_type: Type of venue (e.g., "school", "household")

    Returns:
        List of property names to include, or [] if not configured
    """
    return self.venue_type_properties.get(venue_type, [])

should_include_activity_map()

Check if activity_map should be serialized.

Source code in may/serialization/serialization_config.py
163
164
165
def should_include_activity_map(self):
    """Check if activity_map should be serialized."""
    return self.relationships.get('include_activity_map', True)

should_include_geography_hierarchy()

Check if geography parent-child hierarchy should be serialized.

Source code in may/serialization/serialization_config.py
171
172
173
def should_include_geography_hierarchy(self):
    """Check if geography parent-child hierarchy should be serialized."""
    return self.relationships.get('include_geography_hierarchy', True)

should_include_venue_hierarchy()

Check if venue parent-child hierarchy should be serialized.

Source code in may/serialization/serialization_config.py
167
168
169
def should_include_venue_hierarchy(self):
    """Check if venue parent-child hierarchy should be serialized."""
    return self.relationships.get('include_venue_hierarchy', True)