Skip to content

Strategies

AssignmentStrategy

Base class for assignment strategies.

Strategies are simplified - they don't evaluate complex conditions, just perform straightforward assignments based on context.

Source code in may/attribute_assignment/strategies.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
class AssignmentStrategy:
    """
    Base class for assignment strategies.

    Strategies are simplified - they don't evaluate complex conditions,
    just perform straightforward assignments based on context.
    """

    def __init__(self, config: Dict[str, Any], data_manager):
        """
        Initialize strategy.

        Args:
            config: Strategy configuration from YAML
            data_manager: DataSourceManager instance for data lookups
        """
        self.config = config
        self.data_manager = data_manager
        self.strategy_type = config.get('strategy')

    def assign(self, person, household, context: Dict[str, Any]) -> Any:
        """
        Assign attribute value to a person.

        Args:
            person: Person object to assign to
            household: Household (venue) object
            context: Assignment context with state information

        Returns:
            Assigned attribute value
        """
        raise NotImplementedError("Subclasses must implement assign()")

    def _record_fallback(self, context: Dict[str, Any], reason: str):
        """Record the reason for fallback in the context for diagnostics."""
        context['fallback_reason'] = reason
        logger.debug(f"      ! Fallback: {reason}")

    def _fallback(self, person, household, context: Dict[str, Any], reason: str) -> Any:
        """
        Execute the fallback strategy configured in the YAML.

        Fails loudly when none is configured: a silent default (the old
        behavior invented a `geo_distribution` lookup) masks data and config
        problems. If a miss is expected and acceptable, the config must say
        so with an explicit `fallback:`.
        """
        self._record_fallback(context, reason)

        fallback_config = self.config.get('fallback')
        if not fallback_config:
            raise RuntimeError(
                f"Strategy '{self.strategy_type}' failed for person {person.id} "
                f"({reason}) and the rule has no `fallback:` configured. "
                "Add an explicit fallback to the rule or fix the data/config."
            )

        strategy_type = fallback_config.get('strategy')
        if strategy_type == 'probabilistic':
            strat = ProbabilisticStrategy(fallback_config, self.data_manager)
            return strat.assign(person, household, context)
        if strategy_type == 'constant':
            if 'value' not in fallback_config:
                raise ValueError(
                    f"Constant fallback for strategy '{self.strategy_type}' has no "
                    "'value' — other keys (e.g. 'data_source') are not read."
                )
            return fallback_config['value']
        raise ValueError(
            f"Unsupported fallback strategy '{strategy_type}' for "
            f"'{self.strategy_type}' — only 'probabilistic' and 'constant' exist."
        )

    def _get_person_by_role(self, context: Dict[str, Any], role_name: str):
        """
        Get person by role name from context.

        Args:
            context: Assignment context
            role_name: Role name (e.g., "primary_adult")

        Returns:
            Person object or None
        """
        person_key = f"{role_name}_person"
        return context.get(person_key)

    def _get_attribute_value(self, person, attribute_name: str) -> Any:
        """
        Get attribute value from person.

        Delegates to the shared get_person_attribute utility which handles
        dot-notation, properties dict, and residence prefix.

        Args:
            person: Person object
            attribute_name: Name of attribute

        Returns:
            Attribute value or None
        """
        from may.utils.attribute_access import get_person_attribute
        return get_person_attribute(person, attribute_name)

__init__(config, data_manager)

Initialize strategy.

Parameters:

Name Type Description Default
config Dict[str, Any]

Strategy configuration from YAML

required
data_manager

DataSourceManager instance for data lookups

required
Source code in may/attribute_assignment/strategies.py
120
121
122
123
124
125
126
127
128
129
130
def __init__(self, config: Dict[str, Any], data_manager):
    """
    Initialize strategy.

    Args:
        config: Strategy configuration from YAML
        data_manager: DataSourceManager instance for data lookups
    """
    self.config = config
    self.data_manager = data_manager
    self.strategy_type = config.get('strategy')

assign(person, household, context)

Assign attribute value to a person.

Parameters:

Name Type Description Default
person

Person object to assign to

required
household

Household (venue) object

required
context Dict[str, Any]

Assignment context with state information

required

Returns:

Type Description
Any

Assigned attribute value

Source code in may/attribute_assignment/strategies.py
132
133
134
135
136
137
138
139
140
141
142
143
144
def assign(self, person, household, context: Dict[str, Any]) -> Any:
    """
    Assign attribute value to a person.

    Args:
        person: Person object to assign to
        household: Household (venue) object
        context: Assignment context with state information

    Returns:
        Assigned attribute value
    """
    raise NotImplementedError("Subclasses must implement assign()")

CategoricalSamplerStrategy

Bases: AssignmentStrategy

Samples ONE category from a probability distribution.

Works with MultiKeyLookupSource that returns {category: probability} dicts. Unlike ProbabilisticConditionsStrategy which samples multiple yes/no conditions, this samples exactly one mutually-exclusive category (e.g., one industry sector).

Supports batch assignment to reduce repeated lookups.

Source code in may/attribute_assignment/strategies.py
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
class CategoricalSamplerStrategy(AssignmentStrategy):
    """
    Samples ONE category from a probability distribution.

    Works with MultiKeyLookupSource that returns {category: probability} dicts.
    Unlike ProbabilisticConditionsStrategy which samples multiple yes/no conditions,
    this samples exactly one mutually-exclusive category (e.g., one industry sector).

    Supports batch assignment to reduce repeated lookups.
    """

    def __init__(self, config: Dict[str, Any], data_manager):
        """Initialize categorical sampler strategy."""
        super().__init__(config, data_manager)
        self.data_source_name = config.get('data_source')

    def assign_batch(self, people_list: List, households_list: List, contexts_list: List[Dict[str, Any]]) -> List[Any]:
        """
        Batch assignment to minimize repeated data lookups.

        Groups people by their lookup keys and processes each group together.

        Args:
            people_list: List of Person objects
            households_list: List of Household objects (parallel to people_list)
            contexts_list: List of context dicts (parallel to people_list)

        Returns:
            List of sampled category values (parallel to people_list)
        """
        from collections import defaultdict

        # Get data source
        source = self.data_manager.get_source(self.data_source_name)
        if not source:
            logger.warning(f"Data source '{self.data_source_name}' not found")
            return [None] * len(people_list)

        # Group people by their lookup keys
        # lookup_key_groups: {lookup_key: [indices]}
        lookup_key_groups = defaultdict(list)

        for i, (person, household, context) in enumerate(zip(people_list, households_list, contexts_list)):
            # Look up probability distribution
            probs = source.lookup(person, household, context)
            if probs:
                # Create a hashable key from the probabilities
                lookup_key = tuple(sorted(probs.items()))
                lookup_key_groups[lookup_key].append((i, probs))

        # Results array
        results = [None] * len(people_list)

        # Process each group
        for lookup_key, group_data in lookup_key_groups.items():
            indices = [idx for idx, _ in group_data]
            probs = group_data[0][1]  # All have same probs for this key

            categories, probabilities = self._sanitize_probabilities(probs)
            if categories is None:
                continue

            # BATCH SAMPLE: Sample for all people in this group at once
            n_samples = len(indices)
            sampled_values = np.random.choice(categories, size=n_samples, p=probabilities)

            # Assign results
            for idx, value in zip(indices, sampled_values):
                results[idx] = value

        return results

    def assign(self, person, household, context: Dict[str, Any]) -> Any:
        """
        Sample one category from probability distribution.

        Args:
            person: Person object
            household: Household object (optional)
            context: Assignment context

        Returns:
            Sampled category value
        """
        # Get data source
        source = self.data_manager.get_source(self.data_source_name)
        if not source:
            logger.warning(f"Data source '{self.data_source_name}' not found")
            return None

        # Look up probability distribution
        probs = source.lookup(person, household, context)
        if not probs:
            logger.warning(f"No probabilities found for person {person.id}")
            return None

        categories, probabilities = self._sanitize_probabilities(probs, person.id)
        if categories is None:
            return None

        sampled = np.random.choice(categories, p=probabilities)

        logger.debug(f"Categorical: {sampled} for person {person.id}")
        return sampled

    @staticmethod
    def _sanitize_probabilities(probs, person_id=None):
        """
        Clamp negatives, normalize, and validate probabilities.

        Returns:
            (categories, probabilities) tuple, or (None, None) if invalid.
        """
        categories = list(probs.keys())
        probabilities = list(probs.values())

        # Clamp negatives to 0
        has_negative = False
        for i, p in enumerate(probabilities):
            if p < 0:
                has_negative = True
                probabilities[i] = 0.0
        if has_negative:
            logger.warning(f"Negative probability clamped to 0 for person {person_id}")

        # Check total after clamping
        total = sum(probabilities)
        if total <= 0:
            logger.warning(f"Invalid probabilities (sum={total}) for person {person_id}")
            return None, None

        # Always normalize to avoid numpy tolerance issues
        probabilities = [p / total for p in probabilities]

        return categories, probabilities

__init__(config, data_manager)

Initialize categorical sampler strategy.

Source code in may/attribute_assignment/strategies.py
1160
1161
1162
1163
def __init__(self, config: Dict[str, Any], data_manager):
    """Initialize categorical sampler strategy."""
    super().__init__(config, data_manager)
    self.data_source_name = config.get('data_source')

assign(person, household, context)

Sample one category from probability distribution.

Parameters:

Name Type Description Default
person

Person object

required
household

Household object (optional)

required
context Dict[str, Any]

Assignment context

required

Returns:

Type Description
Any

Sampled category value

Source code in may/attribute_assignment/strategies.py
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
def assign(self, person, household, context: Dict[str, Any]) -> Any:
    """
    Sample one category from probability distribution.

    Args:
        person: Person object
        household: Household object (optional)
        context: Assignment context

    Returns:
        Sampled category value
    """
    # Get data source
    source = self.data_manager.get_source(self.data_source_name)
    if not source:
        logger.warning(f"Data source '{self.data_source_name}' not found")
        return None

    # Look up probability distribution
    probs = source.lookup(person, household, context)
    if not probs:
        logger.warning(f"No probabilities found for person {person.id}")
        return None

    categories, probabilities = self._sanitize_probabilities(probs, person.id)
    if categories is None:
        return None

    sampled = np.random.choice(categories, p=probabilities)

    logger.debug(f"Categorical: {sampled} for person {person.id}")
    return sampled

assign_batch(people_list, households_list, contexts_list)

Batch assignment to minimize repeated data lookups.

Groups people by their lookup keys and processes each group together.

Parameters:

Name Type Description Default
people_list List

List of Person objects

required
households_list List

List of Household objects (parallel to people_list)

required
contexts_list List[Dict[str, Any]]

List of context dicts (parallel to people_list)

required

Returns:

Type Description
List[Any]

List of sampled category values (parallel to people_list)

Source code in may/attribute_assignment/strategies.py
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
def assign_batch(self, people_list: List, households_list: List, contexts_list: List[Dict[str, Any]]) -> List[Any]:
    """
    Batch assignment to minimize repeated data lookups.

    Groups people by their lookup keys and processes each group together.

    Args:
        people_list: List of Person objects
        households_list: List of Household objects (parallel to people_list)
        contexts_list: List of context dicts (parallel to people_list)

    Returns:
        List of sampled category values (parallel to people_list)
    """
    from collections import defaultdict

    # Get data source
    source = self.data_manager.get_source(self.data_source_name)
    if not source:
        logger.warning(f"Data source '{self.data_source_name}' not found")
        return [None] * len(people_list)

    # Group people by their lookup keys
    # lookup_key_groups: {lookup_key: [indices]}
    lookup_key_groups = defaultdict(list)

    for i, (person, household, context) in enumerate(zip(people_list, households_list, contexts_list)):
        # Look up probability distribution
        probs = source.lookup(person, household, context)
        if probs:
            # Create a hashable key from the probabilities
            lookup_key = tuple(sorted(probs.items()))
            lookup_key_groups[lookup_key].append((i, probs))

    # Results array
    results = [None] * len(people_list)

    # Process each group
    for lookup_key, group_data in lookup_key_groups.items():
        indices = [idx for idx, _ in group_data]
        probs = group_data[0][1]  # All have same probs for this key

        categories, probabilities = self._sanitize_probabilities(probs)
        if categories is None:
            continue

        # BATCH SAMPLE: Sample for all people in this group at once
        n_samples = len(indices)
        sampled_values = np.random.choice(categories, size=n_samples, p=probabilities)

        # Assign results
        for idx, value in zip(indices, sampled_values):
            results[idx] = value

    return results

CommutingLikelihoodStrategy

Bases: AssignmentStrategy

Assigns workplace location based on origin-destination commuting flows.

Samples from an origin-destination matrix weighted by likelihood. Can assign multiple attributes (e.g., workplace_location and work_mode).

Supports batch assignment to reduce repeated lookups.

Source code in may/attribute_assignment/strategies.py
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
class CommutingLikelihoodStrategy(AssignmentStrategy):
    """
    Assigns workplace location based on origin-destination commuting flows.

    Samples from an origin-destination matrix weighted by likelihood.
    Can assign multiple attributes (e.g., workplace_location and work_mode).

    Supports batch assignment to reduce repeated lookups.
    """

    def __init__(self, config: Dict[str, Any], data_manager):
        """Initialize commuting likelihood strategy."""
        super().__init__(config, data_manager)
        self.data_source_name = config.get('data_source')
        self.outputs = config.get('outputs', {})

    def _resolve_origin_code(self, person) -> Optional[str]:
        """
        Resolve person's origin geographical unit to the correct level for O-D matrix lookup.

        This handles complex data source configurations like ancestor lookups.

        Args:
            person: Person object

        Returns:
            Origin code string, or None if resolution fails
        """
        # Get person's origin (residence) geographical unit
        origin_geo_unit = getattr(person, 'geographical_unit', None)
        if not origin_geo_unit:
            return None

        # Get the data source to check its configuration
        source = self.data_manager.get_source(self.data_source_name)
        if not source:
            return None

        # Resolve origin based on data source key configuration
        # Check if this is an O-D matrix source with key_columns config
        if hasattr(source, '_file_configs') and source._file_configs:
            file_config = source._file_configs[0]
            key_columns = file_config.get('key_columns', {})

            if key_columns:
                # Get first key column config (origin)
                first_key_config = list(key_columns.values())[0]

                # Check if we need to traverse hierarchy
                if isinstance(first_key_config, dict):
                    lookup_type = first_key_config.get('type')
                    if lookup_type == 'ancestor_lookup':
                        level = first_key_config.get('level')
                        property_name = first_key_config.get('property', 'name')

                        # Traverse to ancestor level
                        ancestor = origin_geo_unit.get_ancestor_by_level(level)
                        if ancestor:
                            return getattr(ancestor, property_name)
                        else:
                            return None
                    else:
                        return origin_geo_unit.name
                else:
                    return origin_geo_unit.name
            else:
                return origin_geo_unit.name
        else:
            return origin_geo_unit.name

    def assign_batch(self, people_list: List, households_list: List, contexts_list: List[Dict[str, Any]]) -> List[Any]:
        """
        Batch assignment to minimize repeated O-D matrix lookups.

        Groups people by origin code and processes each group together.

        Args:
            people_list: List of Person objects
            households_list: List of Household objects (parallel to people_list)
            contexts_list: List of context dicts (parallel to people_list)

        Returns:
            List of assigned values (parallel to people_list)
            - If single output: list of values
            - If multiple outputs: list of dicts
        """
        from collections import defaultdict

        # Get data source
        source = self.data_manager.get_source(self.data_source_name)
        if not source:
            logger.warning(f"Data source '{self.data_source_name}' not found")
            return [self._get_fallback(person, household, context)
                    for person, household, context in zip(people_list, households_list, contexts_list)]

        # Group people by origin_code
        origin_groups = defaultdict(list)

        for i, person in enumerate(people_list):
            origin_code = self._resolve_origin_code(person)
            if origin_code:
                origin_groups[origin_code].append(i)

        # Results array
        results = [None] * len(people_list)

        # Process each origin group
        for origin_code, indices in origin_groups.items():
            # Look up destinations from O-D matrix
            destinations = source.lookup(origin_code)
            if not destinations:
                logger.warning(f"No destinations found for origin {origin_code}")
                # Fill with fallback for this group
                for idx in indices:
                    person = people_list[idx]
                    household = households_list[idx]
                    context = contexts_list[idx]
                    results[idx] = self._get_fallback(person, household, context)
                continue

            # Prepare sampling arrays
            # destinations is List[(destination, metadata_dict, likelihood)]
            dest_codes = [dest for dest, meta, lik in destinations]
            likelihoods = [lik for dest, meta, lik in destinations]
            metadata_list = [meta for dest, meta, lik in destinations]

            # BATCH SAMPLE: Sample destinations for all people in this origin group at once
            n_samples = len(indices)
            sampled_indices = np.random.choice(len(dest_codes), size=n_samples, p=likelihoods)

            # Build outputs for each person
            for idx, sampled_idx in zip(indices, sampled_indices):
                sampled_dest = dest_codes[sampled_idx]
                sampled_metadata = metadata_list[sampled_idx]
                results[idx] = self._build_output(sampled_dest, sampled_metadata)

        return results

    def assign(self, person, household, context: Dict[str, Any]) -> Any:
        """
        Assign workplace location and work mode based on commuting flows.

        Args:
            person: Person object
            household: Household object (optional, may be None for person-level assignment)
            context: Assignment context

        Returns:
            If single output: returns the assigned value
            If multiple outputs: returns dict with all assigned values
        """
        # Resolve person's origin to correct geographical level
        origin_code = self._resolve_origin_code(person)
        if not origin_code:
            logger.warning(f"Could not resolve origin code for person {person.id}")
            return self._get_fallback(person, household, context)

        # Get data source
        source = self.data_manager.get_source(self.data_source_name)
        if not source:
            logger.warning(f"Data source '{self.data_source_name}' not found")
            return self._get_fallback(person, household, context)

        # Look up destinations from O-D matrix
        destinations = source.lookup(origin_code)
        if not destinations:
            logger.warning(f"No destinations found for origin {origin_code}")
            return self._get_fallback(person, household, context)

        # Sample from destinations weighted by likelihood
        # destinations is List[(destination, metadata_dict, likelihood)]
        dest_codes = [dest for dest, meta, lik in destinations]
        likelihoods = [lik for dest, meta, lik in destinations]
        metadata_list = [meta for dest, meta, lik in destinations]

        # Sample one destination
        idx = np.random.choice(len(dest_codes), p=likelihoods)
        sampled_dest = dest_codes[idx]
        sampled_metadata = metadata_list[idx]

        # Build output based on configured outputs
        result = self._build_output(sampled_dest, sampled_metadata)
        if not isinstance(result, dict):
            return result
        logger.debug(f"Commuting: {person.id} -> {result}")
        return result

    def _build_output(self, sampled_dest, sampled_metadata):
        """
        Build return value from sampled destination and metadata.

        Returns:
            Single value (if one output configured) or dict (if multiple).
        """
        if len(self.outputs) == 1:
            output_attr, output_source = list(self.outputs.items())[0]
            if output_source == 'destination':
                return sampled_dest
            if output_source in sampled_metadata:
                return sampled_metadata[output_source]
            raise ValueError(
                f"Output source '{output_source}' not found in metadata keys "
                f"{list(sampled_metadata.keys())}. Check outputs config."
            )

        result = {}
        for output_attr, output_source in self.outputs.items():
            if output_source == 'destination':
                result[output_attr] = sampled_dest
            elif output_source in sampled_metadata:
                result[output_attr] = sampled_metadata[output_source]
            else:
                raise ValueError(
                    f"Output source '{output_source}' for attribute '{output_attr}' "
                    f"not found in metadata keys {list(sampled_metadata.keys())}. "
                    f"Check outputs config."
                )
        return result

    def _get_fallback(self, person, household, context):
        """Standard fallback for commuting."""
        return self._fallback(person, household, context, "COMMUTING_DATA_MISSING")

__init__(config, data_manager)

Initialize commuting likelihood strategy.

Source code in may/attribute_assignment/strategies.py
790
791
792
793
794
def __init__(self, config: Dict[str, Any], data_manager):
    """Initialize commuting likelihood strategy."""
    super().__init__(config, data_manager)
    self.data_source_name = config.get('data_source')
    self.outputs = config.get('outputs', {})

assign(person, household, context)

Assign workplace location and work mode based on commuting flows.

Parameters:

Name Type Description Default
person

Person object

required
household

Household object (optional, may be None for person-level assignment)

required
context Dict[str, Any]

Assignment context

required

Returns:

Type Description
Any

If single output: returns the assigned value

Any

If multiple outputs: returns dict with all assigned values

Source code in may/attribute_assignment/strategies.py
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
def assign(self, person, household, context: Dict[str, Any]) -> Any:
    """
    Assign workplace location and work mode based on commuting flows.

    Args:
        person: Person object
        household: Household object (optional, may be None for person-level assignment)
        context: Assignment context

    Returns:
        If single output: returns the assigned value
        If multiple outputs: returns dict with all assigned values
    """
    # Resolve person's origin to correct geographical level
    origin_code = self._resolve_origin_code(person)
    if not origin_code:
        logger.warning(f"Could not resolve origin code for person {person.id}")
        return self._get_fallback(person, household, context)

    # Get data source
    source = self.data_manager.get_source(self.data_source_name)
    if not source:
        logger.warning(f"Data source '{self.data_source_name}' not found")
        return self._get_fallback(person, household, context)

    # Look up destinations from O-D matrix
    destinations = source.lookup(origin_code)
    if not destinations:
        logger.warning(f"No destinations found for origin {origin_code}")
        return self._get_fallback(person, household, context)

    # Sample from destinations weighted by likelihood
    # destinations is List[(destination, metadata_dict, likelihood)]
    dest_codes = [dest for dest, meta, lik in destinations]
    likelihoods = [lik for dest, meta, lik in destinations]
    metadata_list = [meta for dest, meta, lik in destinations]

    # Sample one destination
    idx = np.random.choice(len(dest_codes), p=likelihoods)
    sampled_dest = dest_codes[idx]
    sampled_metadata = metadata_list[idx]

    # Build output based on configured outputs
    result = self._build_output(sampled_dest, sampled_metadata)
    if not isinstance(result, dict):
        return result
    logger.debug(f"Commuting: {person.id} -> {result}")
    return result

assign_batch(people_list, households_list, contexts_list)

Batch assignment to minimize repeated O-D matrix lookups.

Groups people by origin code and processes each group together.

Parameters:

Name Type Description Default
people_list List

List of Person objects

required
households_list List

List of Household objects (parallel to people_list)

required
contexts_list List[Dict[str, Any]]

List of context dicts (parallel to people_list)

required

Returns:

Type Description
List[Any]

List of assigned values (parallel to people_list)

List[Any]
  • If single output: list of values
List[Any]
  • If multiple outputs: list of dicts
Source code in may/attribute_assignment/strategies.py
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
def assign_batch(self, people_list: List, households_list: List, contexts_list: List[Dict[str, Any]]) -> List[Any]:
    """
    Batch assignment to minimize repeated O-D matrix lookups.

    Groups people by origin code and processes each group together.

    Args:
        people_list: List of Person objects
        households_list: List of Household objects (parallel to people_list)
        contexts_list: List of context dicts (parallel to people_list)

    Returns:
        List of assigned values (parallel to people_list)
        - If single output: list of values
        - If multiple outputs: list of dicts
    """
    from collections import defaultdict

    # Get data source
    source = self.data_manager.get_source(self.data_source_name)
    if not source:
        logger.warning(f"Data source '{self.data_source_name}' not found")
        return [self._get_fallback(person, household, context)
                for person, household, context in zip(people_list, households_list, contexts_list)]

    # Group people by origin_code
    origin_groups = defaultdict(list)

    for i, person in enumerate(people_list):
        origin_code = self._resolve_origin_code(person)
        if origin_code:
            origin_groups[origin_code].append(i)

    # Results array
    results = [None] * len(people_list)

    # Process each origin group
    for origin_code, indices in origin_groups.items():
        # Look up destinations from O-D matrix
        destinations = source.lookup(origin_code)
        if not destinations:
            logger.warning(f"No destinations found for origin {origin_code}")
            # Fill with fallback for this group
            for idx in indices:
                person = people_list[idx]
                household = households_list[idx]
                context = contexts_list[idx]
                results[idx] = self._get_fallback(person, household, context)
            continue

        # Prepare sampling arrays
        # destinations is List[(destination, metadata_dict, likelihood)]
        dest_codes = [dest for dest, meta, lik in destinations]
        likelihoods = [lik for dest, meta, lik in destinations]
        metadata_list = [meta for dest, meta, lik in destinations]

        # BATCH SAMPLE: Sample destinations for all people in this origin group at once
        n_samples = len(indices)
        sampled_indices = np.random.choice(len(dest_codes), size=n_samples, p=likelihoods)

        # Build outputs for each person
        for idx, sampled_idx in zip(indices, sampled_indices):
            sampled_dest = dest_codes[sampled_idx]
            sampled_metadata = metadata_list[sampled_idx]
            results[idx] = self._build_output(sampled_dest, sampled_metadata)

    return results

ConstantStrategy

Bases: AssignmentStrategy

Assigns a fixed, constant value.

This is useful for static attributes or default values that apply unconditionally to a given role or household structure.

Source code in may/attribute_assignment/strategies.py
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
class ConstantStrategy(AssignmentStrategy):
    """
    Assigns a fixed, constant value.

    This is useful for static attributes or default values that apply
    unconditionally to a given role or household structure.
    """

    def __init__(self, config: Dict[str, Any], data_manager):
        """Initialize constant strategy."""
        super().__init__(config, data_manager)
        self.value = config.get('value')

    def assign_batch(self, people_list: List, households_list: List, contexts_list: List[Dict[str, Any]]) -> List[Any]:
        """Batch assignment - all receive the same value."""
        if self.value is None:
            # Match assign() behavior: delegate to per-person fallback
            return [self.assign(p, h, c) for p, h, c in zip(people_list, households_list, contexts_list)]
        return [self.value] * len(people_list)

    def assign(self, person, household, context: Dict[str, Any]) -> Any:
        """Assign the constant value."""
        if self.value is None:
            logger.warning(f"ConstantStrategy: No value configured for assignment to person {person.id}")
            return self._fallback(person, household, context, "NO_CONSTANT_VALUE")

        return self.value

__init__(config, data_manager)

Initialize constant strategy.

Source code in may/attribute_assignment/strategies.py
1294
1295
1296
1297
def __init__(self, config: Dict[str, Any], data_manager):
    """Initialize constant strategy."""
    super().__init__(config, data_manager)
    self.value = config.get('value')

assign(person, household, context)

Assign the constant value.

Source code in may/attribute_assignment/strategies.py
1306
1307
1308
1309
1310
1311
1312
def assign(self, person, household, context: Dict[str, Any]) -> Any:
    """Assign the constant value."""
    if self.value is None:
        logger.warning(f"ConstantStrategy: No value configured for assignment to person {person.id}")
        return self._fallback(person, household, context, "NO_CONSTANT_VALUE")

    return self.value

assign_batch(people_list, households_list, contexts_list)

Batch assignment - all receive the same value.

Source code in may/attribute_assignment/strategies.py
1299
1300
1301
1302
1303
1304
def assign_batch(self, people_list: List, households_list: List, contexts_list: List[Dict[str, Any]]) -> List[Any]:
    """Batch assignment - all receive the same value."""
    if self.value is None:
        # Match assign() behavior: delegate to per-person fallback
        return [self.assign(p, h, c) for p, h, c in zip(people_list, households_list, contexts_list)]
    return [self.value] * len(people_list)

GUSamplerStrategy

Bases: AssignmentStrategy

Samples a geographical unit within a parent GU based on weighted distribution. Generic strategy that works with any geographical hierarchy level.

Supports batch assignment to reduce repeated lookups.

Source code in may/attribute_assignment/strategies.py
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
class GUSamplerStrategy(AssignmentStrategy):
    """
    Samples a geographical unit within a parent GU based on weighted distribution.
    Generic strategy that works with any geographical hierarchy level.

    Supports batch assignment to reduce repeated lookups.
    """

    def __init__(self, config: Dict[str, Any], data_manager):
        """Initialize geographical unit sampler strategy."""
        super().__init__(config, data_manager)
        self.data_source_name = config.get('data_source')

    def assign_batch(self, people_list: List, households_list: List, contexts_list: List[Dict[str, Any]]) -> List[Any]:
        """
        Batch assignment to minimize repeated data lookups.

        Groups people by (workplace_parent_gu, home_parent_gu) and processes each group together.
        Falls back from workplace to home GU if workplace has no data.

        Args:
            people_list: List of Person objects
            households_list: List of Household objects (parallel to people_list)
            contexts_list: List of context dicts (parallel to people_list)

        Returns:
            List of sampled geographical unit codes (parallel to people_list)
        """
        from collections import defaultdict

        # Get data source
        source = self.data_manager.get_source(self.data_source_name)
        if not source:
            logger.warning(f"Data source '{self.data_source_name}' not found")
            return [None] * len(people_list)

        # Group people by (workplace_parent_gu, home_parent_gu)
        # This allows efficient batch sampling with fallback logic
        gu_groups = defaultdict(list)

        for i, person in enumerate(people_list):
            # Get workplace parent GU
            workplace_parent_gu = person.properties.get('workplace_location')

            # Get home parent GU for fallback
            home_parent_gu = None
            if person.geographical_unit:
                home_parent_gu_obj = person.geographical_unit.get_ancestor_by_level('LGU')
                if home_parent_gu_obj:
                    home_parent_gu = home_parent_gu_obj.name

            if workplace_parent_gu:
                gu_groups[(workplace_parent_gu, home_parent_gu)].append(i)

        # Results array
        results = [None] * len(people_list)

        # Process each group
        for (workplace_parent_gu, home_parent_gu), indices in gu_groups.items():
            # Try workplace parent GU first
            gu_probs = source.lookup(workplace_parent_gu)

            # Fallback: if no data for workplace parent GU, try home parent GU
            if not gu_probs and home_parent_gu:
                logger.debug(f"No GU distribution for workplace parent GU '{workplace_parent_gu}', "
                           f"falling back to home parent GU '{home_parent_gu}'")
                gu_probs = source.lookup(home_parent_gu)

            if not gu_probs:
                logger.warning(f"No GU distribution found for parent GU '{workplace_parent_gu}' "
                             f"or home parent GU '{home_parent_gu}'")
                continue

            # BATCH SAMPLE: Sample GUs for all people in this group at once
            gu_codes = list(gu_probs.keys())
            probabilities = list(gu_probs.values())
            n_samples = len(indices)
            sampled_gus = np.random.choice(gu_codes, size=n_samples, p=probabilities)

            # Assign results
            for idx, sampled_gu in zip(indices, sampled_gus):
                results[idx] = sampled_gu
                logger.debug(f"GU Sampler (batch): {sampled_gu} for person at index {idx} in parent GU {workplace_parent_gu}")

        return results

    def assign(self, person, household, context: Dict[str, Any]) -> Any:
        """
        Sample a geographical unit within person's parent GU.
        Falls back to home parent GU if workplace parent GU has no data.

        Args:
            person: Person object
            household: Household object (optional)
            context: Assignment context

        Returns:
            Sampled geographical unit code
        """
        # Get workplace_location from person properties
        workplace_parent_gu = person.properties.get('workplace_location')
        if not workplace_parent_gu:
            logger.warning(f"No workplace_location found for person {person.id}")
            return None

        # Look up GU distribution for this parent GU
        source = self.data_manager.get_source(self.data_source_name)
        if not source:
            logger.warning(f"Data source '{self.data_source_name}' not found")
            return None

        gu_probs = source.lookup(workplace_parent_gu)

        # Fallback: if no data for workplace parent GU, try home parent GU
        if not gu_probs:
            # Get person's home parent GU from their geographical_unit
            home_parent_gu = None
            if person.geographical_unit:
                home_parent_gu_obj = person.geographical_unit.get_ancestor_by_level('LGU')
                if home_parent_gu_obj:
                    home_parent_gu = home_parent_gu_obj.name
                else:
                    logger.debug(f"Person {person.id} GU '{person.geographical_unit.name}' has no LGU ancestor")
            else:
                logger.debug(f"Person {person.id} has no geographical_unit set")

            if home_parent_gu:
                logger.debug(f"No GU distribution for workplace parent GU '{workplace_parent_gu}', "
                           f"falling back to home parent GU '{home_parent_gu}'")
                gu_probs = source.lookup(home_parent_gu)

            if not gu_probs:
                logger.warning(f"No GU distribution found for parent GU '{workplace_parent_gu}' "
                             f"or home parent GU '{home_parent_gu}'")
                return None

        # Sample GU weighted by distribution
        gu_codes = list(gu_probs.keys())
        probabilities = list(gu_probs.values())
        sampled_gu = np.random.choice(gu_codes, p=probabilities)

        logger.debug(f"GU Sampler: {sampled_gu} for person {person.id} in parent GU {workplace_parent_gu}")
        return sampled_gu

__init__(config, data_manager)

Initialize geographical unit sampler strategy.

Source code in may/attribute_assignment/strategies.py
1012
1013
1014
1015
def __init__(self, config: Dict[str, Any], data_manager):
    """Initialize geographical unit sampler strategy."""
    super().__init__(config, data_manager)
    self.data_source_name = config.get('data_source')

assign(person, household, context)

Sample a geographical unit within person's parent GU. Falls back to home parent GU if workplace parent GU has no data.

Parameters:

Name Type Description Default
person

Person object

required
household

Household object (optional)

required
context Dict[str, Any]

Assignment context

required

Returns:

Type Description
Any

Sampled geographical unit code

Source code in may/attribute_assignment/strategies.py
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
def assign(self, person, household, context: Dict[str, Any]) -> Any:
    """
    Sample a geographical unit within person's parent GU.
    Falls back to home parent GU if workplace parent GU has no data.

    Args:
        person: Person object
        household: Household object (optional)
        context: Assignment context

    Returns:
        Sampled geographical unit code
    """
    # Get workplace_location from person properties
    workplace_parent_gu = person.properties.get('workplace_location')
    if not workplace_parent_gu:
        logger.warning(f"No workplace_location found for person {person.id}")
        return None

    # Look up GU distribution for this parent GU
    source = self.data_manager.get_source(self.data_source_name)
    if not source:
        logger.warning(f"Data source '{self.data_source_name}' not found")
        return None

    gu_probs = source.lookup(workplace_parent_gu)

    # Fallback: if no data for workplace parent GU, try home parent GU
    if not gu_probs:
        # Get person's home parent GU from their geographical_unit
        home_parent_gu = None
        if person.geographical_unit:
            home_parent_gu_obj = person.geographical_unit.get_ancestor_by_level('LGU')
            if home_parent_gu_obj:
                home_parent_gu = home_parent_gu_obj.name
            else:
                logger.debug(f"Person {person.id} GU '{person.geographical_unit.name}' has no LGU ancestor")
        else:
            logger.debug(f"Person {person.id} has no geographical_unit set")

        if home_parent_gu:
            logger.debug(f"No GU distribution for workplace parent GU '{workplace_parent_gu}', "
                       f"falling back to home parent GU '{home_parent_gu}'")
            gu_probs = source.lookup(home_parent_gu)

        if not gu_probs:
            logger.warning(f"No GU distribution found for parent GU '{workplace_parent_gu}' "
                         f"or home parent GU '{home_parent_gu}'")
            return None

    # Sample GU weighted by distribution
    gu_codes = list(gu_probs.keys())
    probabilities = list(gu_probs.values())
    sampled_gu = np.random.choice(gu_codes, p=probabilities)

    logger.debug(f"GU Sampler: {sampled_gu} for person {person.id} in parent GU {workplace_parent_gu}")
    return sampled_gu

assign_batch(people_list, households_list, contexts_list)

Batch assignment to minimize repeated data lookups.

Groups people by (workplace_parent_gu, home_parent_gu) and processes each group together. Falls back from workplace to home GU if workplace has no data.

Parameters:

Name Type Description Default
people_list List

List of Person objects

required
households_list List

List of Household objects (parallel to people_list)

required
contexts_list List[Dict[str, Any]]

List of context dicts (parallel to people_list)

required

Returns:

Type Description
List[Any]

List of sampled geographical unit codes (parallel to people_list)

Source code in may/attribute_assignment/strategies.py
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
def assign_batch(self, people_list: List, households_list: List, contexts_list: List[Dict[str, Any]]) -> List[Any]:
    """
    Batch assignment to minimize repeated data lookups.

    Groups people by (workplace_parent_gu, home_parent_gu) and processes each group together.
    Falls back from workplace to home GU if workplace has no data.

    Args:
        people_list: List of Person objects
        households_list: List of Household objects (parallel to people_list)
        contexts_list: List of context dicts (parallel to people_list)

    Returns:
        List of sampled geographical unit codes (parallel to people_list)
    """
    from collections import defaultdict

    # Get data source
    source = self.data_manager.get_source(self.data_source_name)
    if not source:
        logger.warning(f"Data source '{self.data_source_name}' not found")
        return [None] * len(people_list)

    # Group people by (workplace_parent_gu, home_parent_gu)
    # This allows efficient batch sampling with fallback logic
    gu_groups = defaultdict(list)

    for i, person in enumerate(people_list):
        # Get workplace parent GU
        workplace_parent_gu = person.properties.get('workplace_location')

        # Get home parent GU for fallback
        home_parent_gu = None
        if person.geographical_unit:
            home_parent_gu_obj = person.geographical_unit.get_ancestor_by_level('LGU')
            if home_parent_gu_obj:
                home_parent_gu = home_parent_gu_obj.name

        if workplace_parent_gu:
            gu_groups[(workplace_parent_gu, home_parent_gu)].append(i)

    # Results array
    results = [None] * len(people_list)

    # Process each group
    for (workplace_parent_gu, home_parent_gu), indices in gu_groups.items():
        # Try workplace parent GU first
        gu_probs = source.lookup(workplace_parent_gu)

        # Fallback: if no data for workplace parent GU, try home parent GU
        if not gu_probs and home_parent_gu:
            logger.debug(f"No GU distribution for workplace parent GU '{workplace_parent_gu}', "
                       f"falling back to home parent GU '{home_parent_gu}'")
            gu_probs = source.lookup(home_parent_gu)

        if not gu_probs:
            logger.warning(f"No GU distribution found for parent GU '{workplace_parent_gu}' "
                         f"or home parent GU '{home_parent_gu}'")
            continue

        # BATCH SAMPLE: Sample GUs for all people in this group at once
        gu_codes = list(gu_probs.keys())
        probabilities = list(gu_probs.values())
        n_samples = len(indices)
        sampled_gus = np.random.choice(gu_codes, size=n_samples, p=probabilities)

        # Assign results
        for idx, sampled_gu in zip(indices, sampled_gus):
            results[idx] = sampled_gu
            logger.debug(f"GU Sampler (batch): {sampled_gu} for person at index {idx} in parent GU {workplace_parent_gu}")

    return results

InheritanceStrategy

Bases: AssignmentStrategy

Forward inheritance: Parent → Child.

Children inherit attribute values from parents based on combination rules. Logic is completely configurable via YAML logic blocks.

Example for ethnicity: - Same + Same = Same (W+W=W, A+A=A, etc.) - Different = Mixed (W+A=M, W+B=M, etc.) - Mixed + Any = Mixed (M+X=M)

Simplified from V1 - no complex conditions, just straightforward logic.

Source code in may/attribute_assignment/strategies.py
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
class InheritanceStrategy(AssignmentStrategy):
    """
    Forward inheritance: Parent → Child.

    Children inherit attribute values from parents based on combination rules.
    Logic is completely configurable via YAML logic blocks.

    Example for ethnicity:
    - Same + Same = Same (W+W=W, A+A=A, etc.)
    - Different = Mixed (W+A=M, W+B=M, etc.)
    - Mixed + Any = Mixed (M+X=M)

    Simplified from V1 - no complex conditions, just straightforward logic.
    """

    def __init__(self, config: Dict[str, Any], data_manager):
        """Initialize inheritance strategy - completely generic."""
        super().__init__(config, data_manager)
        # Store the full config - we'll evaluate logic blocks
        self.inherit_config = config.get('inherit_from', {})
        self.logic_blocks = config.get('logic', [])

    def assign(self, person, household, context: Dict[str, Any]) -> Any:
        """
        Assign value based on inheritance from parent roles (completely generic).

        Evaluates logic blocks defined in YAML configuration.

        Args:
            person: Person object
            household: Household object
            context: Assignment context

        Returns:
            Inherited attribute value
        """
        attribute_name = context.get('attribute_name')

        # Get roles to inherit from
        parent_roles = self.inherit_config.get('roles', [])

        # Collect parent values
        parent_values = []
        for role_name in parent_roles:
            parent = self._get_person_by_role(context, role_name)
            if parent:
                value = self._get_attribute_value(parent, attribute_name)
                if value is not None:
                    parent_values.append(value)

        if not parent_values:
            logger.warning(f"No parent values found for inheritance")
            return self._fallback(person, household, context, "NO_PARENT_VALUES")

        # Evaluate logic blocks
        unique_values = list(set(parent_values))

        # Create evaluation context for logic blocks
        eval_context = {
            'values': parent_values,
            'unique_values': unique_values,
            'count': lambda x: len(x)
        }

        # Evaluate each logic block
        for logic_block in self.logic_blocks:
            when_condition = logic_block.get('when')
            then_action = logic_block.get('then')

            try:
                # Evaluate the condition
                if self._evaluate_condition(when_condition, eval_context):
                    # Execute the 'then' action
                    if isinstance(then_action, str):
                        # Simple value like "M" or "values[0]"
                        result = self._resolve_value(then_action, eval_context)
                        logger.debug(f"Inheritance: {result} for {person.id}")
                        return result
                    elif isinstance(then_action, dict):
                        # Nested strategy - not implemented yet, fall back
                        logger.warning(f"Nested strategy in inheritance not yet supported")
                        return self._fallback(person, household, context, "NESTED_STRATEGY_UNSUPPORTED")
            except Exception as e:
                logger.warning(f"Error evaluating inheritance logic: {e}")
                continue

        # No logic matched - fallback
        return self._fallback(person, household, context, "LOGIC_NO_MATCH")

    def _evaluate_condition(self, condition: str, context: dict) -> bool:
        """Evaluate a when condition with fast-paths for common patterns."""
        # FAST PATH: These account for >90% of ethnicity inheritance calls
        if condition == "count(unique_values) == 1":
            return len(context['unique_values']) == 1
        if condition == "count(unique_values) > 1":
            return len(context['unique_values']) > 1

        try:
            # Fallback to cached eval for complex conditions
            code = _compile_expression(condition, 'eval')
            return eval(code, {"__builtins__": {}}, context)
        except:
            return False

    def _resolve_value(self, value_expr: str, context: dict) -> Any:
        """Resolve a value expression with fast-paths for common patterns."""
        # FAST PATH: Common resolutions like 'M' or 'values[0]'
        if value_expr == "values[0]":
            return context['values'][0] if context['values'] else None
        if len(value_expr) <= 2: # Likely a literal code like "M", "W", etc.
            # If it's in context, it's a variable, but for letters it's usually literal
            if value_expr not in context:
                return value_expr

        try:
            # Fallback to cached eval
            code = _compile_expression(value_expr, 'eval')
            return eval(code, {"__builtins__": {}}, context)
        except:
            # If it fails, return as literal string
            return value_expr

__init__(config, data_manager)

Initialize inheritance strategy - completely generic.

Source code in may/attribute_assignment/strategies.py
352
353
354
355
356
357
def __init__(self, config: Dict[str, Any], data_manager):
    """Initialize inheritance strategy - completely generic."""
    super().__init__(config, data_manager)
    # Store the full config - we'll evaluate logic blocks
    self.inherit_config = config.get('inherit_from', {})
    self.logic_blocks = config.get('logic', [])

assign(person, household, context)

Assign value based on inheritance from parent roles (completely generic).

Evaluates logic blocks defined in YAML configuration.

Parameters:

Name Type Description Default
person

Person object

required
household

Household object

required
context Dict[str, Any]

Assignment context

required

Returns:

Type Description
Any

Inherited attribute value

Source code in may/attribute_assignment/strategies.py
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
def assign(self, person, household, context: Dict[str, Any]) -> Any:
    """
    Assign value based on inheritance from parent roles (completely generic).

    Evaluates logic blocks defined in YAML configuration.

    Args:
        person: Person object
        household: Household object
        context: Assignment context

    Returns:
        Inherited attribute value
    """
    attribute_name = context.get('attribute_name')

    # Get roles to inherit from
    parent_roles = self.inherit_config.get('roles', [])

    # Collect parent values
    parent_values = []
    for role_name in parent_roles:
        parent = self._get_person_by_role(context, role_name)
        if parent:
            value = self._get_attribute_value(parent, attribute_name)
            if value is not None:
                parent_values.append(value)

    if not parent_values:
        logger.warning(f"No parent values found for inheritance")
        return self._fallback(person, household, context, "NO_PARENT_VALUES")

    # Evaluate logic blocks
    unique_values = list(set(parent_values))

    # Create evaluation context for logic blocks
    eval_context = {
        'values': parent_values,
        'unique_values': unique_values,
        'count': lambda x: len(x)
    }

    # Evaluate each logic block
    for logic_block in self.logic_blocks:
        when_condition = logic_block.get('when')
        then_action = logic_block.get('then')

        try:
            # Evaluate the condition
            if self._evaluate_condition(when_condition, eval_context):
                # Execute the 'then' action
                if isinstance(then_action, str):
                    # Simple value like "M" or "values[0]"
                    result = self._resolve_value(then_action, eval_context)
                    logger.debug(f"Inheritance: {result} for {person.id}")
                    return result
                elif isinstance(then_action, dict):
                    # Nested strategy - not implemented yet, fall back
                    logger.warning(f"Nested strategy in inheritance not yet supported")
                    return self._fallback(person, household, context, "NESTED_STRATEGY_UNSUPPORTED")
        except Exception as e:
            logger.warning(f"Error evaluating inheritance logic: {e}")
            continue

    # No logic matched - fallback
    return self._fallback(person, household, context, "LOGIC_NO_MATCH")

PartnershipStrategy

Bases: AssignmentStrategy

Partnership-based assignment using pair probabilities.

Given the first person's attribute value, samples the second person's value from conditional probability distribution. Used for couples and family secondary adults.

Replaces complex conditional strategies with role-based subset selection.

Source code in may/attribute_assignment/strategies.py
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
class PartnershipStrategy(AssignmentStrategy):
    """
    Partnership-based assignment using pair probabilities.

    Given the first person's attribute value, samples the second person's value
    from conditional probability distribution. Used for couples and family secondary adults.

    Replaces complex conditional strategies with role-based subset selection.
    """

    def __init__(self, config: Dict[str, Any], data_manager):
        """Initialize partnership strategy."""
        super().__init__(config, data_manager)
        self.data_source_name = config.get('data_source', 'pair_probabilities')
        self.partner_role = config.get('partner_role', 'primary_adult')

    def assign(self, person, household, context: Dict[str, Any]) -> Any:
        """
        Sample partner attribute value based on first person's attribute value.

        Args:
            person: Person object (the partner being assigned)
            household: Household object
            context: Assignment context (must contain partner_role person)

        Returns:
            Sampled attribute value
        """
        # Get the first person (primary_adult or primary_elder)
        first_person = self._get_person_by_role(context, self.partner_role)
        if not first_person:
            logger.warning(f"Partner role '{self.partner_role}' not found in context")
            # Fall back to probabilistic
            return self._fallback(person, household, context, "PARTNER_ROLE_NOT_FOUND")

        # Get first person's attribute value
        attribute_name = context.get('attribute_name')
        first_value = self._get_attribute_value(first_person, attribute_name)
        if first_value is None:
            logger.warning(f"No {attribute_name} found for {self.partner_role}")
            return self._fallback(person, household, context, "PARTNER_VALUE_MISSING")

        if not household or not household.geographical_unit:
            logger.warning("No geographical unit found for household")
            return self._fallback(person, household, context, "GEO_UNIT_MISSING")

        geo_unit = household.geographical_unit.name

        # Look up pair probabilities
        probs = self.data_manager.lookup(self.data_source_name, geo_unit, first_value)
        if not probs:
            logger.warning(f"No pair probabilities for {geo_unit}, {first_value}")
            return self._fallback(person, household, context, "DATA_SOURCE_MISSING")

        # Sample from distribution
        values = list(probs.keys())
        probabilities = list(probs.values())
        sampled = np.random.choice(values, p=probabilities)

        logger.debug(f"Partnership: {sampled} (partner of {first_value}) for {person.id}")
        return sampled

__init__(config, data_manager)

Initialize partnership strategy.

Source code in may/attribute_assignment/strategies.py
283
284
285
286
287
def __init__(self, config: Dict[str, Any], data_manager):
    """Initialize partnership strategy."""
    super().__init__(config, data_manager)
    self.data_source_name = config.get('data_source', 'pair_probabilities')
    self.partner_role = config.get('partner_role', 'primary_adult')

assign(person, household, context)

Sample partner attribute value based on first person's attribute value.

Parameters:

Name Type Description Default
person

Person object (the partner being assigned)

required
household

Household object

required
context Dict[str, Any]

Assignment context (must contain partner_role person)

required

Returns:

Type Description
Any

Sampled attribute value

Source code in may/attribute_assignment/strategies.py
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
def assign(self, person, household, context: Dict[str, Any]) -> Any:
    """
    Sample partner attribute value based on first person's attribute value.

    Args:
        person: Person object (the partner being assigned)
        household: Household object
        context: Assignment context (must contain partner_role person)

    Returns:
        Sampled attribute value
    """
    # Get the first person (primary_adult or primary_elder)
    first_person = self._get_person_by_role(context, self.partner_role)
    if not first_person:
        logger.warning(f"Partner role '{self.partner_role}' not found in context")
        # Fall back to probabilistic
        return self._fallback(person, household, context, "PARTNER_ROLE_NOT_FOUND")

    # Get first person's attribute value
    attribute_name = context.get('attribute_name')
    first_value = self._get_attribute_value(first_person, attribute_name)
    if first_value is None:
        logger.warning(f"No {attribute_name} found for {self.partner_role}")
        return self._fallback(person, household, context, "PARTNER_VALUE_MISSING")

    if not household or not household.geographical_unit:
        logger.warning("No geographical unit found for household")
        return self._fallback(person, household, context, "GEO_UNIT_MISSING")

    geo_unit = household.geographical_unit.name

    # Look up pair probabilities
    probs = self.data_manager.lookup(self.data_source_name, geo_unit, first_value)
    if not probs:
        logger.warning(f"No pair probabilities for {geo_unit}, {first_value}")
        return self._fallback(person, household, context, "DATA_SOURCE_MISSING")

    # Sample from distribution
    values = list(probs.keys())
    probabilities = list(probs.values())
    sampled = np.random.choice(values, p=probabilities)

    logger.debug(f"Partnership: {sampled} (partner of {first_value}) for {person.id}")
    return sampled

ProbabilisticConditionsStrategy

Bases: AssignmentStrategy

Assigns multiple conditions independently based on probabilities.

Each condition is checked with a Bernoulli trial (independent sampling). Person can end up with 0, 1, or multiple conditions.

Source code in may/attribute_assignment/strategies.py
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
class ProbabilisticConditionsStrategy(AssignmentStrategy):
    """
    Assigns multiple conditions independently based on probabilities.

    Each condition is checked with a Bernoulli trial (independent sampling).
    Person can end up with 0, 1, or multiple conditions.
    """

    def __init__(self, config: Dict[str, Any], data_manager):
        """Initialize probabilistic conditions strategy."""
        super().__init__(config, data_manager)
        self.strategy_type = "probabilistic_conditions"
        self.conditions = config.get('conditions', [])
        self.selection_method = config.get('selection_method', 'independent_bernoulli')

    def assign(self, person, household, context: Dict[str, Any]) -> List[str]:
        """
        Assign comorbidities to person.

        Args:
            person: Person object
            household: Household venue (optional)
            context: Assignment context

        Returns:
            List of condition names (e.g., ["cvd", "crd"])
        """
        # Get data source name
        data_source_name = self.config.get('data_source')
        if not data_source_name:
            logger.warning("No data_source specified for probabilistic_conditions strategy")
            return []

        # Look up probabilities using data source
        source = self.data_manager.get_source(data_source_name)
        if not source:
            logger.warning(f"Data source '{data_source_name}' not found")
            return []

        # Perform lookup
        probabilities = source.lookup(person, household, context)
        if not probabilities:
            logger.warning(f"No probabilities found for person {person.id}")
            return []

        # Sample conditions based on selection method
        if self.selection_method == 'independent_bernoulli':
            return self._sample_independent_bernoulli(probabilities)
        else:
            logger.warning(f"Unknown selection method: {self.selection_method}")
            return []

    def _sample_independent_bernoulli(self, probabilities: Dict[str, float]) -> List[str]:
        """
        Sample conditions independently using Bernoulli trials.

        Each condition is checked independently with its probability.

        Args:
            probabilities: Dict mapping condition names to probabilities

        Returns:
            List of condition names that were sampled
        """
        selected_conditions = []

        for condition in self.conditions:
            condition_name = condition.get('name')
            if not condition_name:
                continue

            # Get probability for this condition
            probability = probabilities.get(condition_name, 0.0)

            # Bernoulli trial
            if np.random.random() < probability:
                selected_conditions.append(condition_name)

        return selected_conditions

__init__(config, data_manager)

Initialize probabilistic conditions strategy.

Source code in may/attribute_assignment/strategies.py
707
708
709
710
711
712
def __init__(self, config: Dict[str, Any], data_manager):
    """Initialize probabilistic conditions strategy."""
    super().__init__(config, data_manager)
    self.strategy_type = "probabilistic_conditions"
    self.conditions = config.get('conditions', [])
    self.selection_method = config.get('selection_method', 'independent_bernoulli')

assign(person, household, context)

Assign comorbidities to person.

Parameters:

Name Type Description Default
person

Person object

required
household

Household venue (optional)

required
context Dict[str, Any]

Assignment context

required

Returns:

Type Description
List[str]

List of condition names (e.g., ["cvd", "crd"])

Source code in may/attribute_assignment/strategies.py
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
def assign(self, person, household, context: Dict[str, Any]) -> List[str]:
    """
    Assign comorbidities to person.

    Args:
        person: Person object
        household: Household venue (optional)
        context: Assignment context

    Returns:
        List of condition names (e.g., ["cvd", "crd"])
    """
    # Get data source name
    data_source_name = self.config.get('data_source')
    if not data_source_name:
        logger.warning("No data_source specified for probabilistic_conditions strategy")
        return []

    # Look up probabilities using data source
    source = self.data_manager.get_source(data_source_name)
    if not source:
        logger.warning(f"Data source '{data_source_name}' not found")
        return []

    # Perform lookup
    probabilities = source.lookup(person, household, context)
    if not probabilities:
        logger.warning(f"No probabilities found for person {person.id}")
        return []

    # Sample conditions based on selection method
    if self.selection_method == 'independent_bernoulli':
        return self._sample_independent_bernoulli(probabilities)
    else:
        logger.warning(f"Unknown selection method: {self.selection_method}")
        return []

ProbabilisticStrategy

Bases: AssignmentStrategy

Probabilistic assignment based on geographical distribution.

Samples from attribute distribution for the household's geographical unit. This is the simplest strategy - no dependencies on other people.

Source code in may/attribute_assignment/strategies.py
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
class ProbabilisticStrategy(AssignmentStrategy):
    """
    Probabilistic assignment based on geographical distribution.

    Samples from attribute distribution for the household's geographical unit.
    This is the simplest strategy - no dependencies on other people.
    """

    def __init__(self, config: Dict[str, Any], data_manager):
        """Initialize and cache configuration values."""
        super().__init__(config, data_manager)
        self.data_source_name = config.get('data_source', 'geo_distribution')

    def assign(self, person, household, context: Dict[str, Any]) -> Any:
        """
        Sample attribute value from geographical distribution.

        Args:
            person: Person object
            household: Household object
            context: Assignment context

        Returns:
            Sampled attribute value
        """
        # 1. Try to get geo unit from residence venue (household/CE)
        geo_unit = None
        if household and household.geographical_unit:
            geo_unit = household.geographical_unit.name
            logger.debug(f"  Geographical unit for person {person.id} sourced from venue: {geo_unit}")

        # 2. Fall back to person's own geo unit (useful for individuals not yet distributed or CE residents)
        if not geo_unit and person.geographical_unit:
            geo_unit = person.geographical_unit.name
            logger.debug(f"  Geographical unit for person {person.id} sourced from person: {geo_unit}")

        if not geo_unit:
            logger.warning(f"No geographical unit found for person {person.id} (no residence venue and no person-level geo unit)")
            return None

        # Get probability distribution from data source
        probs = self.data_manager.lookup(self.data_source_name, geo_unit)
        if not probs:
            logger.warning(f"No probabilities found for {self.data_source_name}({geo_unit})")
            return None

        # Sample from distribution
        values = list(probs.keys())
        probabilities = list(probs.values())
        sampled = np.random.choice(values, p=probabilities)

        logger.debug(f"Probabilistic: {sampled} for {person.id} in {geo_unit}")
        return sampled

__init__(config, data_manager)

Initialize and cache configuration values.

Source code in may/attribute_assignment/strategies.py
226
227
228
229
def __init__(self, config: Dict[str, Any], data_manager):
    """Initialize and cache configuration values."""
    super().__init__(config, data_manager)
    self.data_source_name = config.get('data_source', 'geo_distribution')

assign(person, household, context)

Sample attribute value from geographical distribution.

Parameters:

Name Type Description Default
person

Person object

required
household

Household object

required
context Dict[str, Any]

Assignment context

required

Returns:

Type Description
Any

Sampled attribute value

Source code in may/attribute_assignment/strategies.py
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
def assign(self, person, household, context: Dict[str, Any]) -> Any:
    """
    Sample attribute value from geographical distribution.

    Args:
        person: Person object
        household: Household object
        context: Assignment context

    Returns:
        Sampled attribute value
    """
    # 1. Try to get geo unit from residence venue (household/CE)
    geo_unit = None
    if household and household.geographical_unit:
        geo_unit = household.geographical_unit.name
        logger.debug(f"  Geographical unit for person {person.id} sourced from venue: {geo_unit}")

    # 2. Fall back to person's own geo unit (useful for individuals not yet distributed or CE residents)
    if not geo_unit and person.geographical_unit:
        geo_unit = person.geographical_unit.name
        logger.debug(f"  Geographical unit for person {person.id} sourced from person: {geo_unit}")

    if not geo_unit:
        logger.warning(f"No geographical unit found for person {person.id} (no residence venue and no person-level geo unit)")
        return None

    # Get probability distribution from data source
    probs = self.data_manager.lookup(self.data_source_name, geo_unit)
    if not probs:
        logger.warning(f"No probabilities found for {self.data_source_name}({geo_unit})")
        return None

    # Sample from distribution
    values = list(probs.keys())
    probabilities = list(probs.values())
    sampled = np.random.choice(values, p=probabilities)

    logger.debug(f"Probabilistic: {sampled} for {person.id} in {geo_unit}")
    return sampled

ReverseInheritanceStrategy

Bases: AssignmentStrategy

Reverse inheritance: Child → Parent.

When children are assigned first, infer parent attribute values. Logic is completely configurable via YAML logic blocks.

Example for ethnicity: - Child is W/A/B/O → Both parents must be same (both W, both A, etc.) - Child is M → Parents must differ (sample two different values from geo distribution)

Enables "kids first" assignment in certain household patterns.

Source code in may/attribute_assignment/strategies.py
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
class ReverseInheritanceStrategy(AssignmentStrategy):
    """
    Reverse inheritance: Child → Parent.

    When children are assigned first, infer parent attribute values.
    Logic is completely configurable via YAML logic blocks.

    Example for ethnicity:
    - Child is W/A/B/O → Both parents must be same (both W, both A, etc.)
    - Child is M → Parents must differ (sample two different values from geo distribution)

    Enables "kids first" assignment in certain household patterns.
    """

    def __init__(self, config: Dict[str, Any], data_manager):
        """Initialize reverse inheritance strategy - completely generic."""
        super().__init__(config, data_manager)
        # Store the full config - we'll evaluate logic blocks
        self.inherit_config = config.get('inherit_from', {})
        self.logic_blocks = config.get('logic', [])

    def assign(self, person, household, context: Dict[str, Any]) -> Any:
        """
        Assign value based on reverse inheritance (generic - evaluates logic blocks).

        Args:
            person: Person object (parent being assigned)
            household: Household object
            context: Assignment context

        Returns:
            Inferred parent value
        """
        attribute_name = context.get('attribute_name')

        # Get child role to inherit from
        child_role = self.inherit_config.get('role')
        if not child_role:
            logger.warning("No child role specified for reverse inheritance")
            return self._fallback(person, household, context, "NO_CHILD_ROLE")

        # Get child's attribute value
        child = self._get_person_by_role(context, child_role)
        if not child:
            logger.warning(f"Child role '{child_role}' not found")
            return self._fallback(person, household, context, "CHILD_NOT_FOUND")

        child_value = self._get_attribute_value(child, attribute_name)
        if child_value is None:
            logger.warning(f"No value found for child role '{child_role}'")
            return self._fallback(person, household, context, "CHILD_VALUE_MISSING")

        # Create evaluation context for logic blocks
        # Make child value accessible as "primary_adult.ethnicity" format
        eval_context = {child_role: type('obj', (object,), {attribute_name: child_value})()}

        # Evaluate each logic block
        for logic_block in self.logic_blocks:
            when_condition = logic_block.get('when')
            then_action = logic_block.get('then')

            try:
                # Evaluate the condition
                if self._evaluate_condition_with_context(when_condition, eval_context, child_role, attribute_name, child_value):
                    # Execute the 'then' action
                    if isinstance(then_action, str):
                        # Simple value - might be literal or reference to child value
                        if then_action == f"{child_role}.{attribute_name}":
                            result = child_value
                        else:
                            result = then_action
                        logger.debug(f"Reverse inheritance: {result} for {person.id}")
                        return result
                    elif isinstance(then_action, dict):
                        # Nested strategy - create and execute it
                        strategy_type = then_action.get('strategy')
                        if strategy_type == 'probabilistic':
                            # Check for exclude constraints (e.g., secondary_elder
                            # must differ from primary_elder when child is Mixed)
                            exclude_refs = then_action.get('exclude', [])
                            excluded_values = self._resolve_exclude_values(
                                exclude_refs, context, attribute_name
                            )

                            if excluded_values:
                                return self._sample_with_exclusion(
                                    person, household, context,
                                    then_action, excluded_values
                                )
                            else:
                                fallback_strategy = ProbabilisticStrategy(then_action, self.data_manager)
                                return fallback_strategy.assign(person, household, context)
                        else:
                            logger.warning(f"Unknown nested strategy: {strategy_type}")
                            return self._fallback(person, household, context, "NESTED_STRATEGY_UNSUPPORTED")
            except Exception as e:
                logger.warning(f"Error evaluating reverse inheritance logic: {e}")
                continue

        # No logic matched - fallback
        return self._fallback(person, household, context, "LOGIC_NO_MATCH")

    def _resolve_exclude_values(self, exclude_refs: List[str],
                                context: Dict[str, Any],
                                attribute_name: str) -> set:
        """
        Resolve exclude references like ["primary_elder.ethnicity"] into
        concrete values by looking up the referenced role persons in context.

        Args:
            exclude_refs: List of "role.attribute" reference strings
            context: Assignment context containing role persons
            attribute_name: Current attribute being assigned

        Returns:
            Set of concrete values to exclude (may be empty)
        """
        excluded = set()
        for ref in exclude_refs:
            # Parse "role_name.attribute_name" format
            parts = ref.split('.', 1)
            if len(parts) == 2:
                role_name, attr_name = parts
            else:
                role_name = ref
                attr_name = attribute_name

            person = self._get_person_by_role(context, role_name)
            if person:
                value = self._get_attribute_value(person, attr_name)
                if value is not None:
                    excluded.add(value)
                    logger.debug(f"Exclude: resolved '{ref}' → '{value}'")
                else:
                    logger.debug(f"Exclude: '{ref}' has no value assigned yet, skipping")
            else:
                logger.debug(f"Exclude: role '{role_name}' not found in context, skipping")

        return excluded

    def _sample_with_exclusion(self, person, household, context: Dict[str, Any],
                                strategy_config: Dict[str, Any],
                                excluded_values: set) -> Any:
        """
        Sample from a probabilistic distribution while excluding specific values.

        Gets the full distribution, removes excluded values, re-normalizes,
        and samples. Falls back to sampling without exclusion if all values
        would be excluded.

        Args:
            person: Person being assigned
            household: Household venue
            context: Assignment context
            strategy_config: Probabilistic strategy config dict
            excluded_values: Set of values to exclude from sampling

        Returns:
            Sampled attribute value
        """
        data_source_name = strategy_config.get('data_source', 'geo_distribution')

        # Get the geo unit for lookup
        geo_unit = None
        if household and household.geographical_unit:
            geo_unit = household.geographical_unit.name
        elif person.geographical_unit:
            geo_unit = person.geographical_unit.name

        if not geo_unit:
            logger.warning(f"No geo unit for exclusion sampling, person {person.id}")
            return self._fallback(person, household, context, "GEO_UNIT_MISSING")

        # Look up full distribution
        probs = self.data_manager.lookup(data_source_name, geo_unit)
        if not probs:
            logger.warning(f"No distribution for {data_source_name}({geo_unit})")
            return self._fallback(person, household, context, "DATA_SOURCE_MISSING")

        # Remove excluded values
        filtered_probs = {k: v for k, v in probs.items() if k not in excluded_values}

        if not filtered_probs:
            # All values excluded — fall back to full distribution with warning
            logger.warning(
                f"All values excluded for person {person.id} "
                f"(excluded={excluded_values}, available={set(probs.keys())}). "
                f"Falling back to full distribution without exclusion."
            )
            filtered_probs = probs

        # Re-normalize
        total = sum(filtered_probs.values())
        if total <= 0:
            logger.warning(f"Zero total probability after exclusion for person {person.id}")
            return self._fallback(person, household, context, "ZERO_PROBABILITY")

        values = list(filtered_probs.keys())
        probabilities = [v / total for v in filtered_probs.values()]

        sampled = np.random.choice(values, p=probabilities)
        logger.debug(
            f"Reverse inheritance (with exclusion): {sampled} for {person.id} "
            f"(excluded={excluded_values})"
        )
        return sampled

    def _evaluate_condition_with_context(self, condition: str, eval_context: dict,
                                         child_role: str, attribute_name: str, child_value: Any) -> bool:
        """Evaluate a when condition with attribute access and fast-paths."""
        # FAST PATH: Pattern like "primary_adult.ethnicity == 'W'"
        prefix = f"{child_role}.{attribute_name}"
        if condition.startswith(prefix):
            op_part = condition[len(prefix):].strip()
            if op_part.startswith("=="):
                # Extract value (handles both 'VAL' and "VAL")
                val_part = op_part[2:].strip().strip("'").strip('"')
                return str(child_value) == val_part

        try:
            # Build a safe evaluation context
            safe_context = {
                "__builtins__": {},
                child_role: eval_context[child_role]
            }
            return eval(condition, safe_context, {})
        except:
            return False

    def _fallback_probabilistic(self, person, household, context: Dict[str, Any]) -> Any:
        """Fallback to geographical distribution."""
        fallback_strategy = ProbabilisticStrategy(
            {'strategy': 'probabilistic', 'data_source': 'geo_distribution'},
            self.data_manager
        )
        return fallback_strategy.assign(person, household, context)

__init__(config, data_manager)

Initialize reverse inheritance strategy - completely generic.

Source code in may/attribute_assignment/strategies.py
475
476
477
478
479
480
def __init__(self, config: Dict[str, Any], data_manager):
    """Initialize reverse inheritance strategy - completely generic."""
    super().__init__(config, data_manager)
    # Store the full config - we'll evaluate logic blocks
    self.inherit_config = config.get('inherit_from', {})
    self.logic_blocks = config.get('logic', [])

assign(person, household, context)

Assign value based on reverse inheritance (generic - evaluates logic blocks).

Parameters:

Name Type Description Default
person

Person object (parent being assigned)

required
household

Household object

required
context Dict[str, Any]

Assignment context

required

Returns:

Type Description
Any

Inferred parent value

Source code in may/attribute_assignment/strategies.py
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
def assign(self, person, household, context: Dict[str, Any]) -> Any:
    """
    Assign value based on reverse inheritance (generic - evaluates logic blocks).

    Args:
        person: Person object (parent being assigned)
        household: Household object
        context: Assignment context

    Returns:
        Inferred parent value
    """
    attribute_name = context.get('attribute_name')

    # Get child role to inherit from
    child_role = self.inherit_config.get('role')
    if not child_role:
        logger.warning("No child role specified for reverse inheritance")
        return self._fallback(person, household, context, "NO_CHILD_ROLE")

    # Get child's attribute value
    child = self._get_person_by_role(context, child_role)
    if not child:
        logger.warning(f"Child role '{child_role}' not found")
        return self._fallback(person, household, context, "CHILD_NOT_FOUND")

    child_value = self._get_attribute_value(child, attribute_name)
    if child_value is None:
        logger.warning(f"No value found for child role '{child_role}'")
        return self._fallback(person, household, context, "CHILD_VALUE_MISSING")

    # Create evaluation context for logic blocks
    # Make child value accessible as "primary_adult.ethnicity" format
    eval_context = {child_role: type('obj', (object,), {attribute_name: child_value})()}

    # Evaluate each logic block
    for logic_block in self.logic_blocks:
        when_condition = logic_block.get('when')
        then_action = logic_block.get('then')

        try:
            # Evaluate the condition
            if self._evaluate_condition_with_context(when_condition, eval_context, child_role, attribute_name, child_value):
                # Execute the 'then' action
                if isinstance(then_action, str):
                    # Simple value - might be literal or reference to child value
                    if then_action == f"{child_role}.{attribute_name}":
                        result = child_value
                    else:
                        result = then_action
                    logger.debug(f"Reverse inheritance: {result} for {person.id}")
                    return result
                elif isinstance(then_action, dict):
                    # Nested strategy - create and execute it
                    strategy_type = then_action.get('strategy')
                    if strategy_type == 'probabilistic':
                        # Check for exclude constraints (e.g., secondary_elder
                        # must differ from primary_elder when child is Mixed)
                        exclude_refs = then_action.get('exclude', [])
                        excluded_values = self._resolve_exclude_values(
                            exclude_refs, context, attribute_name
                        )

                        if excluded_values:
                            return self._sample_with_exclusion(
                                person, household, context,
                                then_action, excluded_values
                            )
                        else:
                            fallback_strategy = ProbabilisticStrategy(then_action, self.data_manager)
                            return fallback_strategy.assign(person, household, context)
                    else:
                        logger.warning(f"Unknown nested strategy: {strategy_type}")
                        return self._fallback(person, household, context, "NESTED_STRATEGY_UNSUPPORTED")
        except Exception as e:
            logger.warning(f"Error evaluating reverse inheritance logic: {e}")
            continue

    # No logic matched - fallback
    return self._fallback(person, household, context, "LOGIC_NO_MATCH")

StrategyFactory

Factory for creating strategy instances.

Maps strategy type strings to strategy classes.

Source code in may/attribute_assignment/strategies.py
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
class StrategyFactory:
    """
    Factory for creating strategy instances.

    Maps strategy type strings to strategy classes.
    """

    _strategy_map = {
        'probabilistic': ProbabilisticStrategy,
        'partnership': PartnershipStrategy,
        'inheritance': InheritanceStrategy,
        'reverse_inheritance': ReverseInheritanceStrategy,
        'probabilistic_conditions': ProbabilisticConditionsStrategy,
        'commuting_likelihood': CommutingLikelihoodStrategy,
        'geographical_unit_sampler': GUSamplerStrategy,
        'categorical_sampler': CategoricalSamplerStrategy,
        'constant': ConstantStrategy,
    }

    @classmethod
    def create_strategy(cls, config: Dict[str, Any], data_manager) -> AssignmentStrategy:
        """
        Create strategy instance from configuration.

        Args:
            config: Strategy configuration dict
            data_manager: DataSourceManager instance

        Returns:
            Strategy instance

        Raises:
            ValueError: If strategy type is unknown
        """
        validate_assignment_config(config)

        strategy_class = cls._strategy_map.get(config['strategy'])
        return strategy_class(config, data_manager)

create_strategy(config, data_manager) classmethod

Create strategy instance from configuration.

Parameters:

Name Type Description Default
config Dict[str, Any]

Strategy configuration dict

required
data_manager

DataSourceManager instance

required

Returns:

Type Description
AssignmentStrategy

Strategy instance

Raises:

Type Description
ValueError

If strategy type is unknown

Source code in may/attribute_assignment/strategies.py
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
@classmethod
def create_strategy(cls, config: Dict[str, Any], data_manager) -> AssignmentStrategy:
    """
    Create strategy instance from configuration.

    Args:
        config: Strategy configuration dict
        data_manager: DataSourceManager instance

    Returns:
        Strategy instance

    Raises:
        ValueError: If strategy type is unknown
    """
    validate_assignment_config(config)

    strategy_class = cls._strategy_map.get(config['strategy'])
    return strategy_class(config, data_manager)

validate_assignment_config(config, where='assignment')

Reject assignment config keys no strategy reads.

Raises:

Type Description
ValueError

naming the unknown keys, the strategy, and the allowed

Source code in may/attribute_assignment/strategies.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def validate_assignment_config(config: Dict[str, Any], where: str = "assignment") -> None:
    """
    Reject assignment config keys no strategy reads.

    Raises:
        ValueError: naming the unknown keys, the strategy, and the allowed
        set — so a stale key (e.g. `context`) breaks the build at load time
        instead of silently doing nothing.
    """
    if not isinstance(config, dict):
        raise ValueError(f"{where}: assignment must be a mapping, got {type(config).__name__}")

    strategy_type = config.get('strategy')
    if not strategy_type:
        raise ValueError(f"{where}: assignment has no 'strategy' field")

    allowed = STRATEGY_ALLOWED_KEYS.get(strategy_type)
    if allowed is None:
        raise ValueError(
            f"{where}: unknown strategy '{strategy_type}' "
            f"(known: {sorted(STRATEGY_ALLOWED_KEYS)})"
        )

    unknown = set(config) - allowed
    if unknown:
        raise ValueError(
            f"{where}: strategy '{strategy_type}' does not read key(s) "
            f"{sorted(unknown)} — allowed keys are {sorted(allowed)}. "
            f"Remove them (dead config) or fix the typo."
        )

    fallback = config.get('fallback')
    if fallback is not None:
        if not isinstance(fallback, dict):
            raise ValueError(f"{where}.fallback: must be a mapping with a 'strategy'")
        fb_strategy = fallback.get('strategy')
        if fb_strategy not in ('probabilistic', 'constant'):
            raise ValueError(
                f"{where}.fallback: only 'probabilistic' and 'constant' fallback "
                f"strategies are supported, got '{fb_strategy}'"
            )
        if fb_strategy == 'constant' and 'value' not in fallback:
            raise ValueError(
                f"{where}.fallback: a constant fallback must define 'value' "
                "(other keys, e.g. 'data_source', are not read for constants)"
            )
        validate_assignment_config(fallback, where=f"{where}.fallback")

    for i, entry in enumerate(config.get('logic') or []):
        entry = entry or {}
        unknown = set(entry) - _LOGIC_ENTRY_KEYS
        if unknown:
            raise ValueError(
                f"{where}.logic[{i}]: unknown key(s) {sorted(unknown)} — "
                f"allowed: {sorted(_LOGIC_ENTRY_KEYS)}"
            )
        then = entry.get('then')
        if isinstance(then, dict):
            unknown = set(then) - _THEN_BLOCK_KEYS
            if unknown:
                raise ValueError(
                    f"{where}.logic[{i}].then: unknown key(s) {sorted(unknown)} — "
                    f"allowed: {sorted(_THEN_BLOCK_KEYS)}"
                )