Household excess handler

Handler for allocating excess and overflow people to households.

This module contains logic for: - Allocating unassigned people to existing households (excess allocation) - Handling overflow allocation when excess allocation leaves people unassigned

`HouseholdExcessHandler`

Handles excess and overflow people allocation to households.

Source code in may/residence/household_excess_handler.py

class HouseholdExcessHandler:
    """Handles excess and overflow people allocation to households."""

    def __init__(self, household_distributor):
        """
        Initialize the excess handler.

        Args:
            household_distributor: Reference to parent HouseholdDistributor
        """
        self.distributor = household_distributor

    def allocate_excess_to_households(self,
                                      target_patterns: List[str],
                                      add_category: str,
                                      constraints: Optional[List[Dict]] = None,
                                      max_per_household: Optional[int] = None,
                                      add_distribution: Optional[Dict] = None,
                                      refresh_pools: bool = False,
                                      round_name: Optional[str] = None,
                                      rule_name: Optional[str] = None):
        """
        Allocate excess people to existing households created in previous steps.

        This method allows you to add people to households that were created earlier,
        respecting flexible patterns and configurable constraints.

        Args:
            target_patterns: List of original patterns to target for adding people.
                           Only households created with these patterns will be modified.
                           Example: [">=2 >=0 2 0", "1 >=0 2 0"]
            add_category: Name of age category to add (e.g., "Young Adults", "Kids")
            constraints: List of constraint dicts defining limits.
                        Example: [{"category_sum": ["Kids", "Young Adults"], "max": 4}]
            max_per_household: Maximum number of people to add per household (None = no limit)
            add_distribution: Distribution config for how many to add per household.
                            Example: {"type": "weighted", "probabilities": {0: 0.3, 1: 0.5, 2: 0.2}}
                            Or: {"type": "poisson", "mean": 1.2}
                            Or: {"type": "normal", "mean": 1.5, "std": 0.7}
            refresh_pools: If True, refresh person pools to get latest unallocated people
            round_name: Optional name for this round (for logging)
            rule_name: Optional relationship rule name to validate people against existing household members

        Returns:
            dict: Statistics about this excess allocation
        """
        round_label = self.distributor._log_round_start(round_name, "Excess Allocation Round")
        logger.info(f"Target patterns: {target_patterns}")
        logger.info(f"Adding category: {add_category}")
        logger.info(f"Constraints: {constraints}")
        if rule_name:
            logger.info(f"Using relationship rule: '{rule_name}'")
        logger.info("")

        # Get rule if specified
        rule = None
        if rule_name:
            rule = self.distributor.relationship_rules.get_rule_by_name(rule_name)
            if not rule:
                logger.error(f"Unknown relationship rule '{rule_name}'")
                return {
                    'round_name': round_label,
                    'people_added': 0,
                    'households_modified': 0,
                    'error': f"Unknown relationship rule '{rule_name}'"
                }

        # Refresh pools if requested
        if refresh_pools:
            self.distributor._prepare_person_pools(refresh=True)

        # Find category index for the category to add
        add_cat_idx = self.distributor._validate_category_index(add_category)
        if add_cat_idx is None:
            return {
                'round_name': round_label,
                'people_added': 0,
                'households_modified': 0,
                'error': f"Unknown category '{add_category}'"
            }

        # Filter households by target patterns
        target_households = self.distributor._filter_households_by_patterns(target_patterns)
        logger.info(f"Found {len(target_households)} households matching target patterns")

        if not target_households:
            logger.warning("No households found matching target patterns")
            return {
                'round_name': round_label,
                'people_added': 0,
                'households_modified': 0
            }

        # Shuffle households for fairness
        np.random.shuffle(target_households)

        # Track statistics
        people_added = 0
        households_modified = 0

        # Progress tracking
        total_households = len(target_households)
        households_processed = 0
        progress_interval = max(1, total_households // 10)  # Update every 10%

        logger.info(f"Processing {total_households:,} target households...")

        # Iterate through target households and try to add people
        for household in target_households:
            households_processed += 1
            geo_unit_code = household.geographical_unit.name

            # Get person pool for this geo_unit
            if geo_unit_code not in self.distributor.person_pool_by_geo_unit:
                continue

            pools = self.distributor.person_pool_by_geo_unit[geo_unit_code]
            available_people = pools[add_cat_idx]

            if not available_people:
                continue

            # Determine target number to add for this household
            if add_distribution:
                target_to_add = self.distributor._sample_from_distribution(add_distribution)
            else:
                # Default: fill to max allowed
                target_to_add = max_per_household if max_per_household is not None else float('inf')

            # Apply max_per_household limit
            if max_per_household is not None:
                target_to_add = min(target_to_add, max_per_household)

            # Try to add the target number of people
            added_to_this_household = 0

            # Handle infinity case (fill to max) vs finite target
            if target_to_add == float('inf'):
                # Fill to capacity (until pool empty or constraints violated)
                while available_people:
                    # Check if adding this person would violate constraints
                    if constraints and not self.distributor._check_constraints_if_added(
                        household, add_category, constraints
                    ):
                        # Can't add more to this household due to constraints
                        break

                    # Select person (with or without relationship rule validation)
                    if rule:
                        # Use relationship rules to validate against existing household members
                        person = self._select_person_for_excess_with_rule(
                            household, available_people, add_category, rule
                        )
                        if not person:
                            # No valid person found for this household
                            break
                    else:
                        # No rule - take first available person
                        # available_people is now a dict, take first value
                        person = next(iter(available_people.values()))  # Always take first (already shuffled)

                    # Add the person
                    self.distributor._allocate_person_to_household(household, person, available_people)

                    added_to_this_household += 1
                    people_added += 1
            else:
                # Finite target - add up to target_to_add people
                for _ in range(int(target_to_add)):
                    # Check if we have people available
                    if not available_people:
                        break

                    # Check if adding this person would violate constraints
                    if constraints and not self.distributor._check_constraints_if_added(
                        household, add_category, constraints
                    ):
                        # Can't add more to this household due to constraints
                        break

                    # Select person (with or without relationship rule validation)
                    if rule:
                        # Use relationship rules to validate against existing household members
                        person = self._select_person_for_excess_with_rule(
                            household, available_people, add_category, rule
                        )
                        if not person:
                            # No valid person found for this household
                            break
                    else:
                        # No rule - take first available person
                        # available_people is now a dict, take first value
                        person = next(iter(available_people.values()))  # Always take first (already shuffled)

                    # Add the person
                    self.distributor._allocate_person_to_household(household, person, available_people)

                    added_to_this_household += 1
                    people_added += 1

            if added_to_this_household > 0:
                households_modified += 1
                logger.debug(f"Added {added_to_this_household} {add_category} to household {household.id}")

            # Log progress at intervals
            if households_processed % progress_interval == 0 or households_processed == total_households:
                percent_complete = (households_processed / total_households) * 100
                logger.info(f"  Progress: {households_processed}/{total_households} households processed ({percent_complete:.1f}%) - {households_modified} modified, {people_added} people added")

        # Statistics
        stats = {
            'round_name': round_label,
            'round_number': self.distributor.current_round,
            'people_added': people_added,
            'households_modified': households_modified,
            'target_households_count': len(target_households),
            'total_people_allocated': len(self.distributor.allocated_people),
            'total_people_remaining': len(self.distributor.population.get_all_people()) - len(self.distributor.allocated_people)
        }

        # Log summary
        logger.info("=" * 60)
        logger.info(f"{round_label} complete!")
        logger.info(f"  Target households: {len(target_households):,}")
        logger.info(f"  Households modified: {households_modified:,}")
        logger.info(f"  People added: {people_added:,}")
        logger.info(f"  Total people allocated: {len(self.distributor.allocated_people):,}")
        logger.info(f"  People remaining: {stats['total_people_remaining']:,}")
        logger.info("")

        # Show remaining by category
        remaining_by_category = self.distributor.get_available_people_by_category()
        logger.info("  Remaining by category:")
        for cat_name in [cat.name for cat in self.distributor.categories]:
            count = remaining_by_category.get(cat_name, 0)
            logger.info(f"    {cat_name}: {count:,}")
        logger.info("=" * 60)

        return stats

    def allocate_overflow_to_households(self,
                                       target_patterns: List[str],
                                       add_category: str,
                                       pattern_bias: Optional[Dict[str, float]] = None,
                                       refresh_pools: bool = False,
                                       round_name: Optional[str] = None):
        """
        Allocate ALL remaining people from a category to existing households,
        IGNORING max household size constraints (overflow mode).

        This is a "desperation round" that distributes remaining people balancedly
        across eligible households, optionally biasing certain patterns.

        Args:
            target_patterns: List of patterns to target for adding people.
                           Example: ["2 >=0 2 0", "0 >=0 0 0"]
            add_category: Name of age category to add (e.g., "Young Adults")
            pattern_bias: Dict mapping patterns to bias weights.
                         Higher weight = more likely to receive people.
                         Example: {"0 >=0 0 0": 2.0, "2 >=0 2 0": 1.0}
                         Households with pattern "0 >=0 0 0" get 2x allocation
            refresh_pools: If True, refresh person pools
            round_name: Optional name for this round (for logging)

        Returns:
            dict: Statistics about this overflow allocation
        """
        round_label = self.distributor._log_round_start(round_name, "Overflow Allocation Round")
        logger.info(f"Target patterns: {target_patterns}")
        logger.info(f"Adding category: {add_category}")
        logger.info(f"Pattern bias: {pattern_bias}")
        logger.info("WARNING: This step IGNORES max household size constraints!")
        logger.info("")

        # Refresh pools if requested
        if refresh_pools:
            self.distributor._prepare_person_pools(refresh=True)

        # Find category index
        add_cat_idx = self.distributor._validate_category_index(add_category)
        if add_cat_idx is None:
            return {
                'round_name': round_label,
                'people_added': 0,
                'households_modified': 0,
                'error': f"Unknown category '{add_category}'"
            }

        # Group households by geo_unit and pattern
        filtered_households = self.distributor._filter_households_by_patterns(target_patterns)
        households_by_geo_unit_pattern = {}
        for household in filtered_households:
            geo_unit_code = household.geographical_unit.name
            original_pattern = household.properties.get('original_pattern', '')
            key = (geo_unit_code, original_pattern)
            if key not in households_by_geo_unit_pattern:
                households_by_geo_unit_pattern[key] = []
            households_by_geo_unit_pattern[key].append(household)

        total_eligible_households = sum(len(hhs) for hhs in households_by_geo_unit_pattern.values())
        logger.info(f"Found {total_eligible_households} eligible households across {len(households_by_geo_unit_pattern)} geo_unit-pattern combinations")

        # Track statistics
        people_added = 0
        households_modified = 0

        # Progress tracking
        geo_units_list = list(set(k[0] for k in households_by_geo_unit_pattern.keys()))
        total_geo_units = len(geo_units_list)
        geo_units_processed = 0
        progress_interval = max(1, total_geo_units // 10)  # Update every 10%

        logger.info(f"Processing {total_geo_units} geo_units...")

        # Process each geo_unit
        for geo_unit_code in geo_units_list:
            geo_units_processed += 1
            if geo_unit_code not in self.distributor.person_pool_by_geo_unit:
                continue

            pools = self.distributor.person_pool_by_geo_unit[geo_unit_code]
            pool_dict = pools[add_cat_idx]

            if not pool_dict:
                continue

            # Convert to list for complex indexing in overflow mode
            available_people = list(pool_dict.values())
            logger.debug(f"geo_unit {geo_unit_code}: {len(available_people)} {add_category} available")

            # Get all households in this geo_unit across all patterns
            geo_unit_households_by_pattern = {}
            for (ac, pattern), hhs in households_by_geo_unit_pattern.items():
                if ac == geo_unit_code:
                    geo_unit_households_by_pattern[pattern] = hhs

            # Calculate distribution with bias
            total_to_allocate = len(available_people)

            # Apply bias weights
            pattern_weights = {}
            for pattern in geo_unit_households_by_pattern.keys():
                weight = pattern_bias.get(pattern, 1.0) if pattern_bias else 1.0
                num_households = len(geo_unit_households_by_pattern[pattern])
                pattern_weights[pattern] = weight * num_households

            total_weight = sum(pattern_weights.values())

            if total_weight == 0:
                continue

            # Allocate to each pattern proportionally
            pattern_allocations = {}
            allocated_so_far = 0

            for pattern in geo_unit_households_by_pattern.keys():
                proportion = pattern_weights[pattern] / total_weight
                allocation = int(total_to_allocate * proportion)
                pattern_allocations[pattern] = allocation
                allocated_so_far += allocation

            # Distribute remainder to highest-weight patterns
            remainder = total_to_allocate - allocated_so_far
            if remainder > 0:
                sorted_patterns = sorted(pattern_weights.keys(), key=lambda p: pattern_weights[p], reverse=True)
                for i in range(remainder):
                    pattern = sorted_patterns[i % len(sorted_patterns)]
                    pattern_allocations[pattern] += 1

            # Track global people index across all patterns
            global_people_index = 0

            # Now distribute within each pattern's households
            for pattern, num_to_add in pattern_allocations.items():
                if num_to_add == 0:
                    continue

                pattern_households = geo_unit_households_by_pattern[pattern]
                num_hh = len(pattern_households)

                # Distribute balancedly
                base_per_household = num_to_add // num_hh
                remainder_hh = num_to_add % num_hh

                # Shuffle for fairness
                shuffled_hh = pattern_households.copy()
                np.random.shuffle(shuffled_hh)

                for hh_idx, household in enumerate(shuffled_hh):
                    # Determine how many to add to this household
                    to_add = base_per_household + (1 if hh_idx < remainder_hh else 0)

                    if to_add == 0:
                        continue

                    # Add people to this household
                    added_to_hh = 0
                    for _ in range(to_add):
                        if global_people_index >= len(available_people):
                            break

                        person = available_people[global_people_index]
                        # Key the subset by the person's actual age category, otherwise
                        # add_to_subset() falls back to the household's first existing
                        # subset and contaminates it (e.g. an adult landing in "Kids").
                        household.add_to_subset(
                            person,
                            subset_key=self.distributor._get_person_category_name(person),
                        )
                        self.distributor.allocated_people.add(person.id)
                        global_people_index += 1
                        added_to_hh += 1
                        people_added += 1

                    if added_to_hh > 0:
                        households_modified += 1
                        logger.debug(f"Added {added_to_hh} {add_category} to household {household.id} (pattern: {pattern}, now size: {household.size()})")

            # Remove allocated people from pool dictionary
            if global_people_index > 0:
                ids_to_remove = [p.id for p in available_people[:global_people_index]]
                for pid in ids_to_remove:
                    pool_dict.pop(pid, None)

            # Log progress at intervals
            if geo_units_processed % progress_interval == 0 or geo_units_processed == total_geo_units:
                percent_complete = (geo_units_processed / total_geo_units) * 100
                logger.info(f"  Progress: {geo_units_processed}/{total_geo_units} geo_units processed ({percent_complete:.1f}%) - {households_modified} households modified, {people_added} people added")

        # Statistics
        stats = {
            'round_name': round_label,
            'round_number': self.distributor.current_round,
            'people_added': people_added,
            'households_modified': households_modified,
            'total_people_allocated': len(self.distributor.allocated_people),
            'total_people_remaining': len(self.distributor.population.get_all_people()) - len(self.distributor.allocated_people)
        }

        # Get remaining people by category
        remaining_by_category = self.distributor.get_available_people_by_category()

        # Log summary
        logger.info("=" * 60)
        logger.info(f"{round_label} complete!")
        logger.info(f"  Households modified: {households_modified:,}")
        logger.info(f"  People added (overflow): {people_added:,}")
        logger.info(f"  Total people allocated: {len(self.distributor.allocated_people):,}")
        logger.info(f"  People remaining: {stats['total_people_remaining']:,}")
        logger.info("")
        logger.info("  Remaining by category:")
        for cat_name in [cat.name for cat in self.distributor.categories]:
            count = remaining_by_category.get(cat_name, 0)
            logger.info(f"    {cat_name}: {count:,}")
        logger.info("=" * 60)

        return stats

    def _select_person_for_excess_with_rule(self, household: Venue,
                                           candidates: List['Person'],
                                           add_category: str,
                                           rule) -> Optional['Person']:
        """
        Select a person to add to an existing household using relationship rules.

        This validates the candidate against existing household members based on
        the relationship rule constraints (e.g., age differences).

        Args:
            household: The household to add to
            candidates: List of candidate people to choose from
            add_category: Category name being added (e.g., "Young Adults")
            rule: The relationship rule to use for validation

        Returns:
            Selected person if valid candidate found, None otherwise
        """
        # Organize existing household members by their roles based on the rule
        existing_people_by_role = {}

        # Map each rule role to its category names
        for role_name, role_config in rule.roles.items():
            category_names = role_config['categories']
            existing_people_by_role[role_name] = []

            # Find all household members that belong to this role's categories
            for resident in household.get_all_members():
                resident_cat_name = self.distributor._get_person_category_name(resident)
                if resident_cat_name in category_names:
                    existing_people_by_role[role_name].append(resident)

        # Find which role the person being added belongs to
        current_role = None
        for role_name, role_config in rule.roles.items():
            if add_category in role_config['categories']:
                current_role = role_name
                break

        candidate_list = (
            list(candidates.values()) if isinstance(candidates, dict) else list(candidates)
        )

        if not current_role:
            # Category not in any role - just return first candidate
            logger.debug(f"Category '{add_category}' not found in rule roles, using first candidate")
            return candidate_list[0] if candidate_list else None

        rr = self.distributor.relationship_rules

        # Couple completion: if this role carries a `pair_matching` constraint and
        # the household already holds exactly one partner, the person we add
        # *completes* the couple. The base excess path ignores pair_matching
        # entirely (it only honors numerical_attribute_difference), so a "second
        # adult" would be added as an un-coupled individual. Here we honor the
        # pair: pick a partner compatible with the existing member (sex + age
        # gap), still subject to the role's numerical constraints, and flag the
        # cohabiting couple. Falls back to the plain selection when no
        # couple-compatible-and-role-valid candidate exists.
        role_count = (rule.roles.get(current_role) or {}).get('count')
        pair_constraint = self.distributor._find_pair_constraint_for_role(
            rule, current_role, role_count
        )
        existing_partners = existing_people_by_role.get(current_role, [])
        required_count = (pair_constraint or {}).get('require_exact_count') or 2
        if pair_constraint is not None and required_count == 2 and len(existing_partners) == 1:
            partner = existing_partners[0]
            geo_unit_code = getattr(
                getattr(household, 'geographical_unit', None), 'name', None
            )
            pool = rr.couple_compatible_candidates(
                partner, candidate_list, pair_constraint, geo_unit_code=geo_unit_code
            )
            if pool:
                person = rr.select_person_with_constraint(
                    candidates=pool,
                    existing_people_by_role=existing_people_by_role,
                    constraints=rule.constraints,
                    current_role=current_role,
                    show_detailed_logs=False,
                )
                if person is not None:
                    if pair_constraint.get('creates_romantic_couple', False):
                        person.properties['cohabiting_couple'] = [partner.id]
                        partner.properties['cohabiting_couple'] = [person.id]
                    return person
            # else: fall through to the plain (un-coupled) selection below.

        # Use relationship rules to select a valid person
        person = rr.select_person_with_constraint(
            candidates=candidate_list,
            existing_people_by_role=existing_people_by_role,
            constraints=rule.constraints,
            current_role=current_role,
            show_detailed_logs=False  # Keep logs minimal for performance
        )

        return person

`init(household_distributor)`

Initialize the excess handler.

Parameters:

Name	Type	Description	Default
`household_distributor`		Reference to parent HouseholdDistributor	required

Source code in may/residence/household_excess_handler.py

def __init__(self, household_distributor):
    """
    Initialize the excess handler.

    Args:
        household_distributor: Reference to parent HouseholdDistributor
    """
    self.distributor = household_distributor

`allocate_excess_to_households(target_patterns, add_category, constraints=None, max_per_household=None, add_distribution=None, refresh_pools=False, round_name=None, rule_name=None)`

Allocate excess people to existing households created in previous steps.

This method allows you to add people to households that were created earlier, respecting flexible patterns and configurable constraints.

Parameters:

Name	Type	Description	Default
`target_patterns`	`List[str]`	List of original patterns to target for adding people. Only households created with these patterns will be modified. Example: [">=2 >=0 2 0", "1 >=0 2 0"]	required
`add_category`	`str`	Name of age category to add (e.g., "Young Adults", "Kids")	required
`constraints`	`Optional[List[Dict]]`	List of constraint dicts defining limits. Example: [{"category_sum": ["Kids", "Young Adults"], "max": 4}]	`None`
`max_per_household`	`Optional[int]`	Maximum number of people to add per household (None = no limit)	`None`
`add_distribution`	`Optional[Dict]`	Distribution config for how many to add per household. Example: {"type": "weighted", "probabilities": {0: 0.3, 1: 0.5, 2: 0.2}} Or: {"type": "poisson", "mean": 1.2} Or: {"type": "normal", "mean": 1.5, "std": 0.7}	`None`
`refresh_pools`	`bool`	If True, refresh person pools to get latest unallocated people	`False`
`round_name`	`Optional[str]`	Optional name for this round (for logging)	`None`
`rule_name`	`Optional[str]`	Optional relationship rule name to validate people against existing household members	`None`

Returns:

Name	Type	Description
`dict`		Statistics about this excess allocation

Source code in may/residence/household_excess_handler.py

def allocate_excess_to_households(self,
                                  target_patterns: List[str],
                                  add_category: str,
                                  constraints: Optional[List[Dict]] = None,
                                  max_per_household: Optional[int] = None,
                                  add_distribution: Optional[Dict] = None,
                                  refresh_pools: bool = False,
                                  round_name: Optional[str] = None,
                                  rule_name: Optional[str] = None):
    """
    Allocate excess people to existing households created in previous steps.

    This method allows you to add people to households that were created earlier,
    respecting flexible patterns and configurable constraints.

    Args:
        target_patterns: List of original patterns to target for adding people.
                       Only households created with these patterns will be modified.
                       Example: [">=2 >=0 2 0", "1 >=0 2 0"]
        add_category: Name of age category to add (e.g., "Young Adults", "Kids")
        constraints: List of constraint dicts defining limits.
                    Example: [{"category_sum": ["Kids", "Young Adults"], "max": 4}]
        max_per_household: Maximum number of people to add per household (None = no limit)
        add_distribution: Distribution config for how many to add per household.
                        Example: {"type": "weighted", "probabilities": {0: 0.3, 1: 0.5, 2: 0.2}}
                        Or: {"type": "poisson", "mean": 1.2}
                        Or: {"type": "normal", "mean": 1.5, "std": 0.7}
        refresh_pools: If True, refresh person pools to get latest unallocated people
        round_name: Optional name for this round (for logging)
        rule_name: Optional relationship rule name to validate people against existing household members

    Returns:
        dict: Statistics about this excess allocation
    """
    round_label = self.distributor._log_round_start(round_name, "Excess Allocation Round")
    logger.info(f"Target patterns: {target_patterns}")
    logger.info(f"Adding category: {add_category}")
    logger.info(f"Constraints: {constraints}")
    if rule_name:
        logger.info(f"Using relationship rule: '{rule_name}'")
    logger.info("")

    # Get rule if specified
    rule = None
    if rule_name:
        rule = self.distributor.relationship_rules.get_rule_by_name(rule_name)
        if not rule:
            logger.error(f"Unknown relationship rule '{rule_name}'")
            return {
                'round_name': round_label,
                'people_added': 0,
                'households_modified': 0,
                'error': f"Unknown relationship rule '{rule_name}'"
            }

    # Refresh pools if requested
    if refresh_pools:
        self.distributor._prepare_person_pools(refresh=True)

    # Find category index for the category to add
    add_cat_idx = self.distributor._validate_category_index(add_category)
    if add_cat_idx is None:
        return {
            'round_name': round_label,
            'people_added': 0,
            'households_modified': 0,
            'error': f"Unknown category '{add_category}'"
        }

    # Filter households by target patterns
    target_households = self.distributor._filter_households_by_patterns(target_patterns)
    logger.info(f"Found {len(target_households)} households matching target patterns")

    if not target_households:
        logger.warning("No households found matching target patterns")
        return {
            'round_name': round_label,
            'people_added': 0,
            'households_modified': 0
        }

    # Shuffle households for fairness
    np.random.shuffle(target_households)

    # Track statistics
    people_added = 0
    households_modified = 0

    # Progress tracking
    total_households = len(target_households)
    households_processed = 0
    progress_interval = max(1, total_households // 10)  # Update every 10%

    logger.info(f"Processing {total_households:,} target households...")

    # Iterate through target households and try to add people
    for household in target_households:
        households_processed += 1
        geo_unit_code = household.geographical_unit.name

        # Get person pool for this geo_unit
        if geo_unit_code not in self.distributor.person_pool_by_geo_unit:
            continue

        pools = self.distributor.person_pool_by_geo_unit[geo_unit_code]
        available_people = pools[add_cat_idx]

        if not available_people:
            continue

        # Determine target number to add for this household
        if add_distribution:
            target_to_add = self.distributor._sample_from_distribution(add_distribution)
        else:
            # Default: fill to max allowed
            target_to_add = max_per_household if max_per_household is not None else float('inf')

        # Apply max_per_household limit
        if max_per_household is not None:
            target_to_add = min(target_to_add, max_per_household)

        # Try to add the target number of people
        added_to_this_household = 0

        # Handle infinity case (fill to max) vs finite target
        if target_to_add == float('inf'):
            # Fill to capacity (until pool empty or constraints violated)
            while available_people:
                # Check if adding this person would violate constraints
                if constraints and not self.distributor._check_constraints_if_added(
                    household, add_category, constraints
                ):
                    # Can't add more to this household due to constraints
                    break

                # Select person (with or without relationship rule validation)
                if rule:
                    # Use relationship rules to validate against existing household members
                    person = self._select_person_for_excess_with_rule(
                        household, available_people, add_category, rule
                    )
                    if not person:
                        # No valid person found for this household
                        break
                else:
                    # No rule - take first available person
                    # available_people is now a dict, take first value
                    person = next(iter(available_people.values()))  # Always take first (already shuffled)

                # Add the person
                self.distributor._allocate_person_to_household(household, person, available_people)

                added_to_this_household += 1
                people_added += 1
        else:
            # Finite target - add up to target_to_add people
            for _ in range(int(target_to_add)):
                # Check if we have people available
                if not available_people:
                    break

                # Check if adding this person would violate constraints
                if constraints and not self.distributor._check_constraints_if_added(
                    household, add_category, constraints
                ):
                    # Can't add more to this household due to constraints
                    break

                # Select person (with or without relationship rule validation)
                if rule:
                    # Use relationship rules to validate against existing household members
                    person = self._select_person_for_excess_with_rule(
                        household, available_people, add_category, rule
                    )
                    if not person:
                        # No valid person found for this household
                        break
                else:
                    # No rule - take first available person
                    # available_people is now a dict, take first value
                    person = next(iter(available_people.values()))  # Always take first (already shuffled)

                # Add the person
                self.distributor._allocate_person_to_household(household, person, available_people)

                added_to_this_household += 1
                people_added += 1

        if added_to_this_household > 0:
            households_modified += 1
            logger.debug(f"Added {added_to_this_household} {add_category} to household {household.id}")

        # Log progress at intervals
        if households_processed % progress_interval == 0 or households_processed == total_households:
            percent_complete = (households_processed / total_households) * 100
            logger.info(f"  Progress: {households_processed}/{total_households} households processed ({percent_complete:.1f}%) - {households_modified} modified, {people_added} people added")

    # Statistics
    stats = {
        'round_name': round_label,
        'round_number': self.distributor.current_round,
        'people_added': people_added,
        'households_modified': households_modified,
        'target_households_count': len(target_households),
        'total_people_allocated': len(self.distributor.allocated_people),
        'total_people_remaining': len(self.distributor.population.get_all_people()) - len(self.distributor.allocated_people)
    }

    # Log summary
    logger.info("=" * 60)
    logger.info(f"{round_label} complete!")
    logger.info(f"  Target households: {len(target_households):,}")
    logger.info(f"  Households modified: {households_modified:,}")
    logger.info(f"  People added: {people_added:,}")
    logger.info(f"  Total people allocated: {len(self.distributor.allocated_people):,}")
    logger.info(f"  People remaining: {stats['total_people_remaining']:,}")
    logger.info("")

    # Show remaining by category
    remaining_by_category = self.distributor.get_available_people_by_category()
    logger.info("  Remaining by category:")
    for cat_name in [cat.name for cat in self.distributor.categories]:
        count = remaining_by_category.get(cat_name, 0)
        logger.info(f"    {cat_name}: {count:,}")
    logger.info("=" * 60)

    return stats

`allocate_overflow_to_households(target_patterns, add_category, pattern_bias=None, refresh_pools=False, round_name=None)`

Allocate ALL remaining people from a category to existing households, IGNORING max household size constraints (overflow mode).

This is a "desperation round" that distributes remaining people balancedly across eligible households, optionally biasing certain patterns.

Parameters:

Name	Type	Description	Default
`target_patterns`	`List[str]`	List of patterns to target for adding people. Example: ["2 >=0 2 0", "0 >=0 0 0"]	required
`add_category`	`str`	Name of age category to add (e.g., "Young Adults")	required
`pattern_bias`	`Optional[Dict[str, float]]`	Dict mapping patterns to bias weights. Higher weight = more likely to receive people. Example: {"0 >=0 0 0": 2.0, "2 >=0 2 0": 1.0} Households with pattern "0 >=0 0 0" get 2x allocation	`None`
`refresh_pools`	`bool`	If True, refresh person pools	`False`
`round_name`	`Optional[str]`	Optional name for this round (for logging)	`None`

Returns:

Name	Type	Description
`dict`		Statistics about this overflow allocation

Source code in may/residence/household_excess_handler.py

def allocate_overflow_to_households(self,
                                   target_patterns: List[str],
                                   add_category: str,
                                   pattern_bias: Optional[Dict[str, float]] = None,
                                   refresh_pools: bool = False,
                                   round_name: Optional[str] = None):
    """
    Allocate ALL remaining people from a category to existing households,
    IGNORING max household size constraints (overflow mode).

    This is a "desperation round" that distributes remaining people balancedly
    across eligible households, optionally biasing certain patterns.

    Args:
        target_patterns: List of patterns to target for adding people.
                       Example: ["2 >=0 2 0", "0 >=0 0 0"]
        add_category: Name of age category to add (e.g., "Young Adults")
        pattern_bias: Dict mapping patterns to bias weights.
                     Higher weight = more likely to receive people.
                     Example: {"0 >=0 0 0": 2.0, "2 >=0 2 0": 1.0}
                     Households with pattern "0 >=0 0 0" get 2x allocation
        refresh_pools: If True, refresh person pools
        round_name: Optional name for this round (for logging)

    Returns:
        dict: Statistics about this overflow allocation
    """
    round_label = self.distributor._log_round_start(round_name, "Overflow Allocation Round")
    logger.info(f"Target patterns: {target_patterns}")
    logger.info(f"Adding category: {add_category}")
    logger.info(f"Pattern bias: {pattern_bias}")
    logger.info("WARNING: This step IGNORES max household size constraints!")
    logger.info("")

    # Refresh pools if requested
    if refresh_pools:
        self.distributor._prepare_person_pools(refresh=True)

    # Find category index
    add_cat_idx = self.distributor._validate_category_index(add_category)
    if add_cat_idx is None:
        return {
            'round_name': round_label,
            'people_added': 0,
            'households_modified': 0,
            'error': f"Unknown category '{add_category}'"
        }

    # Group households by geo_unit and pattern
    filtered_households = self.distributor._filter_households_by_patterns(target_patterns)
    households_by_geo_unit_pattern = {}
    for household in filtered_households:
        geo_unit_code = household.geographical_unit.name
        original_pattern = household.properties.get('original_pattern', '')
        key = (geo_unit_code, original_pattern)
        if key not in households_by_geo_unit_pattern:
            households_by_geo_unit_pattern[key] = []
        households_by_geo_unit_pattern[key].append(household)

    total_eligible_households = sum(len(hhs) for hhs in households_by_geo_unit_pattern.values())
    logger.info(f"Found {total_eligible_households} eligible households across {len(households_by_geo_unit_pattern)} geo_unit-pattern combinations")

    # Track statistics
    people_added = 0
    households_modified = 0

    # Progress tracking
    geo_units_list = list(set(k[0] for k in households_by_geo_unit_pattern.keys()))
    total_geo_units = len(geo_units_list)
    geo_units_processed = 0
    progress_interval = max(1, total_geo_units // 10)  # Update every 10%

    logger.info(f"Processing {total_geo_units} geo_units...")

    # Process each geo_unit
    for geo_unit_code in geo_units_list:
        geo_units_processed += 1
        if geo_unit_code not in self.distributor.person_pool_by_geo_unit:
            continue

        pools = self.distributor.person_pool_by_geo_unit[geo_unit_code]
        pool_dict = pools[add_cat_idx]

        if not pool_dict:
            continue

        # Convert to list for complex indexing in overflow mode
        available_people = list(pool_dict.values())
        logger.debug(f"geo_unit {geo_unit_code}: {len(available_people)} {add_category} available")

        # Get all households in this geo_unit across all patterns
        geo_unit_households_by_pattern = {}
        for (ac, pattern), hhs in households_by_geo_unit_pattern.items():
            if ac == geo_unit_code:
                geo_unit_households_by_pattern[pattern] = hhs

        # Calculate distribution with bias
        total_to_allocate = len(available_people)

        # Apply bias weights
        pattern_weights = {}
        for pattern in geo_unit_households_by_pattern.keys():
            weight = pattern_bias.get(pattern, 1.0) if pattern_bias else 1.0
            num_households = len(geo_unit_households_by_pattern[pattern])
            pattern_weights[pattern] = weight * num_households

        total_weight = sum(pattern_weights.values())

        if total_weight == 0:
            continue

        # Allocate to each pattern proportionally
        pattern_allocations = {}
        allocated_so_far = 0

        for pattern in geo_unit_households_by_pattern.keys():
            proportion = pattern_weights[pattern] / total_weight
            allocation = int(total_to_allocate * proportion)
            pattern_allocations[pattern] = allocation
            allocated_so_far += allocation

        # Distribute remainder to highest-weight patterns
        remainder = total_to_allocate - allocated_so_far
        if remainder > 0:
            sorted_patterns = sorted(pattern_weights.keys(), key=lambda p: pattern_weights[p], reverse=True)
            for i in range(remainder):
                pattern = sorted_patterns[i % len(sorted_patterns)]
                pattern_allocations[pattern] += 1

        # Track global people index across all patterns
        global_people_index = 0

        # Now distribute within each pattern's households
        for pattern, num_to_add in pattern_allocations.items():
            if num_to_add == 0:
                continue

            pattern_households = geo_unit_households_by_pattern[pattern]
            num_hh = len(pattern_households)

            # Distribute balancedly
            base_per_household = num_to_add // num_hh
            remainder_hh = num_to_add % num_hh

            # Shuffle for fairness
            shuffled_hh = pattern_households.copy()
            np.random.shuffle(shuffled_hh)

            for hh_idx, household in enumerate(shuffled_hh):
                # Determine how many to add to this household
                to_add = base_per_household + (1 if hh_idx < remainder_hh else 0)

                if to_add == 0:
                    continue

                # Add people to this household
                added_to_hh = 0
                for _ in range(to_add):
                    if global_people_index >= len(available_people):
                        break

                    person = available_people[global_people_index]
                    # Key the subset by the person's actual age category, otherwise
                    # add_to_subset() falls back to the household's first existing
                    # subset and contaminates it (e.g. an adult landing in "Kids").
                    household.add_to_subset(
                        person,
                        subset_key=self.distributor._get_person_category_name(person),
                    )
                    self.distributor.allocated_people.add(person.id)
                    global_people_index += 1
                    added_to_hh += 1
                    people_added += 1

                if added_to_hh > 0:
                    households_modified += 1
                    logger.debug(f"Added {added_to_hh} {add_category} to household {household.id} (pattern: {pattern}, now size: {household.size()})")

        # Remove allocated people from pool dictionary
        if global_people_index > 0:
            ids_to_remove = [p.id for p in available_people[:global_people_index]]
            for pid in ids_to_remove:
                pool_dict.pop(pid, None)

        # Log progress at intervals
        if geo_units_processed % progress_interval == 0 or geo_units_processed == total_geo_units:
            percent_complete = (geo_units_processed / total_geo_units) * 100
            logger.info(f"  Progress: {geo_units_processed}/{total_geo_units} geo_units processed ({percent_complete:.1f}%) - {households_modified} households modified, {people_added} people added")

    # Statistics
    stats = {
        'round_name': round_label,
        'round_number': self.distributor.current_round,
        'people_added': people_added,
        'households_modified': households_modified,
        'total_people_allocated': len(self.distributor.allocated_people),
        'total_people_remaining': len(self.distributor.population.get_all_people()) - len(self.distributor.allocated_people)
    }

    # Get remaining people by category
    remaining_by_category = self.distributor.get_available_people_by_category()

    # Log summary
    logger.info("=" * 60)
    logger.info(f"{round_label} complete!")
    logger.info(f"  Households modified: {households_modified:,}")
    logger.info(f"  People added (overflow): {people_added:,}")
    logger.info(f"  Total people allocated: {len(self.distributor.allocated_people):,}")
    logger.info(f"  People remaining: {stats['total_people_remaining']:,}")
    logger.info("")
    logger.info("  Remaining by category:")
    for cat_name in [cat.name for cat in self.distributor.categories]:
        count = remaining_by_category.get(cat_name, 0)
        logger.info(f"    {cat_name}: {count:,}")
    logger.info("=" * 60)

    return stats

Household excess handler