Skip to content

Graph relationship builder

Graph-based relationship builder using clustered random graphs.

Uses NetworkX to generate a Watts-Strogatz graph with controllable clustering, then maps edges to relationships between Person objects.

GraphRelationshipBuilder

Builds relationship networks between people using graph-based approach.

Uses a random graph to create relationships, ideally with controllable clustering. Each node in the graph corresponds to a person, and each edge represents a relationship. Default is to use the Watts-Strogatz small-world graph.

Attributes:

Name Type Description
people list[Person]

List of Person objects to create relationships for.

n_people int

Number of people in the population.

mean_connections_per_person int

Target average connections per person.

clustering_level float

Target clustering coefficient (0.0 to 1.0).

storage_key str

Key used to store relationships in person.properties.

Source code in may/social_networks/builder_functions/graph/graph_relationship_builder.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
class GraphRelationshipBuilder:
    """
    Builds relationship networks between people using graph-based approach.

    Uses a random graph to create relationships, ideally with
    controllable clustering. Each node in the graph corresponds to a person,
    and each edge represents a relationship.
    Default is to use the Watts-Strogatz small-world graph.

    Attributes:
        people (list[Person]): List of Person objects to create relationships for.
        n_people (int): Number of people in the population.
        mean_connections_per_person (int): Target average connections per person.
        clustering_level (float): Target clustering coefficient (0.0 to 1.0).
        storage_key (str): Key used to store relationships in person.properties.
    """
    def __init__(
        self,
        people: list[Person],
        mean_connections_per_person: int = 6,
        clustering_level: float = 0.7,
        storage_key: str = "social_contacts",
        connection_filters: Optional[list] = None,
        symmetric: bool = True,
        max_rewire_attempts: int = 10,
        **kwargs,
    ):
        """
        Initialize the graph relationship builder.

        Args:
            people (list[Person]): List of Person objects to create relationships for.
            mean_connections_per_person (int): Average number of connections per person
                (must be even for the Watts-Strogatz method).
            clustering_level (float): 0.0 (random-like) to 1.0 (high clustering).
            storage_key (str): Key to use when storing relationships in person.properties.
        """
        self.people = people
        self.n_people = len(people)
        self.mean_connections_per_person = mean_connections_per_person
        self.clustering_level = clustering_level
        self.storage_key = storage_key
        self.connection_filters = connection_filters or []
        self.symmetric = symmetric
        self.max_rewire_attempts = max_rewire_attempts

        # Create mapping from index to person id
        self._idx_to_person_id = {i: person.id for i, person in enumerate(people)}
        self._person_id_to_idx = {person.id: i for i, person in enumerate(people)}
        self.kwargs = kwargs

    def build_all(self) -> dict[int, list[Person]]:
        """
        Build relationships for all people using graph-based approach.

        Returns:
            dict[int, list[Person]]: Mapping of person_id to list of connected Person objects.

        Example:
            >>> builder = GraphRelationshipBuilder(people, mean_connections_per_person=6)
            >>> relationships = builder.build_all()
            >>> print(f"Person 0 connected to {len(relationships[0])} others")
        """
        nx = _require_networkx()

        logger.debug(f"Building graph-based relationships for {self.n_people:,} people")
        logger.debug(f"  mean_connections_per_person={self.mean_connections_per_person}, clustering_level={self.clustering_level}")

        # Ensure number of people is above 2
        if self.n_people < 2:
            logger.warning("Need at least 2 people to create relationships")
            return {}
        k = self.mean_connections_per_person

        # Ensure k doesn't exceed what's possible for the graph Or below 0
        if k > (self.n_people - 1):
            max_k = self.n_people - 1
            logger.warning(f'Average connections {k} exceeds the max possible for the graph n_people-1={max_k}. Reducing to the max possible')
            k = max_k

        if k < 0:
            logger.error(f"Average connections {k} is below zero.")
            raise ValueError(f"Average connections {k} is below zero.")

        # Generate the clustered graph
        G = create_clustered_graph(
            n_nodes=self.n_people,
            k=min(self.mean_connections_per_person, self.n_people-1),
            clustering_level=self.clustering_level,
            **self.kwargs,
        )

        # Apply connection filters with Numba-accelerated rewiring on rejection
        if self.connection_filters:
            local_attr_arrays = build_local_attribute_arrays(self.people, self.connection_filters)
            stacked, match_types, attr_indices, range_values = encode_connection_filters_for_numba(
                self.connection_filters, local_attr_arrays
            )
            adj = nx.to_scipy_sparse_array(G, nodelist=range(self.n_people), format='csr', dtype=np.int32)
            edge_array = np.array(list(G.edges()), dtype=np.int32)
            rng_seed = int(np.random.randint(0, 2**31))
            kept_array = _apply_filters_and_rewire(
                edge_array, adj.indices, adj.indptr, self.n_people,
                stacked, match_types, attr_indices, range_values,
                self.max_rewire_attempts, rng_seed,
            )
            G = nx.Graph()
            G.add_nodes_from(range(self.n_people))
            G.add_edges_from(kept_array.tolist())

        # Convert graph edges to relationships (Person objects, not IDs)
        relationships: dict[int, list[Person]] = {person.id: [] for person in self.people}

        for node_u, node_v in G.edges():
            person_u = self.people[node_u]
            person_v = self.people[node_v]

            relationships[person_u.id].append(person_v)
            if self.symmetric:
                relationships[person_v.id].append(person_u)

        for person in self.people:
            if relationships[person.id]:
                store_contacts(person, relationships[person.id], self.storage_key)

        # Log statistics
        total_connections = sum(len(conns) for conns in relationships.values())
        avg_actual = total_connections / self.n_people if self.n_people > 0 else 0

        try:
            actual_clustering = nx.average_clustering(G)
            logger.debug(f"Built {total_connections:,} total connections "
                       f"(avg {avg_actual:.1f} per person, clustering={actual_clustering:.3f})")
        except Exception:
            logger.debug(f"Built {total_connections:,} total connections "
                       f"(avg {avg_actual:.1f} per person)")

        return relationships

    @staticmethod
    def build_graph_relationships(
        people: list[Person],
        mean_connections_per_person: int = 6,
        clustering_level: float = 0.7,
        storage_key: str = "social_contacts",
        connection_filters: Optional[list] = None,
        symmetric: bool = True,
        max_rewire_attempts: int = 10,
        **kwargs,
    ) -> dict[int, list[Person]]:
        """
        Convenience static method to build graph-based relationships.

        Args:
            people (list[Person]): List of Person objects.
            mean_connections_per_person (int): Average connections per person.
            clustering_level (float): 0.0 (low clustering) to 1.0 (high clustering).
            storage_key (str): Key for storing in person.properties.
            connection_filters (list[ConnectionFilter] | None): Pairwise edge filters.
            symmetric (bool): If True, both u→v and v→u are stored per edge.
            max_rewire_attempts (int): Retry cap when an edge fails connection_filters.
        """
        builder = GraphRelationshipBuilder(
            people=people,
            mean_connections_per_person=mean_connections_per_person,
            clustering_level=clustering_level,
            storage_key=storage_key,
            connection_filters=connection_filters,
            symmetric=symmetric,
            max_rewire_attempts=max_rewire_attempts,
            **kwargs,
        )
        return builder.build_all()

__init__(people, mean_connections_per_person=6, clustering_level=0.7, storage_key='social_contacts', connection_filters=None, symmetric=True, max_rewire_attempts=10, **kwargs)

Initialize the graph relationship builder.

Parameters:

Name Type Description Default
people list[Person]

List of Person objects to create relationships for.

required
mean_connections_per_person int

Average number of connections per person (must be even for the Watts-Strogatz method).

6
clustering_level float

0.0 (random-like) to 1.0 (high clustering).

0.7
storage_key str

Key to use when storing relationships in person.properties.

'social_contacts'
Source code in may/social_networks/builder_functions/graph/graph_relationship_builder.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
def __init__(
    self,
    people: list[Person],
    mean_connections_per_person: int = 6,
    clustering_level: float = 0.7,
    storage_key: str = "social_contacts",
    connection_filters: Optional[list] = None,
    symmetric: bool = True,
    max_rewire_attempts: int = 10,
    **kwargs,
):
    """
    Initialize the graph relationship builder.

    Args:
        people (list[Person]): List of Person objects to create relationships for.
        mean_connections_per_person (int): Average number of connections per person
            (must be even for the Watts-Strogatz method).
        clustering_level (float): 0.0 (random-like) to 1.0 (high clustering).
        storage_key (str): Key to use when storing relationships in person.properties.
    """
    self.people = people
    self.n_people = len(people)
    self.mean_connections_per_person = mean_connections_per_person
    self.clustering_level = clustering_level
    self.storage_key = storage_key
    self.connection_filters = connection_filters or []
    self.symmetric = symmetric
    self.max_rewire_attempts = max_rewire_attempts

    # Create mapping from index to person id
    self._idx_to_person_id = {i: person.id for i, person in enumerate(people)}
    self._person_id_to_idx = {person.id: i for i, person in enumerate(people)}
    self.kwargs = kwargs

build_all()

Build relationships for all people using graph-based approach.

Returns:

Type Description
dict[int, list[Person]]

dict[int, list[Person]]: Mapping of person_id to list of connected Person objects.

Example

builder = GraphRelationshipBuilder(people, mean_connections_per_person=6) relationships = builder.build_all() print(f"Person 0 connected to {len(relationships[0])} others")

Source code in may/social_networks/builder_functions/graph/graph_relationship_builder.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
def build_all(self) -> dict[int, list[Person]]:
    """
    Build relationships for all people using graph-based approach.

    Returns:
        dict[int, list[Person]]: Mapping of person_id to list of connected Person objects.

    Example:
        >>> builder = GraphRelationshipBuilder(people, mean_connections_per_person=6)
        >>> relationships = builder.build_all()
        >>> print(f"Person 0 connected to {len(relationships[0])} others")
    """
    nx = _require_networkx()

    logger.debug(f"Building graph-based relationships for {self.n_people:,} people")
    logger.debug(f"  mean_connections_per_person={self.mean_connections_per_person}, clustering_level={self.clustering_level}")

    # Ensure number of people is above 2
    if self.n_people < 2:
        logger.warning("Need at least 2 people to create relationships")
        return {}
    k = self.mean_connections_per_person

    # Ensure k doesn't exceed what's possible for the graph Or below 0
    if k > (self.n_people - 1):
        max_k = self.n_people - 1
        logger.warning(f'Average connections {k} exceeds the max possible for the graph n_people-1={max_k}. Reducing to the max possible')
        k = max_k

    if k < 0:
        logger.error(f"Average connections {k} is below zero.")
        raise ValueError(f"Average connections {k} is below zero.")

    # Generate the clustered graph
    G = create_clustered_graph(
        n_nodes=self.n_people,
        k=min(self.mean_connections_per_person, self.n_people-1),
        clustering_level=self.clustering_level,
        **self.kwargs,
    )

    # Apply connection filters with Numba-accelerated rewiring on rejection
    if self.connection_filters:
        local_attr_arrays = build_local_attribute_arrays(self.people, self.connection_filters)
        stacked, match_types, attr_indices, range_values = encode_connection_filters_for_numba(
            self.connection_filters, local_attr_arrays
        )
        adj = nx.to_scipy_sparse_array(G, nodelist=range(self.n_people), format='csr', dtype=np.int32)
        edge_array = np.array(list(G.edges()), dtype=np.int32)
        rng_seed = int(np.random.randint(0, 2**31))
        kept_array = _apply_filters_and_rewire(
            edge_array, adj.indices, adj.indptr, self.n_people,
            stacked, match_types, attr_indices, range_values,
            self.max_rewire_attempts, rng_seed,
        )
        G = nx.Graph()
        G.add_nodes_from(range(self.n_people))
        G.add_edges_from(kept_array.tolist())

    # Convert graph edges to relationships (Person objects, not IDs)
    relationships: dict[int, list[Person]] = {person.id: [] for person in self.people}

    for node_u, node_v in G.edges():
        person_u = self.people[node_u]
        person_v = self.people[node_v]

        relationships[person_u.id].append(person_v)
        if self.symmetric:
            relationships[person_v.id].append(person_u)

    for person in self.people:
        if relationships[person.id]:
            store_contacts(person, relationships[person.id], self.storage_key)

    # Log statistics
    total_connections = sum(len(conns) for conns in relationships.values())
    avg_actual = total_connections / self.n_people if self.n_people > 0 else 0

    try:
        actual_clustering = nx.average_clustering(G)
        logger.debug(f"Built {total_connections:,} total connections "
                   f"(avg {avg_actual:.1f} per person, clustering={actual_clustering:.3f})")
    except Exception:
        logger.debug(f"Built {total_connections:,} total connections "
                   f"(avg {avg_actual:.1f} per person)")

    return relationships

build_graph_relationships(people, mean_connections_per_person=6, clustering_level=0.7, storage_key='social_contacts', connection_filters=None, symmetric=True, max_rewire_attempts=10, **kwargs) staticmethod

Convenience static method to build graph-based relationships.

Parameters:

Name Type Description Default
people list[Person]

List of Person objects.

required
mean_connections_per_person int

Average connections per person.

6
clustering_level float

0.0 (low clustering) to 1.0 (high clustering).

0.7
storage_key str

Key for storing in person.properties.

'social_contacts'
connection_filters list[ConnectionFilter] | None

Pairwise edge filters.

None
symmetric bool

If True, both u→v and v→u are stored per edge.

True
max_rewire_attempts int

Retry cap when an edge fails connection_filters.

10
Source code in may/social_networks/builder_functions/graph/graph_relationship_builder.py
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
@staticmethod
def build_graph_relationships(
    people: list[Person],
    mean_connections_per_person: int = 6,
    clustering_level: float = 0.7,
    storage_key: str = "social_contacts",
    connection_filters: Optional[list] = None,
    symmetric: bool = True,
    max_rewire_attempts: int = 10,
    **kwargs,
) -> dict[int, list[Person]]:
    """
    Convenience static method to build graph-based relationships.

    Args:
        people (list[Person]): List of Person objects.
        mean_connections_per_person (int): Average connections per person.
        clustering_level (float): 0.0 (low clustering) to 1.0 (high clustering).
        storage_key (str): Key for storing in person.properties.
        connection_filters (list[ConnectionFilter] | None): Pairwise edge filters.
        symmetric (bool): If True, both u→v and v→u are stored per edge.
        max_rewire_attempts (int): Retry cap when an edge fails connection_filters.
    """
    builder = GraphRelationshipBuilder(
        people=people,
        mean_connections_per_person=mean_connections_per_person,
        clustering_level=clustering_level,
        storage_key=storage_key,
        connection_filters=connection_filters,
        symmetric=symmetric,
        max_rewire_attempts=max_rewire_attempts,
        **kwargs,
    )
    return builder.build_all()