Skip to content

Typifier

The typifier module is responsible for atom typing and chemical perception.

Atomistic

OplsAngleTypifier

OplsAngleTypifier(forcefield)

Bases: TypifierBase[Angle]

Match angle type based on atom types of three atoms in Angle

Source code in src/molpy/typifier/atomistic.py
150
151
152
def __init__(self, forcefield: ForceField) -> None:
    super().__init__(forcefield)
    self._build_table()

typify

typify(angle)

Assign type to angle

Source code in src/molpy/typifier/atomistic.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
@override
def typify(self, angle: Angle) -> Angle:
    """Assign type to angle"""
    itom_type = angle.itom.get("type", None)
    jtom_type = angle.jtom.get("type", None)
    ktom_type = angle.ktom.get("type", None)

    if None in (itom_type, jtom_type, ktom_type):
        raise ValueError(f"Angle atoms must have 'type' attribute: {angle}")

    assert isinstance(itom_type, str)
    assert isinstance(jtom_type, str)
    assert isinstance(ktom_type, str)

    # Iterate through all angle types and match manually
    for (at1, at2, at3), angle_type in self._angle_table.items():
        # Try forward and reverse matching (center atom at2 unchanged)
        if (
            atomtype_matches(at1, itom_type)
            and atomtype_matches(at2, jtom_type)
            and atomtype_matches(at3, ktom_type)
        ) or (
            atomtype_matches(at1, ktom_type)
            and atomtype_matches(at2, jtom_type)
            and atomtype_matches(at3, itom_type)
        ):
            angle.data["type"] = angle_type.name
            angle.data.update(**angle_type.params.kwargs)
            return angle

    # Not found, try class matching
    itom_class = None
    jtom_class = None
    ktom_class = None
    for cls, types in self.class_to_types.items():
        if itom_type in types:
            itom_class = cls
        if jtom_type in types:
            jtom_class = cls
        if ktom_type in types:
            ktom_class = cls

    if itom_class and jtom_class and ktom_class:
        for (at1, at2, at3), angle_type in self._angle_table.items():
            if (
                atomtype_matches(at1, itom_class)
                and atomtype_matches(at2, jtom_class)
                and atomtype_matches(at3, ktom_class)
            ) or (
                atomtype_matches(at1, ktom_class)
                and atomtype_matches(at2, jtom_class)
                and atomtype_matches(at3, itom_class)
            ):
                angle.data["type"] = angle_type.name
                angle.data.update(**angle_type.params.kwargs)
                return angle

    raise ValueError(
        f"No angle type found for atom types: {itom_type} - {jtom_type} - {ktom_type}"
    )

OplsAtomTypifier

OplsAtomTypifier(forcefield, strict=False)

Bases: TypifierBase['Atomistic']

Assign atom types using SMARTS matcher (support type references and dependency resolution)

Initialize OPLS atom typifier.

Parameters:

Name Type Description Default
forcefield ForceField

Force field to use for typing

required
strict bool

If True, raise error when atoms cannot be typed. If False (default), silently skip untyped atoms.

False
Source code in src/molpy/typifier/atomistic.py
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
def __init__(
    self,
    forcefield: ForceField,
    strict: bool = False,
) -> None:
    """
    Initialize OPLS atom typifier.

    Args:
        forcefield: Force field to use for typing
        strict: If True, raise error when atoms cannot be typed.
               If False (default), silently skip untyped atoms.
    """
    super().__init__(forcefield)
    from .adapter import build_mol_graph

    # Extract patterns from forcefield
    self.pattern_dict = self._extract_patterns()
    self._build_mol_graph = build_mol_graph
    self.strict = strict

    # Use LayeredTypingEngine
    from .layered_engine import LayeredTypingEngine

    self.engine = LayeredTypingEngine(self.pattern_dict)

typify

typify(struct)

Assign types to all atoms in Atomistic structure (using dependency-aware layered matching)

Source code in src/molpy/typifier/atomistic.py
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
@override
def typify(self, struct: "Atomistic") -> "Atomistic":
    """Assign types to all atoms in Atomistic structure (using dependency-aware layered matching)"""
    # Convert molecule to graph
    graph, vs_to_atomid, _atomid_to_vs = self._build_mol_graph(struct)

    # Use LayeredTypingEngine for layered matching
    result = self.engine.typify(graph, vs_to_atomid)

    # Apply results to atoms
    for atom in struct.atoms:
        atom_id = id(atom)
        if atom_id in result:
            atomtype = result[atom_id]
            atom.data["type"] = atomtype

            # Get other parameters from forcefield AtomType
            atom_type_obj = self._find_atomtype_by_name(atomtype)
            if atom_type_obj:
                atom.data.update(**atom_type_obj.params.kwargs)

    # Check for untyped atoms if strict mode is enabled
    if self.strict:
        untyped_atoms = [atom for atom in struct.atoms if atom.get("type") is None]
        if untyped_atoms:
            untyped_info = [
                f"{atom.get('symbol', '?')} (id={id(atom)})"
                for atom in untyped_atoms[:10]
            ]
            error_msg = (
                f"Failed to assign types to {len(untyped_atoms)} atom(s). "
                f"Examples: {', '.join(untyped_info)}"
            )
            if len(untyped_atoms) > 10:
                error_msg += f" (and {len(untyped_atoms) - 10} more)"
            raise ValueError(error_msg)

    return struct

OplsAtomisticTypifier

OplsAtomisticTypifier(forcefield, skip_atom_typing=False, skip_pair_typing=False, skip_bond_typing=False, skip_angle_typing=False, skip_dihedral_typing=False, strict_typing=True)

Bases: TypifierBase[Atomistic]

Assign all types (bond, angle, dihedral) to entire Atomistic structure

Note: This class assumes atoms are already assigned types. If you need to assign atom types simultaneously, use OplsAtomTypifier first, or use skip_atom_typing=False parameter.

Initialize OPLS atomistic typifier.

Parameters:

Name Type Description Default
forcefield ForceField

Force field to use for typing

required
skip_atom_typing bool

If True, skip atom type assignment

False
skip_pair_typing bool

If True, skip pair type assignment

False
skip_bond_typing bool

If True, skip bond type assignment

False
skip_angle_typing bool

If True, skip angle type assignment

False
skip_dihedral_typing bool

If True, skip dihedral type assignment

False
strict_typing bool

If True (default), raise error when atoms cannot be typed. If False, emit warnings for untyped atoms.

True
Source code in src/molpy/typifier/atomistic.py
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
def __init__(
    self,
    forcefield: ForceField,
    skip_atom_typing: bool = False,
    skip_pair_typing: bool = False,
    skip_bond_typing: bool = False,
    skip_angle_typing: bool = False,
    skip_dihedral_typing: bool = False,
    strict_typing: bool = True,
) -> None:
    """
    Initialize OPLS atomistic typifier.

    Args:
        forcefield: Force field to use for typing
        skip_atom_typing: If True, skip atom type assignment
        skip_pair_typing: If True, skip pair type assignment
        skip_bond_typing: If True, skip bond type assignment
        skip_angle_typing: If True, skip angle type assignment
        skip_dihedral_typing: If True, skip dihedral type assignment
        strict_typing: If True (default), raise error when atoms cannot be typed.
                      If False, emit warnings for untyped atoms.
    """
    super().__init__(forcefield)
    self.skip_atom_typing = skip_atom_typing
    self.skip_pair_typing = skip_pair_typing
    self.skip_bond_typing = skip_bond_typing
    self.skip_angle_typing = skip_angle_typing
    self.skip_dihedral_typing = skip_dihedral_typing
    self.strict_typing = strict_typing

    if not skip_atom_typing:
        self.atom_typifier = OplsAtomTypifier(forcefield, strict=strict_typing)
    if not skip_pair_typing:
        self.pair_typifier = PairTypifier(forcefield)
    if not skip_bond_typing:
        self.bond_typifier = OplsBondTypifier(forcefield)
    if not skip_angle_typing:
        self.angle_typifier = OplsAngleTypifier(forcefield)
    if not skip_dihedral_typing:
        self.dihedral_typifier = OplsDihedralTypifier(forcefield)

typify

typify(struct)

Assign types to all bonds, angles, dihedrals in Atomistic structure

Parameters:

Name Type Description Default
struct Atomistic

Atomistic structure

required
Prerequisites
  • If skip_atom_typing=True (default), all atoms must already have 'type' attribute
  • If skip_atom_typing=False, will assign atom types first
Source code in src/molpy/typifier/atomistic.py
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
@override
def typify(self, struct: Atomistic) -> Atomistic:
    """
    Assign types to all bonds, angles, dihedrals in Atomistic structure

    Args:
        struct: Atomistic structure

    Prerequisites:
        - If skip_atom_typing=True (default), all atoms must already have 'type' attribute
        - If skip_atom_typing=False, will assign atom types first
    """
    # Optional: First assign atom types
    if not self.skip_atom_typing:
        self.atom_typifier.typify(struct)

    # Assign pair types (nonbond parameters: charge, sigma, epsilon)
    # Only type atoms that already have atom types assigned
    if not self.skip_pair_typing:
        for atom in struct.atoms:
            # Skip atoms without type (they may be skipped in non-strict mode)
            if atom.get("type") is not None:
                self.pair_typifier.typify(atom)

    # Assign types to all bonds
    # Only type bonds where both endpoints have atom types
    if not self.skip_bond_typing:
        for bond in struct.bonds:
            # Skip bonds where endpoints don't have types (may be skipped in non-strict mode)
            if (
                bond.itom.get("type") is not None
                and bond.jtom.get("type") is not None
            ):
                self.bond_typifier.typify(bond)

    # Assign types to all angles (if exist)
    # Only type angles where all endpoints have atom types
    if not self.skip_angle_typing:
        angles = struct.links.bucket(Angle)
        for angle in angles:
            endpoints = angle.endpoints
            if all(ep.get("type") is not None for ep in endpoints):
                self.angle_typifier.typify(angle)

    # Assign types to all dihedrals (if exist)
    # Only type dihedrals where all endpoints have atom types
    if not self.skip_dihedral_typing:
        dihedrals = struct.links.bucket(Dihedral)
        for dihedral in dihedrals:
            endpoints = dihedral.endpoints
            if all(ep.get("type") is not None for ep in endpoints):
                self.dihedral_typifier.typify(dihedral)

    return struct

OplsBondTypifier

OplsBondTypifier(forcefield)

Bases: TypifierBase[Bond]

Match bond type based on atom types at both ends of the bond.

Strategy: Build class_to_types table and manually compare each AtomType

Source code in src/molpy/typifier/atomistic.py
60
61
62
def __init__(self, forcefield: ForceField) -> None:
    super().__init__(forcefield)
    self._build_table()

typify

typify(bond)

Assign type to bond

Source code in src/molpy/typifier/atomistic.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
@override
def typify(self, bond: Bond) -> Bond:
    """Assign type to bond"""
    itom_type = bond.itom.get("type", None)
    jtom_type = bond.jtom.get("type", None)

    if itom_type is None or jtom_type is None:
        raise ValueError(f"Bond atoms must have 'type' attribute: {bond}")

    # Iterate through all bond types and match manually
    for (at1, at2), bond_type in self._bond_table.items():
        # Try forward and reverse matching
        if (
            atomtype_matches(at1, itom_type) and atomtype_matches(at2, jtom_type)
        ) or (
            atomtype_matches(at1, jtom_type) and atomtype_matches(at2, itom_type)
        ):
            bond.data["type"] = bond_type.name
            bond.data.update(**bond_type.params.kwargs)
            return bond

    # Not found, try class matching
    # Find class for itom_type and jtom_type
    # First get class from AtomType object
    itom_atomtype = None
    jtom_atomtype = None
    for at in self.ff.get_types(AtomType):
        if at.name == itom_type:
            itom_atomtype = at
        if at.name == jtom_type:
            jtom_atomtype = at

    itom_class = (
        itom_atomtype.params.kwargs.get("class_", "*") if itom_atomtype else None
    )
    jtom_class = (
        jtom_atomtype.params.kwargs.get("class_", "*") if jtom_atomtype else None
    )

    if itom_class and jtom_class and itom_class != "*" and jtom_class != "*":
        # Try matching class of AtomType objects in bond_type
        for (at1, at2), bond_type in self._bond_table.items():
            at1_class = (
                at1.params.kwargs.get("class_", "*")
                if hasattr(at1, "params")
                else "*"
            )
            at2_class = (
                at2.params.kwargs.get("class_", "*")
                if hasattr(at2, "params")
                else "*"
            )
            # Match class (support forward and reverse)
            if (at1_class == itom_class and at2_class == jtom_class) or (
                at1_class == jtom_class and at2_class == itom_class
            ):
                bond.data["type"] = bond_type.name
                bond.data.update(**bond_type.params.kwargs)
                return bond

    raise ValueError(
        f"No bond type found for atom types: {itom_type} - {jtom_type}"
    )

OplsDihedralTypifier

OplsDihedralTypifier(forcefield)

Bases: TypifierBase[Dihedral]

Match dihedral type based on atom types of four atoms in Dihedral

Source code in src/molpy/typifier/atomistic.py
237
238
239
def __init__(self, forcefield: ForceField) -> None:
    super().__init__(forcefield)
    self._build_table()

typify

typify(dihedral)

Assign type to dihedral

Source code in src/molpy/typifier/atomistic.py
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
@override
def typify(self, dihedral: Dihedral) -> Dihedral:
    """Assign type to dihedral"""
    itom_type = dihedral.itom.get("type", None)
    jtom_type = dihedral.jtom.get("type", None)
    ktom_type = dihedral.ktom.get("type", None)
    ltom_type = dihedral.ltom.get("type", None)

    if None in (itom_type, jtom_type, ktom_type, ltom_type):
        raise ValueError(f"Dihedral atoms must have 'type' attribute: {dihedral}")

    assert isinstance(itom_type, str)
    assert isinstance(jtom_type, str)
    assert isinstance(ktom_type, str)
    assert isinstance(ltom_type, str)

    # Iterate through all dihedral types and match manually
    for dihedral_type in self._dihedral_list:
        at1, at2, at3, at4 = (
            dihedral_type.itom,
            dihedral_type.jtom,
            dihedral_type.ktom,
            dihedral_type.ltom,
        )
        # Try forward and reverse matching
        if (
            atomtype_matches(at1, itom_type)
            and atomtype_matches(at2, jtom_type)
            and atomtype_matches(at3, ktom_type)
            and atomtype_matches(at4, ltom_type)
        ) or (
            atomtype_matches(at1, ltom_type)
            and atomtype_matches(at2, ktom_type)
            and atomtype_matches(at3, jtom_type)
            and atomtype_matches(at4, itom_type)
        ):
            dihedral.data["type"] = dihedral_type.name
            dihedral.data.update(**dihedral_type.params.kwargs)
            return dihedral

    # Not found, try class matching
    itom_class = None
    jtom_class = None
    ktom_class = None
    ltom_class = None
    for cls, types in self.class_to_types.items():
        if itom_type in types:
            itom_class = cls
        if jtom_type in types:
            jtom_class = cls
        if ktom_type in types:
            ktom_class = cls
        if ltom_type in types:
            ltom_class = cls

    if itom_class and jtom_class and ktom_class and ltom_class:
        for dihedral_type in self._dihedral_list:
            at1, at2, at3, at4 = (
                dihedral_type.itom,
                dihedral_type.jtom,
                dihedral_type.ktom,
                dihedral_type.ltom,
            )
            if (
                atomtype_matches(at1, itom_class)
                and atomtype_matches(at2, jtom_class)
                and atomtype_matches(at3, ktom_class)
                and atomtype_matches(at4, ltom_class)
            ) or (
                atomtype_matches(at1, ltom_class)
                and atomtype_matches(at2, ktom_class)
                and atomtype_matches(at3, jtom_class)
                and atomtype_matches(at4, itom_class)
            ):
                dihedral.data["type"] = dihedral_type.name
                dihedral.data.update(**dihedral_type.params.kwargs)
                return dihedral

    raise ValueError(
        f"No dihedral type found for atom types: {itom_type} - {jtom_type} - {ktom_type} - {ltom_type}"
    )

PairTypifier

PairTypifier(forcefield)

Bases: TypifierBase[Atom]

Assign nonbonded parameters (charge, sigma, epsilon) to atoms based on their types.

This typifier reads PairType parameters from the forcefield and assigns them to atoms.

Source code in src/molpy/typifier/atomistic.py
346
347
348
def __init__(self, forcefield: ForceField) -> None:
    super().__init__(forcefield)
    self._build_pair_table()

typify

typify(atom)

Assign nonbonded parameters to atom based on its type

Source code in src/molpy/typifier/atomistic.py
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
@override
def typify(self, atom: Atom) -> Atom:
    """Assign nonbonded parameters to atom based on its type"""
    atom_type = atom.get("type", None)

    if atom_type is None:
        raise ValueError(
            f"Atom must have 'type' attribute before pair typification: {atom}"
        )

    # Find matching PairType
    pair_type = self._pair_table.get(atom_type)

    if pair_type:
        atom.update(**pair_type.params.kwargs)
    else:
        raise ValueError(f"No pair type found for atom type: {atom_type}")

    return atom

atomtype_matches

atomtype_matches(atomtype, type_str)

Check if an AtomType matches a given type string.

Matching rules: 1. If atomtype has a specific type (not "*"), compare by type 2. If type doesn't match, compare by class

Parameters:

Name Type Description Default
atomtype AtomType

AtomType instance

required
type_str str

Type string to match (from Atom.data["type"] or class name)

required

Returns:

Type Description
bool

True if matches, False otherwise

Source code in src/molpy/typifier/atomistic.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def atomtype_matches(atomtype: AtomType, type_str: str) -> bool:
    """
    Check if an AtomType matches a given type string.

    Matching rules:
    1. If atomtype has a specific type (not "*"), compare by type
    2. If type doesn't match, compare by class

    Args:
        atomtype: AtomType instance
        type_str: Type string to match (from Atom.data["type"] or class name)

    Returns:
        True if matches, False otherwise
    """
    at_type = atomtype.params.kwargs.get("type_", "*")
    at_class = atomtype.params.kwargs.get("class_", "*")

    # Match by type first
    if at_type != "*" and at_type == type_str:
        return True

    # Then match by class
    if at_class != "*" and at_class == type_str:
        return True

    # If both are wildcards, also match
    return bool(at_type == "*" and at_class == "*")

Adapter

Adapter utilities for converting molecules to graphs for matching.

This module provides functions to convert Atomistic structures into igraph.Graph representations suitable for SMARTS pattern matching.

build_mol_graph

build_mol_graph(structure)

Convert Atomistic structure to igraph.Graph for matching.

Parameters:

Name Type Description Default
structure Atomistic

Atomistic structure with atoms and bonds

required

Returns:

Type Description
Graph

Tuple of (graph, vs_to_atomid, atomid_to_vs) where:

dict[int, int]
  • graph: igraph.Graph with vertex/edge attributes
dict[int, int]
  • vs_to_atomid: mapping from vertex index to atom ID
tuple[Graph, dict[int, int], dict[int, int]]
  • atomid_to_vs: mapping from atom ID to vertex index
Vertex attributes set
  • element: str (e.g., "C", "N", "O")
  • number: int (atomic number)
  • is_aromatic: bool
  • charge: int
  • degree: int (number of bonds)
  • hyb: int | None (1=sp, 2=sp2, 3=sp3)
  • in_ring: bool
  • cycles: set of tuples (ring membership)
Edge attributes set
  • order: int | str (1, 2, 3, or ":")
  • is_aromatic: bool
  • is_in_ring: bool
Source code in src/molpy/typifier/adapter.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def build_mol_graph(
    structure: "Atomistic",
) -> tuple[Graph, dict[int, int], dict[int, int]]:
    """Convert Atomistic structure to igraph.Graph for matching.

    Args:
        structure: Atomistic structure with atoms and bonds

    Returns:
        Tuple of (graph, vs_to_atomid, atomid_to_vs) where:
        - graph: igraph.Graph with vertex/edge attributes
        - vs_to_atomid: mapping from vertex index to atom ID
        - atomid_to_vs: mapping from atom ID to vertex index

    Vertex attributes set:
        - element: str (e.g., "C", "N", "O")
        - number: int (atomic number)
        - is_aromatic: bool
        - charge: int
        - degree: int (number of bonds)
        - hyb: int | None (1=sp, 2=sp2, 3=sp3)
        - in_ring: bool
        - cycles: set of tuples (ring membership)

    Edge attributes set:
        - order: int | str (1, 2, 3, or ":")
        - is_aromatic: bool
        - is_in_ring: bool
    """

    # Type check
    if not isinstance(structure, Atomistic):
        raise TypeError(f"Expected Atomistic structure, got {type(structure).__name__}")

    atoms = list(structure.atoms)
    bonds = list(structure.bonds)

    # Create vertex index mappings
    # Use stable ordering based on entity storage order
    vs_to_atomid = {}
    atomid_to_vs = {}
    atom_to_vs = {}

    for i, atom in enumerate(atoms):
        atom_id = id(atom)  # Use Python id as stable identifier
        vs_to_atomid[i] = atom_id
        atomid_to_vs[atom_id] = i
        atom_to_vs[atom] = i

    # Create graph
    g = Graph(n=len(atoms), directed=False)

    # Set vertex attributes
    for i, atom in enumerate(atoms):
        attrs = _extract_atom_attributes(atom, structure)
        # Add atom_id for type assignment lookup
        attrs["atom_id"] = id(atom)
        for key, value in attrs.items():
            g.vs[i][key] = value

    # Add edges
    edge_list = []
    edge_attrs_list = []

    for bond in bonds:
        itom_idx = atom_to_vs.get(bond.itom)
        jtom_idx = atom_to_vs.get(bond.jtom)

        if itom_idx is not None and jtom_idx is not None:
            edge_list.append((itom_idx, jtom_idx))
            edge_attrs = _extract_bond_attributes(bond, structure)
            edge_attrs_list.append(edge_attrs)

    if edge_list:
        g.add_edges(edge_list)

        # Set edge attributes
        for eid, attrs in enumerate(edge_attrs_list):
            for key, value in attrs.items():
                g.es[eid][key] = value

    # Compute derived properties
    _compute_derived_properties(g, structure, atom_to_vs)

    return g, vs_to_atomid, atomid_to_vs

Dependency Analyzer

Dependency analysis for SMARTS patterns with type references.

DependencyAnalyzer

DependencyAnalyzer(patterns)

Analyzes dependencies between SMARTS patterns and computes matching levels.

Attributes:

Name Type Description
patterns

Dictionary mapping atom type names to their SMARTSGraph patterns

dependency_graph dict[str, set[str]]

Adjacency list of dependencies (type -> depends_on)

levels dict[str, int]

Dictionary mapping atom type names to their topological levels

circular_groups list[set[str]]

List of sets containing types with circular dependencies

Initialize dependency analyzer.

Parameters:

Name Type Description Default
patterns dict[str, SMARTSGraph]

Dictionary of {atom_type_name: SMARTSGraph}

required
Source code in src/molpy/typifier/dependency_analyzer.py
18
19
20
21
22
23
24
25
26
27
28
29
30
def __init__(self, patterns: dict[str, SMARTSGraph]) -> None:
    """Initialize dependency analyzer.

    Args:
        patterns: Dictionary of {atom_type_name: SMARTSGraph}
    """
    self.patterns = patterns
    self.dependency_graph: dict[str, set[str]] = defaultdict(set)
    self.levels: dict[str, int] = {}
    self.circular_groups: list[set[str]] = []

    self._build_dependency_graph()
    self._compute_levels()

get_max_level

get_max_level()

Get the maximum level number.

Returns:

Type Description
int

Maximum level, or -1 if no patterns

Source code in src/molpy/typifier/dependency_analyzer.py
158
159
160
161
162
163
164
def get_max_level(self) -> int:
    """Get the maximum level number.

    Returns:
        Maximum level, or -1 if no patterns
    """
    return max(self.levels.values()) if self.levels else -1

get_patterns_by_level

get_patterns_by_level(level)

Get all patterns at a specific level.

Parameters:

Name Type Description Default
level int

Topological level number

required

Returns:

Type Description
list[SMARTSGraph]

List of SMARTSGraph patterns at that level

Source code in src/molpy/typifier/dependency_analyzer.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def get_patterns_by_level(self, level: int) -> list[SMARTSGraph]:
    """Get all patterns at a specific level.

    Args:
        level: Topological level number

    Returns:
        List of SMARTSGraph patterns at that level
    """
    return [
        pattern
        for type_name, pattern in self.patterns.items()
        if self.levels.get(type_name) == level
    ]

has_circular_dependencies

has_circular_dependencies()

Check if there are any circular dependencies.

Returns:

Type Description
bool

True if circular dependencies exist

Source code in src/molpy/typifier/dependency_analyzer.py
166
167
168
169
170
171
172
def has_circular_dependencies(self) -> bool:
    """Check if there are any circular dependencies.

    Returns:
        True if circular dependencies exist
    """
    return len(self.circular_groups) > 0

Graph

Module for SMARTSGraph and SMARTS matching logic.

SMARTSGraph

SMARTSGraph(smarts_string=None, parser=None, name=None, atomtype_name=None, priority=0, target_vertices=None, source='', overrides=None, *args, **kwargs)

Bases: Graph

A graph representation of a SMARTS pattern.

This class supports two modes of construction: 1. From SMARTS string (legacy mode) 2. From predicates (new predicate-based mode)

Attributes

atomtype_name : str The atom type this pattern assigns priority : int Priority for conflict resolution (higher wins) target_vertices : list[int] Which pattern vertices should receive the atom type (empty = all) source : str Source identifier for debugging smarts_string : str | None The SMARTS string (if constructed from string) ir : SmartsIR | None The intermediate representation (if constructed from string)

Notes

SMARTSGraph inherits from igraph.Graph

Vertex attributes
  • preds: list[Callable] - list of predicates that must all pass
Edge attributes
  • preds: list[Callable] - list of predicates that must all pass
Graph attributes
  • atomtype_name: str
  • priority: int
  • target_vertices: list[int]
  • source: str
  • specificity_score: int (computed)
Source code in src/molpy/typifier/graph.py
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def __init__(
    self,
    smarts_string: str | None = None,
    parser: SmartsParser | None = None,
    name: str | None = None,
    atomtype_name: str | None = None,
    priority: int = 0,
    target_vertices: list[int] | None = None,
    source: str = "",
    overrides: set | None = None,
    *args,
    **kwargs,
):
    super().__init__(*args, **kwargs)

    # Metadata
    self.atomtype_name = atomtype_name or name or ""
    self._priority = priority  # Use _priority as internal storage
    self.target_vertices = target_vertices or []
    self.source = source
    self.overrides = overrides

    # Dependency tracking
    self.dependencies: set[str] = (
        set()
    )  # Set of atom type names this pattern depends on
    self.level: int | None = None  # Topological level (0 = no deps, 1+ = has deps)

    # Legacy support
    self.smarts_string = smarts_string
    self.ir: SmartsIR | None = None

    if smarts_string is not None:
        # Legacy mode: construct from SMARTS string
        if parser is None:
            self.ir = SmartsParser().parse_smarts(smarts_string)
        else:
            self.ir = parser.parse_smarts(smarts_string)

        self._atom_indices = OrderedDict()
        self._add_nodes()
        self._add_edges()

        # Extract dependencies from SMARTS string
        self.dependencies = self.extract_dependencies()

    self._graph_matcher = None
    self._specificity_score: int | None = None

priority property

priority

Get priority for conflict resolution (higher wins).

calc_signature

calc_signature(graph)

Calculate graph signatures for pattern matching.

Source code in src/molpy/typifier/graph.py
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
def calc_signature(self, graph):
    """Calculate graph signatures for pattern matching."""

    # Check if any atoms have ring-related properties
    def check_expr_for_rings(expr):
        """Recursively check expression for ring-related primitives."""
        if isinstance(expr, AtomPrimitiveIR):
            return expr.type in ("ring_size", "ring_count")
        if isinstance(expr, AtomExpressionIR):
            return any(check_expr_for_rings(child) for child in expr.children)
        return False

    has_ring_rules = any(
        check_expr_for_rings(atom.expression) for atom in self.ir.atoms
    )

    if has_ring_rules:
        graph.vs["cycles"] = [set() for _ in graph.vs]
        all_cycles = _find_chordless_cycles(graph, max_cycle_size=6)
        for i, cycles in enumerate(all_cycles):
            for cycle in cycles:
                graph.vs[i]["cycles"].add(tuple(cycle))

extract_dependencies

extract_dependencies()

Extract type references from SMARTS IR.

Finds all has_label primitives that reference atom types (e.g., %opls_154). These are parsed by Lark as AtomPrimitiveIR(type="has_label", value="%opls_154").

Returns:

Type Description
set[str]

Set of referenced atom type names (e.g., {'opls_154', 'opls_135'})

Source code in src/molpy/typifier/graph.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
def extract_dependencies(self) -> set[str]:
    """Extract type references from SMARTS IR.

    Finds all has_label primitives that reference atom types (e.g., %opls_154).
    These are parsed by Lark as AtomPrimitiveIR(type="has_label", value="%opls_154").

    Returns:
        Set of referenced atom type names (e.g., {'opls_154', 'opls_135'})
    """
    if not self.ir or not self.ir.atoms:
        return set()

    dependencies = set()

    def extract_from_expr(expr):
        """Recursively extract dependencies from expression."""
        if isinstance(expr, AtomPrimitiveIR):
            if expr.type == "has_label" and isinstance(expr.value, str):
                # has_label value is like "%opls_154"
                label = expr.value
                if label.startswith("%opls_"):
                    # Strip the % to get "opls_154"
                    dependencies.add(label[1:])
        elif isinstance(expr, AtomExpressionIR):
            for child in expr.children:
                extract_from_expr(child)

    # Extract from all atoms
    for atom in self.ir.atoms:
        extract_from_expr(atom.expression)

    return dependencies

find_matches

find_matches(graph)

Return sets of atoms that match this SMARTS pattern in a topology.

Parameters

structure : TopologyGraph The topology that we are trying to atomtype. typemap : dict The target typemap being used/edited

Notes

When this function gets used in atomtyper.py, we actively modify the white- and blacklists of the atoms in topology after finding a match. This means that between every successive call of subgraph_isomorphisms_iter(), the topology against which we are matching may have actually changed. Currently, we take advantage of this behavior in some edges cases (e.g. see test_hexa_coordinated in test_smarts.py).

Source code in src/molpy/typifier/graph.py
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
def find_matches(self, graph):
    """Return sets of atoms that match this SMARTS pattern in a topology.

    Parameters
    ----------
    structure : TopologyGraph
        The topology that we are trying to atomtype.
    typemap : dict
        The target typemap being used/edited

    Notes
    -----
    When this function gets used in atomtyper.py, we actively modify the
    white- and blacklists of the atoms in `topology` after finding a match.
    This means that between every successive call of
    `subgraph_isomorphisms_iter()`, the topology against which we are
    matching may have actually changed. Currently, we take advantage of this
    behavior in some edges cases (e.g. see `test_hexa_coordinated` in
    `test_smarts.py`).

    """

    self.calc_signature(graph)

    self._graph_matcher = SMARTSMatcher(
        graph,
        self,
        node_match_fn=self._node_match_fn,
        edge_match_fn=self._edge_match_fn,
    )

    matches = self._graph_matcher.subgraph_isomorphisms()
    match_index = set([match[0] for match in matches])
    return match_index

from_igraph classmethod

from_igraph(graph, atomtype_name, priority=0, target_vertices=None, source='')

Create SmartsGraph from an existing igraph.Graph.

Parameters:

Name Type Description Default
graph Graph

igraph.Graph with vertex/edge predicates

required
atomtype_name str

Atom type this pattern assigns

required
priority int

Priority for conflict resolution

0
target_vertices list[int] | None

Which vertices should be typed (empty = all)

None
source str

Source identifier

''

Returns:

Type Description
SMARTSGraph

SMARTSGraph instance

Source code in src/molpy/typifier/graph.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
@classmethod
def from_igraph(
    cls,
    graph: Graph,
    atomtype_name: str,
    priority: int = 0,
    target_vertices: list[int] | None = None,
    source: str = "",
) -> "SMARTSGraph":
    """Create SmartsGraph from an existing igraph.Graph.

    Args:
        graph: igraph.Graph with vertex/edge predicates
        atomtype_name: Atom type this pattern assigns
        priority: Priority for conflict resolution
        target_vertices: Which vertices should be typed (empty = all)
        source: Source identifier

    Returns:
        SMARTSGraph instance
    """
    # Create empty instance
    instance = cls(
        atomtype_name=atomtype_name,
        priority=priority,
        target_vertices=target_vertices or [],
        source=source,
    )

    # Copy graph structure and attributes
    instance.add_vertices(graph.vcount())
    if graph.ecount() > 0:
        instance.add_edges(graph.get_edgelist())

    # Copy vertex attributes
    for attr in graph.vs.attributes():
        instance.vs[attr] = graph.vs[attr]

    # Copy edge attributes
    for attr in graph.es.attributes():
        instance.es[attr] = graph.es[attr]

    return instance

get_priority

get_priority()

Get priority value (supports both new and legacy modes).

Source code in src/molpy/typifier/graph.py
214
215
216
217
218
219
220
221
def get_priority(self) -> int:
    """Get priority value (supports both new and legacy modes)."""
    if hasattr(self, "_priority"):
        return self._priority
    # Legacy: compute from overrides
    if self.overrides is None:
        return 0
    return max([override.priority for override in self.overrides]) + 1

get_specificity_score

get_specificity_score()

Compute specificity score for this pattern.

Scoring heuristic

+0 per element predicate (baseline) +1 per charge/degree/hyb constraint +2 per aromatic/in_ring constraint +3 per bond order predicate +4 per custom predicate

Returns:

Type Description
int

Specificity score (higher = more specific)

Source code in src/molpy/typifier/graph.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
def get_specificity_score(self) -> int:
    """Compute specificity score for this pattern.

    Scoring heuristic:
        +0 per element predicate (baseline)
        +1 per charge/degree/hyb constraint
        +2 per aromatic/in_ring constraint
        +3 per bond order predicate
        +4 per custom predicate

    Returns:
        Specificity score (higher = more specific)
    """
    if self._specificity_score is not None:
        return self._specificity_score

    score = 0

    # Score vertex predicates
    for v in self.vs:
        preds = v["preds"] if "preds" in v.attributes() else []
        for pred in preds:
            if hasattr(pred, "meta"):
                score += pred.meta.weight

    # Score edge predicates
    for e in self.es:
        preds = e["preds"] if "preds" in e.attributes() else []
        for pred in preds:
            if hasattr(pred, "meta"):
                score += pred.meta.weight

    self._specificity_score = score
    return score

override

override(overrides)

Set the priority of this SMART

Source code in src/molpy/typifier/graph.py
201
202
203
204
205
206
207
208
209
210
211
212
def override(self, overrides):
    """Set the priority of this SMART"""
    self.overrides = overrides
    # Legacy behavior: compute priority from overrides
    # Now priority is set explicitly, but keep this for compatibility
    if hasattr(self, "_priority"):
        # New mode: use explicit priority
        pass
    else:
        # Legacy mode: compute from overrides
        if self.overrides:
            self._priority = max([override.priority for override in overrides]) + 1

plot

plot(*args, **kwargs)

Plot the SMARTS graph.

Source code in src/molpy/typifier/graph.py
195
196
197
198
199
def plot(self, *args, **kwargs):
    """Plot the SMARTS graph."""
    graph = Graph(edges=self.get_edgelist())
    graph.vs["label"] = [v.index for v in self.vs]
    return plot(graph, *args, **kwargs)

SMARTSMatcher

SMARTSMatcher(G1, G2, node_match_fn, edge_match_fn=None)

Inherits and implements VF2 for a SMARTSGraph.

Source code in src/molpy/typifier/graph.py
501
502
503
504
505
def __init__(self, G1: Graph, G2: Graph, node_match_fn, edge_match_fn=None):
    self.G1 = G1
    self.G2 = G2
    self.node_match_fn = node_match_fn
    self.edge_match_fn = edge_match_fn

is_isomorphic property

is_isomorphic

Return True if the two graphs are isomorphic.

candidate_pairs_iter

candidate_pairs_iter()

Iterate over candidate pairs of nodes in G1 and G2.

Source code in src/molpy/typifier/graph.py
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
def candidate_pairs_iter(self):
    """Iterate over candidate pairs of nodes in G1 and G2."""
    # All computations are done using the current state!
    G2_nodes = self.G2_nodes

    # First we compute the inout-terminal sets.
    T1_inout = set(self.inout_1.keys()) - set(self.core_1.keys())
    T2_inout = set(self.inout_2.keys()) - set(self.core_2.keys())

    # If T1_inout and T2_inout are both nonempty.
    # P(s) = T1_inout x {min T2_inout}
    if T1_inout and T2_inout:
        for node in T1_inout:
            yield node, min(T2_inout)
    else:
        # First we determine the candidate node for G2
        other_node = min(G2_nodes - set(self.core_2))
        host_nodes = self.valid_nodes if other_node == 0 else self.G1.nodes()
        for node in host_nodes:
            if node not in self.core_1:
                yield node, other_node

subgraph_isomorphisms

subgraph_isomorphisms()

Iterate over all subgraph isomorphisms between G1 and G2.

Source code in src/molpy/typifier/graph.py
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
def subgraph_isomorphisms(self):
    """Iterate over all subgraph isomorphisms between G1 and G2."""
    # Build edge compatibility function if provided
    edge_compat_fn = None
    if self.edge_match_fn is not None:

        def edge_compat_fn(g1, g2, e1, e2):
            return self.edge_match_fn(g1, g2, e1, e2)

    matches = self.G1.get_subisomorphisms_vf2(
        self.G2, node_compat_fn=self.node_match_fn, edge_compat_fn=edge_compat_fn
    )
    results = []
    for sgi in matches:
        sg = self.G1.subgraph(sgi)
        if sg.get_isomorphisms_vf2(self.G2):
            results.append(sgi)
    return results

Layered Engine

Layered typing engine for dependency-aware SMARTS matching.

LayeredTypingEngine

LayeredTypingEngine(patterns)

Orchestrates level-by-level atom typing with dependency resolution.

This engine handles: 1. Dependency analysis and topological sorting 2. Layered matching (level 0 first, then level 1, etc.) 3. Conflict resolution within each level 4. Fixed-point iteration for circular dependencies

Attributes:

Name Type Description
patterns

Dictionary mapping atom type names to SMARTSGraph patterns

matcher

SmartsMatcher instance for pattern matching

analyzer

DependencyAnalyzer for computing levels

Initialize layered typing engine.

Parameters:

Name Type Description Default
patterns dict[str, SMARTSGraph]

Dictionary of {atom_type_name: SMARTSGraph}

required
Source code in src/molpy/typifier/layered_engine.py
25
26
27
28
29
30
31
32
33
34
35
36
def __init__(self, patterns: dict[str, SMARTSGraph]) -> None:
    """Initialize layered typing engine.

    Args:
        patterns: Dictionary of {atom_type_name: SMARTSGraph}
    """
    self.patterns = patterns
    self.analyzer = DependencyAnalyzer(patterns)

    # Create matcher with patterns sorted by level
    pattern_list = self._get_sorted_patterns()
    self.matcher = SmartsMatcher(pattern_list)

get_explain_data

get_explain_data(mol_graph, vs_to_atomid)

Generate detailed explanation of typing process.

Parameters:

Name Type Description Default
mol_graph Graph

Molecule graph

required
vs_to_atomid dict[int, int]

Vertex to atom ID mapping

required

Returns:

Type Description
dict

Dictionary with detailed typing information

Source code in src/molpy/typifier/layered_engine.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
def get_explain_data(self, mol_graph: Graph, vs_to_atomid: dict[int, int]) -> dict:
    """Generate detailed explanation of typing process.

    Args:
        mol_graph: Molecule graph
        vs_to_atomid: Vertex to atom ID mapping

    Returns:
        Dictionary with detailed typing information
    """
    type_assignments = {}
    max_level = self.analyzer.get_max_level()

    explain_data = {
        "levels": {},
        "circular_groups": [list(g) for g in self.analyzer.circular_groups],
        "final_assignments": {},
    }

    for level in range(max_level + 1):
        level_patterns = self.analyzer.get_patterns_by_level(level)

        if not level_patterns:
            continue

        # Create matcher for this level
        level_matcher = SmartsMatcher(level_patterns)
        candidates = level_matcher.find_candidates(
            mol_graph, vs_to_atomid, type_assignments
        )

        # Get explanation
        level_explain = level_matcher.explain(candidates)
        explain_data["levels"][level] = {
            "patterns": [p.atomtype_name for p in level_patterns],
            "assignments": level_explain,
        }

        # Update assignments
        new_assignments = level_matcher.resolve(candidates)
        type_assignments.update(new_assignments)

    explain_data["final_assignments"] = type_assignments
    return explain_data

typify

typify(mol_graph, vs_to_atomid, max_iterations=10)

Perform layered atom typing with dependency resolution.

Parameters:

Name Type Description Default
mol_graph Graph

Molecule graph with vertex/edge attributes

required
vs_to_atomid dict[int, int]

Mapping from vertex index to atom ID

required
max_iterations int

Maximum iterations for circular dependency resolution

10

Returns:

Type Description
dict[int, str]

Dictionary mapping atom_id -> atom_type

Source code in src/molpy/typifier/layered_engine.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def typify(
    self, mol_graph: Graph, vs_to_atomid: dict[int, int], max_iterations: int = 10
) -> dict[int, str]:
    """Perform layered atom typing with dependency resolution.

    Args:
        mol_graph: Molecule graph with vertex/edge attributes
        vs_to_atomid: Mapping from vertex index to atom ID
        max_iterations: Maximum iterations for circular dependency resolution

    Returns:
        Dictionary mapping atom_id -> atom_type
    """
    type_assignments: dict[int, str] = {}
    max_level = self.analyzer.get_max_level()

    # Process each level in order
    for level in range(max_level + 1):
        level_patterns = self.analyzer.get_patterns_by_level(level)

        if not level_patterns:
            continue

        # Check if this level has circular dependencies
        is_circular = any(
            self.patterns[p.atomtype_name] in group
            for group in self.analyzer.circular_groups
            for p in level_patterns
        )

        if is_circular:
            # Use fixed-point iteration
            type_assignments = self._resolve_circular(
                level_patterns,
                mol_graph,
                vs_to_atomid,
                type_assignments,
                max_iterations,
            )
        else:
            # Normal level-by-level matching
            type_assignments = self._resolve_level(
                level_patterns, mol_graph, vs_to_atomid, type_assignments
            )

    return type_assignments

Matcher

SMARTS matcher for atomtyping with conflict resolution.

This module provides the main SmartsMatcher class that: 1. Finds all candidate atom type assignments from SMARTS patterns 2. Resolves conflicts using deterministic priority rules 3. Provides explain functionality for debugging

Candidate dataclass

Candidate(atom_id, atomtype, source, priority, score, pattern_size, definition_order)

Represents a candidate atom type assignment.

The ordering is designed for conflict resolution: 1. Higher priority wins 2. Higher specificity score wins 3. Larger pattern size wins 4. Later definition order wins

ScoringPolicy

Policy for computing pattern specificity scores.

The default policy uses predicate weights

+0 per element predicate (baseline) +1 per charge/degree/hyb constraint +2 per aromatic/in_ring constraint +3 per bond order predicate +4 per custom predicate

custom staticmethod

custom(pattern, vertex_weight=1.0, edge_weight=1.5)

Compute custom specificity score.

Parameters:

Name Type Description Default
pattern SMARTSGraph

SmartsGraph pattern

required
vertex_weight float

Multiplier for vertex predicate weights

1.0
edge_weight float

Multiplier for edge predicate weights

1.5

Returns:

Type Description
int

Specificity score

Source code in src/molpy/typifier/matcher.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
@staticmethod
def custom(
    pattern: SMARTSGraph, vertex_weight: float = 1.0, edge_weight: float = 1.5
) -> int:
    """Compute custom specificity score.

    Args:
        pattern: SmartsGraph pattern
        vertex_weight: Multiplier for vertex predicate weights
        edge_weight: Multiplier for edge predicate weights

    Returns:
        Specificity score
    """
    score = 0

    # Score vertex predicates
    for v in pattern.vs:
        if "preds" in v.attributes():
            preds = v["preds"]
            for pred in preds:
                if hasattr(pred, "meta"):
                    score += int(pred.meta.weight * vertex_weight)

    # Score edge predicates
    for e in pattern.es:
        if "preds" in e.attributes():
            preds = e["preds"]
            for pred in preds:
                if hasattr(pred, "meta"):
                    score += int(pred.meta.weight * edge_weight)

    return score

default staticmethod

default(pattern)

Compute default specificity score for a pattern.

Uses pattern size (vertices + edges) as specificity metric. Larger patterns are considered more specific.

Parameters:

Name Type Description Default
pattern SMARTSGraph

SmartsGraph pattern

required

Returns:

Type Description
int

Specificity score (higher = more specific)

Source code in src/molpy/typifier/matcher.py
78
79
80
81
82
83
84
85
86
87
88
89
90
91
@staticmethod
def default(pattern: SMARTSGraph) -> int:
    """Compute default specificity score for a pattern.

    Uses pattern size (vertices + edges) as specificity metric.
    Larger patterns are considered more specific.

    Args:
        pattern: SmartsGraph pattern

    Returns:
        Specificity score (higher = more specific)
    """
    return pattern.vcount() + pattern.ecount()

SmartsMatcher

SmartsMatcher(patterns, scoring=None)

Main matcher for atomtyping with SMARTS patterns.

This class finds all candidate atom type assignments from a list of SMARTS patterns and resolves conflicts using a deterministic priority system.

Example

patterns = [pattern1, pattern2, pattern3] matcher = SmartsMatcher(patterns) mol_graph = build_mol_graph(structure) candidates = matcher.find_candidates(mol_graph, vs_to_atomid) result = matcher.resolve(candidates)

Initialize matcher with patterns.

Parameters:

Name Type Description Default
patterns list[SMARTSGraph]

List of SmartsGraph patterns

required
scoring ScoringPolicy | None

Scoring policy (default: ScoringPolicy.default)

None
Source code in src/molpy/typifier/matcher.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def __init__(
    self, patterns: list[SMARTSGraph], scoring: ScoringPolicy | None = None
):
    """Initialize matcher with patterns.

    Args:
        patterns: List of SmartsGraph patterns
        scoring: Scoring policy (default: ScoringPolicy.default)
    """
    self.patterns = patterns
    self.scoring = scoring or ScoringPolicy()

    # Store definition order
    for i, pattern in enumerate(self.patterns):
        if not hasattr(pattern, "definition_order"):
            pattern.definition_order = i

explain

explain(candidates)

Generate explain data for debugging.

Parameters:

Name Type Description Default
candidates list[Candidate]

List of Candidate objects

required

Returns:

Type Description
dict[int, Any]

Dict mapping atom_id -> explain data with all candidates

dict[int, Any]

and their ordering keys

Source code in src/molpy/typifier/matcher.py
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
def explain(self, candidates: list[Candidate]) -> dict[int, Any]:
    """Generate explain data for debugging.

    Args:
        candidates: List of Candidate objects

    Returns:
        Dict mapping atom_id -> explain data with all candidates
        and their ordering keys
    """
    # Group candidates by atom_id
    by_atom: dict[int, list[Candidate]] = {}
    for cand in candidates:
        if cand.atom_id not in by_atom:
            by_atom[cand.atom_id] = []
        by_atom[cand.atom_id].append(cand)

    explain_data = {}
    for atom_id, atom_candidates in by_atom.items():
        # Sort candidates
        atom_candidates.sort()

        # Build explain entry
        explain_data[atom_id] = {
            "winner": atom_candidates[0].atomtype if atom_candidates else None,
            "candidates": [
                {
                    "atomtype": c.atomtype,
                    "source": c.source,
                    "priority": c.priority,
                    "score": c.score,
                    "pattern_size": c.pattern_size,
                    "definition_order": c.definition_order,
                    "rank": i + 1,
                }
                for i, c in enumerate(atom_candidates)
            ],
        }

    return explain_data

find_candidates

find_candidates(mol_graph, vs_to_atomid, type_assignments=None)

Find all candidate atom type assignments.

Parameters:

Name Type Description Default
mol_graph Graph

Molecule graph with vertex/edge attributes

required
vs_to_atomid dict[int, int]

Mapping from vertex index to atom ID

required
type_assignments dict[int, str] | None

Current type assignments (for reference checking)

None

Returns:

Type Description
list[Candidate]

List of Candidate objects

Source code in src/molpy/typifier/matcher.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
def find_candidates(
    self,
    mol_graph: Graph,
    vs_to_atomid: dict[int, int],
    type_assignments: dict[int, str] | None = None,
) -> list[Candidate]:
    """Find all candidate atom type assignments.

    Args:
        mol_graph: Molecule graph with vertex/edge attributes
        vs_to_atomid: Mapping from vertex index to atom ID
        type_assignments: Current type assignments (for reference checking)

    Returns:
        List of Candidate objects
    """
    candidates = []

    for pattern_idx, pattern in enumerate(self.patterns):
        # Run VF2 subgraph isomorphism with type-aware matching
        matches = self._find_pattern_matches(pattern, mol_graph, type_assignments)

        # Get pattern metadata
        atomtype = pattern.atomtype_name
        priority = pattern.priority
        score = self.scoring.default(pattern)
        pattern_size = (pattern.vcount(), pattern.ecount())
        source = pattern.source or f"pattern_{pattern_idx}"
        definition_order = getattr(pattern, "definition_order", pattern_idx)

        # Determine which vertices should be typed
        target_vertices = pattern.target_vertices
        if not target_vertices:
            # Default: all matched vertices
            target_vertices = list(range(pattern.vcount()))

        # Create candidates for each match
        for match in matches:
            # match is a list [mol_v0, mol_v1, ...] where index is pattern vertex
            for pattern_v in target_vertices:
                if pattern_v < len(match):
                    mol_v = match[pattern_v]
                    atom_id = vs_to_atomid.get(mol_v)
                    if atom_id is not None:
                        candidates.append(
                            Candidate(
                                atom_id=atom_id,
                                atomtype=atomtype,
                                source=source,
                                priority=priority,
                                score=score,
                                pattern_size=pattern_size,
                                definition_order=definition_order,
                            )
                        )

    return candidates

resolve

resolve(candidates)

Resolve conflicts and return final atom type assignments.

Parameters:

Name Type Description Default
candidates list[Candidate]

List of Candidate objects

required

Returns:

Type Description
dict[int, str]

Dict mapping atom_id -> atomtype

Source code in src/molpy/typifier/matcher.py
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
def resolve(self, candidates: list[Candidate]) -> dict[int, str]:
    """Resolve conflicts and return final atom type assignments.

    Args:
        candidates: List of Candidate objects

    Returns:
        Dict mapping atom_id -> atomtype
    """
    # Group candidates by atom_id
    by_atom: dict[int, list[Candidate]] = {}
    for cand in candidates:
        if cand.atom_id not in by_atom:
            by_atom[cand.atom_id] = []
        by_atom[cand.atom_id].append(cand)

    # Resolve conflicts for each atom
    result = {}
    for atom_id, atom_candidates in by_atom.items():
        # Sort by priority (uses Candidate.__lt__ via sort_key)
        atom_candidates.sort()

        # Pick the best (first after sorting)
        winner = atom_candidates[0]
        result[atom_id] = winner.atomtype

    return result