Molecular Graph Model Tutorial¶
Learn how MolPy uses graph models to represent molecular structures and perform pattern matching with SMARTS!
What is a Molecular Graph Model?¶
MolPy uses graph-based representations for:
- SMARTS Pattern Matching: Match chemical patterns in molecules
- Atom Type Assignment: Identify atom types based on connectivity
- Reaction Modeling: Find and transform molecular substructures
The graph model converts Atomistic structures into igraph.Graph objects for efficient pattern matching.
In [1]:
Copied!
from molpy.typifier.adapter import build_mol_graph
from molpy.typifier.graph import SMARTSGraph
from molpy.typifier.adapter import build_mol_graph
from molpy.typifier.graph import SMARTSGraph
Building Molecular Graphs¶
Convert an Atomistic structure to a graph representation:
In [2]:
Copied!
# Create a simple molecule - water (H2O)
from molpy.core.atomistic import Atomistic
atomistic = Atomistic()
o = atomistic.def_atom(symbol="O", xyz=[0.0, 0.0, 0.0])
h1 = atomistic.def_atom(symbol="H", xyz=[0.96, 0.0, 0.0])
h2 = atomistic.def_atom(symbol="H", xyz=[-0.24, 0.93, 0.0])
atomistic.def_bond(o, h1)
atomistic.def_bond(o, h2)
print(f"Created molecule: {atomistic}")
# Convert to graph
graph, vs_to_atomid, atomid_to_vs = build_mol_graph(atomistic)
print(f"Graph vertices: {graph.vcount()}")
print(f"Graph edges: {graph.ecount()}")
# Access vertex attributes
print("\nVertex attributes:")
for i, v in enumerate(graph.vs):
print(f" Vertex {i}: element={v['element']}, degree={v['degree']}")
# Create a simple molecule - water (H2O)
from molpy.core.atomistic import Atomistic
atomistic = Atomistic()
o = atomistic.def_atom(symbol="O", xyz=[0.0, 0.0, 0.0])
h1 = atomistic.def_atom(symbol="H", xyz=[0.96, 0.0, 0.0])
h2 = atomistic.def_atom(symbol="H", xyz=[-0.24, 0.93, 0.0])
atomistic.def_bond(o, h1)
atomistic.def_bond(o, h2)
print(f"Created molecule: {atomistic}")
# Convert to graph
graph, vs_to_atomid, atomid_to_vs = build_mol_graph(atomistic)
print(f"Graph vertices: {graph.vcount()}")
print(f"Graph edges: {graph.ecount()}")
# Access vertex attributes
print("\nVertex attributes:")
for i, v in enumerate(graph.vs):
print(f" Vertex {i}: element={v['element']}, degree={v['degree']}")
Created molecule: <Atomistic, 3 atoms (H:2 O:1), 2 bonds, with coords> Graph vertices: 3 Graph edges: 2 Vertex attributes: Vertex 0: element=O, degree=2 Vertex 1: element=H, degree=1 Vertex 2: element=H, degree=1
Graph Attributes¶
The graph includes rich vertex and edge attributes:
In [3]:
Copied!
# Vertex attributes include:
# - element: str (e.g., "C", "N", "O")
# - number: int (atomic number)
# - is_aromatic: bool
# - charge: int
# - degree: int (number of bonds)
# - hyb: int | None (1=sp, 2=sp2, 3=sp3)
# - in_ring: bool
# - cycles: set of tuples (ring membership)
# Edge attributes include:
# - order: int | str (1, 2, 3, or ":")
# - is_aromatic: bool
# - is_in_ring: bool
print("Graph attributes documentation")
# Vertex attributes include:
# - element: str (e.g., "C", "N", "O")
# - number: int (atomic number)
# - is_aromatic: bool
# - charge: int
# - degree: int (number of bonds)
# - hyb: int | None (1=sp, 2=sp2, 3=sp3)
# - in_ring: bool
# - cycles: set of tuples (ring membership)
# Edge attributes include:
# - order: int | str (1, 2, 3, or ":")
# - is_aromatic: bool
# - is_in_ring: bool
print("Graph attributes documentation")
Graph attributes documentation
SMARTS Graphs¶
SMARTSGraph represents a SMARTS pattern as a graph for matching:
In [4]:
Copied!
from molpy.parser.smarts import SmartsParser
# Create a SMARTS pattern (e.g., match an oxygen with 2 hydrogens - water)
smarts_string = "[O][H][H]"
parser = SmartsParser()
# Parse and create SMARTSGraph
smarts_graph = SMARTSGraph(
smarts_string=smarts_string, parser=parser, atomtype_name="water", priority=1
)
print(f"Created SMARTSGraph: {smarts_graph}")
print(f"Pattern vertices: {smarts_graph.vcount()}")
print(f"Pattern edges: {smarts_graph.ecount()}")
from molpy.parser.smarts import SmartsParser
# Create a SMARTS pattern (e.g., match an oxygen with 2 hydrogens - water)
smarts_string = "[O][H][H]"
parser = SmartsParser()
# Parse and create SMARTSGraph
smarts_graph = SMARTSGraph(
smarts_string=smarts_string, parser=parser, atomtype_name="water", priority=1
)
print(f"Created SMARTSGraph: {smarts_graph}")
print(f"Pattern vertices: {smarts_graph.vcount()}")
print(f"Pattern edges: {smarts_graph.ecount()}")
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) Cell In[4], line 5 3 # Create a SMARTS pattern (e.g., match an oxygen with 2 hydrogens - water) 4 smarts_string = "[O][H][H]" ----> 5 parser = SmartsParser() 7 # Parse and create SMARTSGraph 8 smarts_graph = SMARTSGraph( 9 smarts_string=smarts_string, parser=parser, atomtype_name="water", priority=1 10 ) File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/parser/smarts.py:544, in SmartsParser.__init__(self) 535 def __init__(self): 536 config = GrammarConfig( 537 grammar_path=Path(__file__).parent / "grammar" / "smarts.lark", 538 start="start", (...) 542 auto_reload=True, 543 ) --> 544 super().__init__(config) File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/parser/base.py:44, in GrammarParserBase.__init__(self, config) 42 self._lark: Lark | None = None 43 self._mtime: float | None = None ---> 44 self._compile_grammar(force=True) File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/parser/base.py:77, in GrammarParserBase._compile_grammar(self, force) 75 path = self.config.grammar_path 76 if not path.exists(): ---> 77 raise FileNotFoundError(f"Grammar file not found: {path}") 79 grammar_text = path.read_text(encoding="utf-8") 81 self._lark = Lark( 82 grammar_text, 83 start=self.config.start, (...) 88 keep_all_tokens=True, 89 ) FileNotFoundError: Grammar file not found: /opt/buildhome/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/parser/grammar/smarts.lark
Pattern Matching¶
Use graph isomorphism to find matches:
In [5]:
Copied!
# Match SMARTS pattern against molecular graph
# Note: The graph needs to match the pattern structure
# For a simple pattern like [O][H][H], we need to check if it matches
# Get subgraph isomorphisms
matches = graph.get_subisomorphisms_vf2(smarts_graph)
print(f"Found {len(matches)} matches")
if matches:
print("\nMatch details:")
for i, match in enumerate(matches):
print(f" Match {i + 1}: vertices {match}")
# Show which atoms matched
for v_idx in match:
atom_id = vs_to_atomid[v_idx]
# Find the atom in atomistic
for atom in atomistic.atoms:
if id(atom) == atom_id:
print(f" Vertex {v_idx} -> {atom.get('symbol')}")
break
# Match SMARTS pattern against molecular graph
# Note: The graph needs to match the pattern structure
# For a simple pattern like [O][H][H], we need to check if it matches
# Get subgraph isomorphisms
matches = graph.get_subisomorphisms_vf2(smarts_graph)
print(f"Found {len(matches)} matches")
if matches:
print("\nMatch details:")
for i, match in enumerate(matches):
print(f" Match {i + 1}: vertices {match}")
# Show which atoms matched
for v_idx in match:
atom_id = vs_to_atomid[v_idx]
# Find the atom in atomistic
for atom in atomistic.atoms:
if id(atom) == atom_id:
print(f" Vertex {v_idx} -> {atom.get('symbol')}")
break
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[5], line 6 1 # Match SMARTS pattern against molecular graph 2 # Note: The graph needs to match the pattern structure 3 # For a simple pattern like [O][H][H], we need to check if it matches 4 5 # Get subgraph isomorphisms ----> 6 matches = graph.get_subisomorphisms_vf2(smarts_graph) 8 print(f"Found {len(matches)} matches") 10 if matches: NameError: name 'smarts_graph' is not defined
Use Cases¶
Molecular graph models are used in:
- Typifiers: Assign atom types based on SMARTS patterns
- Reaction Modeling: Find reaction sites and transform structures
- Structure Analysis: Identify functional groups and substructures
The graph representation makes these operations efficient and flexible!