Skip to content

IO

The IO module handles reading and writing of various molecular file formats.

Factory Functions

Data file reader factory functions.

This module provides convenient factory functions for creating various data file readers. All functions return Frame objects by populating an optional frame parameter.

PathLike module-attribute

PathLike = str | Path

_ensure_frame

_ensure_frame(frame)

Ensure a Frame object exists.

Source code in src/molpy/io/readers.py
15
16
17
18
19
20
21
def _ensure_frame(frame):
    """Ensure a Frame object exists."""
    if frame is None:
        from molpy.core.frame import Frame

        return Frame()
    return frame

read_amber_ac

read_amber_ac(file, frame=None)

Read AC file and return a Frame object.

Parameters:

Name Type Description Default
file PathLike

Path to AC file

required
frame Any

Optional existing Frame to populate

None

Returns:

Type Description
Any

Populated Frame object

Source code in src/molpy/io/readers.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def read_amber_ac(file: PathLike, frame: Any = None) -> Any:
    """
    Read AC file and return a Frame object.

    Args:
        file: Path to AC file
        frame: Optional existing Frame to populate

    Returns:
        Populated Frame object
    """
    from .data.ac import AcReader

    frame = _ensure_frame(frame)
    reader = AcReader(Path(file))
    return reader.read(frame)

read_amber_inpcrd

read_amber_inpcrd(inpcrd, frame=None)

Read AMBER inpcrd file and return a Frame object.

Parameters:

Name Type Description Default
inpcrd PathLike

Path to AMBER inpcrd file

required
frame Any

Optional existing Frame to populate

None

Returns:

Type Description
Any

Populated Frame object

Source code in src/molpy/io/readers.py
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def read_amber_inpcrd(inpcrd: PathLike, frame: Any = None) -> Any:
    """
    Read AMBER inpcrd file and return a Frame object.

    Args:
        inpcrd: Path to AMBER inpcrd file
        frame: Optional existing Frame to populate

    Returns:
        Populated Frame object
    """
    from .data.amber import AmberInpcrdReader

    frame = _ensure_frame(frame)
    reader = AmberInpcrdReader(Path(inpcrd))
    return reader.read(frame)

read_amber_prmtop

read_amber_prmtop(prmtop, inpcrd=None, frame=None)

Read AMBER prmtop and optional inpcrd files.

Parameters:

Name Type Description Default
prmtop PathLike

Path to AMBER prmtop file

required
inpcrd PathLike | None

Optional path to AMBER inpcrd file

None
frame Any

Optional existing Frame to populate

None

Returns:

Type Description
Any

Tuple of (Frame, ForceField)

Source code in src/molpy/io/readers.py
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def read_amber_prmtop(
    prmtop: PathLike, inpcrd: PathLike | None = None, frame: Any = None
) -> Any:
    """
    Read AMBER prmtop and optional inpcrd files.

    Args:
        prmtop: Path to AMBER prmtop file
        inpcrd: Optional path to AMBER inpcrd file
        frame: Optional existing Frame to populate

    Returns:
        Tuple of (Frame, ForceField)
    """
    from .forcefield.amber import AmberPrmtopReader

    frame = _ensure_frame(frame)
    prmtop_path = Path(prmtop)
    reader = AmberPrmtopReader(prmtop_path)
    frame, ff = reader.read(frame)

    if inpcrd is not None:
        from .data.amber import AmberInpcrdReader

        inpcrd_reader = AmberInpcrdReader(Path(inpcrd))
        frame = inpcrd_reader.read(frame)

    return frame, ff

read_gro

read_gro(file, frame=None)

Read GROMACS gro file and return a Frame object.

Parameters:

Name Type Description Default
file PathLike

Path to gro file

required
frame Any

Optional existing Frame to populate

None

Returns:

Type Description
Any

Populated Frame object

Source code in src/molpy/io/readers.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
def read_gro(file: PathLike, frame: Any = None) -> Any:
    """
    Read GROMACS gro file and return a Frame object.

    Args:
        file: Path to gro file
        frame: Optional existing Frame to populate

    Returns:
        Populated Frame object
    """
    from .data.gro import GroReader

    frame = _ensure_frame(frame)
    reader = GroReader(Path(file))
    return reader.read(frame)

read_h5

read_h5(file, frame=None)

Read HDF5 file and return a Frame object.

Parameters:

Name Type Description Default
file PathLike

Path to HDF5 file

required
frame Any

Optional existing Frame to populate

None

Returns:

Type Description
Any

Populated Frame object

Examples:

>>> frame = read_h5("structure.h5")
>>> frame["atoms"]["x"]
array([0., 1., 2.])
Source code in src/molpy/io/readers.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
def read_h5(file: PathLike, frame: Any = None) -> Any:
    """
    Read HDF5 file and return a Frame object.

    Args:
        file: Path to HDF5 file
        frame: Optional existing Frame to populate

    Returns:
        Populated Frame object

    Examples:
        >>> frame = read_h5("structure.h5")
        >>> frame["atoms"]["x"]
        array([0., 1., 2.])
    """
    from .data.h5 import HDF5Reader

    frame = _ensure_frame(frame)
    reader = HDF5Reader(Path(file))
    return reader.read(frame)

read_h5_trajectory

read_h5_trajectory(file)

Read HDF5 trajectory file and return a trajectory reader.

Parameters:

Name Type Description Default
file PathLike

Path to HDF5 trajectory file

required

Returns:

Type Description
Any

HDF5TrajectoryReader object

Examples:

>>> reader = read_h5_trajectory("trajectory.h5")
>>> frame = reader.read_frame(0)
>>> for frame in reader:
...     process(frame)
Source code in src/molpy/io/readers.py
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
def read_h5_trajectory(file: PathLike) -> Any:
    """
    Read HDF5 trajectory file and return a trajectory reader.

    Args:
        file: Path to HDF5 trajectory file

    Returns:
        HDF5TrajectoryReader object

    Examples:
        >>> reader = read_h5_trajectory("trajectory.h5")
        >>> frame = reader.read_frame(0)
        >>> for frame in reader:
        ...     process(frame)
    """
    from .trajectory.h5 import HDF5TrajectoryReader

    return HDF5TrajectoryReader(Path(file))

read_lammps_data

read_lammps_data(file, atom_style, frame=None)

Read LAMMPS data file and return a Frame object.

Parameters:

Name Type Description Default
file PathLike

Path to LAMMPS data file

required
atom_style str

LAMMPS atom style (e.g., 'full', 'atomic')

required
frame Any

Optional existing Frame to populate

None

Returns:

Type Description
Any

Populated Frame object

Source code in src/molpy/io/readers.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def read_lammps_data(file: PathLike, atom_style: str, frame: Any = None) -> Any:
    """
    Read LAMMPS data file and return a Frame object.

    Args:
        file: Path to LAMMPS data file
        atom_style: LAMMPS atom style (e.g., 'full', 'atomic')
        frame: Optional existing Frame to populate

    Returns:
        Populated Frame object
    """
    from .data.lammps import LammpsDataReader

    reader = LammpsDataReader(Path(file), atom_style)
    return reader.read(frame=frame)

read_lammps_forcefield

read_lammps_forcefield(scripts, forcefield=None)

Read LAMMPS force field file and return a ForceField object.

Parameters:

Name Type Description Default
scripts PathLike | list[PathLike]

Path or list of paths to LAMMPS force field scripts

required
forcefield Any

Optional existing ForceField to populate

None

Returns:

Type Description
Any

Populated ForceField object

Source code in src/molpy/io/readers.py
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
def read_lammps_forcefield(
    scripts: PathLike | list[PathLike], forcefield: Any = None
) -> Any:
    """
    Read LAMMPS force field file and return a ForceField object.

    Args:
        scripts: Path or list of paths to LAMMPS force field scripts
        forcefield: Optional existing ForceField to populate

    Returns:
        Populated ForceField object
    """
    from .forcefield.lammps import LAMMPSForceFieldReader

    reader = LAMMPSForceFieldReader(scripts)
    return reader.read(forcefield=forcefield)

read_lammps_molecule

read_lammps_molecule(file, frame=None)

Read LAMMPS molecule file and return a Frame object.

Parameters:

Name Type Description Default
file PathLike

Path to LAMMPS molecule file

required
frame Any

Optional existing Frame to populate

None

Returns:

Type Description
Any

Populated Frame object

Source code in src/molpy/io/readers.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def read_lammps_molecule(file: PathLike, frame: Any = None) -> Any:
    """
    Read LAMMPS molecule file and return a Frame object.

    Args:
        file: Path to LAMMPS molecule file
        frame: Optional existing Frame to populate

    Returns:
        Populated Frame object
    """
    from .data.lammps_molecule import LammpsMoleculeReader

    reader = LammpsMoleculeReader(Path(file))
    return reader.read(frame=frame)

read_lammps_trajectory

read_lammps_trajectory(traj, frame=None)

Read LAMMPS trajectory file and return a trajectory reader.

Parameters:

Name Type Description Default
traj PathLike

Path to LAMMPS trajectory file

required
frame Any

Optional reference Frame for topology

None

Returns:

Type Description
Any

LammpsTrajectoryReader object

Source code in src/molpy/io/readers.py
308
309
310
311
312
313
314
315
316
317
318
319
320
321
def read_lammps_trajectory(traj: PathLike, frame: Any = None) -> Any:
    """
    Read LAMMPS trajectory file and return a trajectory reader.

    Args:
        traj: Path to LAMMPS trajectory file
        frame: Optional reference Frame for topology

    Returns:
        LammpsTrajectoryReader object
    """
    from .trajectory.lammps import LammpsTrajectoryReader

    return LammpsTrajectoryReader(Path(traj), frame)

read_mol2

read_mol2(file, frame=None)

Read mol2 file and return a Frame object.

Parameters:

Name Type Description Default
file PathLike

Path to mol2 file

required
frame Any

Optional existing Frame to populate

None

Returns:

Type Description
Any

Populated Frame object

Source code in src/molpy/io/readers.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def read_mol2(file: PathLike, frame: Any = None) -> Any:
    """
    Read mol2 file and return a Frame object.

    Args:
        file: Path to mol2 file
        frame: Optional existing Frame to populate

    Returns:
        Populated Frame object
    """
    from .data.mol2 import Mol2Reader

    frame = _ensure_frame(frame)
    reader = Mol2Reader(Path(file))
    return reader.read(frame)

read_pdb

read_pdb(file, frame=None)

Read PDB file and return a Frame object.

Parameters:

Name Type Description Default
file PathLike

Path to PDB file

required
frame Any

Optional existing Frame to populate

None

Returns:

Type Description
Any

Populated Frame object

Source code in src/molpy/io/readers.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def read_pdb(file: PathLike, frame: Any = None) -> Any:
    """
    Read PDB file and return a Frame object.

    Args:
        file: Path to PDB file
        frame: Optional existing Frame to populate

    Returns:
        Populated Frame object
    """
    from .data.pdb import PDBReader

    frame = _ensure_frame(frame)
    reader = PDBReader(Path(file))
    return reader.read(frame)

read_top

read_top(file, forcefield=None)

Read GROMACS topology file and return a ForceField object.

Parameters:

Name Type Description Default
file PathLike

Path to GROMACS .top file

required
forcefield Any

Optional existing ForceField to populate

None

Returns:

Type Description
Any

Populated ForceField object

Source code in src/molpy/io/readers.py
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
def read_top(file: PathLike, forcefield: Any = None) -> Any:
    """
    Read GROMACS topology file and return a ForceField object.

    Args:
        file: Path to GROMACS .top file
        forcefield: Optional existing ForceField to populate

    Returns:
        Populated ForceField object
    """
    from molpy.core.forcefield import ForceField

    from .forcefield.top import GromacsTopReader

    if forcefield is None:
        forcefield = ForceField()

    reader = GromacsTopReader(Path(file))
    return reader.read(forcefield)

read_xml_forcefield

read_xml_forcefield(file)

Read XML force field file and return a ForceField object.

Parameters:

Name Type Description Default
file PathLike

Path to XML force field file

required

Returns:

Type Description
Any

ForceField object

Source code in src/molpy/io/readers.py
236
237
238
239
240
241
242
243
244
245
246
247
248
def read_xml_forcefield(file: PathLike) -> Any:
    """
    Read XML force field file and return a ForceField object.

    Args:
        file: Path to XML force field file

    Returns:
        ForceField object
    """
    from .forcefield.xml import read_xml_forcefield as _read_xml

    return _read_xml(file)

read_xsf

read_xsf(file, frame=None)

Read XSF file and return a Frame object.

Parameters:

Name Type Description Default
file PathLike

Path to XSF file

required
frame Any

Optional existing Frame to populate

None

Returns:

Type Description
Any

Populated Frame object

Source code in src/molpy/io/readers.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def read_xsf(file: PathLike, frame: Any = None) -> Any:
    """
    Read XSF file and return a Frame object.

    Args:
        file: Path to XSF file
        frame: Optional existing Frame to populate

    Returns:
        Populated Frame object
    """
    from .data.xsf import XsfReader

    reader = XsfReader(Path(file))
    return reader.read(frame)

read_xyz

read_xyz(file, frame=None)

Read XYZ file and return a Frame object.

Parameters:

Name Type Description Default
file PathLike

Path to XYZ file

required
frame Any

Optional existing Frame to populate

None

Returns:

Type Description
Any

Populated Frame object

Source code in src/molpy/io/readers.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def read_xyz(file: PathLike, frame: Any = None) -> Any:
    """
    Read XYZ file and return a Frame object.

    Args:
        file: Path to XYZ file
        frame: Optional existing Frame to populate

    Returns:
        Populated Frame object
    """
    from .data.xyz import XYZReader

    frame = _ensure_frame(frame)
    reader = XYZReader(Path(file))
    return reader.read(frame)

read_xyz_trajectory

read_xyz_trajectory(file)

Read XYZ trajectory file and return a trajectory reader.

Parameters:

Name Type Description Default
file PathLike

Path to XYZ trajectory file

required

Returns:

Type Description
Any

XYZTrajectoryReader object

Source code in src/molpy/io/readers.py
324
325
326
327
328
329
330
331
332
333
334
335
336
def read_xyz_trajectory(file: PathLike) -> Any:
    """
    Read XYZ trajectory file and return a trajectory reader.

    Args:
        file: Path to XYZ trajectory file

    Returns:
        XYZTrajectoryReader object
    """
    from .trajectory.xyz import XYZTrajectoryReader

    return XYZTrajectoryReader(Path(file))

Data file writer factory functions.

This module provides convenient factory functions for creating various data file writers. All functions write Frame or ForceField objects to files.

PathLike module-attribute

PathLike = str | Path

write_gro

write_gro(file, frame)

Write a Frame object to a GROMACS GRO file.

Parameters:

Name Type Description Default
file PathLike

Output file path

required
frame Any

Frame object to write

required
Source code in src/molpy/io/writers.py
48
49
50
51
52
53
54
55
56
57
58
59
def write_gro(file: PathLike, frame: Any) -> None:
    """
    Write a Frame object to a GROMACS GRO file.

    Args:
        file: Output file path
        frame: Frame object to write
    """
    from .data.gro import GroWriter

    writer = GroWriter(Path(file))
    writer.write(frame)

write_h5

write_h5(file, frame, compression='gzip', compression_opts=4)

Write a Frame object to an HDF5 file.

Parameters:

Name Type Description Default
file PathLike

Output file path

required
frame Any

Frame object to write

required
compression str | None

Compression algorithm (None, 'gzip', 'lzf', 'szip'). Defaults to 'gzip'.

'gzip'
compression_opts int

Compression level (for gzip: 0-9). Defaults to 4.

4

Examples:

>>> frame = Frame(blocks={"atoms": {"x": [0, 1, 2], "y": [0, 0, 0]}})
>>> write_h5("structure.h5", frame)
Source code in src/molpy/io/writers.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def write_h5(
    file: PathLike,
    frame: Any,
    compression: str | None = "gzip",
    compression_opts: int = 4,
) -> None:
    """
    Write a Frame object to an HDF5 file.

    Args:
        file: Output file path
        frame: Frame object to write
        compression: Compression algorithm (None, 'gzip', 'lzf', 'szip').
            Defaults to 'gzip'.
        compression_opts: Compression level (for gzip: 0-9). Defaults to 4.

    Examples:
        >>> frame = Frame(blocks={"atoms": {"x": [0, 1, 2], "y": [0, 0, 0]}})
        >>> write_h5("structure.h5", frame)
    """
    from .data.h5 import HDF5Writer

    writer = HDF5Writer(
        Path(file), compression=compression, compression_opts=compression_opts
    )
    writer.write(frame)

write_h5_trajectory

write_h5_trajectory(file, frames, compression='gzip', compression_opts=4)

Write frames to an HDF5 trajectory file.

Parameters:

Name Type Description Default
file PathLike

Output file path

required
frames list

List of Frame objects to write

required
compression str | None

Compression algorithm (None, 'gzip', 'lzf', 'szip'). Defaults to 'gzip'.

'gzip'
compression_opts int

Compression level (for gzip: 0-9). Defaults to 4.

4

Examples:

>>> frames = [frame0, frame1, frame2]
>>> write_h5_trajectory("trajectory.h5", frames)
Source code in src/molpy/io/writers.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
def write_h5_trajectory(
    file: PathLike,
    frames: list,
    compression: str | None = "gzip",
    compression_opts: int = 4,
) -> None:
    """
    Write frames to an HDF5 trajectory file.

    Args:
        file: Output file path
        frames: List of Frame objects to write
        compression: Compression algorithm (None, 'gzip', 'lzf', 'szip').
            Defaults to 'gzip'.
        compression_opts: Compression level (for gzip: 0-9). Defaults to 4.

    Examples:
        >>> frames = [frame0, frame1, frame2]
        >>> write_h5_trajectory("trajectory.h5", frames)
    """
    from .trajectory.h5 import HDF5TrajectoryWriter

    with HDF5TrajectoryWriter(
        Path(file), compression=compression, compression_opts=compression_opts
    ) as writer:
        for frame in frames:
            writer.write_frame(frame)

write_lammps_data

write_lammps_data(file, frame, atom_style='full')

Write a Frame object to a LAMMPS data file.

Parameters:

Name Type Description Default
file PathLike

Output file path

required
frame Any

Frame object to write

required
atom_style str

LAMMPS atom style (default: 'full')

'full'
Source code in src/molpy/io/writers.py
19
20
21
22
23
24
25
26
27
28
29
30
31
def write_lammps_data(file: PathLike, frame: Any, atom_style: str = "full") -> None:
    """
    Write a Frame object to a LAMMPS data file.

    Args:
        file: Output file path
        frame: Frame object to write
        atom_style: LAMMPS atom style (default: 'full')
    """
    from .data.lammps import LammpsDataWriter

    writer = LammpsDataWriter(Path(file), atom_style=atom_style)
    writer.write(frame)

write_lammps_forcefield

write_lammps_forcefield(file, forcefield, precision=6)

Write a ForceField object to a LAMMPS force field file.

Parameters:

Name Type Description Default
file PathLike

Output file path

required
forcefield Any

ForceField object to write

required
precision int

Number of decimal places for floating point values

6
Source code in src/molpy/io/writers.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def write_lammps_forcefield(
    file: PathLike, forcefield: Any, precision: int = 6
) -> None:
    """
    Write a ForceField object to a LAMMPS force field file.

    Args:
        file: Output file path
        forcefield: ForceField object to write
        precision: Number of decimal places for floating point values
    """
    from .forcefield.lammps import LAMMPSForceFieldWriter

    writer = LAMMPSForceFieldWriter(Path(file), precision=precision)
    writer.write(forcefield)

write_lammps_molecule

write_lammps_molecule(file, frame, format_type='native')

Write a Frame object to a LAMMPS molecule file.

Parameters:

Name Type Description Default
file PathLike

Output file path

required
frame Any

Frame object to write

required
format_type str

Format type (default: 'native')

'native'
Source code in src/molpy/io/writers.py
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def write_lammps_molecule(
    file: PathLike, frame: Any, format_type: str = "native"
) -> None:
    """
    Write a Frame object to a LAMMPS molecule file.

    Args:
        file: Output file path
        frame: Frame object to write
        format_type: Format type (default: 'native')
    """
    from .data.lammps_molecule import LammpsMoleculeWriter

    writer = LammpsMoleculeWriter(Path(file), format_type)
    writer.write(frame)

write_lammps_system

write_lammps_system(workdir, frame, forcefield)

Write a complete LAMMPS system (data + forcefield) to a directory.

Parameters:

Name Type Description Default
workdir PathLike

Output directory path

required
frame Any

Frame object containing structure

required
forcefield Any

ForceField object containing parameters

required
Source code in src/molpy/io/writers.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
def write_lammps_system(workdir: PathLike, frame: Any, forcefield: Any) -> None:
    """
    Write a complete LAMMPS system (data + forcefield) to a directory.

    Args:
        workdir: Output directory path
        frame: Frame object containing structure
        forcefield: ForceField object containing parameters
    """
    workdir_path = Path(workdir)
    if not workdir_path.exists():
        workdir_path.mkdir(parents=True, exist_ok=True)

    # Use directory name as file stem
    file_stem = workdir_path / workdir_path.stem
    write_lammps_data(file_stem.with_suffix(".data"), frame)

    # Extract type names from frame to create whitelist
    atom_types = None
    bond_types = None
    angle_types = None
    dihedral_types = None
    improper_types = None

    if "atoms" in frame and "type" in frame["atoms"]:
        atom_types = set(frame["atoms"]["type"])

    if "bonds" in frame and "type" in frame["bonds"]:
        bond_types = set(frame["bonds"]["type"])

    if "angles" in frame and "type" in frame["angles"]:
        angle_types = set(frame["angles"]["type"])

    if "dihedrals" in frame and "type" in frame["dihedrals"]:
        dihedral_types = set(frame["dihedrals"]["type"])

    if "impropers" in frame and "type" in frame["impropers"]:
        improper_types = set(frame["impropers"]["type"])

    # Write forcefield with whitelist
    from .forcefield.lammps import LAMMPSForceFieldWriter

    writer = LAMMPSForceFieldWriter(file_stem.with_suffix(".ff"))
    writer.write(
        forcefield,
        atom_types=atom_types,
        bond_types=bond_types,
        angle_types=angle_types,
        dihedral_types=dihedral_types,
        improper_types=improper_types,
    )

write_lammps_trajectory

write_lammps_trajectory(file, frames, atom_style='full')

Write frames to a LAMMPS trajectory file.

Parameters:

Name Type Description Default
file PathLike

Output file path

required
frames list

List of Frame objects to write

required
atom_style str

LAMMPS atom style (default: 'full')

'full'
Source code in src/molpy/io/writers.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def write_lammps_trajectory(
    file: PathLike, frames: list, atom_style: str = "full"
) -> None:
    """
    Write frames to a LAMMPS trajectory file.

    Args:
        file: Output file path
        frames: List of Frame objects to write
        atom_style: LAMMPS atom style (default: 'full')
    """
    from .trajectory.lammps import LammpsTrajectoryWriter

    with LammpsTrajectoryWriter(Path(file), atom_style) as writer:
        for i, frame in enumerate(frames):
            timestep = getattr(frame, "step", i)
            writer.write_frame(frame, timestep)

write_pdb

write_pdb(file, frame)

Write a Frame object to a PDB file.

Parameters:

Name Type Description Default
file PathLike

Output file path

required
frame Any

Frame object to write

required
Source code in src/molpy/io/writers.py
34
35
36
37
38
39
40
41
42
43
44
45
def write_pdb(file: PathLike, frame: Any) -> None:
    """
    Write a Frame object to a PDB file.

    Args:
        file: Output file path
        frame: Frame object to write
    """
    from .data.pdb import PDBWriter

    writer = PDBWriter(Path(file))
    writer.write(frame)

write_xsf

write_xsf(file, frame)

Write a Frame object to an XSF file.

Parameters:

Name Type Description Default
file PathLike

Output file path

required
frame Any

Frame object to write

required
Source code in src/molpy/io/writers.py
62
63
64
65
66
67
68
69
70
71
72
73
def write_xsf(file: PathLike, frame: Any) -> None:
    """
    Write a Frame object to an XSF file.

    Args:
        file: Output file path
        frame: Frame object to write
    """
    from .data.xsf import XsfWriter

    writer = XsfWriter(Path(file))
    writer.write(frame)

write_xyz_trajectory

write_xyz_trajectory(file, frames)

Write frames to an XYZ trajectory file.

Parameters:

Name Type Description Default
file PathLike

Output file path

required
frames list

List of Frame objects to write

required
Source code in src/molpy/io/writers.py
167
168
169
170
171
172
173
174
175
176
177
178
179
def write_xyz_trajectory(file: PathLike, frames: list) -> None:
    """
    Write frames to an XYZ trajectory file.

    Args:
        file: Output file path
        frames: List of Frame objects to write
    """
    from .trajectory.xyz import XYZTrajectoryWriter

    with XYZTrajectoryWriter(file) as writer:
        for frame in frames:
            writer.write_frame(frame)

Data Modules

Base

DataReader

DataReader(path, **open_kwargs)

Bases: FileBase, ABC

Base class for data file readers.

Source code in src/molpy/io/data/base.py
47
48
def __init__(self, path: PathLike, **open_kwargs):
    super().__init__(path, mode="r", **open_kwargs)

read abstractmethod

read(frame=None)

Populate / update a Frame from the underlying file.

Parameters:

Name Type Description Default
frame Frame | None

Optional existing Frame to populate. If None, creates a new one.

None

Returns:

Type Description
Frame

The populated Frame object

Source code in src/molpy/io/data/base.py
68
69
70
71
72
73
74
75
76
77
78
79
@abstractmethod
def read(self, frame: Frame | None = None) -> Frame:
    """
    Populate / update a Frame from the underlying file.

    Args:
        frame: Optional existing Frame to populate. If None, creates a new one.

    Returns:
        The populated Frame object
    """
    ...

read_lines

read_lines()

Return all lines at once.

Source code in src/molpy/io/data/base.py
63
64
65
def read_lines(self) -> list[str]:
    """Return all lines at once."""
    return list(self.fh.readlines())

DataWriter

DataWriter(path, **open_kwargs)

Bases: FileBase, ABC

Base class for data file writers.

Source code in src/molpy/io/data/base.py
88
89
def __init__(self, path: PathLike, **open_kwargs):
    super().__init__(path, mode="w", **open_kwargs)

write abstractmethod

write(frame)

Serialize frame into the underlying file.

Parameters:

Name Type Description Default
frame Frame

Frame object to write

required
Source code in src/molpy/io/data/base.py
91
92
93
94
95
96
97
98
99
@abstractmethod
def write(self, frame: Frame) -> None:
    """
    Serialize frame into the underlying file.

    Args:
        frame: Frame object to write
    """
    ...

FileBase

FileBase(path, mode, **open_kwargs)

Bases: ABC

Common logic for Context-manager + lazy file handle.

Source code in src/molpy/io/data/base.py
17
18
19
20
21
def __init__(self, path: PathLike, mode: str, **open_kwargs):
    self._path = Path(path)
    self._mode = mode
    self._open_kwargs = open_kwargs
    self._fh: IO[str] | None = None

AC

Antechamber (AC) file format reader.

Reads Antechamber .ac files containing atom and bond information with force field types and charges.

AcReader

AcReader(file)

Bases: DataReader

Reader for Antechamber .ac format files.

Parses ATOM and BOND sections from Antechamber output files, extracting coordinates, charges, atom types, and connectivity.

Parameters:

Name Type Description Default
file str | Path

Path to .ac file

required
Source code in src/molpy/io/data/ac.py
26
27
28
def __init__(self, file: str | Path):
    super().__init__(Path(file))
    self._file = Path(file)

assign_atomic_numbers

assign_atomic_numbers(atoms)

Assign atomic numbers by guessing from atom names/types.

Source code in src/molpy/io/data/ac.py
109
110
111
112
113
114
115
116
117
def assign_atomic_numbers(self, atoms):
    """Assign atomic numbers by guessing from atom names/types."""
    for atom in atoms:
        element_data = self._guess_atomic_number(atom["name"])
        atomic_number = element_data.number
        if atomic_number == 0:
            element_data = self._guess_atomic_number(atom["type"])
            atomic_number = element_data.number
        atom["number"] = atomic_number

read

read(frame)

Read .ac file and populate Frame with atoms and bonds.

Parameters:

Name Type Description Default
frame Frame

Frame to populate

required

Returns:

Type Description
Frame

Frame with atoms and bonds data

Source code in src/molpy/io/data/ac.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def read(self, frame: Frame) -> Frame:
    """
    Read .ac file and populate Frame with atoms and bonds.

    Args:
        frame: Frame to populate

    Returns:
        Frame with atoms and bonds data
    """
    with open(self._file) as f:
        lines = [line.strip() for line in f if line.strip()]

    self.atoms = []
    self.bonds = []
    self.atomtype_map = {}

    for line in lines:
        if line.startswith("ATOM"):
            atom = self._parse_atom_section(line)
            self.atoms.append(atom)
        elif line.startswith("BOND"):
            bond = self._parse_bond_section(line)
            itom_type = self.atoms[bond["atomi"]]["type"]
            jtom_type = self.atoms[bond["atomj"]]["type"]
            bond["type"] = f"{itom_type}-{jtom_type}"
            self.bonds.append(bond)

    if self.atoms:
        self.assign_atomic_numbers(self.atoms)
        keys = self.atoms[0].keys()
        frame["atoms"] = {k: [d[k] for d in self.atoms] for k in keys}

    if self.bonds:
        keys = self.bonds[0].keys()
        frame["bonds"] = {k: [d[k] for d in self.bonds] for k in keys}

    return frame

Amber

AmberInpcrdReader

AmberInpcrdReader(file, **kwargs)

Bases: DataReader

Reader for AMBER ASCII *.inpcrd (old-style) coordinate files.

  • Coordinates: 12.7/12.8 format, 6 numbers per line
  • Optional velocities section (same length as coordinates)
  • Optional final box line (3-6 floats)
Source code in src/molpy/io/data/amber.py
36
37
def __init__(self, file: str | Path, **kwargs):
    super().__init__(path=Path(file), **kwargs)

GRO

GroReader

GroReader(file)

Bases: DataReader

Robust GROMACS GRO file reader.

Features: - Parses GRO format atoms with proper field extraction - Handles box information (orthogonal and triclinic) - Robust error handling for malformed files - Proper coordinate precision preservation

Source code in src/molpy/io/data/gro.py
24
25
26
def __init__(self, file: str | Path):
    super().__init__(Path(file))
    self._file = Path(file)

assign_numbers

assign_numbers(atoms)

Assign atomic numbers based on atom names.

Source code in src/molpy/io/data/gro.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def assign_numbers(self, atoms):
    """Assign atomic numbers based on atom names."""
    for atom in atoms:
        if "number" not in atom or atom["number"] == 0:
            # Guess from atom name
            name = atom.get("name", "").strip()
            if name:
                # Remove numbers from name
                clean_name = "".join(c for c in name if c.isalpha())
                if clean_name:
                    try:
                        # Try two-letter element first
                        if len(clean_name) >= 2:
                            try:
                                element = Element(clean_name[:2].upper())
                                atom["number"] = element.number
                                continue
                            except (KeyError, ValueError):
                                pass
                        # Then try single letter
                        if len(clean_name) >= 1:
                            try:
                                element = Element(clean_name[:1].upper())
                                atom["number"] = element.number
                                continue
                            except (KeyError, ValueError):
                                pass
                        # Default to hydrogen if nothing works
                        atom["number"] = 1
                    except Exception:
                        atom["number"] = 1  # Default to hydrogen

read

read(frame=None)

Read GRO file and populate frame.

Source code in src/molpy/io/data/gro.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def read(self, frame: Frame | None = None) -> Frame:
    """Read GRO file and populate frame."""

    # Read file content
    with open(self._file) as f:
        lines = f.readlines()

    lines = list(map(self.sanitizer, lines))

    # Parse title (first line)
    lines[0] if lines else "Unknown"

    # Parse number of atoms (second line)
    try:
        natoms = int(lines[1]) if len(lines) > 1 else 0
    except (ValueError, IndexError):
        natoms = 0

    # Parse atoms
    atoms_data: dict = {
        "res_number": [],
        "res_name": [],
        "name": [],
        "number": [],
        "xyz": [],
        "vx": [],  # Separate velocity components
        "vy": [],
        "vz": [],
    }

    has_velocity = False

    atom_lines = lines[2 : 2 + natoms] if len(lines) > 2 else []
    for line in atom_lines:
        atom_data = self._parse_atom_line(line)
        if atom_data:
            atoms_data["res_number"].append(atom_data["res_number"])
            atoms_data["res_name"].append(atom_data["res_name"])
            atoms_data["name"].append(atom_data["name"])
            atoms_data["number"].append(atom_data["number"])
            atoms_data["xyz"].append(
                [atom_data["x"], atom_data["y"], atom_data["z"]]
            )

            if "vx" in atom_data:
                atoms_data["vx"].append(atom_data["vx"])
                atoms_data["vy"].append(atom_data["vy"])
                atoms_data["vz"].append(atom_data["vz"])
                has_velocity = True
            else:
                atoms_data["vx"].append(0.0)
                atoms_data["vy"].append(0.0)
                atoms_data["vz"].append(0.0)

    # Remove velocity components if not present in any atom
    if not has_velocity:
        del atoms_data["vx"]
        del atoms_data["vy"]
        del atoms_data["vz"]

    # Assign atomic numbers if missing
    if atoms_data["res_number"]:  # Only if we have atoms
        atom_dicts = []
        for i in range(len(atoms_data["res_number"])):
            atom_dict = {
                key: values[i] if values else None
                for key, values in atoms_data.items()
            }
            atom_dicts.append(atom_dict)

        self.assign_numbers(atom_dicts)

        # Update atomic numbers
        for i, atom_dict in enumerate(atom_dicts):
            atoms_data["number"][i] = atom_dict["number"]

    # Convert xyz to separate x, y, z fields (keep both formats)
    if "xyz" in atoms_data and atoms_data["xyz"]:
        xyz_array = np.array(atoms_data["xyz"], dtype=float)
        atoms_data["x"] = xyz_array[:, 0]
        atoms_data["y"] = xyz_array[:, 1]
        atoms_data["z"] = xyz_array[:, 2]
        # Keep xyz field for backward compatibility
        atoms_data["xyz"] = xyz_array

    # Convert to numpy arrays
    for key in list(atoms_data.keys()):
        values = atoms_data[key]
        # Check if values is not empty (works for lists and arrays)
        if values is not None and len(values) > 0:
            if key in ["xyz", "velocity"]:
                # Already numpy array or should be 2D array
                if not isinstance(values, np.ndarray):
                    atoms_data[key] = np.array(values, dtype=float)
            elif key == "number":
                atoms_data[key] = np.array(values, dtype=int)
            elif key in ["x", "y", "z", "vx", "vy", "vz"]:
                # Coordinate/velocity components
                atoms_data[key] = np.array(values, dtype=float)
            else:
                # For string data, use proper string dtype
                max_len = max(len(str(v)) for v in values)
                atoms_data[key] = np.array(values, dtype=f"U{max_len}")

    # Create dataset
    frame["atoms"] = Block(atoms_data)

    # Parse box from last line
    if len(lines) > 2 + natoms:
        box_line = lines[2 + natoms]
        box_matrix = self._parse_box_line(box_line)
        frame.box = Box(matrix=box_matrix)
    else:
        frame.box = Box()  # Default box

    return frame

sanitizer staticmethod

sanitizer(line)

Clean line while preserving GRO format.

Source code in src/molpy/io/data/gro.py
28
29
30
31
@staticmethod
def sanitizer(line: str) -> str:
    """Clean line while preserving GRO format."""
    return line.rstrip()

GroWriter

GroWriter(path)

Bases: DataWriter

GROMACS GRO file writer.

Features: - Writes properly formatted GRO files - Handles orthogonal and triclinic boxes - Supports velocity information - Maintains precision for coordinates

Source code in src/molpy/io/data/gro.py
265
266
267
def __init__(self, path: str | Path):
    super().__init__(Path(path))
    self._path = Path(path)

write

write(frame)

Write frame to GRO file.

Source code in src/molpy/io/data/gro.py
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
def write(self, frame):
    """Write frame to GRO file."""

    with open(self._path, "w") as f:
        # Write title line
        title = frame.metadata.get("name", "Generated by molpy")
        if hasattr(frame, "timestep") and frame.timestep is not None:
            title += f", t= {frame.timestep}"
        f.write(title + "\n")

        # Count atoms
        n_atoms = 0
        if "atoms" in frame:
            atoms = frame["atoms"]
            n_atoms = atoms.nrows

        # Write number of atoms
        f.write(f"{n_atoms:>5d}\n")

        # Write atoms
        if n_atoms > 0:
            atoms = frame["atoms"]
            has_velocity = "vx" in atoms and "vy" in atoms and "vz" in atoms

            for i in range(n_atoms):
                atom_data = atoms[i]  # Get atom as dict

                # Extract required fields with defaults
                res_num = atom_data.get("res_number", 1)
                if isinstance(res_num, str):
                    try:
                        res_num = int(res_num)
                    except ValueError:
                        res_num = 1
                else:
                    res_num = int(res_num)

                res_name = str(atom_data.get("res_name", "MOL"))
                atom_name = str(atom_data.get("name", "X"))
                atom_num = int(atom_data.get("number", i + 1))

                # Handle coordinates - support both xyz array and separate x, y, z fields
                if "x" in atom_data and "y" in atom_data and "z" in atom_data:
                    x = float(atom_data["x"])
                    y = float(atom_data["y"])
                    z = float(atom_data["z"])
                elif "xyz" in atom_data:
                    xyz = atom_data["xyz"]
                    if hasattr(xyz, "__iter__") and not isinstance(xyz, str):
                        x, y, z = float(xyz[0]), float(xyz[1]), float(xyz[2])
                    else:
                        raise ValueError(f"Invalid xyz format for atom {i}")
                else:
                    raise ValueError(
                        f"Atom {i} missing coordinate information (need x/y/z or xyz)"
                    )

                # Velocity (optional)
                vx = float(atom_data.get("vx", 0.0)) if has_velocity else None
                vy = float(atom_data.get("vy", 0.0)) if has_velocity else None
                vz = float(atom_data.get("vz", 0.0)) if has_velocity else None

                line = self._format_atom_line(
                    res_num, res_name, atom_name, atom_num, x, y, z, vx, vy, vz
                )
                f.write(line + "\n")

        # Write box line
        box_line = self._format_box_line(getattr(frame, "box", None))
        f.write(box_line + "\n")

H5

HDF5 file format support for Frame objects.

This module provides reading and writing of Frame objects to/from HDF5 format using h5py. The HDF5 format is efficient for storing large molecular datasets and supports compression and chunking.

HDF5 Structure:

/ # Root group ├── blocks/ # Group containing all data blocks │ ├── atoms/ # Block group (e.g., "atoms") │ │ ├── x # Dataset (variable) │ │ ├── y # Dataset │ │ └── z # Dataset │ └── bonds/ # Another block group │ ├── i # Dataset │ └── j # Dataset └── metadata/ # Group containing metadata ├── timestep # Attribute or dataset └── ... # Other metadata

HDF5Reader

HDF5Reader(path, **open_kwargs)

Read Frame objects from HDF5 files.

The HDF5 file structure should follow the format: - /blocks/{block_name}/{variable_name} for data arrays - /metadata/ for frame metadata

Examples:

>>> reader = HDF5Reader("frame.h5")
>>> frame = reader.read()
>>> frame["atoms"]["x"]
array([0., 1., 2.])

Initialize HDF5 reader.

Parameters:

Name Type Description Default
path PathLike

Path to HDF5 file

required
**open_kwargs Any

Additional arguments passed to h5py.File

{}
Source code in src/molpy/io/data/h5.py
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
def __init__(self, path: PathLike, **open_kwargs: Any):
    """Initialize HDF5 reader.

    Args:
        path: Path to HDF5 file
        **open_kwargs: Additional arguments passed to h5py.File
    """
    if h5py is None:
        raise ImportError(
            "h5py is required for HDF5 support. "
            "Install it with: pip install h5py"
        )
    self._path = Path(path)
    self._open_kwargs = open_kwargs
    self._file: h5py.File | None = None

read

read(frame=None)

Read Frame from HDF5 file.

Parameters:

Name Type Description Default
frame Frame | None

Optional existing Frame to populate. If None, creates a new one.

None

Returns:

Name Type Description
Frame Frame

Populated Frame object with blocks and metadata from HDF5 file.

Source code in src/molpy/io/data/h5.py
423
424
425
426
427
428
429
430
431
432
433
def read(self, frame: Frame | None = None) -> Frame:
    """Read Frame from HDF5 file.

    Args:
        frame: Optional existing Frame to populate. If None, creates a new one.

    Returns:
        Frame: Populated Frame object with blocks and metadata from HDF5 file.
    """
    with h5py.File(self._path, "r") as f:
        return h5_group_to_frame(f, frame)

HDF5Writer

HDF5Writer(path, compression='gzip', compression_opts=4, **open_kwargs)

Write Frame objects to HDF5 files.

The HDF5 file structure follows: - /blocks/{block_name}/{variable_name} for data arrays - /metadata/ for frame metadata

Examples:

>>> frame = Frame(blocks={"atoms": {"x": [0, 1, 2], "y": [0, 0, 0]}})
>>> writer = HDF5Writer("frame.h5")
>>> writer.write(frame)

Initialize HDF5 writer.

Parameters:

Name Type Description Default
path PathLike

Path to output HDF5 file

required
compression str | None

Compression algorithm (None, 'gzip', 'lzf', 'szip'). Defaults to 'gzip'.

'gzip'
compression_opts int

Compression level (for gzip: 0-9). Defaults to 4.

4
**open_kwargs Any

Additional arguments passed to h5py.File

{}
Source code in src/molpy/io/data/h5.py
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
def __init__(
    self,
    path: PathLike,
    compression: str | None = "gzip",
    compression_opts: int = 4,
    **open_kwargs: Any,
):
    """Initialize HDF5 writer.

    Args:
        path: Path to output HDF5 file
        compression: Compression algorithm (None, 'gzip', 'lzf', 'szip').
            Defaults to 'gzip'.
        compression_opts: Compression level (for gzip: 0-9). Defaults to 4.
        **open_kwargs: Additional arguments passed to h5py.File
    """
    if h5py is None:
        raise ImportError(
            "h5py is required for HDF5 support. "
            "Install it with: pip install h5py"
        )
    self._path = Path(path)
    self._open_kwargs = open_kwargs
    self._file: h5py.File | None = None
    self.compression = compression
    self.compression_opts = compression_opts

write

write(frame)

Write Frame to HDF5 file.

Parameters:

Name Type Description Default
frame Frame

Frame object to write.

required

Raises:

Type Description
ValueError

If frame is empty (no blocks).

Source code in src/molpy/io/data/h5.py
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
def write(self, frame: Frame) -> None:
    """Write Frame to HDF5 file.

    Args:
        frame: Frame object to write.

    Raises:
        ValueError: If frame is empty (no blocks).
    """
    # If file is already open (context manager), use it
    if self._file is not None:
        frame_to_h5_group(
            frame, self._file, self.compression, self.compression_opts
        )
    else:
        # Otherwise, open and close file
        with h5py.File(self._path, "w") as f:
            frame_to_h5_group(frame, f, self.compression, self.compression_opts)

frame_to_h5_group

frame_to_h5_group(frame, h5_group, compression='gzip', compression_opts=4)

Write a Frame to an HDF5 group.

This function can be used to write a Frame to any HDF5 group, making it reusable for both single Frame files and trajectory files.

Parameters:

Name Type Description Default
frame Frame

Frame object to write

required
h5_group Group

HDF5 group to write to

required
compression str | None

Compression algorithm (None, 'gzip', 'lzf', 'szip'). Defaults to 'gzip'.

'gzip'
compression_opts int

Compression level (for gzip: 0-9). Defaults to 4.

4

Raises:

Type Description
ValueError

If frame is empty (no blocks).

Source code in src/molpy/io/data/h5.py
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def frame_to_h5_group(
    frame: Frame,
    h5_group: "h5py.Group",
    compression: str | None = "gzip",
    compression_opts: int = 4,
) -> None:
    """Write a Frame to an HDF5 group.

    This function can be used to write a Frame to any HDF5 group, making it
    reusable for both single Frame files and trajectory files.

    Args:
        frame: Frame object to write
        h5_group: HDF5 group to write to
        compression: Compression algorithm (None, 'gzip', 'lzf', 'szip').
            Defaults to 'gzip'.
        compression_opts: Compression level (for gzip: 0-9). Defaults to 4.

    Raises:
        ValueError: If frame is empty (no blocks).
    """
    if h5py is None:
        raise ImportError(
            "h5py is required for HDF5 support. " "Install it with: pip install h5py"
        )

    if not frame._blocks:
        raise ValueError("Cannot write empty frame (no blocks)")

    # Write blocks
    blocks_group = h5_group.create_group("blocks")
    for block_name, block in frame._blocks.items():
        block_group = blocks_group.create_group(block_name)

        # Write each variable in the block
        for var_name, var_data in block._vars.items():
            # Ensure data is a numpy array
            data = np.asarray(var_data)

            # Handle string types - h5py doesn't support Unicode arrays directly
            if data.dtype.kind == "U":  # Unicode string
                # Convert to variable-length UTF-8 strings
                # h5py supports variable-length strings via object dtype
                data_as_objects = data.astype(object)
                # lzf doesn't support compression_opts
                create_kwargs = {
                    "compression": compression,
                    "shuffle": True if compression else False,
                }
                if compression == "gzip" and compression_opts is not None:
                    create_kwargs["compression_opts"] = compression_opts
                elif compression == "lzf":
                    # lzf doesn't use compression_opts
                    pass

                dataset = block_group.create_dataset(
                    var_name,
                    data=data_as_objects,
                    dtype=h5py.string_dtype(encoding="utf-8"),
                    **create_kwargs,
                )
            else:
                # Create dataset with compression for numeric types
                # lzf doesn't support compression_opts
                create_kwargs = {
                    "compression": compression,
                    "shuffle": True if compression else False,
                }
                if compression == "gzip" and compression_opts is not None:
                    create_kwargs["compression_opts"] = compression_opts
                elif compression == "lzf":
                    # lzf doesn't use compression_opts
                    pass

                dataset = block_group.create_dataset(
                    var_name,
                    data=data,
                    **create_kwargs,
                )

            # Store dtype information as attribute for better reconstruction
            dataset.attrs["dtype"] = str(data.dtype)

    # Write metadata
    if frame.metadata:
        metadata_group = h5_group.create_group("metadata")
        _write_metadata_to_group(
            metadata_group, frame.metadata, compression, compression_opts
        )

h5_group_to_frame

h5_group_to_frame(h5_group, frame=None)

Read a Frame from an HDF5 group.

This function can be used to read a Frame from any HDF5 group, making it reusable for both single Frame files and trajectory files.

Parameters:

Name Type Description Default
h5_group Group

HDF5 group to read from

required
frame Frame | None

Optional existing Frame to populate. If None, creates a new one.

None

Returns:

Name Type Description
Frame Frame

Populated Frame object with blocks and metadata from HDF5 group.

Source code in src/molpy/io/data/h5.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def h5_group_to_frame(h5_group: "h5py.Group", frame: Frame | None = None) -> Frame:
    """Read a Frame from an HDF5 group.

    This function can be used to read a Frame from any HDF5 group, making it
    reusable for both single Frame files and trajectory files.

    Args:
        h5_group: HDF5 group to read from
        frame: Optional existing Frame to populate. If None, creates a new one.

    Returns:
        Frame: Populated Frame object with blocks and metadata from HDF5 group.
    """
    if h5py is None:
        raise ImportError(
            "h5py is required for HDF5 support. " "Install it with: pip install h5py"
        )

    frame = frame or Frame()

    # Read blocks
    if "blocks" in h5_group:
        blocks_group = h5_group["blocks"]
        for block_name in blocks_group.keys():
            block_group = blocks_group[block_name]
            block = Block()

            # Read all variables in this block
            for var_name in block_group.keys():
                dataset = block_group[var_name]

                # Restore original dtype if it was a Unicode string
                if "dtype" in dataset.attrs:
                    original_dtype_str = dataset.attrs["dtype"]
                    if original_dtype_str.startswith(
                        "<U"
                    ) or original_dtype_str.startswith(">U"):
                        # Use asstr() method for variable-length strings
                        try:
                            data = dataset.asstr()[:]
                            # Convert to numpy array with original dtype
                            data = np.array(data, dtype=original_dtype_str)
                        except (AttributeError, TypeError):
                            # Fallback: read as array and decode
                            raw_data = np.array(dataset)
                            if raw_data.dtype.kind == "O":  # Object dtype
                                data = np.array(
                                    [
                                        (
                                            s.decode("utf-8")
                                            if isinstance(s, bytes)
                                            else str(s)
                                        )
                                        for s in raw_data
                                    ],
                                    dtype=original_dtype_str,
                                )
                            else:
                                data = raw_data.astype(original_dtype_str)
                    else:
                        # Read dataset as numpy array for non-string types
                        data = np.array(dataset)
                else:
                    # Read dataset as numpy array
                    data = np.array(dataset)

                block[var_name] = data

            frame[block_name] = block

    # Read metadata
    if "metadata" in h5_group:
        metadata_group = h5_group["metadata"]
        _read_metadata_from_group(metadata_group, frame.metadata)

    return frame

LAMMPS

Modern LAMMPS data file I/O using Block.from_csv.

This module provides a clean, imperative approach to reading and writing LAMMPS data files using the Block.from_csv functionality.

LammpsDataReader

LammpsDataReader(path, atom_style='full')

Bases: DataReader

Modern LAMMPS data file reader using Block.from_csv.

Source code in src/molpy/io/data/lammps.py
26
27
28
def __init__(self, path: str | Path, atom_style: str = "full") -> None:
    super().__init__(Path(path))
    self.atom_style = atom_style

read

read(frame=None)

Read LAMMPS data file into a Frame.

Source code in src/molpy/io/data/lammps.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def read(self, frame: Frame | None = None) -> Frame:
    """Read LAMMPS data file into a Frame."""
    frame = frame or Frame()

    # Read and parse the file
    lines = self._read_lines()
    sections = self._extract_sections(lines)

    # Parse header and set up box
    header_info = self._parse_header(sections.get("header", []))
    frame.metadata["box"] = self._create_box(header_info["box_bounds"])

    # Parse masses if present
    masses = self._parse_masses(sections.get("Masses", []))

    # Parse type labels
    type_labels = self._parse_type_labels(sections)

    # Parse force field parameters
    forcefield = self._parse_force_field(sections)

    # Parse atoms section
    if "Atoms" in sections:
        frame["atoms"] = self._parse_atoms_section(
            sections["Atoms"], masses, type_labels.get("atom", {})
        )

    # Build id to index mapping for connectivity
    id_to_idx = {}
    if "atoms" in frame:
        for i, atom_id in enumerate(frame["atoms"]["id"]):
            id_to_idx[int(atom_id)] = i

    # Parse connectivity sections
    if "Bonds" in sections and header_info["counts"].get("bonds", 0) > 0:
        frame["bonds"] = self._parse_connectivity_section(
            sections["Bonds"], "bond", type_labels.get("bond", {}), id_to_idx
        )

    if "Angles" in sections and header_info["counts"].get("angles", 0) > 0:
        frame["angles"] = self._parse_connectivity_section(
            sections["Angles"], "angle", type_labels.get("angle", {}), id_to_idx
        )

    if "Dihedrals" in sections and header_info["counts"].get("dihedrals", 0) > 0:
        frame["dihedrals"] = self._parse_connectivity_section(
            sections["Dihedrals"],
            "dihedral",
            type_labels.get("dihedral", {}),
            id_to_idx,
        )

    if "Impropers" in sections and header_info["counts"].get("impropers", 0) > 0:
        frame["impropers"] = self._parse_connectivity_section(
            sections["Impropers"],
            "improper",
            type_labels.get("improper", {}),
            id_to_idx,
        )

    # Store metadata
    frame.metadata.update(
        {
            "format": "lammps_data",
            "atom_style": self.atom_style,
            "counts": header_info["counts"],
            "source_file": str(self._path),
            "forcefield": forcefield,
        }
    )

    return frame

LammpsDataWriter

LammpsDataWriter(path, atom_style='full')

Bases: DataWriter

Modern LAMMPS data file writer using Block.to_csv approach.

Important Requirements: - Atoms in the frame must have an 'id' field. This field is required to map atom indices to atom IDs for LAMMPS output. - Connectivity data (bonds, angles, dihedrals) in the frame uses atom indices (0-based from to_frame()). The writer automatically converts these indices to atom IDs using the index->ID mapping from the atoms 'id' field.

Frame Structure: - Atoms: Must include 'id' field. Other required fields depend on atom_style. - Bonds/Angles/Dihedrals: Use atom indices in 'atomi', 'atomj', 'atomk', 'atoml' (from to_frame()). These are 0-based indices that will be converted to 1-based atom IDs.

Source code in src/molpy/io/data/lammps.py
511
512
513
def __init__(self, path: str | Path, atom_style: str = "full") -> None:
    super().__init__(Path(path))
    self.atom_style = atom_style

write

write(frame)

Write Frame to LAMMPS data file.

Parameters:

Name Type Description Default
frame Frame

Frame containing atoms and optionally bonds/angles/dihedrals. Atoms must have 'id' field.

required

Raises:

Type Description
ValueError

If atoms are missing 'id' field.

Source code in src/molpy/io/data/lammps.py
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
def write(self, frame: Frame) -> None:
    """Write Frame to LAMMPS data file.

    Args:
        frame: Frame containing atoms and optionally bonds/angles/dihedrals.
              Atoms must have 'id' field.

    Raises:
        ValueError: If atoms are missing 'id' field.
    """
    lines = []

    # Header
    lines.append(
        f"# LAMMPS data file written by molpy on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
    )
    lines.append("")

    # Count sections
    counts = self._get_counts(frame)
    self._write_counts(lines, counts)
    lines.append("")

    # Type counts
    self._write_type_counts(lines, frame)
    lines.append("")

    # Box bounds
    self._write_box_bounds(lines, frame)
    lines.append("")

    # Type labels sections (must come before Masses for LAMMPS)
    self._write_type_labels_sections(lines, frame)

    # Masses section
    if "atoms" in frame:
        self._write_masses_section(lines, frame)

    # Force field coefficients sections
    self._write_force_field_coeffs_sections(lines, frame)

    # Data sections
    if "atoms" in frame:
        self._write_atoms_section(lines, frame)

    if "bonds" in frame and counts.get("bonds", 0) > 0:
        self._write_connectivity_section(lines, frame, "bonds")

    if "angles" in frame and counts.get("angles", 0) > 0:
        self._write_connectivity_section(lines, frame, "angles")

    if "dihedrals" in frame and counts.get("dihedrals", 0) > 0:
        self._write_connectivity_section(lines, frame, "dihedrals")

    if "impropers" in frame and counts.get("impropers", 0) > 0:
        self._write_connectivity_section(lines, frame, "impropers")

    # Write to file
    with open(self._path, "w") as f:
        f.write("\n".join(lines))

LAMMPS molecule file I/O.

This module provides readers and writers for LAMMPS molecule template files, supporting both native format and JSON format as described in the LAMMPS documentation.

LammpsMoleculeReader

LammpsMoleculeReader(path)

Bases: DataReader

LAMMPS molecule file reader supporting both native and JSON formats.

Source code in src/molpy/io/data/lammps_molecule.py
23
24
25
def __init__(self, path: str | Path) -> None:
    super().__init__(Path(path))
    self.is_json = self._path.suffix.lower() == ".json"

read

read(frame=None)

Read LAMMPS molecule file into a Frame.

Source code in src/molpy/io/data/lammps_molecule.py
27
28
29
30
31
32
33
34
def read(self, frame: Frame | None = None) -> Frame:
    """Read LAMMPS molecule file into a Frame."""
    frame = frame or Frame()

    if self.is_json:
        return self._read_json_format(frame)
    else:
        return self._read_native_format(frame)

LammpsMoleculeWriter

LammpsMoleculeWriter(path, format_type='native')

Bases: DataWriter

LAMMPS molecule file writer supporting both native and JSON formats.

Source code in src/molpy/io/data/lammps_molecule.py
619
620
621
622
623
624
625
626
627
def __init__(self, path: str | Path, format_type: str = "native") -> None:
    super().__init__(Path(path))
    self.format_type = format_type.lower()
    if self.format_type not in ["native", "json"]:
        raise ValueError("format_type must be 'native' or 'json'")

    # Set appropriate file extension if not provided
    if self.format_type == "json" and self._path.suffix.lower() != ".json":
        self._path = self._path.with_suffix(".json")

write

write(frame)

Write Frame to LAMMPS molecule file.

Source code in src/molpy/io/data/lammps_molecule.py
629
630
631
632
633
634
def write(self, frame: Frame) -> None:
    """Write Frame to LAMMPS molecule file."""
    if self.format_type == "json":
        self._write_json_format(frame)
    else:
        self._write_native_format(frame)

Mol2

Mol2Reader

Mol2Reader(file)

Bases: DataReader

Robust MOL2 file reader following TRIPOS MOL2 format specification.

Features: - Parses MOLECULE, ATOM, BOND, and SUBSTRUCTURE sections - Handles various atom types and bond types - Robust error handling for malformed files - Supports partial files with missing sections - Assigns atomic numbers from atom names/types

Source code in src/molpy/io/data/mol2.py
23
24
25
26
def __init__(self, file: str | Path):
    super().__init__(Path(file))
    self._file = Path(file)
    self.molecule_info = {}

read

read(frame)

Read MOL2 file and populate frame.

Source code in src/molpy/io/data/mol2.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def read(self, frame: Frame) -> Frame:
    """Read MOL2 file and populate frame."""

    try:
        with open(self._file) as f:
            lines = f.readlines()
    except OSError as e:
        raise ValueError(f"Cannot read MOL2 file {self._file}: {e}")

    self.atoms = []
    self.bonds = []
    self.molecule_info = {}

    section = None
    for line_num, line in enumerate(map(self.sanitizer, lines), 1):
        if not line:  # Skip empty lines
            continue

        if line.startswith("@<TRIPOS>"):
            section = line[9:].strip()
            continue

        try:
            if section == "MOLECULE":
                self._parse_molecule_section(line, line_num)

            elif section == "ATOM":
                self._parse_atom_section(line, line_num)

            elif section == "BOND":
                self._parse_bond_section(line, line_num)

            elif section == "SUBSTRUCTURE":
                # We can extend this to parse substructure info if needed
                pass

        except (ValueError, IndexError) as e:
            print(
                f"Warning: Error parsing line {line_num} in section {section}: {e}"
            )
            continue

    # Assign atomic numbers after parsing all atoms
    self._assign_atomic_numbers()

    # Build datasets
    if self.atoms:
        # Convert atom list to Frame Block structure
        atoms_dict = {}
        for key in self.atoms[0]:
            values = [atom[key] for atom in self.atoms]
            if key == "xyz":
                # Convert tuples to separate x, y, z arrays
                xyz_array = np.array(values)
                atoms_dict["x"] = xyz_array[:, 0]
                atoms_dict["y"] = xyz_array[:, 1]
                atoms_dict["z"] = xyz_array[:, 2]
            else:
                atoms_dict[key] = np.array(values)

        frame["atoms"] = atoms_dict

    if self.bonds:
        # Convert bond list to Frame Block structure
        bonds_dict = {}
        for key in self.bonds[0]:
            bonds_dict[key] = np.array([bond[key] for bond in self.bonds])
        frame["bonds"] = bonds_dict

    return frame

sanitizer staticmethod

sanitizer(line)

Clean up line by stripping whitespace.

Source code in src/molpy/io/data/mol2.py
28
29
30
31
@staticmethod
def sanitizer(line: str) -> str:
    """Clean up line by stripping whitespace."""
    return line.strip()

PDB

PDBReader

PDBReader(file, **kwargs)

Bases: DataReader

Minimal-yet-robust PDB reader.

  • ATOM / HETATM parsed per PDB v3.3 fixed columns
  • CRYST1 -> frame.box
  • CONECT -> bond list
Source code in src/molpy/io/data/pdb.py
93
94
def __init__(self, file: Path, **kwargs):
    super().__init__(path=file, **kwargs)

PDBWriter

PDBWriter(path)

Bases: DataWriter

Robust PDB file writer that creates properly formatted PDB files.

Features: - Writes ATOM/HETATM records with proper formatting - Handles missing fields with sensible defaults - Writes CRYST1 records from box information - Writes CONECT records for bonds - Ensures PDB format compliance

Source code in src/molpy/io/data/pdb.py
235
236
def __init__(self, path: Path):
    super().__init__(path=path)

write

write(frame)

Write frame to PDB file.

Required fields in frame["atoms"]: - x, y, z: coordinates (float, required) - id: atom ID (int, optional, defaults to index+1)

Optional fields in frame["atoms"]: - name: atom name (str) - resName: residue name (str) - element: element symbol (str) - resSeq: residue sequence number (int) - chainID: chain identifier (str) - occupancy: occupancy (float) - tempFactor: temperature factor (float)

Optional metadata: - elements: space-separated string of element symbols (one per atom) - name: frame name (str) - box: Box object for CRYST1 record

Raises:

Type Description
ValueError

If required fields (x, y, z) are missing or contain None

Source code in src/molpy/io/data/pdb.py
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
def write(self, frame):
    """Write frame to PDB file.

    Required fields in frame["atoms"]:
    - x, y, z: coordinates (float, required)
    - id: atom ID (int, optional, defaults to index+1)

    Optional fields in frame["atoms"]:
    - name: atom name (str)
    - resName: residue name (str)
    - element: element symbol (str)
    - resSeq: residue sequence number (int)
    - chainID: chain identifier (str)
    - occupancy: occupancy (float)
    - tempFactor: temperature factor (float)

    Optional metadata:
    - elements: space-separated string of element symbols (one per atom)
    - name: frame name (str)
    - box: Box object for CRYST1 record

    Raises:
        ValueError: If required fields (x, y, z) are missing or contain None
    """
    # Extract elements from metadata if available
    elements_list = None
    if "elements" in frame.metadata:
        elements_str = frame.metadata["elements"]
        if isinstance(elements_str, str):
            elements_list = elements_str.split()

    with open(self._path, "w") as f:
        # Write header
        frame_name = frame.metadata.get("name", "MOL")
        f.write(f"REMARK  {frame_name}\n")

        # Write CRYST1 record if box exists
        if "box" in frame.metadata and frame.metadata["box"] is not None:
            f.write(self._format_cryst1_line(frame.metadata["box"]) + "\n")
        else:
            f.write(self._format_cryst1_line(None) + "\n")

        atoms = frame["atoms"]
        n_atoms = atoms.nrows

        # Validate required fields exist and are not None
        required_fields = ["x", "y", "z"]
        for field in required_fields:
            if field not in atoms:
                raise ValueError(
                    f"Required field '{field}' is missing in frame['atoms']"
                )
            # Check if any values are None
            values = atoms[field]
            if values is None:
                raise ValueError(f"Required field '{field}' contains None")
            # Check for None in array (if object dtype)
            if hasattr(values, "dtype") and values.dtype == object:
                if any(v is None for v in values):
                    raise ValueError(
                        f"Required field '{field}' contains None values"
                    )

        # Build index -> id mapping for bonds
        index_to_id = {}
        if "id" in atoms:
            for i, atom_id in enumerate(atoms["id"]):
                index_to_id[i] = int(atom_id) if atom_id is not None else i + 1
        else:
            index_to_id = {i: i + 1 for i in range(n_atoms)}

        for i in range(n_atoms):
            # Extract required fields - raise error if None
            x_val = atoms["x"][i]
            y_val = atoms["y"][i]
            z_val = atoms["z"][i]

            if x_val is None or y_val is None or z_val is None:
                raise ValueError(
                    f"Required coordinate fields contain None at index {i}: "
                    f"x={x_val}, y={y_val}, z={z_val}"
                )

            x = float(x_val)
            y = float(y_val)
            z = float(z_val)

            # Extract optional fields with defaults
            atom_id = index_to_id[i]

            # Get element from metadata list or atom_data
            element = None
            if elements_list and i < len(elements_list):
                element = elements_list[i]
            elif "element" in atoms and atoms["element"][i] is not None:
                element = str(atoms["element"][i])
            elif "symbol" in atoms and atoms["symbol"][i] is not None:
                element = str(atoms["symbol"][i]).upper()
            else:
                element = "X"  # Default unknown element

            # Get atom name (use element if not specified)
            atom_name = None
            if "name" in atoms and atoms["name"][i] is not None:
                atom_name = str(atoms["name"][i])
            else:
                atom_name = element  # Use element as fallback

            # Get residue name
            res_name = "UNK"
            if "resName" in atoms and atoms["resName"][i] is not None:
                res_name = str(atoms["resName"][i])

            # Get residue sequence number
            res_seq = 1
            if "resSeq" in atoms and atoms["resSeq"][i] is not None:
                res_seq = int(atoms["resSeq"][i])

            # Get chain ID
            chain_id = " "
            if "chainID" in atoms and atoms["chainID"][i] is not None:
                chain_id = str(atoms["chainID"][i])[:1]

            # Get optional fields with defaults
            occupancy = 1.0
            if "occupancy" in atoms and atoms["occupancy"][i] is not None:
                occupancy = float(atoms["occupancy"][i])

            temp_factor = 0.0
            if "tempFactor" in atoms and atoms["tempFactor"][i] is not None:
                temp_factor = float(atoms["tempFactor"][i])

            # Format and write atom line
            line = self._format_atom_line_fast(
                serial=atom_id,
                atom_name=atom_name,
                res_name=res_name,
                chain_id=chain_id,
                res_seq=res_seq,
                x=x,
                y=y,
                z=z,
                occupancy=occupancy,
                temp_factor=temp_factor,
                element=element,
            )
            f.write(line)
        f.write("\n")

        # Write bonds as CONECT records
        if "bonds" in frame:
            bonds = frame["bonds"]
            if "atomi" in bonds and "atomj" in bonds:
                connect = defaultdict(list)
                # atomi, atomj are stored as atom indices (0-based), use index_to_id
                for idx1, idx2 in zip(
                    bonds["atomi"].tolist(), bonds["atomj"].tolist()
                ):
                    id1 = index_to_id[int(idx1)]
                    id2 = index_to_id[int(idx2)]
                    connect[id1].append(id2)
                    connect[id2].append(id1)
                for id1, id2s in sorted(connect.items()):
                    js = [str(id2).rjust(5) for id2 in sorted(id2s)]
                    f.write(f"CONECT{str(id1).rjust(5)}{''.join(js)}\n")
        # Write END record
        f.write("END\n")

Top

GROMACS topology file reader for Frame objects.

This module provides a reader for GROMACS topology files that extracts structural information (atoms, bonds, angles, dihedrals, pairs) and creates Frame objects with Block containers.

TopReader

TopReader(file, **open_kwargs)

Bases: DataReader

Read GROMACS topology files and create Frame objects.

This reader parses GROMACS .top files and extracts structural information from sections like [atoms], [bonds], [angles], [dihedrals], and [pairs].

Examples:

>>> reader = TopReader("molecule.top")
>>> frame = reader.read()
>>> frame["atoms"]  # Block with atom data
>>> frame["bonds"]  # Block with bond data

Initialize GROMACS topology reader.

Parameters:

Name Type Description Default
file PathLike

Path to GROMACS .top file

required
**open_kwargs

Additional arguments passed to file open

{}
Source code in src/molpy/io/data/top.py
33
34
35
36
37
38
39
40
41
def __init__(self, file: PathLike, **open_kwargs):
    """Initialize GROMACS topology reader.

    Args:
        file: Path to GROMACS .top file
        **open_kwargs: Additional arguments passed to file open
    """
    super().__init__(file, **open_kwargs)
    self._file = Path(file)

read

read(frame=None)

Read GROMACS topology file and populate Frame.

Parameters:

Name Type Description Default
frame Frame | None

Optional existing Frame to populate. If None, creates a new one.

None

Returns:

Type Description
Frame

Frame object with atoms, bonds, angles, dihedrals, and pairs blocks.

Source code in src/molpy/io/data/top.py
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
def read(self, frame: Frame | None = None) -> Frame:
    """Read GROMACS topology file and populate Frame.

    Args:
        frame: Optional existing Frame to populate. If None, creates a new one.

    Returns:
        Frame object with atoms, bonds, angles, dihedrals, and pairs blocks.
    """
    if frame is None:
        frame = Frame()

    # Initialize data containers
    atoms_data: list[dict[str, Any]] = []
    bonds_data: list[dict[str, Any]] = []
    pairs_data: list[dict[str, Any]] = []
    angles_data: list[dict[str, Any]] = []
    dihedrals_data: list[dict[str, Any]] = []

    # Read and parse file
    with self._file.open("r", encoding="utf-8") as f:
        section: str | None = None
        for line in f:
            # Remove inline comments first
            if ";" in line:
                line = line.split(";")[0]
            line = line.strip()

            # Skip empty lines
            if not line:
                continue

            # Check for section header (must be on its own line)
            if line.startswith("[") and line.endswith("]"):
                section = line.lower()
                continue

            # Skip comment-only lines
            if line.startswith("#"):
                continue

            # Parse content based on current section
            if section == "[ atoms ]":
                atom_dict = self._parse_atom_line(line)
                if atom_dict:
                    atoms_data.append(atom_dict)
            elif section == "[ bonds ]":
                bond_dict = self._parse_bond_line(line)
                if bond_dict:
                    bonds_data.append(bond_dict)
            elif section == "[ pairs ]":
                pair_dict = self._parse_pair_line(line)
                if pair_dict:
                    pairs_data.append(pair_dict)
            elif section == "[ angles ]":
                angle_dict = self._parse_angle_line(line)
                if angle_dict:
                    angles_data.append(angle_dict)
            elif section == "[ dihedrals ]":
                dihedral_dict = self._parse_dihedral_line(line)
                if dihedral_dict:
                    dihedrals_data.append(dihedral_dict)

    # Convert lists of dicts to Blocks
    if atoms_data:
        frame["atoms"] = self._dicts_to_block(atoms_data)
        # Assign atomic numbers
        self._assign_atomic_numbers(frame["atoms"])

    if bonds_data:
        frame["bonds"] = self._dicts_to_block(bonds_data)

    if pairs_data:
        frame["pairs"] = self._dicts_to_block(pairs_data)

    if angles_data:
        frame["angles"] = self._dicts_to_block(angles_data)

    if dihedrals_data:
        frame["dihedrals"] = self._dicts_to_block(dihedrals_data)

    return frame

XSF

XSF (XCrySDen Structure File) format reader and writer.

XSF is a format for crystal structure visualization, supporting both periodic and non-periodic structures. It can contain atomic coordinates, unit cell parameters, and other structural information.

XsfReader

XsfReader(file)

Bases: DataReader

Parse an XSF file into a Frame.

XSF format supports both crystal structures (with unit cell) and molecular structures. The format can contain: - CRYSTAL or MOLECULE keyword - PRIMVEC or CONVVEC for unit cell vectors - PRIMCOORD for atomic coordinates - Optional comment lines starting with #

Source code in src/molpy/io/data/xsf.py
33
34
35
def __init__(self, file: str | Path):
    super().__init__(Path(file))
    self._file = Path(file)

read

read(frame=None)

Read XSF file and return Frame.

Returns

Frame Frame with: - frame containing atomic data - box: unit cell for CRYSTAL, Free Box for MOLECULE

Source code in src/molpy/io/data/xsf.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def read(self, frame: Frame | None = None) -> Frame:
    """
    Read XSF file and return Frame.

    Returns
    -------
    Frame
        Frame with:
        - frame containing atomic data
        - box: unit cell for CRYSTAL, Free Box for MOLECULE
    """
    frame = Frame() if frame is None else frame

    lines = self.read_lines()
    lines = [line.strip() for line in lines if line.strip()]

    # Remove comment lines
    lines = [line for line in lines if not line.startswith("#")]

    if not lines:
        raise ValueError("Empty XSF file")

    # Parse structure type
    structure_type = None
    primvec_matrix = None
    convvec_matrix = None
    atoms_data = []

    i = 0
    while i < len(lines):
        line = lines[i]

        if line.upper() == "CRYSTAL":
            structure_type = "CRYSTAL"
            i += 1
            continue
        elif line.upper() == "MOLECULE":
            structure_type = "MOLECULE"
            i += 1
            continue
        elif line.upper() == "PRIMVEC":
            # Read 3 lines of primitive vectors
            primvec_matrix = self._parse_vectors(lines[i + 1 : i + 4])
            i += 4
            continue
        elif line.upper() == "CONVVEC":
            # Read 3 lines of conventional vectors
            convvec_matrix = self._parse_vectors(lines[i + 1 : i + 4])
            i += 4
            continue
        elif line.upper() == "PRIMCOORD":
            # Read atomic coordinates
            if i + 1 >= len(lines):
                raise ValueError("PRIMCOORD section incomplete")

            # Next line should contain number of atoms and multiplicity
            coord_info = lines[i + 1].split()
            if len(coord_info) < 2:
                raise ValueError("Invalid PRIMCOORD header")

            n_atoms = int(coord_info[0])
            int(coord_info[1])

            # Read atomic coordinates
            atoms_data = self._parse_atoms(lines[i + 2 : i + 2 + n_atoms])
            i += 2 + n_atoms
            continue
        else:
            i += 1

    # Create atoms block
    if atoms_data:
        # Store coordinates as separate x, y, z fields only
        coords_array = np.array([atom["xyz"] for atom in atoms_data])
        atoms_dict = {
            "atomic_number": np.array(
                [atom["atomic_number"] for atom in atoms_data]
            ),
            "element": np.array([atom["element"] for atom in atoms_data]),
            "x": coords_array[:, 0],
            "y": coords_array[:, 1],
            "z": coords_array[:, 2],
        }
        frame["atoms"] = Block(atoms_dict)

    # Set up box and create system
    if structure_type == "CRYSTAL":
        if primvec_matrix is not None:
            box = Box(primvec_matrix)
        elif convvec_matrix is not None:
            box = Box(convvec_matrix)
        else:
            # Default box if vectors not specified
            box = Box(np.eye(3))
    else:
        # For molecular structures, use free box
        box = Box()  # Free box for non-periodic molecules

    frame.metadata["box"] = box

    return frame

XsfWriter

XsfWriter(file)

Bases: DataWriter

Write Frame to XSF format.

Features: - Supports both CRYSTAL and MOLECULE structures - Writes PRIMVEC for unit cell vectors - Writes PRIMCOORD for atomic coordinates - Automatically determines structure type based on presence of box

Source code in src/molpy/io/data/xsf.py
200
201
202
def __init__(self, file: str | Path):
    super().__init__(Path(file))
    self._file = Path(file)

write

write(frame)

Write Frame to XSF file.

Parameters

frame : Frame Frame containing atomic data and optional box information

Source code in src/molpy/io/data/xsf.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
def write(self, frame: Frame) -> None:
    """
    Write Frame to XSF file.

    Parameters
    ----------
    frame : Frame
        Frame containing atomic data and optional box information
    """
    box: Box | None = frame.metadata.get("box", None)

    with open(self._file, "w") as f:
        # Write header comment
        f.write("# XSF file generated by molpy\n")

        # Determine structure type
        has_box = (
            box is not None
            and box.style != Box.Style.FREE
            and box.matrix is not None
        )

        if has_box:
            f.write("CRYSTAL\n")

            # Write primitive vectors
            f.write("PRIMVEC\n")
            matrix = box.matrix
            for i in range(3):
                f.write(
                    f"    {matrix[i, 0]:12.8f}    {matrix[i, 1]:12.8f}    {matrix[i, 2]:12.8f}\n"
                )

            # Write conventional vectors (same as primitive for now)
            f.write("CONVVEC\n")
            for i in range(3):
                f.write(
                    f"    {matrix[i, 0]:12.8f}    {matrix[i, 1]:12.8f}    {matrix[i, 2]:12.8f}\n"
                )
        else:
            f.write("MOLECULE\n")

        # Write atomic coordinates
        if "atoms" in frame:
            atoms = frame["atoms"]
            atomic_numbers = atoms["atomic_number"]
            x = atoms["x"]
            y = atoms["y"]
            z = atoms["z"]
            n_atoms = len(atomic_numbers)

            f.write("PRIMCOORD\n")
            f.write(f"       {n_atoms} 1\n")

            for idx in range(n_atoms):
                an = atomic_numbers[idx]
                x_val = x[idx]
                y_val = y[idx]
                z_val = z[idx]
                f.write(
                    f"{an:2d}    {x_val:12.8f}    {y_val:12.8f}    {z_val:12.8f}\n"
                )
        else:
            # Empty structure
            f.write("PRIMCOORD\n")
            f.write("        0 1\n")

XYZ

XYZReader

XYZReader(path, **open_kwargs)

Bases: DataReader

Parse an XYZ file (single model) into an :class:Frame.

Supports both standard XYZ and extended XYZ (extxyz) formats.

Standard XYZ Format

1. integer `N`  - number of atoms
2. comment line - stored in frame.metadata
3. N lines: `symbol  x  y  z`

Extended XYZ Format

1. integer `N`  - number of atoms
2. comment line with key=value pairs (e.g., Properties=species:S:1:pos:R:3)
3. N lines with columns defined by Properties specification
Source code in src/molpy/io/data/base.py
47
48
def __init__(self, path: PathLike, **open_kwargs):
    super().__init__(path, mode="r", **open_kwargs)

read

read(frame=None)
Parameters

frame Optional frame to populate; if None, a new one is created.

Returns

Frame Frame with: * block "atoms": - element -> (N,) x -> (N,) float array - y -> (N,) float array - z -> (N,) float array - number -> (N,) int array (atomic numbers) - additional columns from Properties if extxyz * metadata from comment line

Source code in src/molpy/io/data/xyz.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def read(self, frame: Frame | None = None) -> Frame:
    """
    Parameters
    ----------
    frame
        Optional frame to populate; if *None*, a new one is created.

    Returns
    -------
    Frame
        Frame with:
          * block ``"atoms"``:
              - ``element``   -> (N,)  <U3   array
              - ``x``         -> (N,)  float array
              - ``y``         -> (N,)  float array
              - ``z``         -> (N,)  float array
              - ``number``    -> (N,)  int array (atomic numbers)
              - additional columns from Properties if extxyz
          * metadata from comment line
    """
    # --- collect lines ------------------------------------------------
    lines: list[str] = self.read_lines()
    if len(lines) < 2:
        raise ValueError("XYZ file too short")

    natoms = int(lines[0])
    if len(lines) < natoms + 2:
        raise ValueError("XYZ record truncated")

    comment = lines[1]
    records = lines[2 : 2 + natoms]

    # --- build / update frame ----------------------------------------
    frame = frame or Frame()

    # Parse comment line for extxyz metadata
    metadata = self._parse_xyz_comment(comment)

    # Check if this is extxyz format with Properties
    if "Properties" in metadata:
        atoms_blk = self._parse_extxyz_atoms(records, metadata["Properties"])
    else:
        atoms_blk = self._parse_standard_xyz_atoms(records)

    # Set box if Lattice is present
    if "Lattice" in metadata:
        lattice_str = metadata.pop("Lattice")
        lattice_values = [float(x) for x in lattice_str.split()]
        frame.box = Box(np.array(lattice_values).reshape(3, 3))

    # Update frame metadata (excluding Properties and Lattice which are structural)
    frame_metadata = {
        k: v for k, v in metadata.items() if k not in ["Properties", "Lattice"]
    }
    frame.metadata.update(frame_metadata)

    frame["atoms"] = atoms_blk
    return frame

Trajectory Modules

Base

BaseTrajectoryReader

BaseTrajectoryReader(fpath)

Bases: ABC, Iterable['Frame']

Base class for trajectory file readers that act as lazy-loading iterators.

This class provides memory-mapped file reading and directly returns Frame objects without loading everything into memory. Supports reading from multiple files.

Implements Iterable[Frame] for lazy iteration over frames.

Initialize the trajectory reader.

Parameters:

Name Type Description Default
fpath PathLike | list[PathLike]

Path to trajectory file or list of paths to multiple trajectory files

required
Source code in src/molpy/io/trajectory/base.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def __init__(self, fpath: PathLike | list[PathLike]):
    """
    Initialize the trajectory reader.

    Args:
        fpath: Path to trajectory file or list of paths to multiple trajectory files
    """
    # Handle both single file and multiple files
    if isinstance(fpath, (str, Path)):
        self.fpaths = [Path(fpath)]
    else:
        self.fpaths = [Path(p) for p in fpath]

    # Validate all files exist
    for path in self.fpaths:
        if not path.exists():
            raise FileNotFoundError(f"File not found: {path}")

    self._frame_locations: list[FrameLocation] = []  # location info for each frame
    self._mms: list[mmap.mmap] = []  # memory-mapped file objects for each file
    self._total_frames = 0
    self._index_files = self._get_index_file_paths()
    self._open_files()

fpath property

fpath

For backward compatibility - returns the first file path.

n_frames property

n_frames

Number of frames in the trajectory.

close

close()

Close all memory-mapped files.

Source code in src/molpy/io/trajectory/base.py
70
71
72
73
74
75
def close(self):
    """Close all memory-mapped files."""
    for mm in self._mms:
        if mm is not None:
            mm.close()
    self._mms.clear()

read_all

read_all()

Read all frames from the trajectory file.

Source code in src/molpy/io/trajectory/base.py
143
144
145
def read_all(self) -> list["Frame"]:
    """Read all frames from the trajectory file."""
    return [self.read_frame(i) for i in range(self._total_frames)]

read_frame

read_frame(index)

Read a specific frame from the trajectory file(s).

Parameters:

Name Type Description Default
index int

Global frame index to read

required

Returns:

Type Description
Frame

The Frame object

Source code in src/molpy/io/trajectory/base.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def read_frame(self, index: int) -> "Frame":
    """
    Read a specific frame from the trajectory file(s).

    Args:
        index: Global frame index to read

    Returns:
        The Frame object
    """
    if index < 0:
        index = self._total_frames + index

    if index < 0 or index >= self._total_frames:
        raise IndexError(
            f"Frame index {index} out of range [0, {self._total_frames})"
        )

    # Get location info for this frame
    location = self._get_frame_location(index)

    # Calculate frame end position
    if index + 1 < len(self._frame_locations):
        next_location = self._frame_locations[index + 1]
        if next_location.file_index == location.file_index:
            frame_end = next_location.byte_offset
        else:
            frame_end = None  # End of file
    else:
        frame_end = None  # Last frame

    # Get the memory-mapped file and read frame data
    mm = self._get_mmap(location.file_index)
    frame_bytes = mm[location.byte_offset : frame_end]
    frame_lines = frame_bytes.decode().splitlines()

    # Parse the frame lines using the derived class implementation
    return self._parse_frame(frame_lines)

read_frames

read_frames(indices)

Read multiple frames from the trajectory file.

Parameters:

Name Type Description Default
indices list[int]

list of frame indices to read

required

Returns:

Type Description
list[Frame]

list of Frame objects

Source code in src/molpy/io/trajectory/base.py
116
117
118
119
120
121
122
123
124
125
126
def read_frames(self, indices: list[int]) -> list["Frame"]:
    """
    Read multiple frames from the trajectory file.

    Args:
        indices: list of frame indices to read

    Returns:
        list of Frame objects
    """
    return [self.read_frame(i) for i in indices]

read_range

read_range(start, stop, step=1)

Read a range of frames from the trajectory file.

Parameters:

Name Type Description Default
start int

Starting frame index

required
stop int

Stopping frame index (exclusive)

required
step int

Step size

1

Returns:

Type Description
list[Frame]

list of Frame objects

Source code in src/molpy/io/trajectory/base.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def read_range(self, start: int, stop: int, step: int = 1) -> list["Frame"]:
    """
    Read a range of frames from the trajectory file.

    Args:
        start: Starting frame index
        stop: Stopping frame index (exclusive)
        step: Step size

    Returns:
        list of Frame objects
    """
    indices = list(range(start, stop, step))
    return self.read_frames(indices)

FrameLocation

Bases: NamedTuple

Location information for a frame.

TrajectoryWriter

TrajectoryWriter(fpath)

Bases: ABC

Base class for all trajectory file writers.

Source code in src/molpy/io/trajectory/base.py
340
341
342
def __init__(self, fpath: str | Path):
    self.fpath = Path(fpath)
    self._fp = open(self.fpath, "w+b")

write_frame abstractmethod

write_frame(frame)

Write a single frame to the file.

Source code in src/molpy/io/trajectory/base.py
350
351
352
353
@abstractmethod
def write_frame(self, frame: "Frame"):
    """Write a single frame to the file."""
    pass

H5

HDF5 trajectory file format support.

This module provides reading and writing of Trajectory objects to/from HDF5 format using h5py. The HDF5 format is efficient for storing large trajectory datasets and supports compression and chunking.

HDF5 Trajectory Structure:

/ # Root group ├── frames/ # Group containing all frames │ ├── 0/ # Frame 0 │ │ ├── blocks/ # Data blocks (same structure as single Frame) │ │ └── metadata/ # Frame metadata │ ├── 1/ # Frame 1 │ │ ├── blocks/ │ │ └── metadata/ │ └── ... ├── n_frames # Attribute: total number of frames └── metadata/ # Optional trajectory-level metadata

HDF5TrajectoryReader

HDF5TrajectoryReader(path, **open_kwargs)

Read Trajectory objects from HDF5 files.

The HDF5 file structure should follow: - /frames/{frame_index}/blocks/ for data blocks - /frames/{frame_index}/metadata/ for frame metadata - /n_frames attribute for total frame count

Examples:

>>> reader = HDF5TrajectoryReader("trajectory.h5")
>>> frame = reader.read_frame(0)
>>> for frame in reader:
...     process(frame)

Initialize HDF5 trajectory reader.

Parameters:

Name Type Description Default
path PathLike

Path to HDF5 trajectory file

required
**open_kwargs

Additional arguments passed to h5py.File

{}
Source code in src/molpy/io/trajectory/h5.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def __init__(self, path: PathLike, **open_kwargs):
    """Initialize HDF5 trajectory reader.

    Args:
        path: Path to HDF5 trajectory file
        **open_kwargs: Additional arguments passed to h5py.File
    """
    if h5py is None:
        raise ImportError(
            "h5py is required for HDF5 support. "
            "Install it with: pip install h5py"
        )
    self._path = Path(path)
    if not self._path.exists():
        raise FileNotFoundError(f"HDF5 trajectory file not found: {self._path}")
    self._open_kwargs = open_kwargs
    self._file: h5py.File | None = None
    self._n_frames: int | None = None

n_frames property

n_frames

Number of frames in the trajectory.

read_frame

read_frame(index)

Read a specific frame from the trajectory.

Parameters:

Name Type Description Default
index int

Frame index (0-based)

required

Returns:

Type Description
Frame

Frame object

Raises:

Type Description
IndexError

If index is out of range

Source code in src/molpy/io/trajectory/h5.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def read_frame(self, index: int) -> Frame:
    """Read a specific frame from the trajectory.

    Args:
        index: Frame index (0-based)

    Returns:
        Frame object

    Raises:
        IndexError: If index is out of range
    """
    if index < 0:
        index = self.n_frames + index

    if index < 0 or index >= self.n_frames:
        raise IndexError(f"Frame index {index} out of range [0, {self.n_frames})")

    # Use context manager if file is not open
    if self._file is None:
        with h5py.File(self._path, "r") as f:
            return self._read_frame_from_file(f, index)
    else:
        return self._read_frame_from_file(self._file, index)

HDF5TrajectoryWriter

HDF5TrajectoryWriter(path, compression='gzip', compression_opts=4, **open_kwargs)

Bases: TrajectoryWriter

Write Trajectory objects to HDF5 files.

The HDF5 file structure follows: - /frames/{frame_index}/blocks/ for data blocks - /frames/{frame_index}/metadata/ for frame metadata - /n_frames attribute for total frame count

Examples:

>>> writer = HDF5TrajectoryWriter("trajectory.h5")
>>> writer.write_frame(frame0)
>>> writer.write_frame(frame1)
>>> writer.close()

Initialize HDF5 trajectory writer.

Parameters:

Name Type Description Default
path PathLike

Path to output HDF5 file

required
compression str | None

Compression algorithm (None, 'gzip', 'lzf', 'szip'). Defaults to 'gzip'.

'gzip'
compression_opts int

Compression level (for gzip: 0-9). Defaults to 4.

4
**open_kwargs

Additional arguments passed to h5py.File

{}
Source code in src/molpy/io/trajectory/h5.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
def __init__(
    self,
    path: PathLike,
    compression: str | None = "gzip",
    compression_opts: int = 4,
    **open_kwargs,
):
    """Initialize HDF5 trajectory writer.

    Args:
        path: Path to output HDF5 file
        compression: Compression algorithm (None, 'gzip', 'lzf', 'szip').
            Defaults to 'gzip'.
        compression_opts: Compression level (for gzip: 0-9). Defaults to 4.
        **open_kwargs: Additional arguments passed to h5py.File
    """
    if h5py is None:
        raise ImportError(
            "h5py is required for HDF5 support. "
            "Install it with: pip install h5py"
        )
    self._path = Path(path)
    self._open_kwargs = open_kwargs
    self.compression = compression
    self.compression_opts = compression_opts
    self._file: h5py.File | None = None
    self._frame_count = 0

    # Open file in append mode if it exists, otherwise create new
    if self._path.exists():
        self._file = h5py.File(self._path, mode="a", **self._open_kwargs)
        # Get current frame count
        if "frames" in self._file:
            frames_group = self._file["frames"]
            frame_keys = [k for k in frames_group.keys() if k.isdigit()]
            if frame_keys:
                self._frame_count = max(int(k) for k in frame_keys) + 1
    else:
        self._file = h5py.File(self._path, mode="w", **self._open_kwargs)

close

close()

Close the HDF5 file.

Source code in src/molpy/io/trajectory/h5.py
308
309
310
311
312
def close(self) -> None:
    """Close the HDF5 file."""
    if self._file is not None:
        self._file.close()
        self._file = None

write_frame

write_frame(frame)

Write a single frame to the trajectory file.

Parameters:

Name Type Description Default
frame Frame

Frame object to write

required
Source code in src/molpy/io/trajectory/h5.py
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
def write_frame(self, frame: Frame) -> None:
    """Write a single frame to the trajectory file.

    Args:
        frame: Frame object to write
    """
    if self._file is None:
        raise ValueError("File not open. Use 'with' statement or call __enter__")

    # Create or get frames group
    if "frames" not in self._file:
        frames_group = self._file.create_group("frames")
    else:
        frames_group = self._file["frames"]

    # Create frame group
    frame_key = str(self._frame_count)
    frame_group = frames_group.create_group(frame_key)

    # Write frame using conversion function
    frame_to_h5_group(frame, frame_group, self.compression, self.compression_opts)

    # Update frame count and n_frames attribute
    self._frame_count += 1
    self._file.attrs["n_frames"] = self._frame_count

LAMMPS

LammpsTrajectoryReader

LammpsTrajectoryReader(fpath, frame=None, extra_type_mappings=None)

Bases: BaseTrajectoryReader

Reader for LAMMPS trajectory files, supporting multiple files.

Source code in src/molpy/io/trajectory/lammps.py
53
54
55
56
57
58
59
60
61
62
63
def __init__(
    self,
    fpath: PathLike | list[PathLike],
    frame: Frame | None = None,
    extra_type_mappings: dict | None = None,
):
    # Pass the fpath (single or multiple) to the base class
    super().__init__(fpath)
    self.frame = frame
    if extra_type_mappings:
        column_type_mappings.update(extra_type_mappings)

LammpsTrajectoryWriter

LammpsTrajectoryWriter(fpath, atom_style='full')

Bases: TrajectoryWriter

Writer for LAMMPS trajectory files.

Source code in src/molpy/io/trajectory/lammps.py
178
179
180
def __init__(self, fpath: str | Path, atom_style: str = "full"):
    super().__init__(fpath)
    self.atom_style = atom_style

write_frame

write_frame(frame, timestep=None)

Write a single frame to the file.

Source code in src/molpy/io/trajectory/lammps.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
def write_frame(self, frame: "Frame", timestep: int | None = None):
    """Write a single frame to the file."""
    if timestep is None:
        timestep = frame.metadata.get("timestep", 0)

    # Write timestep
    if self._fp is None:
        raise ValueError("File is not open for writing.")
    self._fp.write(f"ITEM: TIMESTEP\n{timestep}\n".encode())

    # Write number of atoms
    if "atoms" in frame:
        atoms = frame["atoms"]
        first_col = next(iter(atoms.keys()))
        n_atoms = len(atoms[first_col])
        self._fp.write(f"ITEM: NUMBER OF ATOMS\n{n_atoms}\n".encode())

    # Write box bounds
    # Get box from metadata
    box = frame.metadata.get("box")
    if box:
        matrix = box.matrix
        origin = box.origin

        # Check if box is orthogonal
        if np.allclose(matrix, np.diag(np.diag(matrix))):
            # Orthogonal box
            self._fp.write(b"ITEM: BOX BOUNDS pp pp pp\n")
            for i in range(3):
                self._fp.write(f"{origin[i]} {origin[i] + matrix[i, i]}\n".encode())
        else:
            # Triclinic box
            self._fp.write(b"ITEM: BOX BOUNDS pp pp pp xy xz yz\n")
            for i in range(3):
                if i == 0:
                    self._fp.write(
                        f"{origin[i]} {origin[i] + matrix[i, i]} {matrix[0, 1]}\n".encode()
                    )
                elif i == 1:
                    self._fp.write(
                        f"{origin[i]} {origin[i] + matrix[i, i]} {matrix[0, 2]}\n".encode()
                    )
                else:
                    self._fp.write(
                        f"{origin[i]} {origin[i] + matrix[i, i]} {matrix[1, 2]}\n".encode()
                    )

    # Write atoms
    if "atoms" in frame:
        atoms = frame["atoms"]

        # Determine column order based on available data
        columns = []
        if "id" in atoms:
            columns.append("id")
        if "mol_id" in atoms:
            columns.append("mol_id")
        if "type" in atoms:
            columns.append("type")
        if "q" in atoms:
            columns.append("q")
        if "x" in atoms and "y" in atoms and "z" in atoms:
            columns.extend(["x", "y", "z"])
        elif "xu" in atoms and "yu" in atoms and "zu" in atoms:
            columns.extend(["xu", "yu", "zu"])
        elif "xs" in atoms and "ys" in atoms and "zs" in atoms:
            columns.extend(["xs", "ys", "zs"])
        if "vx" in atoms and "vy" in atoms and "vz" in atoms:
            columns.extend(["vx", "vy", "vz"])
        if "fx" in atoms and "fy" in atoms and "fz" in atoms:
            columns.extend(["fx", "fy", "fz"])

        # Write atom header
        self._fp.write(f"ITEM: ATOMS {' '.join(columns)}\n".encode())

        # Write atom data
        n_atoms = len(atoms)

        # Get first available column to determine actual atom count
        first_col = next(iter(atoms.keys()))
        actual_n_atoms = len(atoms[first_col])

        for i in range(actual_n_atoms):
            row_data = []
            for col in columns:
                if col in [
                    "x",
                    "y",
                    "z",
                    "xu",
                    "yu",
                    "zu",
                    "xs",
                    "ys",
                    "zs",
                    "vx",
                    "vy",
                    "vz",
                    "fx",
                    "fy",
                    "fz",
                    "q",
                ]:
                    row_data.append(f"{atoms[col][i]:.6f}")
                else:
                    row_data.append(f"{atoms[col][i]}")
            self._fp.write(f"{' '.join(row_data)}\n".encode())

    self._fp.flush()

XYZ

XYZTrajectoryReader

XYZTrajectoryReader(fpath)

Bases: BaseTrajectoryReader

Reader for XYZ trajectory files.

Source code in src/molpy/io/trajectory/base.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def __init__(self, fpath: PathLike | list[PathLike]):
    """
    Initialize the trajectory reader.

    Args:
        fpath: Path to trajectory file or list of paths to multiple trajectory files
    """
    # Handle both single file and multiple files
    if isinstance(fpath, (str, Path)):
        self.fpaths = [Path(fpath)]
    else:
        self.fpaths = [Path(p) for p in fpath]

    # Validate all files exist
    for path in self.fpaths:
        if not path.exists():
            raise FileNotFoundError(f"File not found: {path}")

    self._frame_locations: list[FrameLocation] = []  # location info for each frame
    self._mms: list[mmap.mmap] = []  # memory-mapped file objects for each file
    self._total_frames = 0
    self._index_files = self._get_index_file_paths()
    self._open_files()

XYZTrajectoryWriter

XYZTrajectoryWriter(fpath)

Bases: TrajectoryWriter

Writer for XYZ trajectory files.

Source code in src/molpy/io/trajectory/xyz.py
161
162
163
def __init__(self, fpath: str | Path):
    super().__init__(fpath)
    self.fobj = open(fpath, "w")

close

close()

Close the file.

Source code in src/molpy/io/trajectory/xyz.py
196
197
198
199
def close(self):
    """Close the file."""
    if hasattr(self, "fobj") and not self.fobj.closed:
        self.fobj.close()

write_frame

write_frame(frame)

Write a single frame to the XYZ file.

Source code in src/molpy/io/trajectory/xyz.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def write_frame(self, frame: Frame):
    """Write a single frame to the XYZ file."""
    atoms = frame["atoms"]
    box = frame.metadata.get("box", None)
    n_atoms = len(atoms)

    self.fobj.write(f"{n_atoms}\n")

    # Write comment line
    comment = frame.metadata.get("comment", f"Step={frame.metadata.get('step', 0)}")
    if box is not None:
        comment += f' Lattice="{box.matrix.tolist()}"'
    self.fobj.write(f"{comment}\n")

    for _, atom in atoms.iterrows():
        x = atom["x"]
        y = atom["y"]
        z = atom["z"]
        elem = atom.get("element", "X")

        self.fobj.write(f"{elem} {x} {y} {z}\n")

write_traj

write_traj(trajectory)

Write multiple frames to the XYZ file.

Source code in src/molpy/io/trajectory/xyz.py
191
192
193
194
def write_traj(self, trajectory):
    """Write multiple frames to the XYZ file."""
    for frame in trajectory:
        self.write_frame(frame)