Molecular Models

This chapter introduces basics of modeling at the level of molecules.

Interacting molecules: two atoms

As we have seen in the quantum-level model, real atoms interact continuously via a potential that both attracts at long range and repels at short range. Instead of computing the quantum potential, a standard minimal model capturing this effect is the Lennard-Jones (LJ) potential.

For two atoms this is given by

\[ V(r) = 4\varepsilon\!\left[\left(\frac{\sigma}{r}\right)^{12} - \left(\frac{\sigma}{r}\right)^{6}\right], \]

where, \(r\) is the distance between two atoms, \(\sigma\) is the collision diameter (\(r\) with \(V(r) = 0\)), and \(\varepsilon\) is the well depth. The \(r^{-12}\) term accounts for repulsion and the \(-r^{-6}\) term for attraction. The potential is minimal at \(r^* = 2^{1/6}\sigma \approx 1.12\,\sigma\) with \(V(r^*) = -\varepsilon\).

The potential is visualized below. For simplicity, we use reduced LJ units (\(\sigma = \varepsilon = m = 1\)) so numbers stay of order one.

Code

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib import animation
from IPython.display import HTML

r = np.linspace(0.88, 3.5, 400)
eps = 1.0
V = 4 * eps * ((1 / r) ** 12 - (1 / r) ** 6)

fig, ax = plt.subplots(figsize=(4.5, 3))
ax.plot(r, V, color="C0", lw=1.8)
ax.axhline(0, color="gray", lw=0.7, linestyle="--")

# eps annotation
r_min = 2 ** (1 / 6)
ax.axhline(-1, color="gray", lw=0.7, linestyle="--")
ax.annotate("", xy=(r_min, -1), xytext=(r_min, 0),
            arrowprops=dict(arrowstyle="<->", color="C1", lw=1.2))
ax.text(r_min + 0.07, -0.52, r"$\varepsilon$", color="C1", fontsize=10)

# sigma annotation
ax.axvline(1, color="C2", lw=0.8, linestyle="--")

# horizontal double arrow from r=0 to r=sigma=1
y_sigma = 0.0
ax.annotate("",
    xy=(1, y_sigma),
    xytext=(0, y_sigma),
    arrowprops=dict(arrowstyle="<->", color="C2", lw=1.2)
)

ax.text(0.5, y_sigma + 0.12, r"$\sigma$",
        color="C2",
        fontsize=11,
        ha="center",
        va="bottom")

ax.set_xlabel(r"$r / \sigma$")
ax.set_ylabel(r"$V / \varepsilon$")
ax.set_xlim(0, 3.5)
ax.set_ylim(-1.5, 2.5)
ax.set_title("Lennard-Jones potential", fontsize=10)

plt.tight_layout()
plt.show()

Two-body collision

LJ alone is an accurate model for noble gases. As an example consider argon (Ar) with the parameters \(\sigma = 3.4\,\text{Å}\) (see Angstrom, \(1\,\text{Å} = 0.1\,\text{nm}\), about 2 Bohr radii \(a_0\)), \(\varepsilon = 1.65\times10^{-21}\,\text{J}\), and mass \(m \approx 40\,\text{Da}\) (Dalton).

Again, biology is about the interaction of molecules - arguably not noble gases, but this will be useful for a simple “hello world” on the molecular abstraction level.

Let us simulate a collision of two Ar atoms at the molecular level. The force on atom 0 from atom 1 is

\[\mathbf{F} = -\nabla_{\mathbf{r}_0} V = F(r)\,\hat{\mathbf{r}},\]

where the force is given by

\[ F(r) = \frac{4\varepsilon}{r^2}\left[12\left(\frac{\sigma}{r}\right)^{12} - 6\left(\frac{\sigma}{r}\right)^{6}\right], \]

and \(\hat{\mathbf{r}} = (\mathbf{r}_0 - \mathbf{r}_1)/r\) points from atom 1 to atom 0. Newton’s third law also gives \(-\mathbf{F}\) on atom 1.

For simplicity we integrate Newton’s equations with forward Euler at a small time step \(\Delta t = 0.002\,\tau\), where \(\tau = \sqrt{m\sigma^2/\varepsilon}\) is the characteristic time scale determined by the particle mass \(m\), size \(\sigma\), and interaction energy \(\varepsilon\).

Code

# collision setup (in LJ units)
SIGMA = 1.0
EPS   = 1.0
MASS  = 1.0


def lj_force(r_vec: np.ndarray) -> np.ndarray:
    """Force on atom 0 from atom 1.
    
    Input: r_vec = pos[0] - pos[1].
    Output: force vector on atom 0 from atom 1.
    """
    r: float = max(float(np.linalg.norm(r_vec)), 0.5)  # hard floor avoids singularity
    sr6: float = (SIGMA / r) ** 6
    return (4 * EPS * (12 * sr6 ** 2 - 6 * sr6) / r ** 2) * r_vec


def simulate(
    pos0: np.ndarray,
    vel0: np.ndarray,
    dt: float = 0.002,
    n_steps: int = 5000,
    subsample: int = 50,
) -> tuple[np.ndarray, np.ndarray]:
    """Forward Euler trajectory.
    
    Returns (pos_traj, vel_traj) of shape (T, 2, 2)."""
    pos, vel = pos0.copy(), vel0.copy()
    
    # T: number of frames to save (after subsampling)
    T = n_steps // subsample
    
    # preallocate arrays for trajectory
    pos_traj = np.zeros((T, 2, 2))
    vel_traj = np.zeros((T, 2, 2))
    
    # Euler integration loop
    for step in range(n_steps):
        # compute force on atom 0
        f = lj_force(pos[0] - pos[1])
        
        # Newton step for both atoms:
        # dv/dt = f / m
        vel[0] +=  (f / MASS) * dt
        vel[1] -= (f / MASS) * dt

        # Newton step for both atoms: dx/dt = v
        pos    += vel * dt

        # save trajectory at subsampled steps
        if step % subsample == 0:
            idx = step // subsample
            if idx < T:
                pos_traj[idx] = pos
                vel_traj[idx] = vel
    return pos_traj, vel_traj


def make_animation(
    pos_traj: np.ndarray,
    vel_traj: np.ndarray,
    dt_frame: float,
    interval: int = 40,
) -> animation.FuncAnimation:
    # distance between atoms
    r = np.linalg.norm(pos_traj[:, 0] - pos_traj[:, 1], axis=1)
    
    # potential energy (pe):
    # V = 4 * eps * ((sigma / r) ** 12 - (sigma / r) ** 6)
    sr6 = (SIGMA / np.maximum(r, 0.5)) ** 6
    pe = 4 * EPS * (sr6 ** 2 - sr6)
    
    # kinetic energy (ke):
    # E = mv^2/2
    ke = 0.5 * MASS * np.sum(vel_traj ** 2, axis=2)  # (T, 2)
    
    # total energy (total = ke + pe)
    total = ke.sum(axis=1) + pe

    # time points for plotting energy
    t = np.arange(len(pos_traj)) * dt_frame

    # two axes:
    # - left: simulation frame
    # - right: energies over time
    fig, (ax_sim, ax_e) = plt.subplots(
        1, 2, figsize=(5.5, 2.8), gridspec_kw={"width_ratios": [1.3, 1]}
    )

    # simulation frame
    ax_sim.set_title("Ar + Ar colliding (LJ force)", fontsize=9)
    ax_sim.set_aspect("equal")
    ax_sim.set_xlim(-4, 4)
    ax_sim.set_ylim(-2, 2)
    ax_sim.set_axis_off()
    colors = ["C0", "C1"]
    # draw atoms as circles with radius 0.42 (in LJ units)
    circles = [
        ax_sim.add_patch(patches.Circle(
            pos_traj[0, i], 0.42,
            lw=1.2, edgecolor=colors[i], facecolor=colors[i], alpha=0.55
        ))
        for i in range(2)
    ]

    # energy plot
    e_lo = min(pe.min(), 0) * 1.4
    e_hi = ke.sum(axis=1).max() * 1.4
    ax_e.set_xlim(0, t[-1])
    ax_e.set_ylim(e_lo, e_hi)
    ax_e.set_xlabel(r"time ($\tau$)", fontsize=8)
    ax_e.set_ylabel(r"energy ($\varepsilon$)", fontsize=8)
    ax_e.tick_params(labelsize=7)
    ax_e.axhline(total[0], color="gray", lw=0.7, ls=":", label="E(0)")

    ke_lines = [
        ax_e.plot([], [], color=colors[i], lw=1, label=f"kinetic E (atom {i})")[0]
        for i in range(2)
    ]
    pe_line  = ax_e.plot([], [], color="C4",   lw=1,   label="potential E")[0]
    tot_line = ax_e.plot([], [], color="black", lw=1.2, ls="--", label="E")[0]
    ax_e.legend(fontsize=6, loc="upper right")

    plt.tight_layout()

    # scale bar: 1 nm = 10/3.4 σ
    _sb = 10.0 / 3.4
    _sx, _sy = -3.6, -1.75
    ax_sim.plot([_sx, _sx + _sb], [_sy, _sy], 'k-', lw=2, solid_capstyle='butt')
    ax_sim.plot([_sx, _sx], [_sy - 0.08, _sy + 0.08], 'k-', lw=1.5)
    ax_sim.plot([_sx + _sb, _sx + _sb], [_sy - 0.08, _sy + 0.08], 'k-', lw=1.5)
    ax_sim.text(_sx + _sb / 2, _sy - 0.15, '1 nm', ha='center', va='top', fontsize=7)

    # animation callback for frame update
    def update(frame: int) -> list:
        # update atom positions
        for i, c in enumerate(circles):
            c.center = pos_traj[frame, i]
        s = slice(None, frame + 1)
        for i, ln in enumerate(ke_lines):
            ln.set_data(t[s], ke[s, i])
        pe_line.set_data(t[s], pe[s])
        tot_line.set_data(t[s], total[s])
        return circles + ke_lines + [pe_line, tot_line]

    anim = animation.FuncAnimation(fig, update, frames=len(pos_traj), interval=interval)
    plt.close(fig)
    return anim

Code

# collision setup (in LJ units)
# feel free to change initial conditions and see what happens
pos0 = np.array([[-2.5, 0.0], [2.5, -1.2]])
vel0 = np.array([[ 0.5, 0.1], [-0.4, 0.0]])

pos_traj, vel_traj = simulate(pos0, vel0)
HTML(make_animation(pos_traj, vel_traj, dt_frame=0.002 * 50).to_jshtml())

One clearly sees the two atoms colliding without “touching” each other.

Side note: Simulation via Euler causes drifting energy

In the above simulation total energy may grow slowly over time. This is a known artefact of the simple forward Euler method that we used for didactic purposes: the integrator is first-order and not time-reversible, so it systematically injects energy into the system. The drift is small here because \(\Delta t = 0.002\,\tau\) is short, but it accumulates in long runs. Real molecular dynamic simulations use velocity Verlet integration.

Colliding water molecules

Argon is not very relevant biologically and moreover is a single atom, unlike the chapter’s title that promised molecules. Water is considered the E. coli of computational chemistry: it is a not trivial molecule and living forms are water to a large extent.

Unfortunately, for water, the Lennard-Jones model alone is not enough: the hydrogen bond is electrostatic and directional which is not captured by Lennard-Jones.

Instead the TIP3P model is commonly used. It represents each water molecule as three point charges on a rigid frame:

site	charge	\(\sigma\) (Å)	\(\varepsilon\) (kJ/mol)
O	−0.834 e	3.151	0.636
H×2	+0.417 e	0	0

The O–H bond length is \(0.9572\,\text{Å}\) and the H–O–H angle is \(104.52°\). The geometry is kept rigid during the simulation.

We use OpenMM to run the simulation with a Verlet integrator at \(\Delta t = 1\,\text{fs}\). Again we let our constructs collide gently and show the animation below. We will see that the water molecules attract each other by forming transient hydrogen-bonds (shown as dashed line in the animation). This phenomenon is known as water dimers. Hydrogen bonds are all over the place in biology: remember for example the hydrogen bonds in DNA between the base pairs.

Code

# %pip install openmm
# %pip install matplotlib

from openmm import app, unit, Vec3
import openmm
import matplotlib.patches as mpatches

def simulate_water_dimer(
    d0: float = 1,
    v_approach: float = 0.04,
    n_steps: int = 7500,
    subsample: int = 50,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """TIP3P water dimer via OpenMM Verlet.

    Parameters
    ----------
    d0          Initial O-O distance (nm).
    v_approach  Initial approach speed of each molecule (nm/ps).
    n_steps     Total integration steps (1 fs each).
    subsample   Record every `subsample` steps.

    Returns
    -------
    pos_traj  shape (T, 6, 3) in Angstrom
    ke_traj   shape (T,) in kJ/mol
    pe_traj   shape (T,) in kJ/mol
    """
    # topology: 2 water molecules, each with 1 O and 2 H atoms, and bonds between them
    top = app.Topology()
    chain = top.addChain()
    for _ in range(2):
        # for each water molecule (2 in total)
        res = top.addResidue('HOH', chain)
        O  = top.addAtom('O',  app.Element.getBySymbol('O'), res)
        H1 = top.addAtom('H1', app.Element.getBySymbol('H'), res)
        H2 = top.addAtom('H2', app.Element.getBySymbol('H'), res)
        top.addBond(O, H1)
        top.addBond(O, H2)

    # use the TIP3P water model for forces and energies
    ff = app.ForceField('tip3p.xml')
    system = ff.createSystem(
        top,
        nonbondedMethod=app.NoCutoff,
        constraints=app.HAngles,
    )

    # Remove center-of-mass motion remover (not desired for small systems)
    for i in reversed(range(system.getNumForces())):
        if isinstance(system.getForce(i), openmm.CMMotionRemover):
            system.removeForce(i)

    # water geometry parameters
    r_OH = 0.09572  # O-H bond length (nm)
    a    = 104.52 * np.pi / 180  # H-O-H angle (radians)
    c52  = np.cos(a / 2)  # cosine of half the angle
    s52  = np.sin(a / 2)  # sine of half the angle

    # initial positions of the 6 atoms (2 waters x 3 atoms each)
    # "H" of molecule 0 points towards "O" of molecule 1
    # all positions with z = 0
    positions = unit.Quantity([
        Vec3(0.0,           0.0,       0),
        Vec3(r_OH,          0.0,       0),
        Vec3(-r_OH * c52,   r_OH * s52, 0),
        Vec3(d0,            0.0,       0),
        Vec3(d0 + r_OH * c52,  r_OH * s52,  0),
        Vec3(d0 + r_OH * c52, -r_OH * s52,  0),
    ], unit.nanometer)

    integrator = openmm.VerletIntegrator(1.0 * unit.femtosecond)
    sim = app.Simulation(top, system, integrator)
    sim.context.setPositions(positions)

    # initial velocities: both molecules approach each other along x-axis
    sim.context.setVelocities(unit.Quantity([
        Vec3( v_approach, 0, 0), Vec3( v_approach, 0, 0), Vec3( v_approach, 0, 0),
        Vec3(-v_approach, 0, 0), Vec3(-v_approach, 0, 0), Vec3(-v_approach, 0, 0),
    ], unit.nanometer / unit.picosecond))

    # frames to save (after subsampling)
    T = n_steps // subsample

    # allocate arrays for trajectory
    pos_traj = np.zeros((T, 6, 3))
    ke_traj = np.zeros(T)
    pe_traj = np.zeros(T)

    # integration loop
    for step in range(n_steps):
        # make one integration step
        sim.step(1)

        # save trajectory at subsampled steps
        if step % subsample == 0:
            idx = step // subsample
            if idx < T:
                s = sim.context.getState(getPositions=True, getEnergy=True)
                pos_traj[idx] = s.getPositions(asNumpy=True).value_in_unit(unit.angstrom)
                ke_traj[idx]  = s.getKineticEnergy().value_in_unit(unit.kilojoule_per_mole)
                pe_traj[idx]  = s.getPotentialEnergy().value_in_unit(unit.kilojoule_per_mole)
    return pos_traj, ke_traj, pe_traj


def make_water_animation(
    pos_traj: np.ndarray,
    ke_traj: np.ndarray,
    pe_traj: np.ndarray,
    dt_frame: float,  # in ps
    interval: int = 50,  # in ms between frames in the animation
) -> animation.FuncAnimation:
    # time points for plotting energy
    t = np.arange(len(pos_traj)) * dt_frame
    
    # total energy (total = ke + pe)
    total = ke_traj + pe_traj

    # two axes:
    # - left: simulation frame
    # - right: energies over time
    fig, (ax_m, ax_e) = plt.subplots(
        1, 2, figsize=(5.5, 3), gridspec_kw={'width_ratios': [1.2, 1]}
    )

    # simulation frame
    ax_m.set_aspect('equal')
    ax_m.set_xlim(-1.5, 11.5)
    ax_m.set_ylim(-3.0, 3.0)
    ax_m.set_axis_off()
    ax_m.set_title('TIP3P water dimer', fontsize=9)

    r_O, r_H = 0.66, 0.31  # display radii in Å
    o_colors = ['C3', 'steelblue']

    # atom patches — created once, positions updated each frame
    o_patches = [
        ax_m.add_patch(mpatches.Circle(
            pos_traj[0, 3 * mol, :2], r_O,
            lw=1.2, edgecolor='black', facecolor=o_colors[mol], zorder=3
        ))
        for mol in range(2)
    ]
    h_patches = [
        ax_m.add_patch(mpatches.Circle(
            pos_traj[0, 3 * mol + hi, :2], r_H,
            lw=0.8, edgecolor='gray', facecolor='white', zorder=3
        ))
        for mol in range(2) for hi in [1, 2]
    ]
    bond_lines = [ax_m.plot([], [], 'k-', lw=1.5, zorder=2)[0] for _ in range(4)]
    hbond_line = ax_m.plot([], [], 'b--', lw=1.0, alpha=0.8, zorder=1)[0]

    # energy plot
    ax_e.set_xlim(0, t[-1])
    ax_e.set_ylim(min(pe_traj.min(), -25), max(ke_traj.max(), 1) * 1.3)
    ax_e.set_xlabel('time (ps)', fontsize=8)
    ax_e.set_ylabel('energy (kJ/mol)', fontsize=8)
    ax_e.tick_params(labelsize=7)
    ax_e.axhline(total[0], color='gray', lw=0.7, ls=':')
    ke_line  = ax_e.plot([], [], color='C1',   lw=1,   label='kinetic energy')[0]
    pe_line  = ax_e.plot([], [], color='C4',   lw=1,   label='potential energy')[0]
    tot_line = ax_e.plot([], [], color='black', lw=1.2, ls='--', label='total energy')[0]
    ax_e.legend(fontsize=6, loc='upper right')
    plt.tight_layout()

    # scale bar: 1 nm = 10 Å
    _sb = 10.0
    _sx, _sy = -1.2, -2.7
    ax_m.plot([_sx, _sx + _sb], [_sy, _sy], 'k-', lw=2, solid_capstyle='butt')
    ax_m.plot([_sx, _sx], [_sy - 0.1, _sy + 0.1], 'k-', lw=1.5)
    ax_m.plot([_sx + _sb, _sx + _sb], [_sy - 0.1, _sy + 0.1], 'k-', lw=1.5)
    ax_m.text(_sx + _sb / 2, _sy - 0.15, '1 nm', ha='center', va='top', fontsize=7)

    # animation callback for frame update
    def update(frame: int) -> list:
        # update atom positions
        for mol in range(2):
            base = 3 * mol
            o_patches[mol].set_center(pos_traj[frame, base, :2])
            h_patches[2 * mol    ].set_center(pos_traj[frame, base + 1, :2])
            h_patches[2 * mol + 1].set_center(pos_traj[frame, base + 2, :2])
            for hi, bl in enumerate(bond_lines[2 * mol: 2 * mol + 2]):
                xy = np.array([pos_traj[frame, base, :2],
                               pos_traj[frame, base + 1 + hi, :2]])
                bl.set_data(xy[:, 0], xy[:, 1])
        
        # H-bond indicator:
        # closest H of mol 0 to O of mol 1
        h_dists = [np.linalg.norm(pos_traj[frame, h] - pos_traj[frame, 3])
                   for h in [1, 2]]
        best_h  = 1 + int(np.argmin(h_dists))
        if min(h_dists) < 2.8:  # 2.8 Å H-bond cutoff
            xy = np.array([pos_traj[frame, best_h, :2], pos_traj[frame, 3, :2]])
            hbond_line.set_data(xy[:, 0], xy[:, 1])
        else:
            hbond_line.set_data([], [])
        s = slice(None, frame + 1)
        ke_line.set_data(t[s], ke_traj[s])
        pe_line.set_data(t[s], pe_traj[s])
        tot_line.set_data(t[s], total[s])
        return o_patches + h_patches + bond_lines + [hbond_line, ke_line, pe_line, tot_line]

    anim = animation.FuncAnimation(fig, update, frames=len(pos_traj), interval=interval)
    plt.close(fig)
    return anim

Code

pos_w, ke_w, pe_w = simulate_water_dimer()
HTML(make_water_animation(pos_w, ke_w, pe_w, dt_frame=1e-3 * 50).to_jshtml())

More water

Water is more then 2 molecules. The following shows a simulation of water at 300 K (nice summer temperature of 26.85°C) of a 1nm cube of water. Since the tight bounding box would influence the water behavior, we use periodic boundary conditions: molecules that exit on the right return from the left.

Instead of building molecule by molecule, we just add the right amount via the function addSolvent in openmm.

The animation is shown as a projection to 2d for simplicity.

Code

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import animation
from IPython.display import HTML

from openmm import app, unit, Vec3
import openmm

# integration parameters
dt = 1.0 * unit.femtosecond
n_steps: int = 500


def simulate_water_box(
    box_length_nm: float = 1.0,
    temperature_K: float = 300.0,
    n_steps: int = n_steps,
    subsample: int = 20,
):
    """Simulate a small periodic TIP3P water box.

    Returns
    -------
    pos_traj : shape (T, 3*n_waters, 3), positions in Å
    box_size_A : box length in Å
    """

    ff = app.ForceField("tip3p.xml")

    modeller = app.Modeller(app.Topology(), [])

    modeller.addSolvent(
        ff,
        model="tip3p",
        boxSize=Vec3(box_length_nm, box_length_nm, box_length_nm) * unit.nanometer,
    )

    system = ff.createSystem(
        modeller.topology,
        nonbondedMethod=app.PME,
        nonbondedCutoff=0.45 * unit.nanometer,  # must be < box_length/2
        constraints=app.HBonds,
    )

    integrator = openmm.LangevinMiddleIntegrator(
        temperature_K * unit.kelvin,
        1.0 / unit.picosecond,
        dt,
    )

    sim = app.Simulation(modeller.topology, system, integrator)
    sim.context.setPositions(modeller.positions)
    sim.context.setVelocitiesToTemperature(temperature_K * unit.kelvin)

    sim.minimizeEnergy()

    T = n_steps // subsample
    n_atoms = modeller.topology.getNumAtoms()
    pos_traj = np.zeros((T, n_atoms, 3))

    # integration loop
    for step in range(n_steps):
        # step of duration dt
        sim.step(1)

        if step % subsample == 0:
            idx = step // subsample
            if idx < T:
                state = sim.context.getState(getPositions=True)
                pos = state.getPositions(asNumpy=True).value_in_unit(unit.angstrom)

                # wrap positions into the periodic box for nicer display
                box_size_A = box_length_nm * 10.0
                pos_traj[idx] = pos % box_size_A

    return pos_traj, box_length_nm * 10.0


def make_water_box_animation(
    pos_traj: np.ndarray,
    box_size_A: float,
    interval: int = 40,
):
    fig, ax = plt.subplots(figsize=(4, 4))

    ax.set_aspect("equal")
    ax.set_xlim(0, box_size_A)
    ax.set_ylim(0, box_size_A)
    ax.set_xlabel("x (Å)")
    ax.set_ylabel("y (Å)")
    ax.set_title(f"water box (TIP3P model) during {n_steps*dt}")

    # draw periodic box
    ax.add_patch(
        mpatches.Rectangle(
            (0, 0), box_size_A, box_size_A,
            fill=False, lw=1.5, edgecolor="black"
        )
    )

    n_atoms = pos_traj.shape[1]
    n_waters = n_atoms // 3

    r_O, r_H = 0.55, 0.28  # display radii in Å

    o_patches = []
    h_patches = []
    bond_lines = []

    for mol in range(n_waters):
        base = 3 * mol

        o = ax.add_patch(
            mpatches.Circle(
                pos_traj[0, base, :2],
                r_O,
                lw=0.8,
                edgecolor="black",
                facecolor="C3",
                zorder=3,
            )
        )
        o_patches.append(o)

        for hi in [1, 2]:
            h = ax.add_patch(
                mpatches.Circle(
                    pos_traj[0, base + hi, :2],
                    r_H,
                    lw=0.6,
                    edgecolor="gray",
                    facecolor="white",
                    zorder=3,
                )
            )
            h_patches.append(h)

            line = ax.plot([], [], "k-", lw=0.8, zorder=2)[0]
            bond_lines.append(line)

    def update(frame: int):
        artists = []

        for mol in range(n_waters):
            base = 3 * mol

            O = pos_traj[frame, base]
            H1 = pos_traj[frame, base + 1]
            H2 = pos_traj[frame, base + 2]

            o_patches[mol].set_center(O[:2])
            h_patches[2 * mol].set_center(H1[:2])
            h_patches[2 * mol + 1].set_center(H2[:2])

            artists.extend([
                o_patches[mol],
                h_patches[2 * mol],
                h_patches[2 * mol + 1],
            ])

            for j, H in enumerate([H1, H2]):
                line = bond_lines[2 * mol + j]

                # only draw bond if molecule is not split across periodic boundary
                if np.linalg.norm(O[:2] - H[:2]) < 2.0:
                    xy = np.array([O[:2], H[:2]])
                    line.set_data(xy[:, 0], xy[:, 1])
                else:
                    line.set_data([], [])

                artists.append(line)

        return artists

    anim = animation.FuncAnimation(
        fig,
        update,
        frames=len(pos_traj),
        interval=interval,
        blit=True,
    )

    plt.close(fig)
    return anim

Code

pos_traj, box_size_A = simulate_water_box(
    box_length_nm=1.0,
    n_steps=500,
    subsample=20,
)

anim = make_water_box_animation(pos_traj, box_size_A)
HTML(anim.to_jshtml())

Simulating proteins

We are getting in position to simulate larger molecules such as proteins as chains of amino acids. However, the explicit water box above does not scale. A protein in water requires a large amount of water molecules and salts as an environment. The vast majority of simulation time would be spent computing forces between water molecules rather than on the protein itself.

The standard approach is using a so-called implicit solvent: replace the water entirely with a mean-field approximation of the forces it generates. The Generalized Born (GB) model computes the electrostatic contribution to solvation as if the protein were embedded in a dielectric continuum. The result is accurate enough for many applications in biology.

As an example we simulate the “hello world” of peptides (short proteins): tri-alanine (Ala–Ala–Ala), i.e., a chain of three alanine residues.

Code

import nglview.datafiles as nv_data
from openmm.app import PDBFile, ForceField, Simulation

_ELEM_COLOR  = {'N': 'royalblue', 'O': 'tomato', 'C': '#a0a0a0', 'H': '#e0e0e0'}
_ELEM_RADIUS = {'N': 0.60, 'O': 0.60, 'C': 0.60, 'H': 0.32}

dt = 2.0 * unit.femtosecond
n_steps: int = 20000

def simulate_ala3(
    n_steps: int = n_steps,
    subsample: int = 100,
    temperature_K: float = 300.0,
) -> tuple[np.ndarray, list[tuple[int, int]], list[str]]:
    """Tri-alanine in implicit solvent (AMBER14 + OBC2 GB).

    Returns
    -------
    pos_traj  shape (T, n_atoms, 3) in Å
    bonds     list of (i, j) covalent bond index pairs
    elements  list of element symbols per atom
    """

    # load the peptide
    pdb = PDBFile(nv_data.ALA3)

    # implicit/obc2.xml adds the Generalized Born force (OBC model II)
    ff  = ForceField('amber14-all.xml', 'implicit/obc2.xml')
    system = ff.createSystem(
        pdb.topology,
        nonbondedMethod=app.NoCutoff,
        constraints=app.HBonds,
    )
    integrator = openmm.LangevinMiddleIntegrator(
        temperature_K * unit.kelvin,
        1.0 / unit.picosecond,
        dt,
    )
    sim = Simulation(pdb.topology, system, integrator)
    sim.context.setPositions(pdb.positions)
    sim.context.setVelocitiesToTemperature(temperature_K * unit.kelvin)
    sim.minimizeEnergy()

    bonds    = [(b.atom1.index, b.atom2.index) for b in pdb.topology.bonds()]
    elements = [a.element.symbol for a in pdb.topology.atoms()]

    T        = n_steps // subsample
    n_atoms  = pdb.topology.getNumAtoms()
    pos_traj = np.zeros((T, n_atoms, 3))

    # integration loop
    for step in range(n_steps):
        sim.step(1)
        if step % subsample == 0:
            idx = step // subsample
            if idx < T:
                state = sim.context.getState(getPositions=True)
                pos_traj[idx] = state.getPositions(asNumpy=True).value_in_unit(unit.angstrom)

    return pos_traj, bonds, elements


def make_ala3_animation(
    pos_traj: np.ndarray,
    bonds: list[tuple[int, int]],
    elements: list[str],
    interval: int = 50,
) -> animation.FuncAnimation:
    half = 8.0  # viewport half-width in Å (COM-centred)

    fig, ax = plt.subplots(figsize=(4, 4))
    ax.set_aspect('equal')
    ax.set_xlim(-half, half)
    ax.set_ylim(-half, half)
    ax.set_axis_off()
    ax.set_title(f'Tri-alanine (implicit solvent, 300 K) during {n_steps*dt.in_units_of(unit.picosecond)}', fontsize=9)

    ax.legend(handles=[
        mpatches.Patch(facecolor=c, label=s)
        for s, c in [('C', '#a0a0a0'), ('N', 'royalblue'), ('O', 'tomato')]
    ], fontsize=7, loc='upper right')

    # scale bar: 1 nm = 10 Å
    _sb = 10.0
    _sx, _sy = -7.5, -7.0
    ax.plot([_sx, _sx + _sb], [_sy, _sy], 'k-', lw=2, solid_capstyle='butt')
    ax.plot([_sx, _sx], [_sy - 0.2, _sy + 0.2], 'k-', lw=1.5)
    ax.plot([_sx + _sb, _sx + _sb], [_sy - 0.2, _sy + 0.2], 'k-', lw=1.5)
    ax.text(_sx + _sb / 2, _sy - 0.3, '1 nm', ha='center', va='top', fontsize=7)

    bond_lines = [ax.plot([], [], '-', color='#606060', lw=1.0, zorder=1)[0]
                  for _ in bonds]

    com0 = pos_traj[0, :, :2].mean(axis=0)
    p0   = pos_traj[0, :, :2] - com0
    atom_patches = [
        ax.add_patch(mpatches.Circle(
            p0[i], _ELEM_RADIUS.get(elements[i], 0.5),
            lw=0.6, edgecolor='#404040',
            facecolor=_ELEM_COLOR.get(elements[i], 'gray'), zorder=3,
        ))
        for i in range(len(elements))
    ]

    def update(frame: int) -> list:
        com = pos_traj[frame, :, :2].mean(axis=0)
        p   = pos_traj[frame, :, :2] - com
        for i, patch in enumerate(atom_patches):
            patch.set_center(p[i])
        for k, (i, j) in enumerate(bonds):
            bond_lines[k].set_data([p[i, 0], p[j, 0]], [p[i, 1], p[j, 1]])
        return atom_patches + bond_lines

    anim = animation.FuncAnimation(fig, update, frames=len(pos_traj), interval=interval)
    plt.close(fig)
    return anim

Code

pos_ala, bonds_ala, elems_ala = simulate_ala3()
HTML(make_ala3_animation(pos_ala, bonds_ala, elems_ala).to_jshtml())