"""
================
Residence
================
:Author: Orion Cohen, Tingzheng Hou, Kara Fong
:Year: 2021
:Copyright: GNU Public License v3
Understand the dynamic coordination of solvents with the solute.
Residence times for all solvents are automatically calculated from autocovariance
of the solvent-solute adjacency matrix.
While ``residence`` can be used in isolation, it is meant to be used
as an attribute of the Solute class. This makes instantiating it and calculating the
solvation data a non-issue.
"""
import math
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import acovf
from scipy.optimize import curve_fit
import solvation_analysis
from solvation_analysis._column_names import (
SOLVENT,
SOLUTE_ATOM_IX,
SOLVENT_ATOM_IX,
SOLUTE_IX,
)
from solvation_analysis._utils import calculate_adjacency_dataframe
[docs]
class Residence:
"""
Calculate the residence times of solvents.
This class calculates the residence time of each solvent on the solute.
The residence time is in units of Solute frames, so if the Solute object
has 1000 frames over 1 nanosecond, then each frame will be 1 picosecond.
Thus a residence time of 100 would translate to 100 picoseconds.
Two residence time implementations are available. Both calculate the
solute-solvent autocorrelation function for each solute-solvent pair,
take and take the mean over the solvents of each type, this should yield
an exponentially decaying autocorrelation function.
The first implementation fits an exponential curve to the autocorrelation
function and extract the time constant, which is inversely proportional to the
residence time. This result is saved in the ``residence_times_fit`` attribute.
Unfortunately, the fit often fails to converge (value is set to np.nan),
making this method unreliable.
Instead, the default implementation is to simply find point where the
value of the autocorrelation function is 1/e, which is the time constant
of an exact exponential. These values are saved in ``residence_times``.
It is recommended that the user visually inspect the autocorrelation function
with ``Residence.plot_autocorrelation_function`` to ensure an approximately
exponential decay. The residence times are only valid insofar as the autocorrelation
function resembles an exact exponential, it should decays to zero with a long tail.
If the exponential does not decay to zero or its slope does not level off, increasing
the simulation time may help. For this technique to be appropriate, the simulation time
should exceed the residence time.
A fuller description of the method can be found in
`Self, Fong, and Persson <https://pubs-acs-org.libproxy.berkeley.edu/doi/full/10.1021/acsenergylett.9b02118>`_
Parameters
----------
solvation_data : pandas.DataFrame
The solvation data frame output by Solute.
step : int
The spacing of frames in solvation_data. This should be equal
to solute.step.
Examples
--------
.. code-block:: python
# first define Li, BN, and FEC AtomGroups
>>> solute = Solute(Li, {'BN': BN, 'FEC': FEC, 'PF6': PF6})
>>> residence = Residence.from_solute(solute)
>>> residence.residence_times_cutoff
{'BN': 4.02, 'FEC': 3.79, 'PF6': 1.15}
"""
[docs]
def __init__(self, solvation_data: pd.DataFrame, step: int) -> None:
self.solvation_data = solvation_data
self._auto_covariances = self._calculate_auto_covariance_dict()
self._residence_times_cutoff = self._calculate_residence_times_with_cutoff(
self._auto_covariances, step
)
self._residence_times_fit, self._fit_parameters = (
self._calculate_residence_times_with_fit(self._auto_covariances, step)
)
[docs]
@staticmethod
def from_solute(solute: "solvation_analysis.Solute") -> "Residence":
"""
Generate a Residence object from a solute.
Parameters
----------
solute : Solute
Returns
-------
Residence
"""
assert solute.has_run, "The solute must be run before calling from_solute"
return Residence(solute.solvation_data, solute.step)
def _calculate_auto_covariance_dict(self) -> dict[str, np.ndarray]:
partial_index = self.solvation_data.index.droplevel(SOLVENT_ATOM_IX)
unique_indices = np.unique(partial_index)
frame_solute_index = pd.MultiIndex.from_tuples(
unique_indices, names=partial_index.names
)
auto_covariance_dict = {}
for res_name, res_solvation_data in self.solvation_data.groupby([SOLVENT]):
if isinstance(res_name, tuple):
res_name = res_name[0]
adjacency_mini = calculate_adjacency_dataframe(res_solvation_data)
adjacency_df = adjacency_mini.reindex(frame_solute_index, fill_value=0)
auto_covariance = Residence._calculate_auto_covariance(adjacency_df)
# normalize
auto_covariance = auto_covariance / np.max(auto_covariance)
auto_covariance_dict[res_name] = auto_covariance
return auto_covariance_dict
@staticmethod
def _calculate_residence_times_with_cutoff(
auto_covariances: dict[str, np.ndarray],
step: int,
convergence_cutoff: float = 0.1,
) -> dict[str, float]:
residence_times = {}
for res_name, auto_covariance in auto_covariances.items():
if np.min(auto_covariance) > convergence_cutoff:
residence_times[res_name] = np.nan
warnings.warn(
f"the autocovariance for {res_name} does not converge to zero "
"so a residence time cannot be calculated. A longer simulation "
"is required to get a valid estimate of the residence time."
)
unassigned = True
for frame, val in enumerate(auto_covariance):
if val < 1 / math.e:
residence_times[res_name] = frame * step
unassigned = False
break
if unassigned:
residence_times[res_name] = np.nan
return residence_times
@staticmethod
def _calculate_residence_times_with_fit(
auto_covariances: dict[str, np.ndarray], step: int
) -> tuple[dict[str, float], dict[str, tuple[float, float, float]]]:
# calculate the residence times
residence_times = {}
fit_parameters = {}
for res_name, auto_covariance in auto_covariances.items():
res_time, params = Residence._fit_exponential(auto_covariance, res_name)
residence_times[res_name], fit_parameters[res_name] = (
round(res_time * step, 2),
params,
)
return residence_times, fit_parameters
[docs]
def plot_auto_covariance(self, res_name: str) -> tuple[plt.Figure, plt.Axes]:
"""
Plot the autocovariance of a solvent on the solute.
See the discussion in the class docstring for more information.
Parameters
----------
res_name : str
the name of a solvent in the solute.
Returns
-------
fig : matplotlib.Figure
ax : matplotlib.Axes
"""
auto_covariance = self.auto_covariances[res_name]
frames = np.arange(len(auto_covariance))
params = self.fit_parameters[res_name]
def exp_func(x):
return self._exponential_decay(x, *params)
exp_fit = np.array(map(exp_func, frames))
fig, ax = plt.subplots()
ax.plot(frames, auto_covariance, "b-", label="auto covariance")
try:
ax.scatter(frames, exp_fit, label="exponential fit")
except RuntimeError:
warnings.warn(
f"The fit for {res_name} failed so the exponential "
f"fit will not be plotted."
)
ax.hlines(y=1 / math.e, xmin=frames[0], xmax=frames[-1], label="1/e cutoff")
ax.set_xlabel("Timestep (frames)")
ax.set_ylabel("Normalized Autocovariance")
ax.set_ylim(0, 1)
ax.legend()
return fig, ax
@staticmethod
def _exponential_decay(x: np.ndarray, a: float, b: float, c: float) -> np.ndarray:
"""
An exponential decay function.
Args:
x: Independent variable.
a: Initial quantity.
b: Exponential decay constant.
c: Constant.
Returns:
The acf
"""
return a * np.exp(-b * x) + c
@staticmethod
def _fit_exponential(
auto_covariance: np.ndarray, res_name: str
) -> tuple[float, tuple[float, float, float]]:
auto_covariance_norm = auto_covariance / auto_covariance[0]
try:
params, param_covariance = curve_fit(
Residence._exponential_decay,
np.arange(len(auto_covariance_norm)),
auto_covariance_norm,
p0=(1, 0.1, 0.01),
)
tau = 1 / params[1] # p
except RuntimeError:
warnings.warn(
f"The fit for {res_name} failed so its values in"
f"residence_time_fits and fit_parameters will be"
f"set to np.nan."
)
tau, params = np.nan, (np.nan, np.nan, np.nan)
return tau, params
@staticmethod
def _calculate_auto_covariance(adjacency_matrix: pd.DataFrame) -> np.ndarray:
auto_covariances = []
timesteps = adjacency_matrix.index.levels[0]
for solute_ix, solute_df in adjacency_matrix.groupby(
[SOLUTE_IX, SOLUTE_ATOM_IX]
):
# this is needed to make sure auto-covariances can be concatenated later
new_solute_df = solute_df.droplevel([SOLUTE_IX, SOLUTE_ATOM_IX]).reindex(
timesteps, fill_value=0
)
non_zero_cols = new_solute_df.loc[:, (solute_df != 0).any(axis=0)]
auto_covariance_df = non_zero_cols.apply(
acovf,
axis=0,
result_type="expand",
demean=False,
adjusted=True,
fft=True,
)
# timesteps with no binding are getting skipped, we need to make sure to include all timesteps
auto_covariances.append(auto_covariance_df.values)
auto_covariance = np.mean(np.concatenate(auto_covariances, axis=1), axis=1)
return auto_covariance
@property
def auto_covariances(self) -> dict[str, np.ndarray]:
"""
A dictionary where keys are residue names and values are the
autocovariance of the that residue on the solute.
"""
return self._auto_covariances
@property
def residence_times_cutoff(self) -> dict[str, float]:
"""
A dictionary where keys are residue names and values are the
residence times of the that residue on the solute, calculated
with the 1/e cutoff method.
"""
return self._residence_times_cutoff
@property
def residence_times_fit(self) -> dict[str, float]:
"""
A dictionary where keys are residue names and values are the
residence times of the that residue on the solute, calculated
with the exponential fit method.
"""
return self._residence_times_fit
@property
def fit_parameters(self) -> dict[str, tuple[float, float, float]]:
"""
A dictionary where keys are residue names and values are the
parameters for the exponential fit to the autocorrelation function.
"""
return self._fit_parameters