Source code for solvation_analysis.pairing

"""
================
Pairing
================
:Author: Orion Cohen, Tingzheng Hou, Kara Fong
:Year: 2021
:Copyright: GNU Public License v3

Elucidate the composition of the the uncoordinated solvent molecules.

Pairing tracks the fraction of all solvent molecules paired with the solute, as well
as the composition of the diluent.

While ``pairing`` can be used in isolation, it is meant to be used
as an attribute of the Solute class. This makes instantiating it and calculating the
solvation data a non-issue.
"""

import pandas as pd
import numpy as np

import solvation_analysis
from solvation_analysis._column_names import (
    FRAME,
    SOLUTE_IX,
    SOLVENT,
    SOLVENT_IX,
    DISTANCE,
)


[docs] class Pairing: """ Calculate the fraction of solutes that are coordinated with each solvent. The pairing fraction is the fraction of solutes that are coordinated with ANY solvent with matching type. So if the pairing of mol1 is 0.5, then 50% of solutes are coordinated with at least 1 mol1. The pairing fractions are made available as an mean over the whole simulation and by frame. Parameters ---------- solvation_data : pandas.DataFrame The solvation dataframe output by Solute. n_frames : int The number of frames in solvation_data. n_solutes : int The number of solutes in solvation_data. n_solvents : dict of {str: int} The number of each kind of solvent. Examples -------- .. code-block:: python # first define Li, BN, and FEC AtomGroups >>> solute = Solute(Li, {'BN': BN, 'FEC': FEC, 'PF6': PF6}) >>> solute.run() >>> solute.pairing.solvent_pairing {'BN': 1.0, 'FEC': 0.210, 'PF6': 0.120} """
[docs] def __init__( self, solvation_data: pd.DataFrame, n_frames: int, n_solutes: int, n_solvents: dict[str, int], ) -> None: self.solvation_data = solvation_data self.n_frames = n_frames self.n_solutes = n_solutes self.solvent_counts = n_solvents self._solvent_pairing, self._pairing_by_frame = self._fraction_coordinated() self._fraction_free_solvents = self._fraction_free_solvent() ( self._diluent_composition, self._diluent_composition_by_frame, self._diluent_counts, ) = self._diluent_composition()
[docs] @staticmethod def from_solute(solute: "solvation_analysis.Solute") -> "Pairing": """ Generate a Pairing object from a solute. Parameters ---------- solute : Solute Returns ------- Pairing """ assert solute.has_run, "The solute must be run before calling from_solute" return Pairing( solute.solvation_data, solute.n_frames, solute.n_solutes, solute.solvent_counts, )
def _fraction_coordinated(self) -> tuple[dict[str, float], pd.DataFrame]: # calculate the fraction of solute coordinated with each solvent counts = self.solvation_data.groupby([FRAME, SOLUTE_IX, SOLVENT]).count()[ SOLVENT_IX ] pairing_series = counts.astype(bool).groupby([SOLVENT, FRAME]).sum() / ( self.n_solutes ) # mean coordinated overall pairing_by_frame = pairing_series.unstack() pairing_normalized = pairing_series / self.n_frames pairing_dict = pairing_normalized.groupby([SOLVENT]).sum().to_dict() return pairing_dict, pairing_by_frame def _fraction_free_solvent(self) -> dict[str, float]: # calculate the fraction of each solvent NOT coordinated with the solute counts = self.solvation_data.groupby([FRAME, SOLVENT_IX, SOLVENT]).count()[ DISTANCE ] totals = counts.groupby([SOLVENT]).count() / self.n_frames n_solvents = np.array( [self.solvent_counts[name] for name in totals.index.values] ) free_solvents = np.ones(len(totals)) - totals / n_solvents return free_solvents.to_dict() def _diluent_composition( self, ) -> tuple[dict[str, float], pd.DataFrame, pd.DataFrame]: coordinated_solvents = self.solvation_data.groupby([FRAME, SOLVENT]).nunique()[ SOLVENT_IX ] solvent_counts = pd.Series(self.solvent_counts) total_solvents = solvent_counts.reindex(coordinated_solvents.index, level=1) diluent_solvents = total_solvents - coordinated_solvents diluent_series = diluent_solvents / diluent_solvents.groupby([FRAME]).sum() diluent_by_frame = diluent_series.unstack().T diluent_counts = diluent_solvents.unstack().T diluent_dict = diluent_by_frame.mean(axis=1).to_dict() return diluent_dict, diluent_by_frame, diluent_counts @property def solvent_pairing(self) -> dict[str, float]: """ A dictionary where keys are residue names (str) and values are the fraction of solutes that contain that residue (float). """ return self._solvent_pairing @property def pairing_by_frame(self) -> pd.DataFrame: """ A pd.Dataframe tracking the mean fraction of each residue across frames. """ return self._pairing_by_frame @property def fraction_free_solvents(self) -> dict[str, float]: """ A dictionary containing the fraction of each solvent that is free. e.g. not coordinated to a solute. """ return self._fraction_free_solvents @property def diluent_composition(self) -> dict[str, float]: """ The fraction of the diluent constituted by each solvent. The diluent is defined as everything that is not coordinated with the solute. """ return self._diluent_composition @property def diluent_composition_by_frame(self) -> pd.DataFrame: """ A DataFrame of the diluent composition in each frame of the trajectory. """ return self._diluent_composition_by_frame @property def diluent_counts(self) -> pd.DataFrame: """ A DataFrame of the raw solvent counts in the diluent in each frame of the trajectory. """ return self._diluent_counts