Source code for mass.thermo.conc_sampling.conc_achr

# -*- coding: utf-8 -*-
"""Provides concentration sampling through an ACHR sampler.

Based on sampling implementations in :mod:`cobra.sampling.achr`

"""
import numpy as np
import pandas as pd

from mass.thermo.conc_sampling.conc_hr_sampler import ConcHRSampler, step


[docs]class ConcACHRSampler(ConcHRSampler): """Artificial Centering Hit-and-Run sampler for concentration sampling. A sampler with low memory footprint and good convergence :cite:`KS98`. Notes ----- ACHR generates samples by choosing new directions from the sampling space's center and the warmup points. The implementation used here is the similar as in the Python :mod:`cobra` package. This implementation uses only the initial warmup points to generate new directions and not any other previous iterates. This usually gives better mixing since the startup points are chosen to span the space in a wide manner. This also makes the generated sampling chain quasi-markovian since the center converges rapidly. Memory usage is roughly in the order of:: (number included reactions + number included metabolites)^2 due to the required nullspace matrices and warmup points. So large models easily take up a few GB of RAM. Parameters ---------- concentration_solver : ConcSolver The :class:`.ConcSolver` to use in generating samples. thinning : int The thinning factor for the generated sampling chain as a positive ``int`` > 0. A thinning factor of 10 means samples are returned every 10 steps. nproj : int or None A positive ``int`` > 0 indicating how often to reporject the sampling point into the feasibility space. Avoids numerical issues at the cost of lower sampling. If ``None`` then the value is determined via the following:: nproj = int(min(len(self.concentration_solver.variables)**3, 1e6)) Default is ``None`` seed : int or None A positive ``int`` > 0 indiciating random number seed that should be used. If ``None`` provided, the current time stamp is used. Default is ``None``. Attributes ---------- concentration_solver : ConcSolver The :class:`.ConcSolver` used to generate samples. feasibility_tol : float The tolerance used for checking equalities feasibility. bounds_tol : float The tolerance used for checking bounds feasibility. thinning : int The currently used thinning factor. n_samples : int The total number of samples that have been generated by this sampler instance. retries : int The overall of sampling retries the sampler has observed. Larger values indicate numerical instabilities. problem : collections.namedtuple A :class:`~collections.namedtuple` whose attributes define the entire sampling problem in matrix form. See docstring of :class:`~cobra.sampling.hr_sampler.Problem` for more information. warmup : numpy.matrix A matrix of with as many columns as variables in the model of the :class:`.ConcSolver` and more than 3 rows containing a warmup sample in each row. ``None`` if no warmup points have been generated yet. nproj : int How often to reproject the sampling point into the feasibility space. """ def __init__(self, concentration_solver, thinning=100, nproj=None, seed=None): """Initialize a new ConcACHRSampler.""" super(ConcACHRSampler, self).__init__( concentration_solver, thinning, nproj=nproj, seed=seed ) self.generate_cva_warmup() self.prev = self.center = self.warmup.mean(axis=0) np.random.seed(self.seed)
[docs] def sample(self, n, concs=True): """Generate a set of samples. This is the basic sampling function for all hit-and-run samplers. Notes ----- Performance of this function linearly depends on the number of variables in the model of the :class:`.ConcSolver` and the thinning factor. Parameters ---------- n : int The number of samples that are generated at once. concs : bool Whether to return concentrations or the internal solver variables. If ``False`` will return a variable for each metabolite and reaction equilibrium constant as well as all additional variables that may have been defined in the model of the :class:`.ConcSolver`. Returns ------- numpy.matrix A matrix with ``n`` rows, each containing a concentration sample.
[docs] """ samples = np.zeros((n, self.warmup.shape[1])) for i in range(1, self.thinning * n + 1): self.__single_iteration() if i % self.thinning == 0: samples[ i // self.thinning - 1, ] = self.prev names = [v.name for v in self.concentration_solver.variables] df = pd.DataFrame(samples, columns=names) # Map from logspace back to linspace
df = df.apply(np.exp) if concs: df = df.loc[:, self.concentration_solver.included_metabolites] return df
[docs] def __single_iteration(self): """Perform a single iteration of sampling. Warnings -------- This method is intended for internal use only. """ pi = np.random.randint(self.n_warmup) # Mix in the original warmup points to not get stuck delta = ( self.warmup[ pi, ] - self.center ) self.prev = step(self, self.prev, delta) self.center = (self.n_samples * self.center) / ( self.n_samples + 1 ) + self.prev / (self.n_samples + 1) self.n_samples += 1
__all__ = ("ConcACHRSampler",)