################################################################################
# Copyright (C) 2011-2014 Jaakko Luttinen
#
# This file is licensed under the MIT License.
################################################################################
"""
Module for the Gaussian distribution and similar distributions.
"""
import numpy as np
from scipy import special
import truncnorm
from bayespy.utils import (random,
misc,
linalg)
from bayespy.utils.linalg import dot, mvdot
from .expfamily import (ExponentialFamily,
ExponentialFamilyDistribution,
useconstructor)
from .wishart import (WishartMoments,
WishartPriorMoments)
from .gamma import (GammaMoments,
GammaDistribution,
GammaPriorMoments)
from .deterministic import Deterministic
from .node import (Moments,
ensureparents)
#
# MOMENTS
#
[docs]class GaussianMoments(Moments):
r"""
Class for the moments of Gaussian variables.
"""
[docs] def __init__(self, shape):
self.shape = shape
self.ndim = len(shape)
self.dims = (shape, 2*shape)
super().__init__()
[docs] def compute_fixed_moments(self, x):
r"""
Compute the moments for a fixed value
"""
x = np.asanyarray(x)
x = misc.atleast_nd(x, self.ndim)
return [x, linalg.outer(x, x, ndim=self.ndim)]
[docs] @classmethod
def from_values(cls, x, ndim):
r"""
Return the shape of the moments for a fixed value.
"""
if ndim == 0:
return cls(())
else:
return cls(np.shape(x)[-ndim:])
[docs] def get_instance_conversion_kwargs(self):
return dict(ndim=self.ndim)
[docs] def get_instance_converter(self, ndim):
if ndim == self.ndim or ndim is None:
return None
return GaussianToGaussian(self, ndim)
class GaussianToGaussian():
def __init__(self, moments_from, ndim_to):
if not isinstance(moments_from, GaussianMoments):
raise ValueError()
if ndim_to < 0:
return ValueError("ndim_to must be non-negative")
self.shape_from = moments_from.shape
self.ndim_from = moments_from.ndim
self.ndim_to = ndim_to
if self.ndim_to > self.ndim_from:
raise ValueError()
if self.ndim_to == 0:
self.moments = GaussianMoments(())
else:
self.moments = GaussianMoments(self.shape_from[-self.ndim_to:])
return
def compute_moments(self, u):
if self.ndim_to == self.ndim_from:
return u
u0 = u[0]
u1 = misc.get_diag(u[1], ndim=self.ndim_from, ndim_to=self.ndim_to)
return [u0, u1]
def compute_message_to_parent(self, m, u_parent):
# Handle broadcasting in m_child
m0 = m[0] * np.ones(self.shape_from)
m1 = (
misc.make_diag(m[1], ndim=self.ndim_from, ndim_from=self.ndim_to)
* misc.identity(*self.shape_from)
)
return [m0, m1]
def compute_weights_to_parent(self, weights):
diff = self.ndim_from - self.ndim_to
if diff == 0:
return weights
return np.sum(
weights * np.ones(self.shape_from[:diff]),
#misc.atleast_nd(weights, diff),
axis=tuple(range(-diff, 0))
)
def plates_multiplier_from_parent(self, plates_multiplier):
diff = self.ndim_from - self.ndim_to
return plates_multiplier + diff * (1,)
def plates_from_parent(self, plates):
diff = self.ndim_from - self.ndim_to
if diff == 0:
return plates
return plates + self.shape_from[:diff]
def plates_to_parent(self, plates):
diff = self.ndim_from - self.ndim_to
if diff == 0:
return plates
return plates[:-diff]
[docs]class GaussianGammaMoments(Moments):
r"""
Class for the moments of Gaussian-gamma-ISO variables.
"""
[docs] def __init__(self, shape):
r"""
Create moments object for Gaussian-gamma isotropic variables
ndim=0: scalar
ndim=1: vector
ndim=2: matrix
...
"""
self.shape = shape
self.ndim = len(shape)
self.dims = (shape, 2*shape, (), ())
super().__init__()
[docs] def compute_fixed_moments(self, x_alpha):
r"""
Compute the moments for a fixed value
`x` is a mean vector.
`alpha` is a precision scale
"""
(x, alpha) = x_alpha
x = np.asanyarray(x)
alpha = np.asanyarray(alpha)
u0 = x * misc.add_trailing_axes(alpha, self.ndim)
u1 = (linalg.outer(x, x, ndim=self.ndim)
* misc.add_trailing_axes(alpha, 2*self.ndim))
u2 = np.copy(alpha)
u3 = np.log(alpha)
u = [u0, u1, u2, u3]
return u
[docs] @classmethod
def from_values(cls, x_alpha, ndim):
r"""
Return the shape of the moments for a fixed value.
"""
(x, alpha) = x_alpha
if ndim == 0:
shape = ( (), (), (), () )
else:
shape = np.shape(x)[-ndim:]
return cls(shape)
[docs] def get_instance_conversion_kwargs(self):
return dict(ndim=self.ndim)
[docs] def get_instance_converter(self, ndim):
# FIXME/TODO: IMPLEMENT THIS CORRECTLY!
if ndim != self.ndim:
raise NotImplementedError(
"Conversion to different ndim in GaussianMoments not yet "
"implemented."
)
return None
[docs]class GaussianWishartMoments(Moments):
r"""
Class for the moments of Gaussian-Wishart variables.
"""
[docs] def __init__(self, shape):
self.shape = shape
self.ndim = len(shape)
self.dims = ( shape, (), 2*shape, () )
super().__init__()
[docs] def compute_fixed_moments(self, x, Lambda):
r"""
Compute the moments for a fixed value
`x` is a vector.
`Lambda` is a precision matrix
"""
x = np.asanyarray(x)
Lambda = np.asanyarray(Lambda)
u0 = linalg.mvdot(Lambda, x, ndim=self.ndim)
u1 = np.einsum(
'...i,...ij,...j->...',
misc.flatten_axes(x, self.ndim),
misc.flatten_axes(Lambda, self.ndim, self.ndim),
misc.flatten_axes(x, self.ndim)
)
u2 = np.copy(Lambda)
u3 = linalg.logdet_cov(Lambda, ndim=self.ndim)
return [u0, u1, u2, u3]
[docs] @classmethod
def from_values(self, x, Lambda, ndim):
r"""
Return the shape of the moments for a fixed value.
"""
if ndim == 0:
return cls(())
else:
if np.ndim(x) < ndim:
raise ValueError("Mean must be a vector")
shape = np.shape(x)[-ndim:]
if np.shape(Lambda)[-2*ndim:] != shape + shape:
raise ValueError("Shapes inconsistent")
return cls(shape)
#
# DISTRIBUTIONS
#
[docs]class GaussianDistribution(ExponentialFamilyDistribution):
r"""
Class for the VMP formulas of Gaussian variables.
Currently, supports only vector variables.
Notes
-----
Message passing equations:
.. math::
\mathbf{x} &\sim \mathcal{N}(\boldsymbol{\mu}, \mathbf{\Lambda}),
.. math::
\mathbf{x},\boldsymbol{\mu} \in \mathbb{R}^{D},
\quad \mathbf{\Lambda} \in \mathbb{R}^{D \times D},
\quad \mathbf{\Lambda} \text{ symmetric positive definite}
.. math::
\log\mathcal{N}( \mathbf{x} | \boldsymbol{\mu}, \mathbf{\Lambda} )
&=
- \frac{1}{2} \mathbf{x}^{\mathrm{T}} \mathbf{\Lambda} \mathbf{x}
+ \mathbf{x}^{\mathrm{T}} \mathbf{\Lambda} \boldsymbol{\mu}
- \frac{1}{2} \boldsymbol{\mu}^{\mathrm{T}} \mathbf{\Lambda}
\boldsymbol{\mu}
+ \frac{1}{2} \log |\mathbf{\Lambda}|
- \frac{D}{2} \log (2\pi)
"""
[docs] def __init__(self, shape):
self.shape = shape
self.ndim = len(shape)
self.set_limits(None, None)
super().__init__()
[docs] def set_limits(self, minimum=None, maximum=None):
self.minimum = minimum
self.maximum = maximum
self.has_limits = minimum is not None or maximum is not None
return
[docs] def compute_message_to_parent(self, parent, index, u, u_mu_Lambda):
r"""
Compute the message to a parent node.
.. math::
\boldsymbol{\phi}_{\boldsymbol{\mu}} (\mathbf{x}, \mathbf{\Lambda})
&=
\left[ \begin{matrix}
\mathbf{\Lambda} \mathbf{x}
\\
- \frac{1}{2} \mathbf{\Lambda}
\end{matrix} \right]
\\
\boldsymbol{\phi}_{\mathbf{\Lambda}} (\mathbf{x}, \boldsymbol{\mu})
&=
\left[ \begin{matrix}
- \frac{1}{2} \mathbf{xx}^{\mathrm{T}}
+ \frac{1}{2} \mathbf{x}\boldsymbol{\mu}^{\mathrm{T}}
+ \frac{1}{2} \boldsymbol{\mu}\mathbf{x}^{\mathrm{T}}
- \frac{1}{2} \boldsymbol{\mu\mu}^{\mathrm{T}}
\\
\frac{1}{2}
\end{matrix} \right]
"""
if index == 0:
x = u[0]
xx = u[1]
m0 = x
m1 = -0.5
m2 = -0.5*xx
m3 = 0.5
return [m0, m1, m2, m3]
else:
raise ValueError("Index out of bounds")
[docs] def compute_phi_from_parents(self, u_mu_Lambda, mask=True):
r"""
Compute the natural parameter vector given parent moments.
.. math::
\boldsymbol{\phi} (\boldsymbol{\mu}, \mathbf{\Lambda})
&=
\left[ \begin{matrix}
\mathbf{\Lambda} \boldsymbol{\mu}
\\
- \frac{1}{2} \mathbf{\Lambda}
\end{matrix} \right]
"""
Lambda_mu = u_mu_Lambda[0]
Lambda = u_mu_Lambda[2]
return [Lambda_mu,
-0.5 * Lambda]
[docs] def compute_moments_and_cgf(self, phi, mask=True):
r"""
Compute the moments and :math:`g(\phi)`.
.. math::
\overline{\mathbf{u}} (\boldsymbol{\phi})
&=
\left[ \begin{matrix}
- \frac{1}{2} \boldsymbol{\phi}^{-1}_2 \boldsymbol{\phi}_1
\\
\frac{1}{4} \boldsymbol{\phi}^{-1}_2 \boldsymbol{\phi}_1
\boldsymbol{\phi}^{\mathrm{T}}_1 \boldsymbol{\phi}^{-1}_2
- \frac{1}{2} \boldsymbol{\phi}^{-1}_2
\end{matrix} \right]
\\
g_{\boldsymbol{\phi}} (\boldsymbol{\phi})
&=
\frac{1}{4} \boldsymbol{\phi}^{\mathrm{T}}_1 \boldsymbol{\phi}^{-1}_2
\boldsymbol{\phi}_1
+ \frac{1}{2} \log | -2 \boldsymbol{\phi}_2 |
"""
# TODO: Compute -2*phi[1] and simplify the formulas
L = linalg.chol(-2*phi[1], ndim=self.ndim)
k = np.shape(phi[0])[-1]
Cov = linalg.chol_inv(L, ndim=self.ndim)
mu = linalg.chol_solve(L, phi[0], ndim=self.ndim)
# G
g = (-0.5 * linalg.inner(mu, phi[0], ndim=self.ndim)
+ 0.5 * linalg.chol_logdet(L, ndim=self.ndim))
if self.has_limits:
if self.ndim != 1:
raise NotImplementedError("Limits for ndim!=1 not yet supported")
(p, u0, u1)= truncnorm.moments(
mu,
Cov,
self.minimum,
self.maximum,
2,
)
logp = np.log(p)
else:
u0 = mu
u1 = Cov + linalg.outer(u0, u0, ndim=self.ndim)
logp = 0
u = [u0, u1]
return (u, g - logp)
[docs] def compute_cgf_from_parents(self, u_mu_Lambda):
r"""
Compute :math:`\mathrm{E}_{q(p)}[g(p)]`
.. math::
g (\boldsymbol{\mu}, \mathbf{\Lambda})
&=
- \frac{1}{2} \operatorname{tr}(\boldsymbol{\mu\mu}^{\mathrm{T}}
\mathbf{\Lambda} )
+ \frac{1}{2} \log |\mathbf{\Lambda}|
"""
mu_Lambda_mu = u_mu_Lambda[1]
logdet_Lambda = u_mu_Lambda[3]
g = -0.5*mu_Lambda_mu + 0.5*logdet_Lambda
return g
[docs] def compute_fixed_moments_and_f(self, x, mask=True):
r"""
Compute the moments and :math:`f(x)` for a fixed value.
.. math::
\mathbf{u} (\mathbf{x})
&=
\left[ \begin{matrix}
\mathbf{x}
\\
\mathbf{xx}^{\mathrm{T}}
\end{matrix} \right]
\\
f(\mathbf{x})
&= - \frac{D}{2} \log(2\pi)
"""
k = np.shape(x)[-1]
u = [x, linalg.outer(x, x, ndim=self.ndim)]
f = -k/2*np.log(2*np.pi)
return (u, f)
[docs] def compute_gradient(self, g, u, phi):
r"""
Compute the standard gradient with respect to the natural parameters.
Gradient of the moments:
.. math::
\mathrm{d}\overline{\mathbf{u}} &=
\begin{bmatrix}
\frac{1}{2} \phi_2^{-1} \mathrm{d}\phi_2 \phi_2^{-1} \phi_1
- \frac{1}{2} \phi_2^{-1} \mathrm{d}\phi_1
\\
- \frac{1}{4} \phi_2^{-1} \mathrm{d}\phi_2 \phi_2^{-1} \phi_1 \phi_1^{\mathrm{T}} \phi_2^{-1}
- \frac{1}{4} \phi_2^{-1} \phi_1 \phi_1^{\mathrm{T}} \phi_2^{-1} \mathrm{d}\phi_2 \phi_2^{-1}
+ \frac{1}{2} \phi_2^{-1} \mathrm{d}\phi_2 \phi_2^{-1}
+ \frac{1}{4} \phi_2^{-1} \mathrm{d}\phi_1 \phi_1^{\mathrm{T}} \phi_2^{-1}
+ \frac{1}{4} \phi_2^{-1} \phi_1 \mathrm{d}\phi_1^{\mathrm{T}} \phi_2^{-1}
\end{bmatrix}
\\
&=
\begin{bmatrix}
2 (\overline{u}_2 - \overline{u}_1 \overline{u}_1^{\mathrm{T}}) \mathrm{d}\phi_2 \overline{u}_1
+ (\overline{u}_2 - \overline{u}_1 \overline{u}_1^{\mathrm{T}}) \mathrm{d}\phi_1
\\
u_2 d\phi_2 u_2 - 2 u_1 u_1^T d\phi_2 u_1 u_1^T
+ 2 (u_2 - u_1 u_1^T) d\phi_1 u_1^T
\end{bmatrix}
Standard gradient given the gradient with respect to the moments, that
is, given the Riemannian gradient :math:`\tilde{\nabla}`:
.. math::
\nabla =
\begin{bmatrix}
(\overline{u}_2 - \overline{u}_1 \overline{u}_1^{\mathrm{T}}) \tilde{\nabla}_1
+ 2 (u_2 - u_1 u_1^T) \tilde{\nabla}_2 u_1
\\
(u_2 - u_1 u_1^T) \tilde{\nabla}_1 u_1^T
+ u_1 \tilde{\nabla}_1^T (u_2 - u_1 u_1^T)
+ 2 u_2 \tilde{\nabla}_2 u_2
- 2 u_1 u_1^T \tilde{\nabla}_2 u_1 u_1^T
\end{bmatrix}
"""
ndim = 1
x = u[0]
xx = u[1]
# Some helpful variables
x_x = linalg.outer(x, x, ndim=self.ndim)
Cov = xx - x_x
cov_g0 = linalg.mvdot(Cov, g[0], ndim=self.ndim)
cov_g0_x = linalg.outer(cov_g0, x, ndim=self.ndim)
g1_x = linalg.mvdot(g[1], x, ndim=self.ndim)
# Compute gradient terms
d0 = cov_g0 + 2 * linalg.mvdot(Cov, g1_x, ndim=self.ndim)
d1 = (cov_g0_x + linalg.transpose(cov_g0_x, ndim=self.ndim)
+ 2 * linalg.mmdot(xx,
linalg.mmdot(g[1], xx, ndim=self.ndim),
ndim=self.ndim)
- 2 * x_x * misc.add_trailing_axes(linalg.inner(g1_x,
x,
ndim=self.ndim),
2*self.ndim))
return [d0, d1]
[docs] def random(self, *phi, plates=None):
r"""
Draw a random sample from the distribution.
"""
# TODO/FIXME: You shouldn't draw random values for
# observed/fixed elements!
# Note that phi[1] is -0.5*inv(Cov)
U = linalg.chol(-2*phi[1], ndim=self.ndim)
mu = linalg.chol_solve(U, phi[0], ndim=self.ndim)
shape = plates + self.shape
z = np.random.randn(*shape)
# Denote Lambda = -2*phi[1]
# Then, Cov = inv(Lambda) = inv(U'*U) = inv(U) * inv(U')
# Thus, compute mu + U\z
z = linalg.solve_triangular(U, z, trans='N', lower=False, ndim=self.ndim)
return mu + z
[docs]class GaussianARDDistribution(ExponentialFamilyDistribution):
r"""
...
Log probability density function:
.. math::
\log p(x|\mu, \alpha) = -\frac{1}{2} x^T \mathrm{diag}(\alpha) x + x^T
\mathrm{diag}(\alpha) \mu - \frac{1}{2} \mu^T \mathrm{diag}(\alpha) \mu
+ \frac{1}{2} \sum_i \log \alpha_i - \frac{D}{2} \log(2\pi)
Parent has moments:
.. math::
\begin{bmatrix}
\alpha \circ \mu
\\
\alpha \circ \mu \circ \mu
\\
\alpha
\\
\log(\alpha)
\end{bmatrix}
"""
[docs] def __init__(self, shape):
self.shape = shape
self.ndim = len(shape)
super().__init__()
[docs] def compute_message_to_parent(self, parent, index, u, u_mu_alpha):
r"""
...
.. math::
m =
\begin{bmatrix}
x
\\
[-\frac{1}{2}, \ldots, -\frac{1}{2}]
\\
-\frac{1}{2} \mathrm{diag}(xx^T)
\\
[\frac{1}{2}, \ldots, \frac{1}{2}]
\end{bmatrix}
"""
if index == 0:
x = u[0]
x2 = misc.get_diag(u[1], ndim=self.ndim)
m0 = x
m1 = -0.5 * np.ones(self.shape)
m2 = -0.5 * x2
m3 = 0.5 * np.ones(self.shape)
return [m0, m1, m2, m3]
else:
raise ValueError("Invalid parent index")
[docs] def compute_weights_to_parent(self, index, weights):
r"""
Maps the mask to the plates of a parent.
"""
if index != 0:
raise IndexError()
return misc.add_trailing_axes(weights, self.ndim)
[docs] def compute_phi_from_parents(self, u_mu_alpha, mask=True):
alpha_mu = u_mu_alpha[0]
alpha = u_mu_alpha[2]
#mu = u_mu[0]
#alpha = u_alpha[0]
## if np.ndim(mu) < self.ndim_mu:
## raise ValueError("Moment of mu does not have enough dimensions")
## mu = misc.add_axes(mu,
## axis=np.ndim(mu)-self.ndim_mu,
## num=self.ndim-self.ndim_mu)
phi0 = alpha_mu
phi1 = -0.5 * alpha
if self.ndim > 0:
# Ensure that phi is not using broadcasting for variable
# dimension axes
ones = np.ones(self.shape)
phi0 = ones * phi0
phi1 = ones * phi1
# Make a diagonal matrix
phi1 = misc.diag(phi1, ndim=self.ndim)
return [phi0, phi1]
[docs] def compute_moments_and_cgf(self, phi, mask=True):
if self.ndim == 0:
# Use scalar equations
u0 = -phi[0] / (2*phi[1])
u1 = u0**2 - 1 / (2*phi[1])
u = [u0, u1]
g = (-0.5 * u[0] * phi[0] + 0.5 * np.log(-2*phi[1]))
# TODO/FIXME: You could use these equations if phi is a scalar
# in practice although ndim>0 (because the shape can be, e.g.,
# (1,1,1,1) for ndim=4).
else:
# Reshape to standard vector and matrix
D = np.prod(self.shape)
phi0 = np.reshape(phi[0], phi[0].shape[:-self.ndim] + (D,))
phi1 = np.reshape(phi[1], phi[1].shape[:-2*self.ndim] + (D,D))
# Compute the moments
L = linalg.chol(-2*phi1)
Cov = linalg.chol_inv(L)
u0 = linalg.chol_solve(L, phi0)
u1 = linalg.outer(u0, u0) + Cov
# Compute CGF
g = (- 0.5 * np.einsum('...i,...i', u0, phi0)
+ 0.5 * linalg.chol_logdet(L))
# Reshape to arrays
u0 = np.reshape(u0, u0.shape[:-1] + self.shape)
u1 = np.reshape(u1, u1.shape[:-2] + self.shape + self.shape)
u = [u0, u1]
return (u, g)
[docs] def compute_cgf_from_parents(self, u_mu_alpha):
r"""
Compute the value of the cumulant generating function.
"""
# Compute sum(mu^2 * alpha) correctly for broadcasted shapes
alpha_mu2 = u_mu_alpha[1]
logdet_alpha = u_mu_alpha[3]
axes = tuple(range(-self.ndim, 0))
# TODO/FIXME: You could use plate multiplier type of correction instead
# of explicitly broadcasting with ones.
if self.ndim > 0:
alpha_mu2 = misc.sum_multiply(alpha_mu2, np.ones(self.shape),
axis=axes)
if self.ndim > 0:
logdet_alpha = misc.sum_multiply(logdet_alpha, np.ones(self.shape),
axis=axes)
# Compute g
g = -0.5*alpha_mu2 + 0.5*logdet_alpha
return g
[docs] def compute_fixed_moments_and_f(self, x, mask=True):
r""" Compute u(x) and f(x) for given x. """
if self.ndim > 0 and np.shape(x)[-self.ndim:] != self.shape:
raise ValueError("Invalid shape")
k = np.prod(self.shape)
u = [x, linalg.outer(x, x, ndim=self.ndim)]
f = -k/2*np.log(2*np.pi)
return (u, f)
[docs] def plates_to_parent(self, index, plates):
r"""
Resolves the plate mapping to a parent.
Given the plates of the node's moments, this method returns the plates
that the message to a parent has for the parent's distribution.
"""
if index != 0:
raise IndexError()
return plates + self.shape
[docs] def plates_from_parent(self, index, plates):
r"""
Resolve the plate mapping from a parent.
Given the plates of a parent's moments, this method returns the plates
that the moments has for this distribution.
"""
if index != 0:
raise IndexError()
if self.ndim == 0:
return plates
else:
return plates[:-self.ndim]
[docs] def random(self, *phi, plates=None):
r"""
Draw a random sample from the Gaussian distribution.
"""
# TODO/FIXME: You shouldn't draw random values for
# observed/fixed elements!
D = self.ndim
if D == 0:
dims = ()
else:
dims = np.shape(phi[0])[-D:]
if np.prod(dims) == 1.0:
# Scalar Gaussian
phi1 = phi[1]
if D > 0:
# Because the covariance matrix has shape (1,1,...,1,1),
# that is 2*D number of ones, remove the extra half of the
# shape
phi1 = np.reshape(phi1, np.shape(phi1)[:-2*D] + D*(1,))
var = -0.5 / phi1
std = np.sqrt(var)
mu = var * phi[0]
shape = plates + dims
z = np.random.randn(*shape)
x = mu + std * z
else:
N = np.prod(dims)
dims_cov = dims + dims
# Reshape precision matrix
plates_cov = np.shape(phi[1])[:-2*D]
V = -2 * np.reshape(phi[1], plates_cov + (N,N))
# Compute Cholesky
U = linalg.chol(V)
# Reshape mean vector
plates_phi0 = np.shape(phi[0])[:-D]
phi0 = np.reshape(phi[0], plates_phi0 + (N,))
mu = linalg.chol_solve(U, phi0)
# Compute mu + U\z
shape = plates + (N,)
z = np.random.randn(*shape)
# Denote Lambda = -2*phi[1]
# Then, Cov = inv(Lambda) = inv(U'*U) = inv(U) * inv(U')
# Thus, compute mu + U\z
x = mu + linalg.solve_triangular(U, z,
trans='N',
lower=False)
x = np.reshape(x, plates + dims)
return x
[docs] def compute_gradient(self, g, u, phi):
r"""
Compute the standard gradient with respect to the natural parameters.
Gradient of the moments:
.. math::
\mathrm{d}\overline{\mathbf{u}} &=
\begin{bmatrix}
\frac{1}{2} \phi_2^{-1} \mathrm{d}\phi_2 \phi_2^{-1} \phi_1
- \frac{1}{2} \phi_2^{-1} \mathrm{d}\phi_1
\\
- \frac{1}{4} \phi_2^{-1} \mathrm{d}\phi_2 \phi_2^{-1} \phi_1 \phi_1^{\mathrm{T}} \phi_2^{-1}
- \frac{1}{4} \phi_2^{-1} \phi_1 \phi_1^{\mathrm{T}} \phi_2^{-1} \mathrm{d}\phi_2 \phi_2^{-1}
+ \frac{1}{2} \phi_2^{-1} \mathrm{d}\phi_2 \phi_2^{-1}
+ \frac{1}{4} \phi_2^{-1} \mathrm{d}\phi_1 \phi_1^{\mathrm{T}} \phi_2^{-1}
+ \frac{1}{4} \phi_2^{-1} \phi_1 \mathrm{d}\phi_1^{\mathrm{T}} \phi_2^{-1}
\end{bmatrix}
\\
&=
\begin{bmatrix}
2 (\overline{u}_2 - \overline{u}_1 \overline{u}_1^{\mathrm{T}}) \mathrm{d}\phi_2 \overline{u}_1
+ (\overline{u}_2 - \overline{u}_1 \overline{u}_1^{\mathrm{T}}) \mathrm{d}\phi_1
\\
u_2 d\phi_2 u_2 - 2 u_1 u_1^T d\phi_2 u_1 u_1^T
+ 2 (u_2 - u_1 u_1^T) d\phi_1 u_1^T
\end{bmatrix}
Standard gradient given the gradient with respect to the moments, that
is, given the Riemannian gradient :math:`\tilde{\nabla}`:
.. math::
\nabla =
\begin{bmatrix}
(\overline{u}_2 - \overline{u}_1 \overline{u}_1^{\mathrm{T}}) \tilde{\nabla}_1
+ 2 (u_2 - u_1 u_1^T) \tilde{\nabla}_2 u_1
\\
(u_2 - u_1 u_1^T) \tilde{\nabla}_1 u_1^T
+ u_1 \tilde{\nabla}_1^T (u_2 - u_1 u_1^T)
+ 2 u_2 \tilde{\nabla}_2 u_2
- 2 u_1 u_1^T \tilde{\nabla}_2 u_1 u_1^T
\end{bmatrix}
"""
ndim = self.ndim
x = u[0]
xx = u[1]
# Some helpful variables
x_x = linalg.outer(x, x, ndim=ndim)
Cov = xx - x_x
cov_g0 = linalg.mvdot(Cov, g[0], ndim=ndim)
cov_g0_x = linalg.outer(cov_g0, x, ndim=ndim)
g1_x = linalg.mvdot(g[1], x, ndim=ndim)
# Compute gradient terms
d0 = cov_g0 + 2 * linalg.mvdot(Cov, g1_x, ndim=ndim)
d1 = (cov_g0_x + linalg.transpose(cov_g0_x, ndim=ndim)
+ 2 * linalg.mmdot(xx,
linalg.mmdot(g[1], xx, ndim=ndim),
ndim=ndim)
- 2 * x_x * misc.add_trailing_axes(linalg.inner(g1_x,
x,
ndim=ndim),
2*ndim))
return [d0, d1]
[docs]class GaussianGammaDistribution(ExponentialFamilyDistribution):
r"""
Class for the VMP formulas of Gaussian-Gamma-ISO variables.
Currently, supports only vector variables.
Log pdf of the prior:
.. math::
\log p(\mathbf{x}, \tau | \boldsymbol{\mu}, \mathbf{\Lambda}, a, b) =&
- \frac{1}{2} \tau \mathbf{x}^T \mathbf{\Lambda} \mathbf{x}
+ \frac{1}{2} \tau \mathbf{x}^T \mathbf{\Lambda} \boldsymbol{\mu}
+ \frac{1}{2} \tau \boldsymbol{\mu}^T \mathbf{\Lambda} \mathbf{x}
- \frac{1}{2} \tau \boldsymbol{\mu}^T \mathbf{\Lambda} \boldsymbol{\mu}
+ \frac{1}{2} \log|\mathbf{\Lambda}|
+ \frac{D}{2} \log\tau
- \frac{D}{2} \log(2\pi)
\\ &
- b \tau
+ a \log\tau
- \log\tau
+ a \log b
- \log \Gamma(a)
Log pdf of the posterior approximation:
.. math::
\log q(\mathbf{x}, \tau) =&
\tau \mathbf{x}^T \boldsymbol{\phi}_1
+ \tau \mathbf{x}^T \mathbf{\Phi}_2 \mathbf{x}
+ \tau \phi_3
+ \log\tau \phi_4
+ g(\boldsymbol{\phi}_1, \mathbf{\Phi}_2, \phi_3, \phi_4)
+ f(x, \tau)
"""
[docs] def __init__(self, shape):
self.shape = shape
self.ndim = len(shape)
super().__init__()
[docs] def compute_message_to_parent(self, parent, index, u, u_mu_Lambda, u_a, u_b):
r"""
Compute the message to a parent node.
- Parent :math:`(\boldsymbol{\mu}, \mathbf{\Lambda})`
Moments:
.. math::
\begin{bmatrix}
\mathbf{\Lambda}\boldsymbol{\mu}
\\
\boldsymbol{\mu}^T\mathbf{\Lambda}\boldsymbol{\mu}
\\
\mathbf{\Lambda}
\\
\log|\mathbf{\Lambda}|
\end{bmatrix}
Message:
.. math::
\begin{bmatrix}
\langle \tau \mathbf{x} \rangle
\\
- \frac{1}{2} \langle \tau \rangle
\\
- \frac{1}{2} \langle \tau \mathbf{xx}^T \rangle
\\
\frac{1}{2}
\end{bmatrix}
- Parent :math:`a`:
Moments:
.. math::
\begin{bmatrix}
a
\\
\log \Gamma(a)
\end{bmatrix}
Message:
.. math::
\begin{bmatrix}
\langle \log\tau \rangle + \langle \log b \rangle
\\
-1
\end{bmatrix}
- Parent :math:`b`:
Moments:
.. math::
\begin{bmatrix}
b
\\
\log b
\end{bmatrix}
Message:
.. math::
\begin{bmatrix}
- \langle \tau \rangle
\\
\langle a \rangle
\end{bmatrix}
"""
x_tau = u[0]
xx_tau = u[1]
tau = u[2]
logtau = u[3]
if index == 0:
m0 = x_tau
m1 = -0.5 * tau
m2 = -0.5 * xx_tau
m3 = 0.5
return [m0, m1, m2, m3]
elif index == 1:
logb = u_b[1]
m0 = logtau + logb
m1 = -1
return [m0, m1]
elif index == 2:
a = u_a[0]
m0 = -tau
m1 = a
return [m0, m1]
else:
raise ValueError("Index out of bounds")
[docs] def compute_phi_from_parents(self, u_mu_Lambda, u_a, u_b, mask=True):
r"""
Compute the natural parameter vector given parent moments.
"""
Lambda_mu = u_mu_Lambda[0]
mu_Lambda_mu = u_mu_Lambda[1]
Lambda = u_mu_Lambda[2]
a = u_a[0]
b = u_b[0]
phi = [Lambda_mu,
-0.5*Lambda,
-0.5*mu_Lambda_mu - b,
a]
return phi
[docs] def compute_moments_and_cgf(self, phi, mask=True):
r"""
Compute the moments and :math:`g(\phi)`.
"""
# Compute helpful variables
V = -2*phi[1]
L_V = linalg.chol(V, ndim=self.ndim)
logdet_V = linalg.chol_logdet(L_V, ndim=self.ndim)
mu = linalg.chol_solve(L_V, phi[0], ndim=self.ndim)
Cov = linalg.chol_inv(L_V, ndim=self.ndim)
a = phi[3]
b = -phi[2] - 0.5 * linalg.inner(mu, phi[0], ndim=self.ndim)
log_b = np.log(b)
# Compute moments
u2 = a / b
u3 = -log_b + special.psi(a)
u0 = mu * misc.add_trailing_axes(u2, self.ndim)
u1 = Cov + (
linalg.outer(mu, mu, ndim=self.ndim)
* misc.add_trailing_axes(u2, 2 * self.ndim)
)
u = [u0, u1, u2, u3]
# Compute g
g = 0.5*logdet_V + a*log_b - special.gammaln(a)
return (u, g)
[docs] def compute_cgf_from_parents(self, u_mu_Lambda, u_a, u_b):
r"""
Compute :math:`\mathrm{E}_{q(p)}[g(p)]`
"""
logdet_Lambda = u_mu_Lambda[3]
a = u_a[0]
gammaln_a = u_a[1]
log_b = u_b[1]
g = 0.5*logdet_Lambda + a*log_b - gammaln_a
return g
[docs] def compute_fixed_moments_and_f(self, x_alpha, mask=True):
r"""
Compute the moments and :math:`f(x)` for a fixed value.
"""
(x, alpha) = x_alpha
logalpha = np.log(alpha)
u0 = x * misc.add_trailing_axes(alpha, self.ndim)
u1 = linalg.outer(x, x, ndim=self.ndim) * misc.add_trailing_axes(alpha, 2*self.ndim)
u2 = alpha
u3 = logalpha
u = [u0, u1, u2, u3]
if self.ndim > 0:
D = np.prod(np.shape(x)[-self.ndim:])
else:
D = 1
f = (D/2 - 1) * logalpha - D/2 * np.log(2*np.pi)
return (u, f)
[docs] def random(self, *phi, plates=None):
r"""
Draw a random sample from the distribution.
"""
# TODO/FIXME: This is incorrect, I think. Gamma distribution parameters
# aren't directly those, because phi has some parts from the Gaussian
# distribution.
alpha = GammaDistribution().random(
phi[2],
phi[3],
plates=plates
)
mu = GaussianARDDistribution(self.shape).random(
misc.add_trailing_axes(alpha, self.ndim) * phi[0],
misc.add_trailing_axes(alpha, 2*self.ndim) * phi[1],
plates=plates
)
return (mu, alpha)
[docs]class GaussianWishartDistribution(ExponentialFamilyDistribution):
r"""
Class for the VMP formulas of Gaussian-Wishart variables.
Currently, supports only vector variables.
.. math::
\log p(\mathbf{x}, \mathbf{\Lambda} | \boldsymbol{\mu},
\alpha, n, \mathbf{V})
=&
- \frac{1}{2} \alpha \mathbf{x}^T \mathbf{\Lambda} \mathbf{x}
+ \frac{1}{2} \alpha \mathbf{x}^T \mathbf{\Lambda} \boldsymbol{\mu}
+ \frac{1}{2} \alpha \boldsymbol{\mu}^T \mathbf{\Lambda} \mathbf{x}
- \frac{1}{2} \alpha \boldsymbol{\mu}^T \mathbf{\Lambda} \boldsymbol{\mu}
+ \frac{1}{2} \log|\mathbf{\Lambda}|
+ \frac{D}{2} \log\alpha
- \frac{D}{2} \log(2\pi)
\\ &
- \frac{1}{2} \mathrm{tr}(\mathbf{V}\mathbf{\Lambda})
+ \frac{n-d-1}{2} \log|\mathbf{\Lambda}|
- \frac{nd}{2}\log 2
- \frac{n}{2} \log|\mathbf{V}|
- \log\Gamma_d(\frac{n}{2})
Posterior approximation:
.. math::
\log q(\mathbf{x}, \mathbf{\Lambda})
=&
\mathbf{x}^T \mathbf{\Lambda} \boldsymbol{\phi}_1
+ \phi_2 \mathbf{x}^T \mathbf{\Lambda} \mathbf{x}
+ \mathrm{tr}(\mathbf{\Lambda} \mathbf{\Phi}_3)
+ \phi_4 \log|\mathbf{\Lambda}|
+ g(\boldsymbol{\phi}_1, \phi_2, \mathbf{\Phi}_3, \phi_4)
+ f(\mathbf{x}, \mathbf{\Lambda})
"""
[docs] def compute_message_to_parent(self, parent, index, u, u_mu_alpha, u_n, u_V):
r"""
Compute the message to a parent node.
For parent :math:`q(\boldsymbol{\mu}, \alpha)`:
.. math::
\alpha \boldsymbol{\mu}^T \mathbf{m}_1
\Rightarrow &
\mathbf{m}_1 = \langle \mathbf{\Lambda x} \rangle
\\
\alpha \boldsymbol{\mu}^T \mathbf{M}_2 \boldsymbol{\mu}
\Rightarrow &
\mathbf{M}_2 = - \frac{1}{2} \langle \mathbf{\Lambda} \rangle
\\
\alpha m_3
\Rightarrow &
m_3 = - \frac{1}{2} \langle \mathbf{x}^T \mathbf{\Lambda} \mathbf{x} \rangle
\\
m_4 \log \alpha
\Rightarrow &
m_4 = \frac{d}{2}
For parent :math:`q(\mathbf{V})`:
.. math::
\mathbf{M}_1 &= \frac{\partial \langle \log p \rangle}{\partial
\langle \mathbf{V} \rangle} = -\frac{1}{2} \langle \mathbf{\Lambda} \rangle
\\
\mathbf{M}_2 &= \frac{\partial \langle \log p \rangle}{\partial \langle \log|\mathbf{V}| \rangle}
= ...
"""
if index == 0:
m0
m1
m2
m3
raise NotImplementedError()
elif index == 1:
raise NotImplementedError()
elif index == 2:
raise NotImplementedError()
else:
raise ValueError("Index out of bounds")
[docs] def compute_phi_from_parents(self, u_mu_alpha, u_n, u_V, mask=True):
r"""
Compute the natural parameter vector given parent moments.
"""
alpha_mu = u_mu_alpha[0]
alpha_mumu = u_mu_alpha[1]
alpha = u_mu_alpha[2]
V = u_V[0]
n = u_n[0]
phi0 = alpha_mu
phi1 = -0.5 * alpha
phi2 = -0.5 * (V + alpha_mumu)
phi3 = 0.5 * n
return [phi0, phi1, phi2, phi3]
[docs] def compute_moments_and_cgf(self, phi, mask=True):
r"""
Compute the moments and :math:`g(\phi)`.
"""
# TODO/FIXME: This isn't probably correct. Phi[2:] has terms that are
# related to the Gaussian also, not only Wishart.
u_Lambda = WishartDistribution((D,)).compute_moments_and_cgf(phi[2:])
raise NotImplementedError()
return (u, g)
[docs] def compute_cgf_from_parents(self, u_mu_alpha, u_n, u_V):
r"""
Compute :math:`\mathrm{E}_{q(p)}[g(p)]`
"""
raise NotImplementedError()
return g
[docs] def compute_fixed_moments_and_f(self, x, Lambda, mask=True):
r"""
Compute the moments and :math:`f(x)` for a fixed value.
"""
raise NotImplementedError()
return (u, f)
[docs] def random(self, *params, plates=None):
r"""
Draw a random sample from the distribution.
"""
raise NotImplementedError()
#
# NODES
#
class _GaussianTemplate(ExponentialFamily):
def translate(self, b, debug=False):
"""
Transforms the current posterior by adding a bias to the mean
Parameters
----------
b : array
Constant to add
"""
ndim = len(self.dims[0])
if ndim > 0 and np.shape(b)[-ndim:] != self.dims[0]:
raise ValueError("Bias has incorrect shape")
x = self.u[0]
xb = linalg.outer(x, b, ndim=ndim)
bx = linalg.transpose(xb, ndim=ndim)
bb = linalg.outer(b, b, ndim=ndim)
uh = [
self.u[0] + b,
self.u[1] + xb + bx + bb
]
Lambda = -2 * self.phi[1]
Lambda_b = linalg.mvdot(Lambda, b, ndim=ndim)
dg = -0.5 * (
linalg.inner(b, Lambda_b, ndim=ndim)
+ 2 * linalg.inner(x, Lambda_b, ndim=ndim)
)
phih = [
self.phi[0] + Lambda_b,
self.phi[1]
]
self._check_shape(uh)
self._check_shape(phih)
self.u = uh
self.phi = phih
self.g = self.g + dg
# TODO: This is all just debugging stuff and can be removed
if debug:
uh = [ui.copy() for ui in uh]
gh = self.g.copy()
self._update_moments_and_cgf()
if any(not np.allclose(uih, ui, atol=1e-6) for (uih, ui) in zip(uh, self.u)):
raise RuntimeError("BUG")
if not np.allclose(self.g, gh, atol=1e-6):
raise RuntimeError("BUG")
return
[docs]class Gaussian(_GaussianTemplate):
r"""
Node for Gaussian variables.
The node represents a :math:`D`-dimensional vector from the Gaussian
distribution:
.. math::
\mathbf{x} &\sim \mathcal{N}(\boldsymbol{\mu}, \mathbf{\Lambda}),
where :math:`\boldsymbol{\mu}` is the mean vector and
:math:`\mathbf{\Lambda}` is the precision matrix (i.e., inverse of the
covariance matrix).
.. math::
\mathbf{x},\boldsymbol{\mu} \in \mathbb{R}^{D},
\quad \mathbf{\Lambda} \in \mathbb{R}^{D \times D},
\quad \mathbf{\Lambda} \text{ symmetric positive definite}
Parameters
----------
mu : Gaussian-like node or GaussianGamma-like node or GaussianWishart-like node or array
Mean vector
Lambda : Wishart-like node or array
Precision matrix
See also
--------
Wishart, GaussianARD, GaussianWishart, GaussianGamma
"""
[docs] def __init__(self, mu, Lambda, **kwargs):
r"""
Create Gaussian node
"""
super().__init__(mu, Lambda, **kwargs)
@classmethod
def _constructor(cls, mu, Lambda, ndim=1, **kwargs):
r"""
Constructs distribution and moments objects.
"""
mu_Lambda = WrapToGaussianWishart(mu, Lambda, ndim=ndim)
shape = mu_Lambda._moments.shape
moments = GaussianMoments(shape)
parent_moments = (mu_Lambda._moments,)
if mu_Lambda.dims != ( shape, (), shape+shape, () ):
raise Exception("Parents have wrong dimensionality")
distribution = GaussianDistribution(shape)
parents = [mu_Lambda]
return (parents,
kwargs,
moments.dims,
cls._total_plates(kwargs.get('plates'),
distribution.plates_from_parent(0, mu_Lambda.plates)),
distribution,
moments,
parent_moments)
[docs] def initialize_from_parameters(self, mu, Lambda):
u = self._parent_moments[0].compute_fixed_moments(mu, Lambda)
self._initialize_from_parent_moments(u)
[docs] def observe_limits(self, minimum=-np.inf, maximum=np.inf):
self._distribution.set_limits(minimum, maximum)
self._update_mask()
return
def _set_mask(self, mask):
self.mask = np.logical_or(
mask,
np.logical_or(
self.observed,
self._distribution.has_limits,
),
)
def __str__(self):
ndim = len(self.dims[0])
mu = self.u[0]
Cov = self.u[1] - linalg.outer(mu, mu, ndim=ndim)
return ("%s ~ Gaussian(mu, Cov)\n"
" mu = \n"
"%s\n"
" Cov = \n"
"%s\n"
% (self.name, mu, Cov))
[docs] def rotate(self, R, inv=None, logdet=None, Q=None):
# TODO/FIXME: Combine and refactor all these rotation transformations
# into _GaussianTemplate
if self._moments.ndim != 1:
raise NotImplementedError("Not implemented for ndim!=1 yet")
if inv is not None:
invR = inv
else:
invR = np.linalg.inv(R)
if logdet is not None:
logdetR = logdet
else:
logdetR = np.linalg.slogdet(R)[1]
# It would be more efficient and simpler, if you just rotated the
# moments and didn't touch phi. However, then you would need to call
# update() before lower_bound_contribution. This is more error-safe.
# Rotate plates, if plate rotation matrix is given. Assume that there's
# only one plate-axis
if Q is not None:
# Rotate moments using Q
self.u[0] = np.einsum('ik,kj->ij', Q, self.u[0])
sumQ = np.sum(Q, axis=0)
# Rotate natural parameters using Q
self.phi[1] = np.einsum('d,dij->dij', sumQ**(-2), self.phi[1])
self.phi[0] = np.einsum('dij,dj->di', -2*self.phi[1], self.u[0])
# Transform parameters using R
self.phi[0] = mvdot(invR.T, self.phi[0])
self.phi[1] = dot(invR.T, self.phi[1], invR)
if Q is not None:
self._update_moments_and_cgf()
else:
# Transform moments and g using R
self.u[0] = mvdot(R, self.u[0])
self.u[1] = dot(R, self.u[1], R.T)
self.g -= logdetR
[docs] def rotate_matrix(self, R1, R2, inv1=None, logdet1=None, inv2=None, logdet2=None, Q=None):
r"""
The vector is reshaped into a matrix by stacking the row vectors.
Computes R1*X*R2', which is identical to kron(R1,R2)*x (??)
Note that this is slightly different from the standard Kronecker product
definition because Numpy stacks row vectors instead of column vectors.
Parameters
----------
R1 : ndarray
A matrix from the left
R2 : ndarray
A matrix from the right
"""
if self._moments.ndim != 1:
raise NotImplementedError("Not implemented for ndim!=1 yet")
if Q is not None:
# Rotate moments using Q
self.u[0] = np.einsum('ik,kj->ij', Q, self.u[0])
sumQ = np.sum(Q, axis=0)
# Rotate natural parameters using Q
self.phi[1] = np.einsum('d,dij->dij', sumQ**(-2), self.phi[1])
self.phi[0] = np.einsum('dij,dj->di', -2*self.phi[1], self.u[0])
if inv1 is None:
inv1 = np.linalg.inv(R1)
if logdet1 is None:
logdet1 = np.linalg.slogdet(R1)[1]
if inv2 is None:
inv2 = np.linalg.inv(R2)
if logdet2 is None:
logdet2 = np.linalg.slogdet(R2)[1]
D1 = np.shape(R1)[0]
D2 = np.shape(R2)[0]
# Reshape into matrices
sh0 = np.shape(self.phi[0])[:-1] + (D1,D2)
sh1 = np.shape(self.phi[1])[:-2] + (D1,D2,D1,D2)
phi0 = np.reshape(self.phi[0], sh0)
phi1 = np.reshape(self.phi[1], sh1)
# Apply rotations to phi
#phi0 = dot(inv1, phi0, inv2.T)
phi0 = dot(inv1.T, phi0, inv2)
phi1 = np.einsum('...ia,...abcd->...ibcd', inv1.T, phi1)
phi1 = np.einsum('...ic,...abcd->...abid', inv1.T, phi1)
phi1 = np.einsum('...ib,...abcd->...aicd', inv2.T, phi1)
phi1 = np.einsum('...id,...abcd->...abci', inv2.T, phi1)
# Reshape back into vectors
self.phi[0] = np.reshape(phi0, self.phi[0].shape)
self.phi[1] = np.reshape(phi1, self.phi[1].shape)
# It'd be better to rotate the moments too..
self._update_moments_and_cgf()
[docs]class GaussianARD(_GaussianTemplate):
r"""
Node for Gaussian variables with ARD prior.
The node represents a :math:`D`-dimensional vector from the Gaussian
distribution:
.. math::
\mathbf{x} &\sim \mathcal{N}(\boldsymbol{\mu}, \mathrm{diag}(\boldsymbol{\alpha})),
where :math:`\boldsymbol{\mu}` is the mean vector and
:math:`\mathrm{diag}(\boldsymbol{\alpha})` is the diagonal precision matrix
(i.e., inverse of the covariance matrix).
.. math::
\mathbf{x},\boldsymbol{\mu} \in \mathbb{R}^{D}, \quad \alpha_d > 0 \text{
for } d=0,\ldots,D-1
*Note:* The form of the posterior approximation is a Gaussian distribution with full
covariance matrix instead of a diagonal matrix.
Parameters
----------
mu : Gaussian-like node or GaussianGamma-like node or array Mean vector
alpha : gamma-like node or array
Diagonal elements of the precision matrix
See also
--------
Gamma, Gaussian, GaussianGamma, GaussianWishart
"""
[docs] def __init__(self, mu, alpha, ndim=None, shape=None, **kwargs):
r"""
Create GaussianARD node.
"""
super().__init__(mu, alpha, ndim=ndim, shape=shape, **kwargs)
@classmethod
def _constructor(cls, mu, alpha, ndim=None, shape=None, **kwargs):
r"""
Constructs distribution and moments objects.
If __init__ uses useconstructor decorator, this method is called to
construct distribution and moments objects.
The method is given the same inputs as __init__. For some nodes, some of
these can't be "static" class attributes, then the node class must
overwrite this method to construct the objects manually.
The point of distribution class is to move general distribution but
not-node specific code. The point of moments class is to define the
messaging protocols.
"""
mu_alpha = WrapToGaussianGamma(mu, alpha, ndim=0)
if ndim is None:
if shape is not None:
ndim = len(shape)
else:
shape = ()
ndim = 0
else:
if shape is not None:
if ndim != len(shape):
raise ValueError("Given shape and ndim inconsistent")
else:
if ndim == 0:
shape = ()
else:
if ndim > len(mu_alpha.plates):
raise ValueError(
"Cannot determine shape for ndim={0} because parent "
"full shape has ndim={1}."
.format(ndim, len(mu_alpha.plates))
)
shape = mu_alpha.plates[-ndim:]
moments = GaussianMoments(shape)
parent_moments = [GaussianGammaMoments(())]
distribution = GaussianARDDistribution(shape)
plates = cls._total_plates(kwargs.get('plates'),
distribution.plates_from_parent(0, mu_alpha.plates))
parents = [mu_alpha]
return (parents,
kwargs,
moments.dims,
plates,
distribution,
moments,
parent_moments)
[docs] def initialize_from_parameters(self, mu, alpha):
# Explicit broadcasting so the shapes match
mu = mu * np.ones(np.shape(alpha))
alpha = alpha * np.ones(np.shape(mu))
# Compute parent moments
u = self._parent_moments[0].compute_fixed_moments([mu, alpha])
# Initialize distribution
self._initialize_from_parent_moments(u)
[docs] def initialize_from_mean_and_covariance(self, mu, Cov):
ndim = len(self._distribution.shape)
u = [mu, Cov + linalg.outer(mu, mu, ndim=ndim)]
mask = np.logical_not(self.observed)
# TODO: You could compute the CGF but it requires Cholesky of
# Cov. Do it later.
self._set_moments_and_cgf(u, np.nan, mask=mask)
return
def __str__(self):
mu = self.u[0]
Cov = self.u[1] - linalg.outer(mu, mu)
return ("%s ~ Gaussian(mu, Cov)\n"
" mu = \n"
"%s\n"
" Cov = \n"
"%s\n"
% (self.name, mu, Cov))
[docs] def rotate(self, R, inv=None, logdet=None, axis=-1, Q=None, subset=None, debug=False):
if Q is not None:
raise NotImplementedError()
if subset is not None:
raise NotImplementedError()
# TODO/FIXME: Combine and refactor all these rotation transformations
# into _GaussianTemplate
ndim = len(self._distribution.shape)
if inv is not None:
invR = inv
else:
invR = np.linalg.inv(R)
if logdet is not None:
logdetR = logdet
else:
logdetR = np.linalg.slogdet(R)[1]
self.phi[0] = rotate_mean(self.phi[0], invR.T,
axis=axis,
ndim=ndim)
self.phi[1] = rotate_covariance(self.phi[1], invR.T,
axis=axis,
ndim=ndim)
self.u[0] = rotate_mean(self.u[0], R,
axis=axis,
ndim=ndim)
self.u[1] = rotate_covariance(self.u[1], R,
axis=axis,
ndim=ndim)
s = list(self.dims[0])
s.pop(axis)
self.g -= logdetR * np.prod(s)
# TODO: This is all just debugging stuff and can be removed
if debug:
uh = [ui.copy() for ui in self.u]
gh = self.g.copy()
self._update_moments_and_cgf()
if any(not np.allclose(uih, ui, atol=1e-6) for (uih, ui) in zip(uh, self.u)):
raise RuntimeError("BUG")
if not np.allclose(self.g, gh, atol=1e-6):
raise RuntimeError("BUG")
return
[docs] def rotate_plates(self, Q, plate_axis=-1):
r"""
Approximate rotation of a plate axis.
Mean is rotated exactly but covariance/precision matrix is rotated
approximately.
"""
ndim = len(self._distribution.shape)
# Rotate moments using Q
if not isinstance(plate_axis, int):
raise ValueError("Plate axis must be integer")
if plate_axis >= 0:
plate_axis -= len(self.plates)
if plate_axis < -len(self.plates) or plate_axis >= 0:
raise ValueError("Axis out of bounds")
u0 = rotate_mean(self.u[0], Q,
ndim=ndim+(-plate_axis),
axis=0)
sumQ = misc.add_trailing_axes(np.sum(Q, axis=0),
2*ndim-plate_axis-1)
phi1 = sumQ**(-2) * self.phi[1]
phi0 = -2 * matrix_dot_vector(phi1, u0, ndim=ndim)
self.phi[0] = phi0
self.phi[1] = phi1
self._update_moments_and_cgf()
return
[docs]class GaussianGamma(ExponentialFamily):
r"""
Node for Gaussian-gamma (isotropic) random variables.
The prior:
.. math::
p(x, \alpha| \mu, \Lambda, a, b)
p(x|\alpha, \mu, \Lambda) = \mathcal{N}(x | \mu, \alpha Lambda)
p(\alpha|a, b) = \mathcal{G}(\alpha | a, b)
The posterior approximation :math:`q(x, \alpha)` has the same Gaussian-gamma
form.
Currently, supports only vector variables.
"""
@classmethod
def _constructor(cls, mu, Lambda, a, b, ndim=1, **kwargs):
r"""
Constructs distribution and moments objects.
This method is called if useconstructor decorator is used for __init__.
`mu` is the mean/location vector
`alpha` is the scale
`V` is the scale matrix
`n` is the degrees of freedom
"""
# Convert parent nodes
mu_Lambda = WrapToGaussianWishart(mu, Lambda, ndim=ndim)
a = cls._ensure_moments(a, GammaPriorMoments)
b = cls._ensure_moments(b, GammaMoments)
shape = mu_Lambda.dims[0]
distribution = GaussianGammaDistribution(shape)
moments = GaussianGammaMoments(shape)
parent_moments = (
mu_Lambda._moments,
a._moments,
b._moments,
)
# Check shapes
if mu_Lambda.dims != ( shape, (), 2*shape, () ):
raise ValueError("mu and Lambda have wrong shape")
if a.dims != ( (), () ):
raise ValueError("a has wrong shape")
if b.dims != ( (), () ):
raise ValueError("b has wrong shape")
# List of parent nodes
parents = [mu_Lambda, a, b]
return (parents,
kwargs,
moments.dims,
cls._total_plates(kwargs.get('plates'),
distribution.plates_from_parent(0, mu_Lambda.plates),
distribution.plates_from_parent(1, a.plates),
distribution.plates_from_parent(2, b.plates)),
distribution,
moments,
parent_moments)
[docs] def translate(self, b, debug=False):
if self._moments.ndim != 1:
raise NotImplementedError("Only ndim=1 supported at the moment")
tau = self.u[2]
x = self.u[0] / tau[...,None]
xb = linalg.outer(x, b, ndim=1)
bx = linalg.transpose(xb, ndim=1)
bb = linalg.outer(b, b, ndim=1)
uh = [
self.u[0] + tau[...,None] * b,
self.u[1] + tau[...,None,None] * (xb + bx + bb),
self.u[2],
self.u[3]
]
Lambda = -2 * self.phi[1]
dtau = -0.5 * (
np.einsum('...ij,...i,...j->...', Lambda, b, b)
+ 2 * np.einsum('...ij,...i,...j->...', Lambda, b, x)
)
phih = [
self.phi[0] + np.einsum('...ij,...j->...i', Lambda, b),
self.phi[1],
self.phi[2] + dtau,
self.phi[3]
]
self._check_shape(uh)
self._check_shape(phih)
self.phi = phih
self.u = uh
# TODO: This is all just debugging stuff and can be removed
if debug:
uh = [ui.copy() for ui in uh]
gh = self.g.copy()
self._update_moments_and_cgf()
if any(not np.allclose(uih, ui, atol=1e-6) for (uih, ui) in zip(uh, self.u)):
raise RuntimeError("BUG")
if not np.allclose(self.g, gh, atol=1e-6):
raise RuntimeError("BUG")
return
[docs] def rotate(self, R, inv=None, logdet=None, debug=False):
if self._moments.ndim != 1:
raise NotImplementedError("Only ndim=1 supported at the moment")
if inv is None:
inv = np.linalg.inv(R)
if logdet is None:
logdet = np.linalg.slogdet(R)[1]
uh = [
rotate_mean(self.u[0], R),
rotate_covariance(self.u[1], R),
self.u[2],
self.u[3]
]
phih = [
rotate_mean(self.phi[0], inv.T),
rotate_covariance(self.phi[1], inv.T),
self.phi[2],
self.phi[3]
]
self._check_shape(uh)
self._check_shape(phih)
self.phi = phih
self.u = uh
self.g = self.g - logdet
# TODO: This is all just debugging stuff and can be removed
if debug:
uh = [ui.copy() for ui in uh]
gh = self.g.copy()
self._update_moments_and_cgf()
if any(not np.allclose(uih, ui, atol=1e-6) for (uih, ui) in zip(uh, self.u)):
raise RuntimeError("BUG")
if not np.allclose(self.g, gh, atol=1e-6):
raise RuntimeError("BUG")
return
[docs] def plotmatrix(self):
r"""
Creates a matrix of marginal plots.
On diagonal, are marginal plots of each variable. Off-diagonal plot
(i,j) shows the joint marginal density of x_i and x_j.
"""
import bayespy.plot as bpplt
if self.ndim != 1:
raise NotImplementedError("Only ndim=1 supported at the moment")
if np.prod(self.plates) != 1:
raise ValueError("Currently, does not support plates in the node.")
if len(self.dims[0]) != 1:
raise ValueError("Currently, supports only vector variables")
# Dimensionality of the Gaussian
D = self.dims[0][0]
# Compute standard parameters
tau = self.u[2]
mu = self.u[0]
mu = mu / misc.add_trailing_axes(tau, 1)
Cov = self.u[1] - linalg.outer(self.u[0], mu, ndim=1)
Cov = Cov / misc.add_trailing_axes(tau, 2)
a = self.phi[3]
b = -self.phi[2] - 0.5*linalg.inner(self.phi[0], mu, ndim=1)
# Create subplots
(fig, axes) = bpplt.pyplot.subplots(D+1, D+1)
# Plot marginal Student t distributions
for i in range(D):
for j in range(i+1):
if i == j:
bpplt._pdf_t(*(random.gaussian_gamma_to_t(mu[i],
Cov[i,i],
a,
b,
ndim=0)),
axes=axes[i,i])
else:
S = Cov[np.ix_([i,j],[i,j])]
(m, S, nu) = random.gaussian_gamma_to_t(mu[[i,j]],
S,
a,
b)
bpplt._contour_t(m, S, nu, axes=axes[i,j])
bpplt._contour_t(m, S, nu, axes=axes[j,i], transpose=True)
# Plot Gaussian-gamma marginal distributions
for k in range(D):
bpplt._contour_gaussian_gamma(mu[k], Cov[k,k], a, b,
axes=axes[D,k])
bpplt._contour_gaussian_gamma(mu[k], Cov[k,k], a, b,
axes=axes[k,D],
transpose=True)
# Plot gamma marginal distribution
bpplt._pdf_gamma(a, b, axes=axes[D,D])
return axes
[docs] def get_gaussian_location(self):
r"""
Return the mean and variance of the distribution
"""
if self._moments.ndim != 1:
raise NotImplementedError("Only ndim=1 supported at the moment")
tau = self.u[2]
tau_mu = self.u[0]
return tau_mu / tau[...,None]
[docs] def get_gaussian_mean_and_variance(self):
r"""
Return the mean and variance of the distribution
"""
if self.ndim != 1:
raise NotImplementedError("Only ndim=1 supported at the moment")
a = self.phi[3]
nu = 2*a
if np.any(nu <= 1):
raise ValueError("Mean not defined for degrees of freedom <= 1")
if np.any(nu <= 2):
raise ValueError("Variance not defined if degrees of freedom <= 2")
tau = self.u[2]
tau_mu = self.u[0]
mu = tau_mu / misc.add_trailing_axes(tau, 1)
var = misc.get_diag(self.u[1], ndim=1) - tau_mu*mu
var = var / misc.add_trailing_axes(tau, 1)
var = nu / (nu-2) * var
return (mu, var)
[docs] def get_marginal_logpdf(self, gaussian=None, gamma=None):
r"""
Get the (marginal) log pdf of a subset of the variables
Parameters
----------
gaussian : list or None
Indices of the Gaussian variables to keep or None
gamma : bool or None
True if keep the gamma variable, otherwise False or None
Returns
-------
function
A function which computes log-pdf
"""
if self.ndim != 1:
raise NotImplementedError("Only ndim=1 supported at the moment")
if gaussian is None and not gamma:
raise ValueError("Must give some variables")
# Compute standard parameters
tau = self.u[2]
mu = self.u[0]
mu = mu / misc.add_trailing_axes(tau, 1)
Cov = np.linalg.inv(-2*self.phi[1])
if not np.allclose(Cov,
self.u[1] - linalg.outer(self.u[0], mu, ndim=1)):
raise Exception("WAAAT")
#Cov = Cov / misc.add_trailing_axes(tau, 2)
a = self.phi[3]
b = -self.phi[2] - 0.5*linalg.inner(self.phi[0], mu, ndim=1)
if not gamma:
# Student t distributions
inds = list(gaussian)
mu = mu[inds]
Cov = Cov[np.ix_(inds, inds)]
(mu, Cov, nu) = random.gaussian_gamma_to_t(mu,
Cov,
a,
b,
ndim=1)
L = linalg.chol(Cov)
logdet_Cov = linalg.chol_logdet(L)
D = len(inds)
def logpdf(x):
y = x - mu
v = linalg.chol_solve(L, y)
z2 = linalg.inner(y, v, ndim=1)
return random.t_logpdf(z2, logdet_Cov, nu, D)
return logpdf
elif gaussian is None:
# Gamma distribution
def logpdf(x):
logx = np.log(x)
return random.gamma_logpdf(b*x,
logx,
a*logx,
a*np.log(b),
special.gammaln(a))
return logpdf
else:
# Gaussian-gamma distribution
inds = list(gaussian)
mu = mu[inds]
Cov = Cov[np.ix_(inds, inds)]
D = len(inds)
L = linalg.chol(Cov)
logdet_Cov = linalg.chol_logdet(L)
def logpdf(x):
tau = x[...,-1]
logtau = np.log(tau)
x = x[...,:-1]
y = x - mu
v = linalg.chol_solve(L, y) * tau[...,None]
z2 = linalg.inner(y, v, ndim=1)
return (random.gaussian_logpdf(z2,
0,
0,
logdet_Cov + D*logtau,
D) +
random.gamma_logpdf(b*tau,
logtau,
a*logtau,
a*np.log(b),
special.gammaln(a)))
return logpdf
[docs]class GaussianWishart(ExponentialFamily):
r"""
Node for Gaussian-Wishart random variables.
The prior:
.. math::
p(x, \Lambda| \mu, \alpha, V, n)
p(x|\Lambda, \mu, \alpha) = \mathcal(N)(x | \mu, \alpha^{-1} Lambda^{-1})
p(\Lambda|V, n) = \mathcal(W)(\Lambda | n, V)
The posterior approximation :math:`q(x, \Lambda)` has the same Gaussian-Wishart form.
Currently, supports only vector variables.
"""
_distribution = GaussianWishartDistribution()
@classmethod
def _constructor(cls, mu, alpha, n, V, **kwargs):
r"""
Constructs distribution and moments objects.
This method is called if useconstructor decorator is used for __init__.
`mu` is the mean/location vector
`alpha` is the scale
`n` is the degrees of freedom
`V` is the scale matrix
"""
# Convert parent nodes
mu_alpha = WrapToGaussianGamma(mu, alpha, ndim=1)
D = mu_alpha.dims[0][0]
shape = mu_alpha._moments.shape
moments = GaussianWishartMoments(shape)
n = cls._ensure_moments(n, WishartPriorMoments, d=D)
V = cls._ensure_moments(V, WishartMoments, ndim=1)
parent_moments = (
mu_alpha._moments,
n._moments,
V._moments
)
# Check shapes
if mu_alpha.dims != ( (D,), (D,D), (), () ):
raise ValueError("mu and alpha have wrong shape")
if V.dims != ( (D,D), () ):
raise ValueError("Precision matrix has wrong shape")
if n.dims != ( (), () ):
raise ValueError("Degrees of freedom has wrong shape")
parents = [mu_alpha, n, V]
return (parents,
kwargs,
moments.dims,
cls._total_plates(kwargs.get('plates'),
cls._distribution.plates_from_parent(0, mu_alpha.plates),
cls._distribution.plates_from_parent(1, n.plates),
cls._distribution.plates_from_parent(2, V.plates)),
cls._distribution,
moments,
parent_moments)
#
# CONVERTERS
#
[docs]class GaussianToGaussianGamma(Deterministic):
r"""
Converter for Gaussian moments to Gaussian-gamma isotropic moments
Combines the Gaussian moments with gamma moments for a fixed value 1.
"""
[docs] def __init__(self, X, **kwargs):
r"""
"""
if not isinstance(X._moments, GaussianMoments):
raise ValueError("Wrong moments, should be Gaussian")
shape = X._moments.shape
self.ndim = X._moments.ndim
self._moments = GaussianGammaMoments(shape)
self._parent_moments = [GaussianMoments(shape)]
shape = X.dims[0]
dims = ( shape, 2*shape, (), () )
super().__init__(X, dims=dims, **kwargs)
def _compute_moments(self, u_X):
r"""
"""
x = u_X[0]
xx = u_X[1]
u = [x, xx, 1, 0]
return u
def _compute_message_to_parent(self, index, m_child, u_X):
r"""
"""
if index == 0:
m = m_child[:2]
return m
else:
raise ValueError("Invalid parent index")
def _compute_function(self, x):
return (x, 1)
GaussianMoments.add_converter(GaussianGammaMoments,
GaussianToGaussianGamma)
[docs]class GaussianGammaToGaussianWishart(Deterministic):
r"""
"""
[docs] def __init__(self, X_alpha, **kwargs):
raise NotImplementedError()
GaussianGammaMoments.add_converter(GaussianWishartMoments,
GaussianGammaToGaussianWishart)
#
# WRAPPERS
#
# These wrappers form a single node from two nodes for messaging purposes.
#
[docs]class WrapToGaussianGamma(Deterministic):
r"""
"""
[docs] def __init__(self, X, alpha, ndim=None, **kwargs):
r"""
"""
# In case X is a numerical array, convert it to Gaussian first
try:
X = self._ensure_moments(X, GaussianMoments, ndim=ndim)
except Moments.NoConverterError:
pass
try:
ndim = X._moments.ndim
except AttributeError as err:
raise TypeError("ndim needs to be given explicitly") from err
X = self._ensure_moments(X, GaussianGammaMoments, ndim=ndim)
if len(X.dims[0]) != ndim:
raise RuntimeError("Conversion failed ndim.")
shape = X.dims[0]
dims = ( shape, 2 * shape, (), () )
self.shape = shape
self.ndim = len(shape)
self._moments = GaussianGammaMoments(shape)
self._parent_moments = [
GaussianGammaMoments(shape),
GammaMoments()
]
super().__init__(X, alpha, dims=dims, **kwargs)
def _compute_moments(self, u_X, u_alpha):
r"""
"""
(tau_x, tau_xx, tau, logtau) = u_X
(alpha, logalpha) = u_alpha
u0 = tau_x * misc.add_trailing_axes(alpha, self.ndim)
u1 = tau_xx * misc.add_trailing_axes(alpha, 2 * self.ndim)
u2 = tau * alpha
u3 = logtau + logalpha
return [u0, u1, u2, u3]
def _compute_message_to_parent(self, index, m_child, u_X, u_alpha):
r"""
"""
if index == 0:
alpha = u_alpha[0]
m0 = m_child[0] * misc.add_trailing_axes(alpha, self.ndim)
m1 = m_child[1] * misc.add_trailing_axes(alpha, 2 * self.ndim)
m2 = m_child[2] * alpha
m3 = m_child[3]
return [m0, m1, m2, m3]
elif index == 1:
(tau_x, tau_xx, tau, logtau) = u_X
m0 = (
linalg.inner(m_child[0], tau_x, ndim=self.ndim)
+ linalg.inner(m_child[1], tau_xx, ndim=2*self.ndim)
+ m_child[2] * tau
)
m1 = m_child[3]
return [m0, m1]
else:
raise ValueError("Invalid parent index")
[docs]class WrapToGaussianWishart(Deterministic):
r"""
Wraps Gaussian and Wishart nodes into a Gaussian-Wishart node.
The following node combinations can be wrapped:
* Gaussian and Wishart
* Gaussian-gamma and Wishart
* Gaussian-Wishart and gamma
"""
[docs] def __init__(self, X, Lambda, ndim=1, **kwargs):
r"""
"""
# Just in case X is an array, convert it to a Gaussian node first.
try:
X = self._ensure_moments(X, GaussianMoments, ndim=ndim)
except Moments.NoConverterError:
pass
try:
# Try combo Gaussian-Gamma and Wishart
X = self._ensure_moments(X, GaussianGammaMoments, ndim=ndim)
except Moments.NoConverterError:
# Have to use Gaussian-Wishart and Gamma
X = self._ensure_moments(X, GaussianWishartMoments, ndim=ndim)
Lambda = self._ensure_moments(Lambda, GammaMoments, ndim=ndim)
shape = X.dims[0]
if Lambda.dims != ((), ()):
raise ValueError(
"Mean and precision have inconsistent shapes: {0} and {1}"
.format(
X.dims,
Lambda.dims
)
)
self.wishart = False
else:
# Gaussian-Gamma and Wishart
shape = X.dims[0]
Lambda = self._ensure_moments(Lambda, WishartMoments, ndim=ndim)
if Lambda.dims != (2 * shape, ()):
raise ValueError(
"Mean and precision have inconsistent shapes: {0} and {1}"
.format(
X.dims,
Lambda.dims
)
)
self.wishart = True
self.ndim = len(shape)
self._parent_moments = (
X._moments,
Lambda._moments,
)
self._moments = GaussianWishartMoments(shape)
super().__init__(X, Lambda, dims=self._moments.dims, **kwargs)
def _compute_moments(self, u_X_alpha, u_Lambda):
r"""
"""
if self.wishart:
alpha_x = u_X_alpha[0]
alpha_xx = u_X_alpha[1]
alpha = u_X_alpha[2]
log_alpha = u_X_alpha[3]
Lambda = u_Lambda[0]
logdet_Lambda = u_Lambda[1]
D = np.prod(self.dims[0])
u0 = linalg.mvdot(Lambda, alpha_x, ndim=self.ndim)
u1 = linalg.inner(Lambda, alpha_xx, ndim=2*self.ndim)
u2 = Lambda * misc.add_trailing_axes(alpha, 2*self.ndim)
u3 = logdet_Lambda + D * log_alpha
u = [u0, u1, u2, u3]
return u
else:
raise NotImplementedError()
def _compute_message_to_parent(self, index, m_child, u_X_alpha, u_Lambda):
r"""
...
Message from the child is :math:`[m_0, m_1, m_2, m_3]`:
.. math::
\alpha m_0^T \Lambda x + m_1 \alpha x^T \Lambda x
+ \mathrm{tr}(\alpha m_2 \Lambda) + m_3 (\log | \alpha \Lambda |)
In case of Gaussian-gamma and Wishart parents:
Message to the first parent (x, alpha):
.. math::
\tilde{m_0} &= \Lambda m_0
\\
\tilde{m_1} &= m_1 \Lambda
\\
\tilde{m_2} &= \mathrm{tr}(m_2 \Lambda)
\\
\tilde{m_3} &= m_3 \cdot D
Message to the second parent (Lambda):
.. math::
\tilde{m_0} &= \alpha (\frac{1}{2} m_0 x^T + \frac{1}{2} x m_0^T +
m_1 xx^T + m_2)
\\
\tilde{m_1} &= m_3
"""
if index == 0:
if self.wishart:
# Message to Gaussian-gamma (isotropic)
Lambda = u_Lambda[0]
D = np.prod(self.dims[0])
m0 = linalg.mvdot(Lambda, m_child[0], ndim=self.ndim)
m1 = Lambda * misc.add_trailing_axes(m_child[1], 2*self.ndim)
m2 = linalg.inner(Lambda, m_child[2], ndim=2*self.ndim)
m3 = D * m_child[3]
m = [m0, m1, m2, m3]
return m
else:
# Message to Gaussian-Wishart
raise NotImplementedError()
elif index == 1:
if self.wishart:
# Message to Wishart
alpha_x = u_X_alpha[0]
alpha_xx = u_X_alpha[1]
alpha = u_X_alpha[2]
m0 = (0.5*linalg.outer(alpha_x, m_child[0], ndim=self.ndim) +
0.5*linalg.outer(m_child[0], alpha_x, ndim=self.ndim) +
alpha_xx * misc.add_trailing_axes(m_child[1], 2*self.ndim) +
misc.add_trailing_axes(alpha, 2*self.ndim) * m_child[2])
m1 = m_child[3]
m = [m0, m1]
return m
else:
# Message to gamma (isotropic)
raise NotImplementedError()
else:
raise ValueError("Invalid parent index")
def reshape_gaussian_array(dims_from, dims_to, x0, x1):
r"""
Reshape the moments Gaussian array variable.
The plates remain unaffected.
"""
num_dims_from = len(dims_from)
num_dims_to = len(dims_to)
# Reshape the first moment / mean
num_plates_from = np.ndim(x0) - num_dims_from
plates_from = np.shape(x0)[:num_plates_from]
shape = (
plates_from
+ (1,)*(num_dims_to-num_dims_from) + dims_from
)
x0 = np.ones(dims_to) * np.reshape(x0, shape)
# Reshape the second moment / covariance / precision
num_plates_from = np.ndim(x1) - 2*num_dims_from
plates_from = np.shape(x1)[:num_plates_from]
shape = (
plates_from
+ (1,)*(num_dims_to-num_dims_from) + dims_from
+ (1,)*(num_dims_to-num_dims_from) + dims_from
)
x1 = np.ones(dims_to+dims_to) * np.reshape(x1, shape)
return (x0, x1)
def transpose_covariance(Cov, ndim=1):
r"""
Transpose the covariance array of Gaussian array variable.
That is, swap the last ndim axes with the ndim axes before them. This makes
transposing easy for array variables when the covariance is not a matrix but
a multidimensional array.
"""
axes_in = [Ellipsis] + list(range(2*ndim,0,-1))
axes_out = [Ellipsis] + list(range(ndim,0,-1)) + list(range(2*ndim,ndim,-1))
return np.einsum(Cov, axes_in, axes_out)
def left_rotate_covariance(Cov, R, axis=-1, ndim=1):
r"""
Rotate the covariance array of Gaussian array variable.
ndim is the number of axes for the Gaussian variable.
For vector variable, ndim=1 and covariance is a matrix.
"""
if not isinstance(axis, int):
raise ValueError("Axis must be an integer")
if axis < -ndim or axis >= ndim:
raise ValueError("Axis out of range")
# Force negative axis
if axis >= 0:
axis -= ndim
# Rotation from left
axes_R = [Ellipsis, ndim+abs(axis)+1, ndim+abs(axis)]
axes_Cov = [Ellipsis] + list(range(ndim+abs(axis),
0,
-1))
axes_out = [Ellipsis, ndim+abs(axis)+1] + list(range(ndim+abs(axis)-1,
0,
-1))
Cov = np.einsum(R, axes_R, Cov, axes_Cov, axes_out)
return Cov
def right_rotate_covariance(Cov, R, axis=-1, ndim=1):
r"""
Rotate the covariance array of Gaussian array variable.
ndim is the number of axes for the Gaussian variable.
For vector variable, ndim=1 and covariance is a matrix.
"""
if not isinstance(axis, int):
raise ValueError("Axis must be an integer")
if axis < -ndim or axis >= ndim:
raise ValueError("Axis out of range")
# Force negative axis
if axis >= 0:
axis -= ndim
# Rotation from right
axes_R = [Ellipsis, abs(axis)+1, abs(axis)]
axes_Cov = [Ellipsis] + list(range(abs(axis),
0,
-1))
axes_out = [Ellipsis, abs(axis)+1] + list(range(abs(axis)-1,
0,
-1))
Cov = np.einsum(R, axes_R, Cov, axes_Cov, axes_out)
return Cov
def rotate_covariance(Cov, R, axis=-1, ndim=1):
r"""
Rotate the covariance array of Gaussian array variable.
ndim is the number of axes for the Gaussian variable.
For vector variable, ndim=1 and covariance is a matrix.
"""
# Rotate from left and right
Cov = left_rotate_covariance(Cov, R, ndim=ndim, axis=axis)
Cov = right_rotate_covariance(Cov, R, ndim=ndim, axis=axis)
return Cov
def rotate_mean(mu, R, axis=-1, ndim=1):
r"""
Rotate the mean array of Gaussian array variable.
ndim is the number of axes for the Gaussian variable.
For vector variable, ndim=1 and mu is a vector.
"""
if not isinstance(axis, int):
raise ValueError("Axis must be an integer")
if axis < -ndim or axis >= ndim:
raise ValueError("Axis out of range")
# Force negative axis
if axis >= 0:
axis -= ndim
# Rotation from right
axes_R = [Ellipsis, abs(axis)+1, abs(axis)]
axes_mu = [Ellipsis] + list(range(abs(axis),
0,
-1))
axes_out = [Ellipsis, abs(axis)+1] + list(range(abs(axis)-1,
0,
-1))
mu = np.einsum(R, axes_R, mu, axes_mu, axes_out)
return mu
def array_to_vector(x, ndim=1):
if ndim == 0:
return x
shape_x = np.shape(x)
D = np.prod(shape_x[-ndim:])
return np.reshape(x, shape_x[:-ndim] + (D,))
def array_to_matrix(A, ndim=1):
if ndim == 0:
return A
shape_A = np.shape(A)
D = np.prod(shape_A[-ndim:])
return np.reshape(A, shape_A[:-2*ndim] + (D,D))
def vector_to_array(x, shape):
shape_x = np.shape(x)
return np.reshape(x, np.shape(x)[:-1] + tuple(shape))
def matrix_dot_vector(A, x, ndim=1):
if ndim < 0:
raise ValueError("ndim must be non-negative integer")
if ndim == 0:
return A*x
dims_x = np.shape(x)[-ndim:]
A = array_to_matrix(A, ndim=ndim)
x = array_to_vector(x, ndim=ndim)
y = np.einsum('...ik,...k->...i', A, x)
return vector_to_array(y, dims_x)