diff --git a/edward/__init__.py b/edward/__init__.py
index 55fa17df8..fb5cb5d14 100644
--- a/edward/__init__.py
+++ b/edward/__init__.py
@@ -16,7 +16,7 @@
     KLpq, KLqp, ReparameterizationKLqp, ReparameterizationKLKLqp,
     ReparameterizationEntropyKLqp, ReplicaExchangeMC, ScoreKLqp, ScoreKLKLqp,
     ScoreEntropyKLqp, ScoreRBKLqp, WakeSleep, GANInference, BiGANInference,
-    WGANInference, ImplicitKLqp, MAP, Laplace, complete_conditional, Gibbs)
+    WGANInference, ImplicitKLqp, MAP, Laplace, complete_conditional, Gibbs, BBAlpha)
 from edward.models import RandomVariable
 from edward.util import (
     check_data, check_latent_vars, copy, dot,
@@ -45,6 +45,7 @@
     'MetropolisHastings',
     'SGLD',
     'SGHMC',
+    'BBAlpha',
     'KLpq',
     'KLqp',
     'ReparameterizationKLqp',
diff --git a/edward/inferences/__init__.py b/edward/inferences/__init__.py
index ed1600b93..c5fa39b96 100644
--- a/edward/inferences/__init__.py
+++ b/edward/inferences/__init__.py
@@ -4,6 +4,7 @@
 from __future__ import division
 from __future__ import print_function
 
+from edward.inferences.bb_alpha import *
 from edward.inferences.bigan_inference import *
 from edward.inferences.conjugacy import *
 from edward.inferences.gan_inference import *
@@ -27,6 +28,7 @@
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
+    'BBAlpha',
     'BiGANInference',
     'complete_conditional',
     'GANInference',
diff --git a/edward/inferences/bb_alpha.py b/edward/inferences/bb_alpha.py
new file mode 100644
index 000000000..268826d53
--- /dev/null
+++ b/edward/inferences/bb_alpha.py
@@ -0,0 +1,168 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import six
+import tensorflow as tf
+
+from edward.inferences.variational_inference import VariationalInference
+from edward.models import RandomVariable
+from edward.util import copy, get_descendants
+
+try:
+  from edward.models import Normal
+  from tensorflow.contrib.distributions import kl_divergence
+except Exception as e:
+  raise ImportError("{0}. Your TensorFlow version is not supported.".format(e))
+
+
+class BBAlpha(VariationalInference):
+  """Variational inference with the Black Box alpha Divergence
+  It minimizes the reparameterized ELBO
+
+  $\\text{KL}( q(z; \lambda) \| p(z \mid x) )
+  - \frac{1}{\alpha} \sum_n \log \mathbb{E}_{q(z; \lambda)} [f_n(z)^{\alpha}]$
+  This class minimizes the objective by automatically selecting from
+  black box inference techniques. Alpha close to zero is equivalent to KLqp
+  minimization.
+  """
+
+  def __init__(self, *args, **kwargs):
+    super(BBAlpha, self).__init__(*args, **kwargs)
+
+  def initialize(self, n_samples=1, alpha=0.5, kl_scaling=None, *args, **kwargs):
+    """Initialize inference algorithm. It initializes hyperparameters
+    and builds ops for the algorithm's computation graph.
+    Args:
+        n_samples: int, optional.
+            Number of samples from variational model for calculating
+            stochastic gradients.
+        alpha: float, optional.
+            Black box alpha divergence coefficient.
+        kl_scaling: dict of RandomVariable to tf.Tensor.
+            Provides option to scale terms when using ELBO with KL divergence.
+            If the KL divergence terms are
+    """
+    if kl_scaling is None:
+      kl_scaling = {}
+
+    self.n_samples = n_samples
+    self.alpha = alpha
+    self.kl_scaling = kl_scaling
+    return super(BBAlpha, self).initialize(*args, **kwargs)
+
+  def build_loss_and_gradients(self, var_list):
+    """Wrapper for the `BB_alpha` loss function.
+    $-\\text{ELBO} =
+    -\mathbb{E}_{q(z; \lambda)} [ \log p(x, z) - \log q(z; \lambda) ]
+    - \frac{1}{\alpha} \sum_n \log \mathbb{E}_{q(z; \lambda)} [ f_n(\z)^{\alpha} ]$
+    BB_alpha supports reparameterization gradients [@kingma2014auto]
+    of the loss function.
+    If the KL divergence between the variational model and the prior
+    is tractable, then the loss function can be written as
+    $\\text{KL}( q(z; \lambda) \| p(z) )
+    - \frac{1}{\alpha} \sum_n \log \mathbb{E}_{q(z; \lambda)} [ f_n(\z)^{\alpha} ]$
+    where the KL term is computed analytically [@kingma2014auto]. We
+    compute this automatically when $p(z)$ and $q(z; \lambda)$ are
+    Normal.
+
+    Notes
+        + If the model is not reparameterizable, it returns a
+        NotImplementedError.
+    """
+    is_reparameterizable = all([
+      rv.reparameterization_type ==
+      tf.contrib.distributions.FULLY_REPARAMETERIZED
+      for rv in six.itervalues(self.latent_vars)])
+    if is_reparameterizable:
+      return build_reparam_bb_alpha_loss_and_gradients(self, var_list, self.alpha)
+    else:
+      raise NotImplementedError(
+        "Black box alpha divergence inference only works with reparameterizable"
+        " models.")
+
+
+def build_reparam_bb_alpha_loss_and_gradients(inference, var_list, alpha):
+  """Build loss function. Its automatic differentiation
+  is a stochastic gradient of
+  $-\\text{ELBO} =
+    -\mathbb{E}_{q(z; \lambda)} [ \log p(x, z) - \log q(z; \lambda) ]
+    - \frac{1}{\alpha} \sum_n \log \mathbb{E}_{q(z; \lambda)} [ f_n(\z)^{\alpha} ]$
+  based on the reparameterization trick [@kingma2014auto].
+  Computed by sampling from $q(z;\lambda)$ and evaluating the
+  expectation using Monte Carlo sampling.
+  """
+  p_log_prob = [0.0] * inference.n_samples
+  q_log_prob = [0.0] * inference.n_samples
+  p_log_lik = [0.0] * inference.n_samples
+
+  base_scope = tf.get_default_graph().unique_name("inference") + '/'
+  for s in range(inference.n_samples):
+    # Form dictionary in order to replace conditioning on prior or
+    # observed variable with conditioning on a specific value.
+    scope = base_scope + tf.get_default_graph().unique_name("sample")
+    dict_swap = {}
+    for x, qx in six.iteritems(inference.data):
+      if isinstance(x, RandomVariable):
+        if isinstance(qx, RandomVariable):
+          qx_copy = copy(qx, scope=scope)
+          dict_swap[x] = qx_copy.value()
+        else:
+          dict_swap[x] = qx
+
+    for z, qz in six.iteritems(inference.latent_vars):
+      # Copy q(z) to obtain new set of posterior samples.
+      qz_copy = copy(qz, scope=scope)
+      dict_swap[z] = qz_copy.value()
+      q_log_prob[s] += tf.reduce_sum(
+        inference.scale.get(z, 1.0) * qz_copy.log_prob(dict_swap[z]))
+
+    for z in six.iterkeys(inference.latent_vars):
+      z_copy = copy(z, dict_swap, scope=scope)
+      p_log_prob[s] += tf.reduce_sum(
+        inference.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))
+
+    for x in six.iterkeys(inference.data):
+      if isinstance(x, RandomVariable):
+        x_copy = copy(x, dict_swap, scope=scope)
+        p_log_prob[s] += tf.reduce_sum(
+          inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
+
+    for x in six.iterkeys(inference.data):
+      if isinstance(x, RandomVariable):
+        x_copy = copy(x, dict_swap, scope=scope)
+        p_log_lik[s] += tf.reduce_sum(
+          inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
+
+  p_log_prior = [p1 - p2 for p1, p2 in zip(p_log_prob, p_log_lik)]
+  p_log_prob = tf.reduce_mean(p_log_prob)
+  p_log_prior = tf.reduce_mean(p_log_prior)
+  q_log_prob = tf.reduce_mean(q_log_prob)
+
+  if np.abs(alpha - 0.0) < 10e-4:
+    loss = p_log_prob - q_log_prob
+
+  else:
+    p_log_lik = tf.stack(p_log_lik)
+    p_log_lik = p_log_lik * alpha
+    p_log_lik_max = tf.reduce_max(p_log_lik, 0)
+    p_log_lik = tf.log(
+      tf.maximum(1e-9,
+                 tf.reduce_mean(tf.exp(p_log_lik - p_log_lik_max), 0)))
+    p_log_lik = (p_log_lik + p_log_lik_max)
+    p_log_lik = tf.reduce_sum(p_log_lik) / alpha
+
+    loss = - q_log_prob + p_log_prior + p_log_lik
+
+  loss = -loss
+
+  if inference.logging:
+    tf.summary.scalar("loss/p_log_prob", p_log_prob,
+                      collections=[inference._summary_key])
+    tf.summary.scalar("loss/q_log_prob", q_log_prob,
+                      collections=[inference._summary_key])
+
+  grads = tf.gradients(loss, var_list)
+  grads_and_vars = list(zip(grads, var_list))
+  return loss, grads_and_vars
diff --git a/tests/inferences/bb_alpha_test.py b/tests/inferences/bb_alpha_test.py
new file mode 100644
index 000000000..ec4d4fcb1
--- /dev/null
+++ b/tests/inferences/bb_alpha_test.py
@@ -0,0 +1,70 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import edward as ed
+import numpy as np
+import tensorflow as tf
+
+from edward.models import Bernoulli, Normal
+
+
+class test_bb_alpha_class(tf.test.TestCase):
+
+  def _test_normal_normal(self, *args, **kwargs):
+    with self.test_session() as sess:
+      x_data = np.array([0.0] * 50, dtype=np.float32)
+
+      mu = Normal(loc=0.0, scale=1.0)
+      x = Normal(loc=mu, scale=1.0, sample_shape=50)
+
+      qmu_loc = tf.Variable(tf.random_normal([]))
+      qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([])))
+      qmu = Normal(loc=qmu_loc, scale=qmu_scale)
+
+      # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140)
+      inference = ed.BBAlpha({mu: qmu}, data={x: x_data})
+      inference.run(*args, **kwargs)
+
+      self.assertAllClose(qmu.mean().eval(), 0, rtol=0.1, atol=0.6)
+      self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51),
+                          rtol=0.15, atol=0.5)
+
+      variables = tf.get_collection(
+        tf.GraphKeys.GLOBAL_VARIABLES, scope='optimizer')
+      old_t, old_variables = sess.run([inference.t, variables])
+      self.assertEqual(old_t, inference.n_iter)
+      sess.run(inference.reset)
+      new_t, new_variables = sess.run([inference.t, variables])
+      self.assertEqual(new_t, 0)
+      self.assertNotEqual(old_variables, new_variables)
+
+  def _test_model_parameter(self, *args, **kwargs):
+    with self.test_session() as sess:
+      x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1])
+
+      p = tf.sigmoid(tf.Variable(0.5))
+      x = Bernoulli(probs=p, sample_shape=10)
+
+      inference = ed.BBAlpha({}, data={x: x_data})
+      inference.run(*args, **kwargs)
+
+      self.assertAllClose(p.eval(), 0.2, rtol=5e-2, atol=5e-2)
+
+  def test_bb_alpha(self):
+    # model parameter - special case - KLqp:
+    self._test_normal_normal(n_samples=10, alpha=0.01)
+
+    # model parameter - special case - KLqp:
+    self._test_model_parameter(n_samples=10, alpha=0.01)
+    # model parameter - case where alpha = 0.5:
+    self._test_model_parameter(n_samples=10, alpha=0.5)
+    # model parameter - case where alpha > 0:
+    self._test_model_parameter(n_samples=10, alpha=5.0)
+    # model parameter - case where alpha < 0:
+    self._test_model_parameter(n_samples=10, alpha=-5.0)
+
+
+if __name__ == '__main__':
+  ed.set_seed(42)
+  tf.test.main()