Merge pull request #3101 from stan-dev/sum-to-zero-constraint-ilr

Add sum_to_zero constraint, free, and check based on ILR transform
stan-dev · Aug 7, 2024 · 9b1f08b · 9b1f08b
2 parents f120c5e + 5ae902d
commit 9b1f08b
Show file tree

Hide file tree

Showing 10 changed files with 640 additions and 0 deletions.
diff --git a/stan/math/prim/constraint.hpp b/stan/math/prim/constraint.hpp
@@ -37,6 +37,8 @@
 #include <stan/math/prim/constraint/stochastic_column_free.hpp>
 #include <stan/math/prim/constraint/stochastic_row_constrain.hpp>
 #include <stan/math/prim/constraint/stochastic_row_free.hpp>
+#include <stan/math/prim/constraint/sum_to_zero_constrain.hpp>
+#include <stan/math/prim/constraint/sum_to_zero_free.hpp>
 #include <stan/math/prim/constraint/ub_constrain.hpp>
 #include <stan/math/prim/constraint/ub_free.hpp>
 #include <stan/math/prim/constraint/unit_vector_constrain.hpp>

diff --git a/stan/math/prim/constraint/sum_to_zero_constrain.hpp b/stan/math/prim/constraint/sum_to_zero_constrain.hpp
@@ -0,0 +1,166 @@
+#ifndef STAN_MATH_PRIM_CONSTRAINT_SUM_TO_ZERO_CONSTRAIN_HPP
+#define STAN_MATH_PRIM_CONSTRAINT_SUM_TO_ZERO_CONSTRAIN_HPP
+
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/prim/fun/Eigen.hpp>
+#include <stan/math/prim/fun/to_ref.hpp>
+#include <stan/math/prim/fun/inv_sqrt.hpp>
+#include <stan/math/prim/functor/apply_vector_unary.hpp>
+#include <cmath>
+
+namespace stan {
+namespace math {
+
+/**
+ * Return a vector with sum zero corresponding to the specified
+ * free vector.
+ *
+ * The sum-to-zero transform is defined using a modified version of the
+ * the inverse of the isometric log ratio transform (ILR).
+ * See:
+ * Egozcue, Juan Jose; Pawlowsky-Glahn, Vera; Mateu-Figueras, Gloria;
+ * Barcelo-Vidal, Carles (2003), "Isometric logratio transformations for
+ * compositional data analysis", Mathematical Geology, 35 (3): 279–300,
+ * doi:10.1023/A:1023818214614, S2CID 122844634
+ *
+ * This implementation is closer to the description of the same using "pivot
+ * coordinates" in
+ * Filzmoser, P., Hron, K., Templ, M. (2018). Geometrical Properties of
+ * Compositional Data. In: Applied Compositional Data Analysis. Springer Series
+ * in Statistics. Springer, Cham. https://doi.org/10.1007/978-3-319-96422-5_3
+ *
+ * This is a linear transform, with no Jacobian.
+ *
+ * @tparam Vec type of the vector
+ * @param y Free vector input of dimensionality K - 1.
+ * @return Zero-sum vector of dimensionality K.
+ */
+template <typename Vec, require_eigen_col_vector_t<Vec>* = nullptr,
+          require_not_st_var<Vec>* = nullptr>
+inline plain_type_t<Vec> sum_to_zero_constrain(const Vec& y) {
+  const auto N = y.size();
+
+  plain_type_t<Vec> z = Eigen::VectorXd::Zero(N + 1);
+  if (unlikely(N == 0)) {
+    return z;
+  }
+
+  auto&& y_ref = to_ref(y);
+
+  value_type_t<Vec> sum_w(0);
+  for (int i = N; i > 0; --i) {
+    double n = static_cast<double>(i);
+    auto w = y_ref(i - 1) * inv_sqrt(n * (n + 1));
+    sum_w += w;
+
+    z.coeffRef(i - 1) += sum_w;
+    z.coeffRef(i) -= w * n;
+  }
+
+  return z;
+}
+
+/**
+ * Return a vector with sum zero corresponding to the specified
+ * free vector.
+ *
+ * The sum-to-zero transform is defined using a modified version of the
+ * the inverse of the isometric log ratio transform (ILR).
+ * See:
+ * Egozcue, Juan Jose; Pawlowsky-Glahn, Vera; Mateu-Figueras, Gloria;
+ * Barcelo-Vidal, Carles (2003), "Isometric logratio transformations for
+ * compositional data analysis", Mathematical Geology, 35 (3): 279–300,
+ * doi:10.1023/A:1023818214614, S2CID 122844634
+ *
+ * This implementation is closer to the description of the same using "pivot
+ * coordinates" in
+ * Filzmoser, P., Hron, K., Templ, M. (2018). Geometrical Properties of
+ * Compositional Data. In: Applied Compositional Data Analysis. Springer Series
+ * in Statistics. Springer, Cham. https://doi.org/10.1007/978-3-319-96422-5_3
+ *
+ * This is a linear transform, with no Jacobian.
+ *
+ * @tparam Vec type of the vector
+ * @param y Free vector input of dimensionality K - 1.
+ * @param lp unused
+ * @return Zero-sum vector of dimensionality K.
+ */
+template <typename Vec, require_eigen_col_vector_t<Vec>* = nullptr,
+          require_not_st_var<Vec>* = nullptr>
+inline plain_type_t<Vec> sum_to_zero_constrain(const Vec& y,
+                                               value_type_t<Vec>& lp) {
+  return sum_to_zero_constrain(y);
+}
+
+/**
+ * Return a vector with sum zero corresponding to the specified
+ * free vector.
+ *
+ * The sum-to-zero transform is defined using a modified version of
+ * the inverse of the isometric log ratio transform (ILR).
+ * See:
+ * Egozcue, Juan Jose; Pawlowsky-Glahn, Vera; Mateu-Figueras, Gloria;
+ * Barcelo-Vidal, Carles (2003), "Isometric logratio transformations for
+ * compositional data analysis", Mathematical Geology, 35 (3): 279–300,
+ * doi:10.1023/A:1023818214614, S2CID 122844634
+ *
+ * This implementation is closer to the description of the same using "pivot
+ * coordinates" in
+ * Filzmoser, P., Hron, K., Templ, M. (2018). Geometrical Properties of
+ * Compositional Data. In: Applied Compositional Data Analysis. Springer Series
+ * in Statistics. Springer, Cham. https://doi.org/10.1007/978-3-319-96422-5_3
+ *
+ * This is a linear transform, with no Jacobian.
+ *
+ * @tparam Jacobian unused
+ * @tparam Vec A type inheriting from `Eigen::DenseBase` or a `var_value` with
+ *  inner type inheriting from `Eigen::DenseBase` with compile time dynamic rows
+ *  and 1 column
+ * @param[in] y free vector
+ * @param[in, out] lp unused
+ * @return Zero-sum vector of dimensionality one greater than `y`
+ */
+template <bool Jacobian, typename Vec, require_not_std_vector_t<Vec>* = nullptr>
+inline plain_type_t<Vec> sum_to_zero_constrain(const Vec& y,
+                                               return_type_t<Vec>& lp) {
+  return sum_to_zero_constrain(y);
+}
+
+/**
+ * Return a vector with sum zero corresponding to the specified
+ * free vector.
+ *
+ * The sum-to-zero transform is defined using a modified version of
+ * the inverse of the isometric log ratio transform (ILR).
+ * See:
+ * Egozcue, Juan Jose; Pawlowsky-Glahn, Vera; Mateu-Figueras, Gloria;
+ * Barcelo-Vidal, Carles (2003), "Isometric logratio transformations for
+ * compositional data analysis", Mathematical Geology, 35 (3): 279–300,
+ * doi:10.1023/A:1023818214614, S2CID 122844634
+ *
+ * This implementation is closer to the description of the same using "pivot
+ * coordinates" in
+ * Filzmoser, P., Hron, K., Templ, M. (2018). Geometrical Properties of
+ * Compositional Data. In: Applied Compositional Data Analysis. Springer Series
+ * in Statistics. Springer, Cham. https://doi.org/10.1007/978-3-319-96422-5_3
+ *
+ * This is a linear transform, with no Jacobian.
+ *
+ * @tparam Jacobian unused
+ * @tparam Vec A standard vector with inner type inheriting from
+ * `Eigen::DenseBase` or a `var_value` with inner type inheriting from
+ * `Eigen::DenseBase` with compile time dynamic rows and 1 column
+ * @param[in] y free vector
+ * @param[in, out] lp unused
+ * @return Zero-sum vectors of dimensionality one greater than `y`
+ */
+template <bool Jacobian, typename T, require_std_vector_t<T>* = nullptr>
+inline auto sum_to_zero_constrain(const T& y, return_type_t<T>& lp) {
+  return apply_vector_unary<T>::apply(
+      y, [](auto&& v) { return sum_to_zero_constrain(v); });
+}
+
+}  // namespace math
+}  // namespace stan
+
+#endif
diff --git a/stan/math/prim/constraint/sum_to_zero_free.hpp b/stan/math/prim/constraint/sum_to_zero_free.hpp
@@ -0,0 +1,80 @@
+#ifndef STAN_MATH_PRIM_CONSTRAINT_SUM_TO_ZERO_FREE_HPP
+#define STAN_MATH_PRIM_CONSTRAINT_SUM_TO_ZERO_FREE_HPP
+
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/prim/err.hpp>
+#include <stan/math/prim/fun/Eigen.hpp>
+#include <stan/math/prim/fun/to_ref.hpp>
+#include <stan/math/prim/fun/sqrt.hpp>
+#include <stan/math/prim/functor/apply_vector_unary.hpp>
+#include <cmath>
+
+namespace stan {
+namespace math {
+
+/**
+ * Return an unconstrained vector.
+ *
+ * The sum-to-zero transform is defined using a modified version of the
+ * isometric log ratio transform (ILR).
+ * See:
+ * Egozcue, Juan Jose; Pawlowsky-Glahn, Vera; Mateu-Figueras, Gloria;
+ * Barcelo-Vidal, Carles (2003), "Isometric logratio transformations for
+ * compositional data analysis", Mathematical Geology, 35 (3): 279–300,
+ * doi:10.1023/A:1023818214614, S2CID 122844634
+ *
+ * This implementation is closer to the description of the same using "pivot
+ * coordinates" in
+ * Filzmoser, P., Hron, K., Templ, M. (2018). Geometrical Properties of
+ * Compositional Data. In: Applied Compositional Data Analysis. Springer Series
+ * in Statistics. Springer, Cham. https://doi.org/10.1007/978-3-319-96422-5_3
+ *
+ * @tparam ColVec a column vector type
+ * @param z Vector of length K.
+ * @return Free vector of length (K-1).
+ * @throw std::domain_error if z does not sum to zero
+ */
+template <typename Vec, require_eigen_vector_t<Vec>* = nullptr>
+inline plain_type_t<Vec> sum_to_zero_free(const Vec& z) {
+  const auto& z_ref = to_ref(z);
+  check_sum_to_zero("stan::math::sum_to_zero_free", "sum_to_zero variable",
+                    z_ref);
+
+  const auto N = z.size() - 1;
+
+  plain_type_t<Vec> y = Eigen::VectorXd::Zero(N);
+  if (unlikely(N == 0)) {
+    return y;
+  }
+
+  y.coeffRef(N - 1) = -z_ref(N) * sqrt(N * (N + 1)) / N;
+
+  value_type_t<Vec> sum_w(0);
+
+  for (int i = N - 2; i >= 0; --i) {
+    double n = static_cast<double>(i + 1);
+    auto w = y(i + 1) / sqrt((n + 1) * (n + 2));
+    sum_w += w;
+    y.coeffRef(i) = (sum_w - z_ref(i + 1)) * sqrt(n * (n + 1)) / n;
+  }
+
+  return y;
+}
+
+/**
+ * Overload of `sum_to_zero_free()` to untransform each Eigen vector
+ * in a standard vector.
+ * @tparam T A standard vector with with a `value_type` which inherits from
+ *  `Eigen::MatrixBase` with compile time rows or columns equal to 1.
+ * @param z The standard vector to untransform.
+ */
+template <typename T, require_std_vector_t<T>* = nullptr>
+auto sum_to_zero_free(const T& z) {
+  return apply_vector_unary<T>::apply(
+      z, [](auto&& v) { return sum_to_zero_free(v); });
+}
+
+}  // namespace math
+}  // namespace stan
+
+#endif
diff --git a/stan/math/prim/err.hpp b/stan/math/prim/err.hpp
@@ -41,6 +41,7 @@
 #include <stan/math/prim/err/check_std_vector_index.hpp>
 #include <stan/math/prim/err/check_stochastic_column.hpp>
 #include <stan/math/prim/err/check_stochastic_row.hpp>
+#include <stan/math/prim/err/check_sum_to_zero.hpp>
 #include <stan/math/prim/err/check_symmetric.hpp>
 #include <stan/math/prim/err/check_unit_vector.hpp>
 #include <stan/math/prim/err/check_vector.hpp>

diff --git a/stan/math/prim/err/check_sum_to_zero.hpp b/stan/math/prim/err/check_sum_to_zero.hpp
@@ -0,0 +1,72 @@
+#ifndef STAN_MATH_PRIM_ERR_CHECK_SUM_TO_ZERO_HPP
+#define STAN_MATH_PRIM_ERR_CHECK_SUM_TO_ZERO_HPP
+
+#include <stan/math/prim/fun/Eigen.hpp>
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/prim/err/constraint_tolerance.hpp>
+#include <stan/math/prim/err/make_iter_name.hpp>
+#include <stan/math/prim/err/throw_domain_error.hpp>
+#include <stan/math/prim/fun/to_ref.hpp>
+#include <stan/math/prim/fun/value_of_rec.hpp>
+#include <sstream>
+#include <string>
+
+namespace stan {
+namespace math {
+
+/**
+ * Throw an exception if the specified vector does not sum to 0.
+ * This function tests that the sum is within the tolerance specified by
+ * `CONSTRAINT_TOLERANCE`.
+ * This function only accepts Eigen vectors, statically
+ * typed vectors, not general matrices with 1 column.
+ * @tparam T A type inheriting from `Eigen::EigenBase`
+ * @param function Function name (for error messages)
+ * @param name Variable name (for error messages)
+ * @param theta Vector to test
+ * @throw `std::invalid_argument` if `theta` is a 0-vector
+ * @throw `std::domain_error` if the vector does not sum to zero
+ */
+template <typename T, require_matrix_t<T>* = nullptr>
+void check_sum_to_zero(const char* function, const char* name, const T& theta) {
+  using std::fabs;
+  // the size-zero case is technically a valid sum-to-zero vector,
+  // but it cannot be unconstrained to anything
+  check_nonzero_size(function, name, theta);
+  auto&& theta_ref = to_ref(value_of_rec(theta));
+  if (unlikely(!(fabs(theta_ref.sum()) <= CONSTRAINT_TOLERANCE))) {
+    [&]() STAN_COLD_PATH {
+      std::stringstream msg;
+      scalar_type_t<T> sum = theta_ref.sum();
+      msg << "does not sum to zero.";
+      msg.precision(10);
+      msg << " sum(" << name << ") = " << sum << ", but should be ";
+      std::string msg_str(msg.str());
+      throw_domain_error(function, name, 0.0, msg_str.c_str());
+    }();
+  }
+}
+
+/**
+ * Throw an exception if any vector in a standard vector does not sum to 0.
+ * This function tests that the sum is within the tolerance specified by
+ * `CONSTRAINT_TOLERANCE`.
+ * @tparam T A standard vector with inner type inheriting from
+ * `Eigen::EigenBase`
+ * @param function Function name (for error messages)
+ * @param name Variable name (for error messages)
+ * @param theta Vector to test.
+ * @throw `std::invalid_argument` if `theta` is a 0-vector
+ * @throw `std::domain_error` if the vector does not sum to zero
+ */
+template <typename T, require_std_vector_t<T>* = nullptr>
+void check_sum_to_zero(const char* function, const char* name, const T& theta) {
+  for (size_t i = 0; i < theta.size(); ++i) {
+    check_sum_to_zero(function, internal::make_iter_name(name, i).c_str(),
+                      theta[i]);
+  }
+}
+
+}  // namespace math
+}  // namespace stan
+#endif
diff --git a/stan/math/rev/constraint.hpp b/stan/math/rev/constraint.hpp
@@ -17,6 +17,7 @@
 #include <stan/math/rev/constraint/simplex_constrain.hpp>
 #include <stan/math/rev/constraint/stochastic_column_constrain.hpp>
 #include <stan/math/rev/constraint/stochastic_row_constrain.hpp>
+#include <stan/math/rev/constraint/sum_to_zero_constrain.hpp>
 #include <stan/math/rev/constraint/unit_vector_constrain.hpp>
 #include <stan/math/rev/constraint/ub_constrain.hpp>