From 0920b69af2700638c9faaf764634e442ea589129 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Tue, 27 Jul 2021 20:23:32 -0700
Subject: [PATCH 01/29] draft MvNormal

---
 Project.toml                  |   4 +
 src/parameterized/mvnormal.jl | 172 +++++++++++++++++++++++++++-------
 2 files changed, 143 insertions(+), 33 deletions(-)

diff --git a/Project.toml b/Project.toml
index 56cb70f2..68b45cee 100644
--- a/Project.toml
+++ b/Project.toml
@@ -14,6 +14,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 InfiniteArrays = "4858937d-0d70-526a-a4dd-2d5cb5dd786c"
 KeywordCalls = "4d827475-d3e4-43d6-abe3-9688362ede9f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
 MLStyle = "d8e11817-5142-5d16-987a-aa16d5891078"
 MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 MappedArrays = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900"
@@ -24,8 +25,11 @@ PositiveFactorizations = "85a6dd25-e78a-55b7-8502-1745935b8125"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SimpleTraits = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
+Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
+StrideArrays = "d1fa6d79-ef01-42a6-86c9-f7c551f8593b"
+StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da"
 TransformVariables = "84d833dd-6860-57f9-a1a7-6da5db126cff"
 Tricks = "410a4b4d-49e4-4fbc-ab6d-cb71b17b3775"
 Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index ce7cb090..a2e0d6f4 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -7,57 +7,163 @@ export MvNormal
 using Random
 import Base
 using Tullio
+using LoopVectorization
 
+@parameterized MvNormal(μ,Σ)
 
-struct MvNormal{N, T, I, J} <: ParameterizedMeasure{N}
-    par::NamedTuple{N, T}
-end
+@kwstruct MvNormal(Σ)
 
+@kwstruct MvNormal(μ,Σ)
 
-function MvNormal(nt::NamedTuple{N,T}) where {N,T}
-    I,J = mvnormaldims(nt)
+@kwstruct MvNormal(L)
 
-    cache = Vector{Float64}(undef, max(I,J))
-    MvNormal{N,T,I,J}(cache, nt)
-end
+@kwstruct MvNormal(μ,L)
 
-function Base.size(d::MvNormal{N, T, I, J}) where {N,T,I,J}
-    return (I,)
-end
+@kwstruct MvNormal(μ,Ω)
 
-mvnormaldims(nt::NamedTuple{(:A, :b)}) = size(nt.A)
+@kwstruct MvNormal(Ω)
 
-function MeasureTheory.basemeasure(μ::MvNormal{N, T, I,I}) where {N, T, I}
-    return (1 / sqrt2π)^I * Lebesgue(ℝ)^I
-end
 
-sampletype(d::MvNormal{N, T, I, J}) where {N,T,I,J} = Vector{Float64}
+function logdensity(d::MvNormal{(:L2,)}, y::AbstractVector{T}) where {T}
+    L = d.L2
+    k = first(size(L))
+    @assert length(y) == k
+
+    # if z = Lᵗy, the logdensity depends on `det(L)` and `z ⋅ z`. So we find `z`
+    z_dot_z = zero(T)
+    @simd for j ∈ 1:k
+        zj = zero(T)
+        for i ∈ j:k
+            @inbounds zj += L[i,j] * y[i]
+        end
+        z_dot_z += zj ^ 2
+    end
 
-MvNormal(; kwargs...) = begin
-    MvNormal((; kwargs...))
+    -k/2 * log2π + logdet_pos(L) - z_dot_z/2
 end
 
+@kwstruct MvNormal(L2)
 
+using StrideArrays
+using StaticArrays
 
-function Random.rand!(rng::AbstractRNG, d::MvNormal{(:A, :b),T,I,J}, x::AbstractArray) where {T,I,J}
-    z = getcache(d, J)
-    rand!(rng, Normal()^J, z)
-    copyto!(x, d.b)
-    mul!(x, d.A, z, 1.0, 1.0)
-    return x
+
+@inline function logdet_pos(A::Union{UpperTriangular{T},LowerTriangular{T}}) where T
+  abs_det = zero(real(T))
+  @turbo for i in 1:size(A,1)
+    diag_i = A.data[i,i]
+    abs_det += log(diag_i)
+  end
+  return abs_det
 end
 
-function logdensity(d::MvNormal{(:A,:b)}, x)
-    cache = getcache(d, size(d))
-    z = d.A \ (x - d.b)
-    return logdensity(MvNormal(), z) - logabsdet(d.A)
+function logdensity(d::MvNormal{(:μ,:L)}, y::AbstractArray{T}) where {T}
+    x = StrideArray{T}(undef, size(y))
+    @inbounds for j in eachindex(y)
+        x[j] = y[j] - d.μ[j]
+    end
+    GC.@preserve x logdensity(MvNormal(L=d.L), x)
 end
 
-≪(::MvNormal, ::Lebesgue{ℝ}) = true
+@inline function logdensity(d::MvNormal{(:L,)}, y::AbstractVector{T}) where {T}
+    L = d.L
+    k = length(y)
+    z = StrideArray{T}(undef, (k,))
+
+    # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
+    z_dot_z = zero(T)
+    @inbounds for i ∈ 1:k
+        tmp = zero(T)
+        for j in 1:(i-1)
+            tmp += L[i,j] * z[j]
+        end
+        zi = (y[i] - tmp) / L[i,i]
+        z_dot_z += zi ^ 2
+        z[i] = zi
+    end
+
+    -k/2 * log2π - logdet_pos(L) - z_dot_z/2
+end
 
-function logdensity(d::MvNormal{(:Σ⁻¹,)}, x)
-    @tullio ℓ = -0.5 * x[i] * d.Σ⁻¹[i,j] * x[j]
-    return ℓ
+@generated function logdensity(d::MvNormal{(:L,)}, y::SizedVector{K,T}) where {K,T}
+  log2π = log(big(2)*π)
+  if K > 16
+    quote
+      L = parent(d.L)
+      k = StaticInt($K)
+
+      z = StrideArray{$T}(undef, (k,))
+
+      # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
+      z_dot_z = zero($T)
+      @inbounds @fastmath for i ∈ 1:k
+        tmp = zero($T)
+        for j in 1:(i-1)
+          tmp += L[i,j] * z[j]
+        end
+        zi = (y[i] - tmp) / L[i,i]
+        z_dot_z += zi ^ 2
+        z[i] = zi
+      end
+
+      $(T(-K/2 * log2π)) - logdet_pos(L) - z_dot_z/2
+    end
+  else # replace `for i in 1:K` with `Base.Cartesian.@nexprs $K i -> begin`
+    quote
+      L = d.L
+      # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
+      z_dot_z = zero($T)
+
+      @inbounds begin # `@fastmath` needs to be moved inside the `@nexprs`
+        Base.Cartesian.@nexprs $K i -> begin
+          tmp_i = zero($T)
+          Base.Cartesian.@nexprs i-1 j -> begin
+            @fastmath tmp_i += L[i,j] * z_j
+          end
+          @fastmath z_i = (y[i] - tmp_i) / L[i,i]
+          @fastmath z_dot_z += z_i ^ 2
+        end
+      end
+
+      $(T(-K/2 * log2π)) - logdet_pos(L) - z_dot_z/2
+    end
+  end
 end
 
-mvnormaldims(nt::NamedTuple{(:Σ⁻¹,)}) = size(nt.Σ⁻¹)
+# S = @MMatrix(randn(10,15)) |> x -> Symmetric(x * x',:L);
+# d = (L = cholesky(S).L,)
+# y = SizedVector{10}(randn(10));
+
+# z = StrideArray{Float64}(undef, (StaticInt(3),))
+# b = z.data
+
+# function MeasureTheory.basemeasure(μ::MvNormal{N, T, I,I}) where {N, T, I}
+#     return (1 / sqrt2π)^I * Lebesgue(ℝ)^I
+# end
+
+# sampletype(d::MvNormal{N, T, I, J}) where {N,T,I,J} = Vector{Float64}
+
+
+
+# function Random.rand!(rng::AbstractRNG, d::MvNormal{(:A, :b),T,I,J}, x::AbstractArray) where {T,I,J}
+#     z = getcache(d, J)
+#     rand!(rng, Normal()^J, z)
+#     copyto!(x, d.b)
+#     mul!(x, d.A, z, 1.0, 1.0)
+#     return x
+# end
+
+# function logdensity(d::MvNormal{(:A,:b)}, x)
+#     cache = getcache(d, size(d))
+#     z = d.A \ (x - d.b)
+#     return logdensity(MvNormal(), z) - logabsdet(d.A)
+# end
+
+# ≪(::MvNormal, ::Lebesgue{ℝ}) = true
+
+# function logdensity(d::MvNormal{(:Σ⁻¹,)}, x)
+#     @tullio ℓ = -0.5 * x[i] * d.Σ⁻¹[i,j] * x[j]
+#     return ℓ
+# end
+
+# mvnormaldims(nt::NamedTuple{(:Σ⁻¹,)}) = size(nt.Σ⁻¹)

From 0541c76422dda07c19f56714c8ba8bda0e05699c Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Wed, 28 Jul 2021 19:29:56 -0700
Subject: [PATCH 02/29] make sizes static

---
 src/MeasureTheory.jl           |   4 +
 src/parameterized/mvnormal.jl  | 143 +++++++++++++++------------------
 src/transforms/corrcholesky.jl |   6 +-
 3 files changed, 74 insertions(+), 79 deletions(-)

diff --git a/src/MeasureTheory.jl b/src/MeasureTheory.jl
index 84018674..e914bc88 100644
--- a/src/MeasureTheory.jl
+++ b/src/MeasureTheory.jl
@@ -1,5 +1,8 @@
 module MeasureTheory
 
+using LoopVectorization: ArrayInterface
+using InlineTest
+
 using Random
 
 using ConcreteStructs
@@ -22,6 +25,7 @@ using DynamicIterators
 using KeywordCalls
 using ConstructionBase
 using Accessors
+using ArrayInterface
 
 const ∞ = InfiniteArrays.∞
 
diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index a2e0d6f4..0f9fc6ba 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -9,17 +9,17 @@ import Base
 using Tullio
 using LoopVectorization
 
-@parameterized MvNormal(μ,Σ)
+@parameterized MvNormal(μ, Σ)
 
 @kwstruct MvNormal(Σ)
 
-@kwstruct MvNormal(μ,Σ)
+@kwstruct MvNormal(μ, Σ)
 
 @kwstruct MvNormal(L)
 
-@kwstruct MvNormal(μ,L)
+@kwstruct MvNormal(μ, L)
 
-@kwstruct MvNormal(μ,Ω)
+@kwstruct MvNormal(μ, Ω)
 
 @kwstruct MvNormal(Ω)
 
@@ -34,12 +34,12 @@ function logdensity(d::MvNormal{(:L2,)}, y::AbstractVector{T}) where {T}
     @simd for j ∈ 1:k
         zj = zero(T)
         for i ∈ j:k
-            @inbounds zj += L[i,j] * y[i]
+            @inbounds zj += L[i, j] * y[i]
         end
-        z_dot_z += zj ^ 2
+        z_dot_z += zj^2
     end
 
-    -k/2 * log2π + logdet_pos(L) - z_dot_z/2
+    -k / 2 * log2π + logdet_pos(L) - z_dot_z / 2
 end
 
 @kwstruct MvNormal(L2)
@@ -48,86 +48,66 @@ using StrideArrays
 using StaticArrays
 
 
-@inline function logdet_pos(A::Union{UpperTriangular{T},LowerTriangular{T}}) where T
-  abs_det = zero(real(T))
-  @turbo for i in 1:size(A,1)
-    diag_i = A.data[i,i]
-    abs_det += log(diag_i)
-  end
-  return abs_det
+@inline function logdet_pos(A::Union{UpperTriangular{T},LowerTriangular{T}}) where {T}
+    result = zero(real(T))
+    @turbo for i = 1:ArrayInterface.size(A, 1)
+        diag_i = A.data[i, i]
+        result += log(diag_i)
+    end
+    return result
 end
 
-function logdensity(d::MvNormal{(:μ,:L)}, y::AbstractArray{T}) where {T}
+function logdensity(d::MvNormal{(:μ, :L)}, y::AbstractArray{T}) where {T}
     x = StrideArray{T}(undef, size(y))
     @inbounds for j in eachindex(y)
         x[j] = y[j] - d.μ[j]
     end
-    GC.@preserve x logdensity(MvNormal(L=d.L), x)
-end
-
-@inline function logdensity(d::MvNormal{(:L,)}, y::AbstractVector{T}) where {T}
-    L = d.L
-    k = length(y)
-    z = StrideArray{T}(undef, (k,))
-
-    # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
-    z_dot_z = zero(T)
-    @inbounds for i ∈ 1:k
-        tmp = zero(T)
-        for j in 1:(i-1)
-            tmp += L[i,j] * z[j]
-        end
-        zi = (y[i] - tmp) / L[i,i]
-        z_dot_z += zi ^ 2
-        z[i] = zi
-    end
-
-    -k/2 * log2π - logdet_pos(L) - z_dot_z/2
+    GC.@preserve x logdensity(MvNormal(L = d.L), x)
 end
-
-@generated function logdensity(d::MvNormal{(:L,)}, y::SizedVector{K,T}) where {K,T}
-  log2π = log(big(2)*π)
-  if K > 16
-    quote
-      L = parent(d.L)
-      k = StaticInt($K)
-
-      z = StrideArray{$T}(undef, (k,))
-
-      # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
-      z_dot_z = zero($T)
-      @inbounds @fastmath for i ∈ 1:k
-        tmp = zero($T)
-        for j in 1:(i-1)
-          tmp += L[i,j] * z[j]
+using StrideArrays, StaticArrays, LoopVectorization, LinearAlgebra
+
+@generated function logdensity(d::MvNormal{(:L,), Tuple{LowerTriangular{T2, S}}}, y::AbstractArray{T}) where {k,T,T2, S<:SizedMatrix{k,k}}
+    log2π = log(big(2) * π)
+    if k > 16
+        quote
+            # k = StaticInt($K)
+
+            z = StrideArray{$T}(undef, (k,))
+
+            # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
+            z_dot_z = zero($T)
+            @inbounds @fastmath for i ∈ 1:k
+                tmp = zero($T)
+                for j = 1:(i-1)
+                    tmp += L[i, j] * z[j]
+                end
+                zi = (y[i] - tmp) / L[i, i]
+                z_dot_z += zi^2
+                z[i] = zi
+            end
+
+            $(T(-k / 2 * log2π)) + logdet_pos(L) - z_dot_z / 2
         end
-        zi = (y[i] - tmp) / L[i,i]
-        z_dot_z += zi ^ 2
-        z[i] = zi
-      end
-
-      $(T(-K/2 * log2π)) - logdet_pos(L) - z_dot_z/2
-    end
-  else # replace `for i in 1:K` with `Base.Cartesian.@nexprs $K i -> begin`
-    quote
-      L = d.L
-      # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
-      z_dot_z = zero($T)
-
-      @inbounds begin # `@fastmath` needs to be moved inside the `@nexprs`
-        Base.Cartesian.@nexprs $K i -> begin
-          tmp_i = zero($T)
-          Base.Cartesian.@nexprs i-1 j -> begin
-            @fastmath tmp_i += L[i,j] * z_j
-          end
-          @fastmath z_i = (y[i] - tmp_i) / L[i,i]
-          @fastmath z_dot_z += z_i ^ 2
+    else # replace `for i in 1:K` with `Base.Cartesian.@nexprs $K i -> begin`
+        quote
+            L = d.L
+            # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
+            z_dot_z = zero($T)
+
+            @inbounds begin # `@fastmath` needs to be moved inside the `@nexprs`
+                Base.Cartesian.@nexprs $k i -> begin
+                    tmp_i = zero($T)
+                    Base.Cartesian.@nexprs i - 1 j -> begin
+                        @fastmath tmp_i += L[i, j] * z_j
+                    end
+                    @fastmath z_i = (y[i] - tmp_i) / L[i, i]
+                    @fastmath z_dot_z += z_i^2
+                end
+            end
+
+            $(T(-k / 2 * log2π)) - logdet_pos(L) - z_dot_z / 2
         end
-      end
-
-      $(T(-K/2 * log2π)) - logdet_pos(L) - z_dot_z/2
     end
-  end
 end
 
 # S = @MMatrix(randn(10,15)) |> x -> Symmetric(x * x',:L);
@@ -167,3 +147,12 @@ end
 # end
 
 # mvnormaldims(nt::NamedTuple{(:Σ⁻¹,)}) = size(nt.Σ⁻¹)
+
+@testset "MvNormal" begin
+    @testset "MvNormal(L)" begin
+        n = 5
+        t = CorrCholesky(5)
+        L = transform(t, randn(dimension(t)))
+
+    end
+end
diff --git a/src/transforms/corrcholesky.jl b/src/transforms/corrcholesky.jl
index 8f66bc0c..2a70b517 100644
--- a/src/transforms/corrcholesky.jl
+++ b/src/transforms/corrcholesky.jl
@@ -27,8 +27,10 @@ TV.dimension(t::CorrCholesky) = TV.dimension(CorrCholeskyUpper(t.n))
 # consider whether it can help performance to implement this directly for the
 # lower triangular case
 function TV.transform_with(flag::TV.LogJacFlag, t::CorrCholesky, x::AbstractVector, index)
-    U, ℓ, index = TV.transform_with(flag, CorrCholeskyUpper(t.n), x, index)
-    return Cholesky(U, 'U', 0), ℓ, index
+    n = t.n
+    U, ℓ, index = TV.transform_with(flag, CorrCholeskyUpper(n), x, index)
+    L = LowerTriangular(SizedMatrix{n,n}(transpose!(U).data))
+    return Cholesky(L,'L', 0), ℓ, index
 end
 
 TV.inverse_eltype(t::CorrCholesky, x::AbstractMatrix) = TV.extended_eltype(x)

From e2ac0eae3d7638b5b58d267c6f8b0c97b8994de3 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Thu, 29 Jul 2021 07:06:17 -0700
Subject: [PATCH 03/29] bugfixes

---
 src/parameterized/mvnormal.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index 0f9fc6ba..3d77877d 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -58,7 +58,7 @@ using StaticArrays
 end
 
 function logdensity(d::MvNormal{(:μ, :L)}, y::AbstractArray{T}) where {T}
-    x = StrideArray{T}(undef, size(y))
+    x = StrideArray{T}(undef, ArrayInterface.size(y))
     @inbounds for j in eachindex(y)
         x[j] = y[j] - d.μ[j]
     end
@@ -71,7 +71,7 @@ using StrideArrays, StaticArrays, LoopVectorization, LinearAlgebra
     if k > 16
         quote
             # k = StaticInt($K)
-
+            L = d.L
             z = StrideArray{$T}(undef, (k,))
 
             # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
@@ -86,7 +86,7 @@ using StrideArrays, StaticArrays, LoopVectorization, LinearAlgebra
                 z[i] = zi
             end
 
-            $(T(-k / 2 * log2π)) + logdet_pos(L) - z_dot_z / 2
+            $(T(-k / 2 * log2π)) - logdet_pos(L) - z_dot_z / 2
         end
     else # replace `for i in 1:K` with `Base.Cartesian.@nexprs $K i -> begin`
         quote

From d6889eb75442cf8c6f7999293fc62965780d1832 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Thu, 29 Jul 2021 07:52:57 -0700
Subject: [PATCH 04/29] zero allocations!

---
 src/parameterized/mvnormal.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index 3d77877d..7a8e068b 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -68,15 +68,15 @@ using StrideArrays, StaticArrays, LoopVectorization, LinearAlgebra
 
 @generated function logdensity(d::MvNormal{(:L,), Tuple{LowerTriangular{T2, S}}}, y::AbstractArray{T}) where {k,T,T2, S<:SizedMatrix{k,k}}
     log2π = log(big(2) * π)
+    k = StaticInt(k)
     if k > 16
         quote
-            # k = StaticInt($K)
             L = d.L
-            z = StrideArray{$T}(undef, (k,))
+            z = StrideArray{$T}(undef, ($k,))
 
             # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
             z_dot_z = zero($T)
-            @inbounds @fastmath for i ∈ 1:k
+            @inbounds @fastmath for i ∈ 1:$k
                 tmp = zero($T)
                 for j = 1:(i-1)
                     tmp += L[i, j] * z[j]

From 3f5e714907b9fe5337d4e4fa0a158d887e0264bd Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Thu, 29 Jul 2021 13:44:01 -0700
Subject: [PATCH 05/29] version from @chriselrod with column-major traversal

---
 src/parameterized/mvnormal.jl | 56 ++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index 7a8e068b..15a9baaf 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -66,7 +66,10 @@ function logdensity(d::MvNormal{(:μ, :L)}, y::AbstractArray{T}) where {T}
 end
 using StrideArrays, StaticArrays, LoopVectorization, LinearAlgebra
 
-@generated function logdensity(d::MvNormal{(:L,), Tuple{LowerTriangular{T2, S}}}, y::AbstractArray{T}) where {k,T,T2, S<:SizedMatrix{k,k}}
+@generated function logdensity(
+    d::MvNormal{(:L,),Tuple{LowerTriangular{T2,S}}},
+    y::AbstractArray{T},
+) where {k,T,T2,S<:SizedMatrix{k,k}}
     log2π = log(big(2) * π)
     k = StaticInt(k)
     if k > 16
@@ -110,6 +113,57 @@ using StrideArrays, StaticArrays, LoopVectorization, LinearAlgebra
     end
 end
 
+@generated function logdensity2(
+    d::MvNormal{(:L,),Tuple{LowerTriangular{T2,S}}},
+    y::AbstractArray{T},
+) where {k,T,T2,S<:StaticMatrix{k,k}}
+    log2π = log(big(2) * π)
+    if k > 16
+        quote
+            # k = StaticInt($K)
+            L = d.L
+            P = parent(L)
+            tmp = StrideArray{$T}(undef, (StaticInt{$k}(),))
+            @turbo for i ∈ eachindex(tmp)
+                tmp[i] = zero($T)
+            end
+            # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
+            z_dot_z = zero($T)
+            @inbounds for i ∈ 1:$k
+                @fastmath z_i = (y[i] - tmp[i]) / L[i, i]
+                @fastmath z_dot_z += z_i * z_i
+                @turbo check_empty = true for j ∈ i+1:$k
+                    tmp[j] += P[j, i] * z_i
+                end
+            end
+            $(T(-k / 2 * log2π)) - logdet_pos(L) - z_dot_z / 2
+        end
+    else # replace `for i in 1:K` with `Base.Cartesian.@nexprs $K i -> begin`
+        quote
+            L = d.L
+            # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
+            z_dot_z = zero($T)
+
+            @inbounds begin # `@fastmath` needs to be moved inside the `@nexprs`
+                Base.Cartesian.@nexprs $k i -> tmp_i = zero($T)
+                Base.Cartesian.@nexprs $k i -> begin
+                    # tmp_i = zero($T)
+                    # Base.Cartesian.@nexprs i - 1 j -> begin
+                    # @fastmath tmp_i += L[i, j] * z_j
+                    # end
+                    @fastmath z_i = (y[i] - tmp_i) / L[i, i]
+                    Base.Cartesian.@nexprs $k - i j -> begin
+                        @fastmath tmp_{j + i} += L[j+i, i] * z_i
+                    end
+                    @fastmath z_dot_z += z_i^2
+                end
+            end
+
+            $(T(-k / 2 * log2π)) - logdet_pos(L) - z_dot_z / 2
+        end
+    end
+end
+
 # S = @MMatrix(randn(10,15)) |> x -> Symmetric(x * x',:L);
 # d = (L = cholesky(S).L,)
 # y = SizedVector{10}(randn(10));

From 6fb62ed607c033b7be78fbed59cbc5be99c7499c Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Thu, 29 Jul 2021 16:09:30 -0700
Subject: [PATCH 06/29] CorrCholesky update

---
 src/transforms/corrcholesky.jl | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/transforms/corrcholesky.jl b/src/transforms/corrcholesky.jl
index 2a70b517..a0fb22d1 100644
--- a/src/transforms/corrcholesky.jl
+++ b/src/transforms/corrcholesky.jl
@@ -17,20 +17,28 @@ If
 then `Diagonal(σ) * C.L * z` is a zero-centered multivariate normal variate with the standard deviations `σ` and
 correlation matrix `C.L * C.U`.
 """
-struct CorrCholesky <: TV.VectorTransform
-    n::Int
+struct CorrCholesky{N} <: TV.VectorTransform 
+    n::N
 end
 
-TV.dimension(t::CorrCholesky) = TV.dimension(CorrCholeskyUpper(t.n))
+TV.dimension(t::CorrCholesky{Int}) = TV.dimension(CorrCholeskyUpper(t.n))
+
+TV.dimension(t::CorrCholesky{StaticInt{n}}) where {n} = TV.dimension(CorrCholeskyUpper(t.n))
 
 # TODO: For now we just transpose the CorrCholeskyUpper result. We should
 # consider whether it can help performance to implement this directly for the
 # lower triangular case
+function TV.transform_with(flag::TV.LogJacFlag, t::CorrCholesky{StaticInt{n}}, x::AbstractVector, index) where {n}
+    @nospecialize t
+    U, ℓ, index = TV.transform_with(flag, CorrCholeskyUpper(n), x, index)
+    U = UpperTriangular(SizedMatrix{n,n}(U.data))
+    return Cholesky(U,'U', 0), ℓ, index
+end
+
 function TV.transform_with(flag::TV.LogJacFlag, t::CorrCholesky, x::AbstractVector, index)
     n = t.n
     U, ℓ, index = TV.transform_with(flag, CorrCholeskyUpper(n), x, index)
-    L = LowerTriangular(SizedMatrix{n,n}(transpose!(U).data))
-    return Cholesky(L,'L', 0), ℓ, index
+    return Cholesky(U,'U', 0), ℓ, index
 end
 
 TV.inverse_eltype(t::CorrCholesky, x::AbstractMatrix) = TV.extended_eltype(x)

From 78c59b75ade8028c7831d3aee644af8ed598a9f4 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Thu, 29 Jul 2021 16:09:51 -0700
Subject: [PATCH 07/29] add InlineTest and ReTest

---
 Project.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Project.toml b/Project.toml
index 68b45cee..304776ef 100644
--- a/Project.toml
+++ b/Project.toml
@@ -12,6 +12,7 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicIterators = "6c76993d-992e-5bf1-9e63-34920a5a5a38"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 InfiniteArrays = "4858937d-0d70-526a-a4dd-2d5cb5dd786c"
+InlineTest = "bd334432-b1e7-49c7-a2dc-dd9149e4ebd6"
 KeywordCalls = "4d827475-d3e4-43d6-abe3-9688362ede9f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
@@ -23,6 +24,7 @@ NamedTupleTools = "d9ec5142-1e00-5aa0-9d6a-321866360f50"
 NestedTuples = "a734d2a7-8d68-409b-9419-626914d4061d"
 PositiveFactorizations = "85a6dd25-e78a-55b7-8502-1745935b8125"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+ReTest = "e0db7c4e-2690-44b9-bad6-7687da720f89"
 SimpleTraits = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"

From 3fce1839f744563eed53e61d093e17d7f8e3f690 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Tue, 3 Aug 2021 07:35:00 -0700
Subject: [PATCH 08/29] mucking about

---
 src/parameterized/mvnormal.jl | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index 15a9baaf..32722ce2 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -15,34 +15,39 @@ using LoopVectorization
 
 @kwstruct MvNormal(μ, Σ)
 
-@kwstruct MvNormal(L)
+@kwstruct MvNormal(σ)
 
-@kwstruct MvNormal(μ, L)
+@kwstruct MvNormal(μ, σ)
 
 @kwstruct MvNormal(μ, Ω)
 
 @kwstruct MvNormal(Ω)
 
+@kwstruct MvNormal(ω)
 
-function logdensity(d::MvNormal{(:L2,)}, y::AbstractVector{T}) where {T}
-    L = d.L2
-    k = first(size(L))
+const LowerCholesky{T} = Cholesky{T, <:LowerTriangular} 
+const UpperCholesky{T} = Cholesky{T, <:UpperTriangular} 
+
+logdensity(d::MvNormal{(:ω,), <:Tuple{<:Cholesky}}, y) = logdensity_mvnormal_ω(d.ω.UL, y)
+
+function logdensity_mvnormal_ω(U::UpperTriangular, y::AbstractVector{T}) where {T}
+    k = first(size(U))
     @assert length(y) == k
 
-    # if z = Lᵗy, the logdensity depends on `det(L)` and `z ⋅ z`. So we find `z`
+    # if z = Lᵗy, the logdensity depends on `det(U)` and `z ⋅ z`. So we find `z`
     z_dot_z = zero(T)
-    @simd for j ∈ 1:k
+    for j ∈ 1:k
         zj = zero(T)
-        for i ∈ j:k
-            @inbounds zj += L[i, j] * y[i]
+        for i ∈ 1:j
+            @inbounds zj += U[i, j] * y[i]
         end
         z_dot_z += zj^2
     end
 
-    -k / 2 * log2π + logdet_pos(L) - z_dot_z / 2
+    -k / 2 * log2π + logdet_pos(U) - z_dot_z / 2
 end
 
-@kwstruct MvNormal(L2)
+
 
 using StrideArrays
 using StaticArrays

From aa8af50ce4d621ba9c5bc061a0b8f571d08f6689 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Tue, 3 Aug 2021 15:17:39 -0700
Subject: [PATCH 09/29] updates

---
 src/parameterized/mvnormal.jl | 138 ++++++++++++++++++++--------------
 src/utils.jl                  |  10 +++
 2 files changed, 90 insertions(+), 58 deletions(-)

diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index 32722ce2..2a84a1e5 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -9,6 +9,9 @@ import Base
 using Tullio
 using LoopVectorization
 
+using StrideArrays
+using StaticArrays
+
 @parameterized MvNormal(μ, Σ)
 
 @kwstruct MvNormal(Σ)
@@ -28,125 +31,101 @@ using LoopVectorization
 const LowerCholesky{T} = Cholesky{T, <:LowerTriangular} 
 const UpperCholesky{T} = Cholesky{T, <:UpperTriangular} 
 
-logdensity(d::MvNormal{(:ω,), <:Tuple{<:Cholesky}}, y) = logdensity_mvnormal_ω(d.ω.UL, y)
-
-function logdensity_mvnormal_ω(U::UpperTriangular, y::AbstractVector{T}) where {T}
-    k = first(size(U))
-    @assert length(y) == k
-
-    # if z = Lᵗy, the logdensity depends on `det(U)` and `z ⋅ z`. So we find `z`
-    z_dot_z = zero(T)
-    for j ∈ 1:k
-        zj = zero(T)
-        for i ∈ 1:j
-            @inbounds zj += U[i, j] * y[i]
-        end
-        z_dot_z += zj^2
-    end
-
-    -k / 2 * log2π + logdet_pos(U) - z_dot_z / 2
+@inline function logdet_pos(C::Cholesky)
+    logdet_pos(getfield(C, :factors))
 end
 
-
-
-using StrideArrays
-using StaticArrays
-
-
 @inline function logdet_pos(A::Union{UpperTriangular{T},LowerTriangular{T}}) where {T}
-    result = zero(real(T))
-    @turbo for i = 1:ArrayInterface.size(A, 1)
+    exp_result = one(real(T))                   # result = zero(real(T))
+    @turbo for i = 1:ArrayInterface.size(A,1)
         diag_i = A.data[i, i]
-        result += log(diag_i)
+        exp_result *= diag_i                    # result += log(diag_i)
     end
-    return result
+    return log(exp_result)                      # return result
 end
 
-function logdensity(d::MvNormal{(:μ, :L)}, y::AbstractArray{T}) where {T}
-    x = StrideArray{T}(undef, ArrayInterface.size(y))
-    @inbounds for j in eachindex(y)
-        x[j] = y[j] - d.μ[j]
-    end
-    GC.@preserve x logdensity(MvNormal(L = d.L), x)
-end
-using StrideArrays, StaticArrays, LoopVectorization, LinearAlgebra
+
+###############################################################################
+# MvNormal(σ)
+
+logdensity(d::MvNormal{(:σ,), <:Tuple{<:Cholesky}}, y) = logdensity_mvnormal_σ(getfield(d.σ, :factors), y)
 
 @generated function logdensity(
-    d::MvNormal{(:L,),Tuple{LowerTriangular{T2,S}}},
+    d::MvNormal{(:σ,),Tuple{LowerTriangular{T2,S}}},
     y::AbstractArray{T},
 ) where {k,T,T2,S<:SizedMatrix{k,k}}
     log2π = log(big(2) * π)
     k = StaticInt(k)
     if k > 16
         quote
-            L = d.L
+            σ = d.σ
             z = StrideArray{$T}(undef, ($k,))
 
-            # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
+            # Solve `y = σz` for `z`. We need this only as a way to calculate `z ⋅ z`
             z_dot_z = zero($T)
             @inbounds @fastmath for i ∈ 1:$k
                 tmp = zero($T)
                 for j = 1:(i-1)
-                    tmp += L[i, j] * z[j]
+                    tmp += σ[i, j] * z[j]
                 end
-                zi = (y[i] - tmp) / L[i, i]
+                zi = (y[i] - tmp) / σ[i, i]
                 z_dot_z += zi^2
                 z[i] = zi
             end
 
-            $(T(-k / 2 * log2π)) - logdet_pos(L) - z_dot_z / 2
+            $(T(-k / 2 * log2π)) - logdet_pos(σ) - z_dot_z / 2
         end
     else # replace `for i in 1:K` with `Base.Cartesian.@nexprs $K i -> begin`
         quote
-            L = d.L
-            # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
+            σ = d.σ
+            # Solve `y = σz` for `z`. We need this only as a way to calculate `z ⋅ z`
             z_dot_z = zero($T)
 
             @inbounds begin # `@fastmath` needs to be moved inside the `@nexprs`
                 Base.Cartesian.@nexprs $k i -> begin
                     tmp_i = zero($T)
                     Base.Cartesian.@nexprs i - 1 j -> begin
-                        @fastmath tmp_i += L[i, j] * z_j
+                        @fastmath tmp_i += σ[i, j] * z_j
                     end
-                    @fastmath z_i = (y[i] - tmp_i) / L[i, i]
+                    @fastmath z_i = (y[i] - tmp_i) / σ[i, i]
                     @fastmath z_dot_z += z_i^2
                 end
             end
 
-            $(T(-k / 2 * log2π)) - logdet_pos(L) - z_dot_z / 2
+            $(T(-k / 2 * log2π)) - logdet_pos(σ) - z_dot_z / 2
         end
     end
 end
 
 @generated function logdensity2(
-    d::MvNormal{(:L,),Tuple{LowerTriangular{T2,S}}},
+    d::MvNormal{(:σ,),Tuple{LowerTriangular{T2,S}}},
     y::AbstractArray{T},
 ) where {k,T,T2,S<:StaticMatrix{k,k}}
     log2π = log(big(2) * π)
     if k > 16
         quote
             # k = StaticInt($K)
-            L = d.L
-            P = parent(L)
+            σ = d.σ
+            P = parent(σ)
             tmp = StrideArray{$T}(undef, (StaticInt{$k}(),))
             @turbo for i ∈ eachindex(tmp)
                 tmp[i] = zero($T)
             end
-            # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
+            # Solve `y = σz` for `z`. We need this only as a way to calculate `z ⋅ z`
             z_dot_z = zero($T)
             @inbounds for i ∈ 1:$k
-                @fastmath z_i = (y[i] - tmp[i]) / L[i, i]
+                @fastmath z_i = (y[i] - tmp[i]) / σ[i, i]
                 @fastmath z_dot_z += z_i * z_i
                 @turbo check_empty = true for j ∈ i+1:$k
                     tmp[j] += P[j, i] * z_i
                 end
             end
-            $(T(-k / 2 * log2π)) - logdet_pos(L) - z_dot_z / 2
+            $(T(-k / 2 * log2π)) - logdet_pos(σ) - z_dot_z / 2
         end
     else # replace `for i in 1:K` with `Base.Cartesian.@nexprs $K i -> begin`
         quote
-            L = d.L
-            # Solve `y = Lz` for `z`. We need this only as a way to calculate `z ⋅ z`
+            σ = d.σ
+            # Solve `y = σz` for `z`. We need this only as a way to calculate `z ⋅ z`
             z_dot_z = zero($T)
 
             @inbounds begin # `@fastmath` needs to be moved inside the `@nexprs`
@@ -154,21 +133,64 @@ end
                 Base.Cartesian.@nexprs $k i -> begin
                     # tmp_i = zero($T)
                     # Base.Cartesian.@nexprs i - 1 j -> begin
-                    # @fastmath tmp_i += L[i, j] * z_j
+                    # @fastmath tmp_i += σ[i, j] * z_j
                     # end
-                    @fastmath z_i = (y[i] - tmp_i) / L[i, i]
+                    @fastmath z_i = (y[i] - tmp_i) / σ[i, i]
                     Base.Cartesian.@nexprs $k - i j -> begin
-                        @fastmath tmp_{j + i} += L[j+i, i] * z_i
+                        @fastmath tmp_{j + i} += σ[j+i, i] * z_i
                     end
                     @fastmath z_dot_z += z_i^2
                 end
             end
 
-            $(T(-k / 2 * log2π)) - logdet_pos(L) - z_dot_z / 2
+            $(T(-k / 2 * log2π)) - logdet_pos(σ) - z_dot_z / 2
         end
     end
 end
 
+
+###############################################################################
+# MvNormal(ω)
+
+
+logdensity(d::MvNormal{(:ω,), <:Tuple{<:Cholesky}}, y) = logdensity_mvnormal_ω(d.ω.UL, y)
+
+
+function logdensity_mvnormal_ω(U::UpperTriangular, y::AbstractVector{T}) where {T}
+    k = first(size(U))
+    @assert length(y) == k
+
+    # if z = Lᵗy, the logdensity depends on `det(U)` and `z ⋅ z`. So we find `z`
+    z_dot_z = zero(T)
+    for j ∈ 1:k
+        zj = zero(T)
+        for i ∈ 1:j
+            @inbounds zj += U[i, j] * y[i]
+        end
+        z_dot_z += zj^2
+    end
+
+    -k / 2 * log2π + logdet_pos(U) - z_dot_z / 2
+end
+
+
+
+
+
+
+function logdensity(d::MvNormal{(:μ, :σ)}, y::AbstractArray{T}) where {T}
+    x = StrideArray{T}(undef, ArrayInterface.size(y))
+    @inbounds for j in eachindex(y)
+        x[j] = y[j] - d.μ[j]
+    end
+    GC.@preserve x logdensity(MvNormal(σ = d.σ), x)
+end
+using StrideArrays, StaticArrays, LoopVectorization, LinearAlgebra
+
+
+
+
+
 # S = @MMatrix(randn(10,15)) |> x -> Symmetric(x * x',:L);
 # d = (L = cholesky(S).L,)
 # y = SizedVector{10}(randn(10));
diff --git a/src/utils.jl b/src/utils.jl
index a193dabc..a1b92b50 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -61,3 +61,13 @@ struct NonIterable end
 isiterable(::Type{T}) where T = static_hasmethod(iterate, Tuple{T}) ? Iterable() : NonIterable()
 
 functioninstance(::Type{F}) where {F<:Function} = F.instance
+
+using Static
+using ArrayInterface
+using StaticArrays
+
+struct KnownSize{S, T}
+    value::T
+end
+
+KnownSize(x::T) where {T} = KnownSize{Tuple{ArrayInterface.known_size(T)...}, T}(x)

From 25337c59d7e9bb6f9614e3ca1a7a0d57cbee1bfc Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Tue, 3 Aug 2021 20:41:47 -0700
Subject: [PATCH 10/29] more MvNormal work

---
 src/parameterized/mvnormal.jl | 44 ++++++++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index 2a84a1e5..17d5737f 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -35,13 +35,24 @@ const UpperCholesky{T} = Cholesky{T, <:UpperTriangular}
     logdet_pos(getfield(C, :factors))
 end
 
+
 @inline function logdet_pos(A::Union{UpperTriangular{T},LowerTriangular{T}}) where {T}
-    exp_result = one(real(T))                   # result = zero(real(T))
-    @turbo for i = 1:ArrayInterface.size(A,1)
+    # ∑ᵢ log(aᵢ * 2^bᵢ) = log(∏ᵢ aᵢ) + log2 * ∑ᵢ bᵢ
+
+    prod_ai = one(real(T))
+    sum_bi = zero(real(T))
+
+    @inbounds @fastmath for i = 1:ArrayInterface.size(A,1)
         diag_i = A.data[i, i]
-        exp_result *= diag_i                    # result += log(diag_i)
+        ai = significand(diag_i)
+        prod_ai *= ai
     end
-    return log(exp_result)                      # return result
+    @inbounds @fastmath for i = 1:ArrayInterface.size(A,1)
+        diag_i = A.data[i, i]
+        bi = exponent(diag_i)
+        sum_bi += bi
+    end
+    return log(prod_ai) + logtwo * sum_bi      
 end
 
 
@@ -153,10 +164,31 @@ end
 # MvNormal(ω)
 
 
-logdensity(d::MvNormal{(:ω,), <:Tuple{<:Cholesky}}, y) = logdensity_mvnormal_ω(d.ω.UL, y)
 
+logdensity(d::MvNormal{(:ω,), <:Tuple{<:Cholesky}}, y) = logdensity_mvnormal_ω(getfield(d.ω, :factors), y)
+
+logdensity_mvnormal_ω(UL::Union{UpperTriangular, LowerTriangular}, y::AbstractVector) = logdensity_mvnormal_ω(KnownSize(UL), y)
+
+@inline @generated function logdensity_mvnormal_ω(U::KnownSize{Tuple{k,k}, <:UpperTriangular}, y::AbstractVector{T}) where {k,T}
+    log2π = log(big(2) * π)
+    quote
+        U = U.value
+        Udata = U.data
+        # if z = Lᵗy, the logdensity depends on `det(U)` and `z ⋅ z`. So we find `z`
+        z_dot_z = zero(T)
+        @fastmath for j ∈ 1:$k
+            zj = zero(T)
+            for i ∈ 1:j
+                @inbounds zj += Udata[i, j] * y[i]
+            end
+            z_dot_z += zj^2
+        end
+
+        $(T(-k / 2 * log2π)) + logdet_pos(U) - z_dot_z / 2
+    end
+end
 
-function logdensity_mvnormal_ω(U::UpperTriangular, y::AbstractVector{T}) where {T}
+@inline function logdensity_mvnormal_ω(U::KnownSize{Tuple{nothing, nothing}, <:UpperTriangular}, y::AbstractVector{T}) where {T}
     k = first(size(U))
     @assert length(y) == k
 

From 4c10ea58e56cc379d2103d88cf7c21c6690202f4 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Wed, 4 Aug 2021 12:46:17 -0700
Subject: [PATCH 11/29] moar turbo

---
 src/parameterized/mvnormal.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index 17d5737f..cf13bc8a 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -42,12 +42,12 @@ end
     prod_ai = one(real(T))
     sum_bi = zero(real(T))
 
-    @inbounds @fastmath for i = 1:ArrayInterface.size(A,1)
+    @turbo for i = 1:ArrayInterface.size(A,1)
         diag_i = A.data[i, i]
         ai = significand(diag_i)
         prod_ai *= ai
     end
-    @inbounds @fastmath for i = 1:ArrayInterface.size(A,1)
+    @turbo for i = 1:ArrayInterface.size(A,1)
         diag_i = A.data[i, i]
         bi = exponent(diag_i)
         sum_bi += bi

From dd479072c737085c25dc1cec5758e39cb204ed15 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Wed, 4 Aug 2021 15:47:14 -0700
Subject: [PATCH 12/29] combine loops

---
 src/parameterized/mvnormal.jl | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index cf13bc8a..252551a7 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -42,15 +42,12 @@ end
     prod_ai = one(real(T))
     sum_bi = zero(real(T))
 
-    @turbo for i = 1:ArrayInterface.size(A,1)
-        diag_i = A.data[i, i]
-        ai = significand(diag_i)
-        prod_ai *= ai
-    end
     @turbo for i = 1:ArrayInterface.size(A,1)
         diag_i = A.data[i, i]
         bi = exponent(diag_i)
         sum_bi += bi
+        ai = significand(diag_i)
+        prod_ai *= ai
     end
     return log(prod_ai) + logtwo * sum_bi      
 end

From f0fa99aa2eb1c2bac8d6a4faa04fa222d54d0369 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Wed, 4 Aug 2021 20:28:33 -0700
Subject: [PATCH 13/29] drop InlineTest

---
 src/MeasureTheory.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/MeasureTheory.jl b/src/MeasureTheory.jl
index 18a8aa18..46a7cd54 100644
--- a/src/MeasureTheory.jl
+++ b/src/MeasureTheory.jl
@@ -1,7 +1,6 @@
 module MeasureTheory
 
 using LoopVectorization: ArrayInterface
-using InlineTest
 
 using Random
 

From 1459cc6ec4bb27a9c6f47600827a47152106ed71 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Thu, 5 Aug 2021 08:32:43 -0700
Subject: [PATCH 14/29] add Static

---
 Project.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Project.toml b/Project.toml
index d047819e..94984555 100644
--- a/Project.toml
+++ b/Project.toml
@@ -23,6 +23,7 @@ NestedTuples = "a734d2a7-8d68-409b-9419-626914d4061d"
 PositiveFactorizations = "85a6dd25-e78a-55b7-8502-1745935b8125"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
+Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 StrideArrays = "d1fa6d79-ef01-42a6-86c9-f7c551f8593b"

From 26659c368904f8c11629ed02ee650354e8a7f574 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Thu, 5 Aug 2021 10:20:00 -0700
Subject: [PATCH 15/29] version bound for Static.jl

---
 Project.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Project.toml b/Project.toml
index 94984555..4a232d21 100644
--- a/Project.toml
+++ b/Project.toml
@@ -49,6 +49,7 @@ NamedTupleTools = "0.13"
 NestedTuples = "0.3"
 PositiveFactorizations = "0.2"
 SpecialFunctions = "0.10, 1"
+Static = "0.3"
 StaticArrays = "1"
 StatsFuns = "0.9"
 StrideArrays = "0.1"

From 33c945239384fe5757b6e4e3a288c3883008ba08 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Thu, 5 Aug 2021 18:30:33 -0700
Subject: [PATCH 16/29] reorg

---
 benchmarks/benchmarks.jl      |  74 +++++++++++++++
 src/parameterized/mvnormal.jl | 163 +++++++++++++++-------------------
 2 files changed, 148 insertions(+), 89 deletions(-)
 create mode 100644 benchmarks/benchmarks.jl

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
new file mode 100644
index 00000000..15b67788
--- /dev/null
+++ b/benchmarks/benchmarks.jl
@@ -0,0 +1,74 @@
+using MeasureTheory, Static, BenchmarkTools, LinearAlgebra, StaticArrays
+
+x = randn(200,200);
+Σ = x' * x;
+
+CΣ = cholesky(Positive, Σ)
+
+using PositiveFactorizations
+
+function benchmark()
+    σtimes = Float64[]
+    ωtimes = Float64[]
+    y = randn(200);
+    for k in 1:100
+        U = UpperTriangular(SizedMatrix{k,k}(CΣ.U.data[1:k,1:k]));
+        C = Cholesky(U, 'U', 0);
+        yk = y[1:k]
+        push!(σtimes, @belapsed logdensity(MvNormal(σ=$C), $yk))
+        push!(ωtimes, @belapsed logdensity(MvNormal(ω=$C), $yk))
+    end
+
+    (σtimes, ωtimes)
+end
+
+(σtimes, ωtimes) = benchmark()
+
+
+plot(σtimes[1:end] .* 1e6; dpi=300, legend=150, label="σ parameterization")
+plot!(ωtimes[1:end] .* 1e6, label="ω parameterization")
+xlabel!("Dimensions")
+ylabel!("Time (μs)")
+title!("MeasureTheory.jl Log-density for MvNormal")
+savefig("mvnormal.svg")
+savefig("mvnormal.png")
+
+# C_unsized = Cholesky(C.UL.data.data, 'U', )
+
+# using PDMats
+# PD = PDMat(C_unsized)
+
+# julia> @btime Dists.logpdf(Dists.MvNormalCanon($PD), $y)
+#   4.944 μs (6 allocations: 1.97 KiB)
+# -124.71152557023147
+
+# julia> @btime Dists.logpdf(Dists.MvNormalCanon(PDMat($C_unsized)), $y)
+#   16.552 μs (10 allocations: 27.25 KiB)
+# -124.71152557023147
+
+# julia> @btime logdensity(MvNormal(ω=$C), $y)
+#   214.948 ns (0 allocations: 0 bytes)
+# -124.53160601776467
+
+
+# L = inv(C.L)
+# Σ = L' * L
+
+
+
+# @btime Dists.logpdf(Dists.MvNormalCanon($PD), $y)
+# @btime Dists.logpdf(Dists.MvNormalCanon(PDMat($C_unsized)), $y)
+
+
+
+Dists.logpdf(Dists.MvNormal(Σ), y)
+logdensity(MvNormal(ω=C), y)
+
+
+
+# n = 40;
+# t = CorrCholesky(StaticInt(n));
+# C = transform(t, randn(dimension(t)));
+@btime MeasureTheory.logdet_pos($C)
+
+# @btime logdensity(MvNormal(ω=$C), $(randn(n)))
diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index f75bfbcd..8548067b 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -43,11 +43,12 @@ end
 
     @turbo for i = 1:ArrayInterface.size(A,1)
         diag_i = A.data[i, i]
-        bi = exponent(diag_i)
-        sum_bi += bi
         ai = significand(diag_i)
+        bi = exponent(diag_i)
         prod_ai *= ai
+        sum_bi += bi
     end
+
     return log(prod_ai) + logtwo * sum_bi      
 end
 
@@ -55,106 +56,64 @@ end
 ###############################################################################
 # MvNormal(σ)
 
+
 logdensity(d::MvNormal{(:σ,), <:Tuple{<:Cholesky}}, y) = logdensity_mvnormal_σ(getfield(d.σ, :factors), y)
 
-@generated function logdensity(
-    d::MvNormal{(:σ,),Tuple{LowerTriangular{T2,S}}},
-    y::AbstractArray{T},
-) where {k,T,T2,S<:SizedMatrix{k,k}}
+logdensity_mvnormal_σ(UL::Union{UpperTriangular, LowerTriangular}, y::AbstractVector) = logdensity_mvnormal_σ(KnownSize(UL), y)
+
+
+@inline @generated function logdensity_mvnormal_σ(U::KnownSize{Tuple{k,k}, <:UpperTriangular}, y::AbstractVector{T}) where {k,T}
     log2π = log(big(2) * π)
-    k = StaticInt(k)
-    if k > 16
+
+    # Solve `y = σz` for `z`. We need this only as a way to calculate `z ⋅ z`
+
+    header = quote
+        U = U.value
+        Udata = U.data
+        z_dot_z = zero($T)
+    end
+
+    body = if k > 20
         quote
-            σ = d.σ
-            z = StrideArray{$T}(undef, ($k,))
+            z = StrideArray{$T}(undef, ($(StaticInt(k)),))
 
-            # Solve `y = σz` for `z`. We need this only as a way to calculate `z ⋅ z`
-            z_dot_z = zero($T)
-            @inbounds @fastmath for i ∈ 1:$k
+            @inbounds @fastmath for j ∈ 1:$k
                 tmp = zero($T)
-                for j = 1:(i-1)
-                    tmp += σ[i, j] * z[j]
+                for i = 1:(j-1)
+                    tmp += Udata[i, j] * z[i]
                 end
-                zi = (y[i] - tmp) / σ[i, i]
-                z_dot_z += zi^2
-                z[i] = zi
+                zj = (y[j] - tmp) / Udata[j, j]
+                z_dot_z += zj^2
+                z[j] = zj
             end
-
-            $(T(-k / 2 * log2π)) - logdet_pos(σ) - z_dot_z / 2
         end
-    else # replace `for i in 1:K` with `Base.Cartesian.@nexprs $K i -> begin`
+    else
         quote
-            σ = d.σ
-            # Solve `y = σz` for `z`. We need this only as a way to calculate `z ⋅ z`
-            z_dot_z = zero($T)
-
-            @inbounds begin # `@fastmath` needs to be moved inside the `@nexprs`
-                Base.Cartesian.@nexprs $k i -> begin
-                    tmp_i = zero($T)
-                    Base.Cartesian.@nexprs i - 1 j -> begin
-                        @fastmath tmp_i += σ[i, j] * z_j
+            @inbounds begin # `@fastmath` needs to be moved inside the `@nexprs`          
+                Base.Cartesian.@nexprs $k j -> begin
+                    tmp = zero($T)
+                    z_j = zero($T)
+                    Base.Cartesian.@nexprs j - 1 i -> begin
+                        @fastmath tmp += Udata[i, j] * z_i
                     end
-                    @fastmath z_i = (y[i] - tmp_i) / σ[i, i]
-                    @fastmath z_dot_z += z_i^2
+                    @fastmath z_j = (y[j] - tmp) / Udata[j, j]
+                    @fastmath z_dot_z += z_j^2
                 end
             end
-
-            $(T(-k / 2 * log2π)) - logdet_pos(σ) - z_dot_z / 2
         end
     end
-end
 
-@generated function logdensity2(
-    d::MvNormal{(:σ,),Tuple{LowerTriangular{T2,S}}},
-    y::AbstractArray{T},
-) where {k,T,T2,S<:StaticMatrix{k,k}}
-    log2π = log(big(2) * π)
-    if k > 16
-        quote
-            # k = StaticInt($K)
-            σ = d.σ
-            P = parent(σ)
-            tmp = StrideArray{$T}(undef, (StaticInt{$k}(),))
-            @turbo for i ∈ eachindex(tmp)
-                tmp[i] = zero($T)
-            end
-            # Solve `y = σz` for `z`. We need this only as a way to calculate `z ⋅ z`
-            z_dot_z = zero($T)
-            @inbounds for i ∈ 1:$k
-                @fastmath z_i = (y[i] - tmp[i]) / σ[i, i]
-                @fastmath z_dot_z += z_i * z_i
-                @turbo check_empty = true for j ∈ i+1:$k
-                    tmp[j] += P[j, i] * z_i
-                end
-            end
-            $(T(-k / 2 * log2π)) - logdet_pos(σ) - z_dot_z / 2
-        end
-    else # replace `for i in 1:K` with `Base.Cartesian.@nexprs $K i -> begin`
-        quote
-            σ = d.σ
-            # Solve `y = σz` for `z`. We need this only as a way to calculate `z ⋅ z`
-            z_dot_z = zero($T)
-
-            @inbounds begin # `@fastmath` needs to be moved inside the `@nexprs`
-                Base.Cartesian.@nexprs $k i -> tmp_i = zero($T)
-                Base.Cartesian.@nexprs $k i -> begin
-                    # tmp_i = zero($T)
-                    # Base.Cartesian.@nexprs i - 1 j -> begin
-                    # @fastmath tmp_i += σ[i, j] * z_j
-                    # end
-                    @fastmath z_i = (y[i] - tmp_i) / σ[i, i]
-                    Base.Cartesian.@nexprs $k - i j -> begin
-                        @fastmath tmp_{j + i} += σ[j+i, i] * z_i
-                    end
-                    @fastmath z_dot_z += z_i^2
-                end
-            end
+    footer = quote
+        $(T(-k / 2 * log2π)) - logdet_pos(U) - z_dot_z / 2
+    end
 
-            $(T(-k / 2 * log2π)) - logdet_pos(σ) - z_dot_z / 2
-        end
+    return quote
+        $(header.args...)
+        $(body.args...)
+        $(footer.args...)
     end
-end
 
+end
 
 ###############################################################################
 # MvNormal(ω)
@@ -167,21 +126,47 @@ logdensity_mvnormal_ω(UL::Union{UpperTriangular, LowerTriangular}, y::AbstractV
 
 @inline @generated function logdensity_mvnormal_ω(U::KnownSize{Tuple{k,k}, <:UpperTriangular}, y::AbstractVector{T}) where {k,T}
     log2π = log(big(2) * π)
-    quote
+
+    header = quote
         U = U.value
         Udata = U.data
         # if z = Lᵗy, the logdensity depends on `det(U)` and `z ⋅ z`. So we find `z`
         z_dot_z = zero(T)
-        @fastmath for j ∈ 1:$k
-            zj = zero(T)
-            for i ∈ 1:j
-                @inbounds zj += Udata[i, j] * y[i]
+    end
+
+    body = if k > 20
+        quote
+            @fastmath for j ∈ 1:$k
+                zj = zero(T)
+                for i ∈ 1:j
+                    @inbounds zj += Udata[i, j] * y[i]
+                end
+                z_dot_z += zj^2
             end
-            z_dot_z += zj^2
         end
+    else
+        quote
+            @inbounds begin
+                Base.Cartesian.@nexprs $k j -> begin
+                    zj = zero(T)
+                    Base.Cartesian.@nexprs j i -> begin
+                        zj += Udata[i, j] * y[i]
+                    end
+                    z_dot_z += zj^2
+                end
+            end
+        end
+    end
 
+    footer = quote
         $(T(-k / 2 * log2π)) + logdet_pos(U) - z_dot_z / 2
     end
+
+    return quote
+        $(header.args...)
+        $(body.args...)
+        $(footer.args...)
+    end
 end
 
 @inline function logdensity_mvnormal_ω(U::KnownSize{Tuple{nothing, nothing}, <:UpperTriangular}, y::AbstractVector{T}) where {T}

From bc30b338739ab9e526e5bdeb91097dec48af6785 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Thu, 5 Aug 2021 20:11:32 -0700
Subject: [PATCH 17/29] update

---
 benchmarks/benchmarks.jl | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 15b67788..35d460ea 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -3,9 +3,9 @@ using MeasureTheory, Static, BenchmarkTools, LinearAlgebra, StaticArrays
 x = randn(200,200);
 Σ = x' * x;
 
+using PositiveFactorizations
 CΣ = cholesky(Positive, Σ)
 
-using PositiveFactorizations
 
 function benchmark()
     σtimes = Float64[]
@@ -24,6 +24,12 @@ end
 
 (σtimes, ωtimes) = benchmark()
 
+# using PDMats
+# C = cholesky(Σ[1:20,1:20])
+# Dists.MvNormal(PDMat(C))
+
+# @btime Dists.logpdf(Dists.MvNormalCanon(PDMat($C)), $y)
+# @btime Dists.logpdf(Dists.MvNormal(PDMat($C)), $y)
 
 plot(σtimes[1:end] .* 1e6; dpi=300, legend=150, label="σ parameterization")
 plot!(ωtimes[1:end] .* 1e6, label="ω parameterization")

From ea11d25cc64f2892a2fe4ed53adc13393ba7105a Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Fri, 6 Aug 2021 15:17:35 -0700
Subject: [PATCH 18/29] Move code to MultivariateMeasures.jl

---
 Project.toml                  |  12 --
 src/parameterized/mvnormal.jl | 218 ----------------------------------
 2 files changed, 230 deletions(-)

diff --git a/Project.toml b/Project.toml
index 4a232d21..26c8db88 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,7 +5,6 @@ version = "0.10.0"
 
 [deps]
 Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
-ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -14,7 +13,6 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 InfiniteArrays = "4858937d-0d70-526a-a4dd-2d5cb5dd786c"
 KeywordCalls = "4d827475-d3e4-43d6-abe3-9688362ede9f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
 MLStyle = "d8e11817-5142-5d16-987a-aa16d5891078"
 MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 MappedArrays = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900"
@@ -23,17 +21,12 @@ NestedTuples = "a734d2a7-8d68-409b-9419-626914d4061d"
 PositiveFactorizations = "85a6dd25-e78a-55b7-8502-1745935b8125"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
-Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
-StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
-StrideArrays = "d1fa6d79-ef01-42a6-86c9-f7c551f8593b"
-StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da"
 TransformVariables = "84d833dd-6860-57f9-a1a7-6da5db126cff"
 Tricks = "410a4b4d-49e4-4fbc-ab6d-cb71b17b3775"
 
 [compat]
 Accessors = "0.1"
-ArrayInterface = "3.1"
 ConcreteStructs = "0.2"
 ConstructionBase = "1.3"
 Distributions = "0.23, 0.24, 0.25"
@@ -41,7 +34,6 @@ DynamicIterators = "0.4.2"
 FillArrays = "0.11, 0.12"
 InfiniteArrays = "0.11"
 KeywordCalls = "0.1, 0.2"
-LoopVectorization = "0.12"
 MLStyle = "0.4"
 MacroTools = "0.5"
 MappedArrays = "0.3, 0.4"
@@ -49,11 +41,7 @@ NamedTupleTools = "0.13"
 NestedTuples = "0.3"
 PositiveFactorizations = "0.2"
 SpecialFunctions = "0.10, 1"
-Static = "0.3"
-StaticArrays = "1"
 StatsFuns = "0.9"
-StrideArrays = "0.1"
-StrideArraysCore = "0.1"
 TransformVariables = "0.4"
 Tricks = "0.1.4"
 julia = "1.5"
diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl
index 8548067b..a8bd95d4 100644
--- a/src/parameterized/mvnormal.jl
+++ b/src/parameterized/mvnormal.jl
@@ -6,10 +6,7 @@ using LinearAlgebra
 export MvNormal
 using Random
 import Base
-using LoopVectorization
 
-using StrideArrays
-using StaticArrays
 
 @parameterized MvNormal(μ, Σ)
 
@@ -26,218 +23,3 @@ using StaticArrays
 @kwstruct MvNormal(Ω)
 
 @kwstruct MvNormal(ω)
-
-const LowerCholesky{T} = Cholesky{T, <:LowerTriangular} 
-const UpperCholesky{T} = Cholesky{T, <:UpperTriangular} 
-
-@inline function logdet_pos(C::Cholesky)
-    logdet_pos(getfield(C, :factors))
-end
-
-
-@inline function logdet_pos(A::Union{UpperTriangular{T},LowerTriangular{T}}) where {T}
-    # ∑ᵢ log(aᵢ * 2^bᵢ) = log(∏ᵢ aᵢ) + log2 * ∑ᵢ bᵢ
-
-    prod_ai = one(real(T))
-    sum_bi = zero(real(T))
-
-    @turbo for i = 1:ArrayInterface.size(A,1)
-        diag_i = A.data[i, i]
-        ai = significand(diag_i)
-        bi = exponent(diag_i)
-        prod_ai *= ai
-        sum_bi += bi
-    end
-
-    return log(prod_ai) + logtwo * sum_bi      
-end
-
-
-###############################################################################
-# MvNormal(σ)
-
-
-logdensity(d::MvNormal{(:σ,), <:Tuple{<:Cholesky}}, y) = logdensity_mvnormal_σ(getfield(d.σ, :factors), y)
-
-logdensity_mvnormal_σ(UL::Union{UpperTriangular, LowerTriangular}, y::AbstractVector) = logdensity_mvnormal_σ(KnownSize(UL), y)
-
-
-@inline @generated function logdensity_mvnormal_σ(U::KnownSize{Tuple{k,k}, <:UpperTriangular}, y::AbstractVector{T}) where {k,T}
-    log2π = log(big(2) * π)
-
-    # Solve `y = σz` for `z`. We need this only as a way to calculate `z ⋅ z`
-
-    header = quote
-        U = U.value
-        Udata = U.data
-        z_dot_z = zero($T)
-    end
-
-    body = if k > 20
-        quote
-            z = StrideArray{$T}(undef, ($(StaticInt(k)),))
-
-            @inbounds @fastmath for j ∈ 1:$k
-                tmp = zero($T)
-                for i = 1:(j-1)
-                    tmp += Udata[i, j] * z[i]
-                end
-                zj = (y[j] - tmp) / Udata[j, j]
-                z_dot_z += zj^2
-                z[j] = zj
-            end
-        end
-    else
-        quote
-            @inbounds begin # `@fastmath` needs to be moved inside the `@nexprs`          
-                Base.Cartesian.@nexprs $k j -> begin
-                    tmp = zero($T)
-                    z_j = zero($T)
-                    Base.Cartesian.@nexprs j - 1 i -> begin
-                        @fastmath tmp += Udata[i, j] * z_i
-                    end
-                    @fastmath z_j = (y[j] - tmp) / Udata[j, j]
-                    @fastmath z_dot_z += z_j^2
-                end
-            end
-        end
-    end
-
-    footer = quote
-        $(T(-k / 2 * log2π)) - logdet_pos(U) - z_dot_z / 2
-    end
-
-    return quote
-        $(header.args...)
-        $(body.args...)
-        $(footer.args...)
-    end
-
-end
-
-###############################################################################
-# MvNormal(ω)
-
-
-
-logdensity(d::MvNormal{(:ω,), <:Tuple{<:Cholesky}}, y) = logdensity_mvnormal_ω(getfield(d.ω, :factors), y)
-
-logdensity_mvnormal_ω(UL::Union{UpperTriangular, LowerTriangular}, y::AbstractVector) = logdensity_mvnormal_ω(KnownSize(UL), y)
-
-@inline @generated function logdensity_mvnormal_ω(U::KnownSize{Tuple{k,k}, <:UpperTriangular}, y::AbstractVector{T}) where {k,T}
-    log2π = log(big(2) * π)
-
-    header = quote
-        U = U.value
-        Udata = U.data
-        # if z = Lᵗy, the logdensity depends on `det(U)` and `z ⋅ z`. So we find `z`
-        z_dot_z = zero(T)
-    end
-
-    body = if k > 20
-        quote
-            @fastmath for j ∈ 1:$k
-                zj = zero(T)
-                for i ∈ 1:j
-                    @inbounds zj += Udata[i, j] * y[i]
-                end
-                z_dot_z += zj^2
-            end
-        end
-    else
-        quote
-            @inbounds begin
-                Base.Cartesian.@nexprs $k j -> begin
-                    zj = zero(T)
-                    Base.Cartesian.@nexprs j i -> begin
-                        zj += Udata[i, j] * y[i]
-                    end
-                    z_dot_z += zj^2
-                end
-            end
-        end
-    end
-
-    footer = quote
-        $(T(-k / 2 * log2π)) + logdet_pos(U) - z_dot_z / 2
-    end
-
-    return quote
-        $(header.args...)
-        $(body.args...)
-        $(footer.args...)
-    end
-end
-
-@inline function logdensity_mvnormal_ω(U::KnownSize{Tuple{nothing, nothing}, <:UpperTriangular}, y::AbstractVector{T}) where {T}
-    k = first(size(U))
-    @assert length(y) == k
-
-    # if z = Lᵗy, the logdensity depends on `det(U)` and `z ⋅ z`. So we find `z`
-    z_dot_z = zero(T)
-    for j ∈ 1:k
-        zj = zero(T)
-        for i ∈ 1:j
-            @inbounds zj += U[i, j] * y[i]
-        end
-        z_dot_z += zj^2
-    end
-
-    -k / 2 * log2π + logdet_pos(U) - z_dot_z / 2
-end
-
-
-
-
-
-
-function logdensity(d::MvNormal{(:μ, :σ)}, y::AbstractArray{T}) where {T}
-    x = StrideArray{T}(undef, ArrayInterface.size(y))
-    @inbounds for j in eachindex(y)
-        x[j] = y[j] - d.μ[j]
-    end
-    GC.@preserve x logdensity(MvNormal(σ = d.σ), x)
-end
-using StrideArrays, StaticArrays, LoopVectorization, LinearAlgebra
-
-
-
-
-
-# S = @MMatrix(randn(10,15)) |> x -> Symmetric(x * x',:L);
-# d = (L = cholesky(S).L,)
-# y = SizedVector{10}(randn(10));
-
-# z = StrideArray{Float64}(undef, (StaticInt(3),))
-# b = z.data
-
-# function MeasureTheory.basemeasure(μ::MvNormal{N, T, I,I}) where {N, T, I}
-#     return (1 / sqrt2π)^I * Lebesgue(ℝ)^I
-# end
-
-# sampletype(d::MvNormal{N, T, I, J}) where {N,T,I,J} = Vector{Float64}
-
-
-
-# function Random.rand!(rng::AbstractRNG, d::MvNormal{(:A, :b),T,I,J}, x::AbstractArray) where {T,I,J}
-#     z = getcache(d, J)
-#     rand!(rng, Normal()^J, z)
-#     copyto!(x, d.b)
-#     mul!(x, d.A, z, 1.0, 1.0)
-#     return x
-# end
-
-# function logdensity(d::MvNormal{(:A,:b)}, x)
-#     cache = getcache(d, size(d))
-#     z = d.A \ (x - d.b)
-#     return logdensity(MvNormal(), z) - logabsdet(d.A)
-# end
-
-# ≪(::MvNormal, ::Lebesgue{ℝ}) = true
-
-# function logdensity(d::MvNormal{(:Σ⁻¹,)}, x)
-#     @tullio ℓ = -0.5 * x[i] * d.Σ⁻¹[i,j] * x[j]
-#     return ℓ
-# end
-
-# mvnormaldims(nt::NamedTuple{(:Σ⁻¹,)}) = size(nt.Σ⁻¹)

From a86716e4211da08f3fa8063c8fd9fe159269b0de Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Fri, 6 Aug 2021 15:20:09 -0700
Subject: [PATCH 19/29] drop `using LoopVectorization`

---
 src/MeasureTheory.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/MeasureTheory.jl b/src/MeasureTheory.jl
index 46a7cd54..aed386d1 100644
--- a/src/MeasureTheory.jl
+++ b/src/MeasureTheory.jl
@@ -1,7 +1,5 @@
 module MeasureTheory
 
-using LoopVectorization: ArrayInterface
-
 using Random
 
 using ConcreteStructs

From e62d41957de943cf76a8db1982d8f4899bd41c03 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Fri, 6 Aug 2021 15:54:10 -0700
Subject: [PATCH 20/29] delete benchmarks (moved to MultivariateMeasures)

---
 benchmarks/benchmarks.jl | 80 ----------------------------------------
 1 file changed, 80 deletions(-)
 delete mode 100644 benchmarks/benchmarks.jl

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
deleted file mode 100644
index 35d460ea..00000000
--- a/benchmarks/benchmarks.jl
+++ /dev/null
@@ -1,80 +0,0 @@
-using MeasureTheory, Static, BenchmarkTools, LinearAlgebra, StaticArrays
-
-x = randn(200,200);
-Σ = x' * x;
-
-using PositiveFactorizations
-CΣ = cholesky(Positive, Σ)
-
-
-function benchmark()
-    σtimes = Float64[]
-    ωtimes = Float64[]
-    y = randn(200);
-    for k in 1:100
-        U = UpperTriangular(SizedMatrix{k,k}(CΣ.U.data[1:k,1:k]));
-        C = Cholesky(U, 'U', 0);
-        yk = y[1:k]
-        push!(σtimes, @belapsed logdensity(MvNormal(σ=$C), $yk))
-        push!(ωtimes, @belapsed logdensity(MvNormal(ω=$C), $yk))
-    end
-
-    (σtimes, ωtimes)
-end
-
-(σtimes, ωtimes) = benchmark()
-
-# using PDMats
-# C = cholesky(Σ[1:20,1:20])
-# Dists.MvNormal(PDMat(C))
-
-# @btime Dists.logpdf(Dists.MvNormalCanon(PDMat($C)), $y)
-# @btime Dists.logpdf(Dists.MvNormal(PDMat($C)), $y)
-
-plot(σtimes[1:end] .* 1e6; dpi=300, legend=150, label="σ parameterization")
-plot!(ωtimes[1:end] .* 1e6, label="ω parameterization")
-xlabel!("Dimensions")
-ylabel!("Time (μs)")
-title!("MeasureTheory.jl Log-density for MvNormal")
-savefig("mvnormal.svg")
-savefig("mvnormal.png")
-
-# C_unsized = Cholesky(C.UL.data.data, 'U', )
-
-# using PDMats
-# PD = PDMat(C_unsized)
-
-# julia> @btime Dists.logpdf(Dists.MvNormalCanon($PD), $y)
-#   4.944 μs (6 allocations: 1.97 KiB)
-# -124.71152557023147
-
-# julia> @btime Dists.logpdf(Dists.MvNormalCanon(PDMat($C_unsized)), $y)
-#   16.552 μs (10 allocations: 27.25 KiB)
-# -124.71152557023147
-
-# julia> @btime logdensity(MvNormal(ω=$C), $y)
-#   214.948 ns (0 allocations: 0 bytes)
-# -124.53160601776467
-
-
-# L = inv(C.L)
-# Σ = L' * L
-
-
-
-# @btime Dists.logpdf(Dists.MvNormalCanon($PD), $y)
-# @btime Dists.logpdf(Dists.MvNormalCanon(PDMat($C_unsized)), $y)
-
-
-
-Dists.logpdf(Dists.MvNormal(Σ), y)
-logdensity(MvNormal(ω=C), y)
-
-
-
-# n = 40;
-# t = CorrCholesky(StaticInt(n));
-# C = transform(t, randn(dimension(t)));
-@btime MeasureTheory.logdet_pos($C)
-
-# @btime logdensity(MvNormal(ω=$C), $(randn(n)))

From 3893d54f0e61213afb024d5a937b546235683778 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Fri, 6 Aug 2021 15:55:14 -0700
Subject: [PATCH 21/29] drop old array code (moved to MultivariateMeasures)

---
 src/MeasureTheory.jl | 1 -
 src/utils.jl         | 9 ---------
 2 files changed, 10 deletions(-)

diff --git a/src/MeasureTheory.jl b/src/MeasureTheory.jl
index aed386d1..7b9a7a02 100644
--- a/src/MeasureTheory.jl
+++ b/src/MeasureTheory.jl
@@ -22,7 +22,6 @@ using DynamicIterators
 using KeywordCalls
 using ConstructionBase
 using Accessors
-using ArrayInterface
 
 const ∞ = InfiniteArrays.∞
 
diff --git a/src/utils.jl b/src/utils.jl
index 0028b34c..ce565a90 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -56,12 +56,3 @@ isiterable(::Type{T}) where T = static_hasmethod(iterate, Tuple{T}) ? Iterable()
 
 functioninstance(::Type{F}) where {F<:Function} = F.instance
 
-using Static
-using ArrayInterface
-using StaticArrays
-
-struct KnownSize{S, T}
-    value::T
-end
-
-KnownSize(x::T) where {T} = KnownSize{Tuple{ArrayInterface.known_size(T)...}, T}(x)

From e974c427a58c258877bb57310e7f4944b7ac2a25 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Fri, 6 Aug 2021 16:01:31 -0700
Subject: [PATCH 22/29] Move multivariate transforms to MultivariateMeasures

---
 src/transforms/corrcholesky.jl      |  56 ---------------
 src/transforms/corrcholeskylower.jl |  40 -----------
 src/transforms/ordered.jl           | 107 ----------------------------
 3 files changed, 203 deletions(-)
 delete mode 100644 src/transforms/corrcholesky.jl
 delete mode 100644 src/transforms/corrcholeskylower.jl
 delete mode 100644 src/transforms/ordered.jl

diff --git a/src/transforms/corrcholesky.jl b/src/transforms/corrcholesky.jl
deleted file mode 100644
index a0fb22d1..00000000
--- a/src/transforms/corrcholesky.jl
+++ /dev/null
@@ -1,56 +0,0 @@
-# Adapted from https://github.com/tpapp/TransformVariables.jl/blob/master/src/special_arrays.jl
-
-export CorrCholesky
-
-
-"""
-    CorrCholesky(n)
-Cholesky factor of a correlation matrix of size `n`.
-Transforms ``n(n-1)/2`` real numbers to an ``n×n`` lower-triangular matrix `L`, such that
-`L*L'` is a correlation matrix (positive definite, with unit diagonal).
-
-# Notes
-If
-- `z` is a vector of `n` IID standard normal variates,
-- `σ` is an `n`-element vector of standard deviations,
-- `C` is obtained from `CorrCholesky(n)`,
-then `Diagonal(σ) * C.L * z` is a zero-centered multivariate normal variate with the standard deviations `σ` and
-correlation matrix `C.L * C.U`.
-"""
-struct CorrCholesky{N} <: TV.VectorTransform 
-    n::N
-end
-
-TV.dimension(t::CorrCholesky{Int}) = TV.dimension(CorrCholeskyUpper(t.n))
-
-TV.dimension(t::CorrCholesky{StaticInt{n}}) where {n} = TV.dimension(CorrCholeskyUpper(t.n))
-
-# TODO: For now we just transpose the CorrCholeskyUpper result. We should
-# consider whether it can help performance to implement this directly for the
-# lower triangular case
-function TV.transform_with(flag::TV.LogJacFlag, t::CorrCholesky{StaticInt{n}}, x::AbstractVector, index) where {n}
-    @nospecialize t
-    U, ℓ, index = TV.transform_with(flag, CorrCholeskyUpper(n), x, index)
-    U = UpperTriangular(SizedMatrix{n,n}(U.data))
-    return Cholesky(U,'U', 0), ℓ, index
-end
-
-function TV.transform_with(flag::TV.LogJacFlag, t::CorrCholesky, x::AbstractVector, index)
-    n = t.n
-    U, ℓ, index = TV.transform_with(flag, CorrCholeskyUpper(n), x, index)
-    return Cholesky(U,'U', 0), ℓ, index
-end
-
-TV.inverse_eltype(t::CorrCholesky, x::AbstractMatrix) = TV.extended_eltype(x)
-
-function TV.inverse_at!(x::AbstractVector, index, t::CorrCholesky, L::LowerTriangular)
-    return TV.inverse_at!(x, index, CorrCholeskyUpper(t.n), L')
-end
-
-function TV.inverse_at!(x::AbstractVector, index, t::CorrCholesky, U::UpperTriangular)
-    return TV.inverse_at!(x, index, CorrCholeskyUpper(t.n), U)
-end
-
-function TV.inverse_at!(x::AbstractVector, index, t::CorrCholesky, C::Cholesky)
-    return TV.inverse_at!(x, index, t, C.UL)
-end
diff --git a/src/transforms/corrcholeskylower.jl b/src/transforms/corrcholeskylower.jl
deleted file mode 100644
index 772ee49e..00000000
--- a/src/transforms/corrcholeskylower.jl
+++ /dev/null
@@ -1,40 +0,0 @@
-# Adapted from https://github.com/tpapp/TransformVariables.jl/blob/master/src/special_arrays.jl
-
-using LinearAlgebra
-const CorrCholeskyUpper = CorrCholeskyFactor
-
-export CorrCholeskyLower
-
-"""
-    CorrCholeskyLower(n)
-Lower Cholesky factor of a correlation matrix of size `n`.
-Transforms ``n(n-1)/2`` real numbers to an ``n×n`` lower-triangular matrix `L`, such that
-`L*L'` is a correlation matrix (positive definite, with unit diagonal).
-
-# Notes
-If
-- `z` is a vector of `n` IID standard normal variates,
-- `σ` is an `n`-element vector of standard deviations,
-- `L` is obtained from `CorrCholeskyLower(n)`,
-then `Diagonal(σ) * L * z` is a zero-centered multivariate normal variate with the standard deviations `σ` and
-correlation matrix `L * L'`.
-"""
-struct CorrCholeskyLower <: TV.VectorTransform
-    n::Int
-end
-
-TV.dimension(t::CorrCholeskyLower) = TV.dimension(CorrCholeskyUpper(t.n))
-
-# TODO: For now we just transpose the CorrCholeskyUpper result. We should
-# consider whether it can help performance to implement this directly for the
-# lower triangular case
-function TV.transform_with(flag::TV.LogJacFlag, t::CorrCholeskyLower, x::AbstractVector, index)
-    U, ℓ, index = TV.transform_with(flag, CorrCholeskyUpper(t.n), x, index)
-    return U', ℓ, index
-end
-
-TV.inverse_eltype(t::CorrCholeskyLower, L::LowerTriangular) = TV.extended_eltype(L)
-
-function TV.inverse_at!(x::AbstractVector, index, t::CorrCholeskyLower, L::LowerTriangular)
-    return TV.inverse_at!(x, index, CorrCholeskyUpper(t.n), L')
-end
diff --git a/src/transforms/ordered.jl b/src/transforms/ordered.jl
deleted file mode 100644
index 9eb44f43..00000000
--- a/src/transforms/ordered.jl
+++ /dev/null
@@ -1,107 +0,0 @@
-# abstract type AbstractOrderedVector{T} <: AbstractVector{T} end
-
-# struct OrderedVector{T} <: AbstractOrderedVector{T}
-#     data::Vector{T}
-# end
-
-# as(::Type{OrderedVector}, transformation::TV.AbstractTransform, dim::Int) =
-#     Ordered(transformation, dim)
-
-using MeasureTheory
-
-export Ordered
-
-using TransformVariables
-const TV = TransformVariables
-
-struct Ordered{T <: TV.AbstractTransform} <: TV.VectorTransform
-    transformation::T
-    dim::Int
-end
-
-TV.dimension(t::Ordered) = t.dim
-
-addlogjac(ℓ, Δℓ) = ℓ + Δℓ
-addlogjac(::TV.NoLogJac, Δℓ) = TV.NoLogJac()
-
-using MappedArrays
-
-bounds(t::TV.ShiftedExp{true}) = (t.shift, TV.∞)
-bounds(t::TV.ShiftedExp{false}) = (-TV.∞, t.shift)
-bounds(t::TV.ScaledShiftedLogistic) = (t.shift, t.scale + t.shift)
-bounds(::TV.Identity) = (-TV.∞, TV.∞)
-
-const OrderedΔx = -8.0
-
-# See https://mc-stan.org/docs/2_27/reference-manual/ordered-vector.html
-function TV.transform_with(flag::TV.LogJacFlag, t::Ordered, x, index::T) where {T}
-    transformation, len = (t.transformation, t.dim)
-    @assert dimension(transformation) == 1
-    y = similar(x, len)
-        
-    (lo,hi) = bounds(transformation)
-
-
-    x = mappedarray(xj -> xj + OrderedΔx, x)
-
-    @inbounds (y[1], ℓ, _) = TV.transform_with(flag, as(Real, lo, hi), x, index)
-    index += 1
-
-    @inbounds for i in 2:len
-        (y[i], Δℓ, _) =  TV.transform_with(flag, as(Real, y[i-1], hi), x, index)
-        ℓ = addlogjac(ℓ, Δℓ)
-        index += 1
-    end
-
-    return (y, ℓ, index)
-end
-
-TV.inverse_eltype(t::Ordered, y::AbstractVector) = TV.extended_eltype(y)
-
-Ordered(n::Int) = Ordered(asℝ, n)
-
-function TV.inverse_at!(x::AbstractVector, index, t::Ordered, y::AbstractVector)
-    (lo,hi) = bounds(t.transformation)
-
-    @inbounds x[index] = inverse(as(Real, lo, hi), y[1]) - OrderedΔx
-    index += 1
-
-    @inbounds for i in 2:length(y)
-        x[index] = inverse(as(Real, y[i-1], hi), y[i]) - OrderedΔx
-        index += 1
-    end
-    return index
-end
-
-export Sorted
-struct Sorted{M} <: AbstractMeasure
-    μ::M
-    n::Int
-end
-
-logdensity(s::Sorted, x) = logdensity(s.μ ^ s.n, x)
-
-TV.as(s::Sorted) = Ordered(as(s.μ), s.n)
-
-function Random.rand!(rng::AbstractRNG, d::Sorted, x::AbstractArray)
-    rand!(rng, d.μ ^ d.n, x)
-    sort!(x)
-    return x
-end
-
-function Base.rand(rng::AbstractRNG, T::Type, d::Sorted)
-    # TODO: Use `sampletype` for this
-    elT = typeof(rand(rng, T, d.μ))
-    x = Vector{elT}(undef, d.n)
-    rand!(rng, d, x)
-end
-
-# logdensity(d, rand(d))
-
-
-
-# TV.transform_with(TV.LogJac(), Ordered(asℝ, 4), zeros(4), 1)
-# TV.transform_with(TV.LogJac(), Ordered(asℝ, 4), randn(4), 1)
-
-# d = Pushforward(Ordered(10), Normal()^10, false)
-# logdensity(Lebesgue(ℝ)^10, rand(d))

From 7344016b552c5093da6dbbcd4ad0dcfbd248097d Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Fri, 6 Aug 2021 16:03:12 -0700
Subject: [PATCH 23/29] moved CorrCholesky test

---
 test/runtests.jl | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 32ee80f8..400b5a2d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -85,20 +85,6 @@ end
         @test ℓ ≈ logdensity(Normal(;μ,logσ), y)
     end
 
-    @testset "LKJCholesky" begin
-        D = LKJCholesky{(:k,:η)}
-        par = transform(asparams(D, (k=4,)), randn(1))
-        d = D(merge((k=4,),par))
-        # @test params(d) == par
-
-        η  = par.η
-        logη = log(η)
-
-        y = rand(d)
-        η = par.η
-        ℓ = logdensity(LKJCholesky(4,η), y)
-        @test ℓ ≈ logdensity(LKJCholesky(k=4,logη=logη), y)
-    end
 end
 
 @testset "Kernel" begin

From fa5b18e0764b8cb4b54bfd15bf426073247e705c Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Fri, 6 Aug 2021 16:04:52 -0700
Subject: [PATCH 24/29] move LKJCholesky to MultivariateMeasures

---
 src/parameterized/lkj-cholesky.jl | 109 ------------------------------
 1 file changed, 109 deletions(-)
 delete mode 100644 src/parameterized/lkj-cholesky.jl

diff --git a/src/parameterized/lkj-cholesky.jl b/src/parameterized/lkj-cholesky.jl
deleted file mode 100644
index 2ae2ee1e..00000000
--- a/src/parameterized/lkj-cholesky.jl
+++ /dev/null
@@ -1,109 +0,0 @@
-# Modified from
-# https://github.com/tpapp/AltDistributions.jl
-
-# See also the Stan manual (the "Stanual", though nobody calls it that)
-# https://mc-stan.org/docs/2_27/reference-manual/cholesky-factors-of-correlation-matrices-1.html
-
-export LKJCholesky
-using PositiveFactorizations
-
-
-const CorrCholeskyUpper = TV.CorrCholeskyFactor
-
-@doc """
-    LKJCholesky(k=3, η=1.0)
-    LKJCholesky(k=3, logη=0.0)
-
-`LKJCholesky(k, ...)` gives the `k×k` LKJ distribution (Lewandowski et al 2009)
-expressed as a Cholesky decomposition. As a special case, for
-`C = rand(LKJCholesky(k=K, η=1.0))` (or equivalently
-`C=rand(LKJCholesky{k}(k=K, logη=0.0))`), `C.L * C.U` is uniform over the set of
-all `K×K` correlation matrices. Note, however, that in this case `C.L` and `C.U`
-are **not** sampled uniformly (because the multiplication is nonlinear).
-
-The `logdensity` method for this measure applies for `LowerTriangular`,
-`UpperTriangular`, or `Diagonal` matrices, and will "do the right thing". The
-`logdensity` **does not check if `L*U` yields a valid correlation matrix**.
-
-Valid values are ``η > 0``. When ``η > 1``, the distribution is unimodal with a
-peak at `I`, while ``0 < η < 1`` yields a trough. ``η = 2`` is recommended as a vague prior.
-
-Adapted from
-https://github.com/tpapp/AltDistributions.jl
-""" LKJCholesky
-
-@parameterized LKJCholesky(k,η)
-
-
-@kwstruct LKJCholesky(k,η)
-@kwstruct LKJCholesky(k,logη)
-
-LKJCholesky(k::Integer) = LKJCholesky(k, 1.0)
-
-asparams(::Type{<:LKJCholesky}, ::Val{:η}) = asℝ₊
-asparams(::Type{<:LKJCholesky}, ::Val{:logη}) = asℝ
-
-# Modified from
-# https://github.com/tpapp/AltDistributions.jl
-
-
-using LinearAlgebra
-
-logdensity(d::LKJCholesky, C::Cholesky) = logdensity(d, C.UL)
-
-function logdensity(d::LKJCholesky{(:k, :η,)}, L::Union{LinearAlgebra.AbstractTriangular, Diagonal})
-    η = d.η
-    # z = diag(L)
-    # sum(log.(z) .* ((k:-1:1) .+ 2*(η-1)))
-
-    # Note: https://github.com/cscherrer/MeasureTheory.jl/issues/100#issuecomment-852428192
-    c = d.k + 2(η - 1)
-    n = size(L,1)
-    s = sum(1:n) do i 
-        (c - i) * @inbounds log(L[i,i]) 
-    end
-    return s
-end
-
-function logdensity(d::LKJCholesky{(:k, :logη)}, L::Union{LinearAlgebra.AbstractTriangular, Diagonal})
-    c = d.k + 2 * expm1(d.logη)
-    n = size(L,1)
-    s = sum(1:n) do i
-        (c - i) * @inbounds log(L[i,i])
-    end
-    return s
-end
-
-
-TV.as(d::LKJCholesky) = CorrCholesky(d.k)
-
-function basemeasure(μ::LKJCholesky{(:k,:η)})
-    t = as(μ)
-    f(par) = Dists.lkj_logc0(par.k, par.η)
-    base = Pushforward(t, Lebesgue(ℝ)^dimension(t), false)
-    ParamWeightedMeasure(f, (k= μ.k, η= μ.η), base)
-end
-
-function basemeasure(μ::LKJCholesky{(:k,:logη)})
-    t = as(μ)
-    η = exp(μ.logη)
-    f(par) = Dists.lkj_logc0(par.k, exp(par.logη))
-    base = Pushforward(t, Lebesgue(ℝ)^dimension(t), false)
-    ParamWeightedMeasure(f, (k= μ.k, logη= logη), base)
-end
-
-# Note (from @sethaxen): this can be done without the cholesky decomposition, by randomly
-# sampling the canonical partial correlations, as in the LKJ paper, and then
-# mapping them to the cholesky factor instead of the correlation matrix. Stan
-# implements* this. But that can be for a future PR. 
-#
-# * https://github.com/stan-dev/math/blob/develop/stan/math/prim/prob/lkj_corr_cholesky_rng.hpp
-function Base.rand(rng::AbstractRNG, ::Type, d::LKJCholesky{(:k, :η,)})
-    return cholesky(Positive, rand(rng, Dists.LKJ(d.k, d.η)))
-end;
-
-function Base.rand(rng::AbstractRNG, ::Type, d::LKJCholesky{(:k, :logη)})
-    return cholesky(Positive, rand(rng, Dists.LKJ(d.k, exp(d.logη))))
-end;
-
-ConstructionBase.constructorof(::Type{L}) where {L<:LKJCholesky} = LKJCholesky

From 5a82db46c0a88f48988780aed586c2a1bb7e90b0 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Sun, 8 Aug 2021 13:06:36 -0700
Subject: [PATCH 25/29] drop refernce to lkj-cholesky.jl (moved to
 MultivariateMeasures

---
 src/MeasureTheory.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/MeasureTheory.jl b/src/MeasureTheory.jl
index 7b9a7a02..aef2bf92 100644
--- a/src/MeasureTheory.jl
+++ b/src/MeasureTheory.jl
@@ -91,7 +91,7 @@ include("parameterized/bernoulli.jl")
 include("parameterized/poisson.jl")
 include("parameterized/binomial.jl")
 include("parameterized/multinomial.jl")
-include("parameterized/lkj-cholesky.jl")
+# include("parameterized/lkj-cholesky.jl")
 include("parameterized/negativebinomial.jl")
 
 include("transforms/corrcholesky.jl")

From 882ad05fff7ad4914bd78599c6847614b515df1c Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Sun, 8 Aug 2021 13:12:02 -0700
Subject: [PATCH 26/29] drop corrcholesky reference

---
 src/MeasureTheory.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/MeasureTheory.jl b/src/MeasureTheory.jl
index aef2bf92..882bff13 100644
--- a/src/MeasureTheory.jl
+++ b/src/MeasureTheory.jl
@@ -94,7 +94,7 @@ include("parameterized/multinomial.jl")
 # include("parameterized/lkj-cholesky.jl")
 include("parameterized/negativebinomial.jl")
 
-include("transforms/corrcholesky.jl")
+# include("transforms/corrcholesky.jl")
 include("transforms/ordered.jl")
 
 include("density.jl")

From 09d96049b09d7ba7cbca10aea3b3ed2f89aa4894 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Sun, 8 Aug 2021 13:12:55 -0700
Subject: [PATCH 27/29] drop ordered reference

---
 src/MeasureTheory.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/MeasureTheory.jl b/src/MeasureTheory.jl
index 882bff13..9546a780 100644
--- a/src/MeasureTheory.jl
+++ b/src/MeasureTheory.jl
@@ -95,7 +95,7 @@ include("parameterized/multinomial.jl")
 include("parameterized/negativebinomial.jl")
 
 # include("transforms/corrcholesky.jl")
-include("transforms/ordered.jl")
+# include("transforms/ordered.jl")
 
 include("density.jl")
 # include("pushforward.jl")

From f36b8c5afd6cf67fb6f33aa14819ae42c442e4e8 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Mon, 9 Aug 2021 16:43:14 -0700
Subject: [PATCH 28/29] drop PositiveFactorizations dependency (not used)

---
 Project.toml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 26c8db88..e7382d34 100644
--- a/Project.toml
+++ b/Project.toml
@@ -18,7 +18,6 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 MappedArrays = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900"
 NamedTupleTools = "d9ec5142-1e00-5aa0-9d6a-321866360f50"
 NestedTuples = "a734d2a7-8d68-409b-9419-626914d4061d"
-PositiveFactorizations = "85a6dd25-e78a-55b7-8502-1745935b8125"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
@@ -39,7 +38,6 @@ MacroTools = "0.5"
 MappedArrays = "0.3, 0.4"
 NamedTupleTools = "0.13"
 NestedTuples = "0.3"
-PositiveFactorizations = "0.2"
 SpecialFunctions = "0.10, 1"
 StatsFuns = "0.9"
 TransformVariables = "0.4"

From 6e1fca0bedbf8ad5c2b07499c503d526d41ea9b2 Mon Sep 17 00:00:00 2001
From: Chad Scherrer <chad.scherrer@gmail.com>
Date: Mon, 9 Aug 2021 16:43:24 -0700
Subject: [PATCH 29/29] drop old LKJCholesky test

---
 test/runtests.jl | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 400b5a2d..3d44e937 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -254,9 +254,6 @@ end
         @test repro(Laplace, (:μ,:σ))
     end
 
-    @testset "LKJCholesky" begin
-        @test repro(LKJCholesky, (:k,:η,), (k=3,))
-    end
 
     @testset "Multinomial" begin
         @test_broken repro(Multinomial, (:n,:p,))