Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rules for BLAS.dot, BLAS.dotc, and BLAS.dotu #739

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b4bc4a7
Add initial commit of blasdot
sethaxen Apr 18, 2023
eb96aaf
Generalize to support dotu and dotc also
sethaxen Apr 19, 2023
f0929d1
Format more nicely
sethaxen Apr 19, 2023
be409e4
Define overloads in a module
sethaxen Apr 20, 2023
86c9e8b
Add missing imports
sethaxen Apr 20, 2023
b5a7c85
Support pointer arguments
sethaxen Apr 20, 2023
9c1fd09
Simplify with utility function
sethaxen Apr 20, 2023
0a67b2d
Make more concise
sethaxen Apr 20, 2023
851c240
Only compute primal if necessary
sethaxen Apr 20, 2023
fcac5c6
Simplify rules with utility functions
sethaxen Apr 20, 2023
744e32e
Rename utility function
sethaxen Apr 20, 2023
846fe3a
Call `stride`
sethaxen Apr 20, 2023
6133de8
Fix bugs
sethaxen Apr 20, 2023
36c20ca
Add dot tests
sethaxen Apr 20, 2023
6edeb12
Fix stray comma
sethaxen Apr 20, 2023
3707f08
Remove Duplicated as allowed activity
sethaxen Apr 23, 2023
b002e35
Simplify logic
sethaxen Apr 24, 2023
255f28c
Return primal if return type is Const
sethaxen Apr 24, 2023
9bac361
Test overwriting functions
sethaxen Apr 24, 2023
40ff30e
Support batched forward mode
sethaxen Apr 24, 2023
2130a26
Reorganize tests
sethaxen Apr 24, 2023
b623cf0
Add tests for BatchDuplicated
sethaxen Apr 24, 2023
fb91389
Only tape if absolutely necessary
sethaxen Apr 25, 2023
57b43ab
Simplify code with utility
sethaxen Apr 25, 2023
3f248cf
Correctly compute dim
sethaxen Apr 25, 2023
8e4ba1b
Refactor to reduce whitespace
sethaxen Apr 25, 2023
ced987e
Fix rules for negative increments and subarrays
sethaxen Apr 25, 2023
d6443d3
Correctly check overwrites
sethaxen Apr 27, 2023
7bb09f8
Removed unused functions
sethaxen Apr 27, 2023
640169c
More rigorously check pullback
sethaxen Apr 27, 2023
be426c2
Fix pullback for dotc
sethaxen Apr 27, 2023
627a5b1
Revert "Correctly check overwrites"
sethaxen Apr 27, 2023
f075240
Test tape mechanism
sethaxen Apr 27, 2023
87b217f
Test 2-arg versions
sethaxen Apr 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Enzyme.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ include("gradientutils.jl")
include("utils.jl")
include("compiler.jl")
include("internal_rules.jl")
include("rules/LinearAlgebra/blas.jl")

import .Compiler: CompilationException

Expand Down
149 changes: 149 additions & 0 deletions src/rules/LinearAlgebra/blas.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
module BLASRules

using ..Enzyme
using LinearAlgebra.BLAS

const ConstOrDuplicated{T} = Union{Const{T},Duplicated{T}}
const ConstOrBatchDuplicated{T} = Union{ConstOrDuplicated{T},BatchDuplicated{T}}

_safe_similar(x::AbstractArray, n::Integer) = similar(x, n)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These utilities should be reusable for and greatly simplify the rules for all other Level 1 BLAS functions.

_safe_similar(x::Ptr, n::Integer) = Array{eltype(x)}(undef, n)

function _strided_tape(n::Integer, x::Union{AbstractArray,Ptr}, incx::Integer)
xtape = _safe_similar(x, n)
BLAS.blascopy!(n, x, incx, xtape, 1)
increment = 1
return xtape, increment
end

function _strided_range(n, x, incx)
r = range(1; step=abs(incx), length=n)
incx < 0 && return reverse(r)
return r
end

function _strided_view(n::Integer, x::AbstractArray, incx::Integer)
ind = _strided_range(n, x, incx)
return view(x, ind)
end
function _strided_view(n::Integer, x::Ptr, incx::Integer)
ind = _strided_range(n, x, incx)
dim = abs(last(ind) - first(ind)) + 1
y = Base.unsafe_wrap(Array, x, dim)
return view(y, ind)
end

function _maybe_primal_shadow(config, func, args)
needs_primal = EnzymeRules.needs_primal(config)
needs_shadow = EnzymeRules.needs_shadow(config)
if needs_primal || needs_shadow
r = func(args...)
else
r = nothing
end
primal = needs_primal ? r : nothing
shadow = needs_shadow ? zero(r) : nothing
return primal, shadow
end

_map_tuple(f, xs::Tuple...) = map(f, xs...)
_map_tuple(f, xs...) = f(xs...)

# axpy!(a, conj.(x), y)
function _aconjxpy!(n, a, x, incx, y, incy)
xview = _strided_view(n, x, incx)
yview = _strided_view(n, y, incy)
yview .+= a .* conj.(xview)
return y
end

for (fname, Ttype) in ((:dot, :BlasReal), (:dotu, :BlasComplex), (:dotc, :BlasComplex))
@eval begin
function EnzymeRules.forward(
func::Const{typeof(BLAS.$fname)},
RT::Type{
<:Union{
Const,DuplicatedNoNeed,Duplicated,BatchDuplicatedNoNeed,BatchDuplicated
},
},
n::Const{<:Integer},
X::ConstOrBatchDuplicated{<:Union{Ptr{T},AbstractArray{T}}},
incx::Const{<:Integer},
Y::ConstOrBatchDuplicated{<:Union{Ptr{T},AbstractArray{T}}},
incy::Const{<:Integer},
) where {T<:BLAS.$Ttype}
RT <: Const && return func.val(n.val, X.val, incx.val, Y.val, incy.val)
Copy link
Collaborator Author

@sethaxen sethaxen Apr 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For some reason calling the 2-arg dot, which forwards to the 5-arg dot, now errors with Const return type:

using Enzyme, LinearAlgebra
x, y, ∂x, ∂y = ntuple(_ -> randn(5), 4);
autodiff(Forward, BLAS.dot, Duplicated, Duplicated(x, ∂x), Duplicated(y, ∂y))  # fine
autodiff(Forward, BLAS.dot, Duplicated, Const(x), Duplicated(y, ∂y))  # fine
autodiff(Forward, BLAS.dot, Const, Duplicated(x, ∂x), Duplicated(y, ∂y))  # errors, see below
┌ Warning: Using fallback BLAS replacements, performance may be degraded
└ @ Enzyme.Compiler ~/.julia/packages/GPUCompiler/BxfIW/src/utils.jl:56
mod:; ModuleID = 'start'
source_filename = "start"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-linux-gnu"

@_j_str1 = private unnamed_addr constant [11 x i8] c"typeassert\00"

; Function Attrs: noinline nosync readonly
define dso_local fastcc double @julia_dot_2279(i64 signext %0, i64 zeroext %1, i64 signext %2, i64 zeroext %3, i64 signext %4) unnamed_addr #0 !dbg !11 {
top:
  %5 = call {}*** @julia.get_pgcstack()
  %6 = inttoptr i64 %1 to double*, !dbg !14
  %7 = inttoptr i64 %3 to double*, !dbg !14
  %8 = sub i64 1, %0, !dbg !14
  %9 = icmp sgt i64 %0, 0, !dbg !14
  br i1 %9, label %10, label %cblas_ddot64_.exit, !dbg !14

10:                                               ; preds = %top
  %11 = icmp sgt i64 %4, 0, !dbg !14
  %12 = mul i64 %8, %4, !dbg !14
  %13 = select i1 %11, i64 0, i64 %12, !dbg !14
  %14 = icmp sgt i64 %2, 0, !dbg !14
  %15 = mul i64 %8, %2, !dbg !14
  %16 = select i1 %14, i64 0, i64 %15, !dbg !14
  br label %17, !dbg !14

17:                                               ; preds = %17, %10
  %18 = phi i64 [ 0, %10 ], [ %33, %17 ], !dbg !14
  %19 = phi i64 [ %13, %10 ], [ %32, %17 ], !dbg !14
  %20 = phi i64 [ %16, %10 ], [ %31, %17 ], !dbg !14
  %21 = phi double [ 0.000000e+00, %10 ], [ %30, %17 ], !dbg !14
  %22 = shl i64 %20, 32, !dbg !14
  %23 = ashr exact i64 %22, 32, !dbg !14
  %24 = getelementptr inbounds double, double* %6, i64 %23, !dbg !14
  %25 = load double, double* %24, align 8, !dbg !14, !tbaa !15
  %26 = shl i64 %19, 32, !dbg !14
  %27 = ashr exact i64 %26, 32, !dbg !14
  %28 = getelementptr inbounds double, double* %7, i64 %27, !dbg !14
  %29 = load double, double* %28, align 8, !dbg !14, !tbaa !15
  %30 = call double @llvm.fmuladd.f64(double %25, double %29, double %21) #17, !dbg !14
  %31 = add nsw i64 %23, %2, !dbg !14
  %32 = add nsw i64 %27, %4, !dbg !14
  %33 = add nuw nsw i64 %18, 1, !dbg !14
  %34 = icmp eq i64 %33, %0, !dbg !14
  br i1 %34, label %cblas_ddot64_.exit, label %17, !dbg !14, !llvm.loop !19

cblas_ddot64_.exit:                               ; preds = %17, %top
  %35 = phi double [ 0.000000e+00, %top ], [ %30, %17 ], !dbg !14
  ret double %35, !dbg !14
}

; Function Attrs: nofree readnone
declare {}*** @julia.get_pgcstack() #1

; Function Attrs: inaccessiblememonly allocsize(1)
declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) #2

; Function Attrs: inaccessiblememonly nofree
declare token @llvm.julia.gc_preserve_begin(...) #3

; Function Attrs: nofree nounwind readnone
declare nonnull {}* @julia.pointer_from_objref({} addrspace(11)*) local_unnamed_addr #4

; Function Attrs: inaccessiblememonly nofree
declare void @llvm.julia.gc_preserve_end(token) #3

; Function Attrs: inaccessiblememonly nofree norecurse nounwind
declare void @julia.write_barrier({} addrspace(10)* readonly, ...) local_unnamed_addr #5

; Function Attrs: nofree
declare nonnull {} addrspace(10)* @ijl_invoke({} addrspace(10)*, {} addrspace(10)** nocapture readonly, i32, {} addrspace(10)*) #6

declare nonnull {} addrspace(10)* @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) local_unnamed_addr #7

; Function Attrs: noreturn
declare void @ijl_throw({} addrspace(12)*) local_unnamed_addr #8

; Function Attrs: nofree norecurse nounwind readnone
declare nonnull {} addrspace(10)* @julia.typeof({} addrspace(10)*) local_unnamed_addr #9

; Function Attrs: noreturn
declare void @ijl_type_error(i8*, {} addrspace(10)*, {} addrspace(12)*) local_unnamed_addr #8

; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare double @llvm.fmuladd.f64(double, double, double) #10

define double @julia_dot_2276_inner.3({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1) local_unnamed_addr #11 !dbg !22 {
entry:
  %2 = call {}*** @julia.get_pgcstack()
  %3 = bitcast {} addrspace(10)* %0 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !23
  %4 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %3 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !23
  %5 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %4, i64 0, i32 1, !dbg !23
  %6 = load i64, i64 addrspace(11)* %5, align 8, !dbg !23, !range !28, !alias.scope !29, !noalias !32
  %7 = bitcast {} addrspace(10)* %1 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !23
  %8 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %7 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !23
  %9 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %8, i64 0, i32 1, !dbg !23
  %10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !23, !range !28, !alias.scope !29, !noalias !32
  %.not.i = icmp eq i64 %6, %10, !dbg !37
  br i1 %.not.i, label %julia_dot_2276_inner.exit, label %L12.i, !dbg !41

L12.i:                                            ; preds = %entry
  %current_task15.i = getelementptr inbounds {}**, {}*** %2, i64 -13, !dbg !42
  %current_task1.i = bitcast {}*** %current_task15.i to {}**, !dbg !42
  %11 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 16, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195699359008 to {}*) to {} addrspace(10)*)) #18, !dbg !42
  %12 = bitcast {} addrspace(10)* %11 to {} addrspace(10)* addrspace(10)*, !dbg !42
  %13 = addrspacecast {} addrspace(10)* addrspace(10)* %12 to {} addrspace(10)* addrspace(11)*, !dbg !42
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %13, align 8, !dbg !42, !tbaa !45, !alias.scope !51, !noalias !52
  %14 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %13, i64 1, !dbg !42
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %14, align 8, !dbg !42, !tbaa !45, !alias.scope !51, !noalias !52
  %15 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 32, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195667121680 to {}*) to {} addrspace(10)*)) #18, !dbg !42
  %16 = bitcast {} addrspace(10)* %15 to { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)*, !dbg !42
  %.repack.i = bitcast {} addrspace(10)* %15 to {} addrspace(10)* addrspace(10)*, !dbg !42
  store {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195729619104 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(10)* %.repack.i, align 8, !dbg !42, !tbaa !55, !alias.scope !51, !noalias !52
  %.repack7.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %16, i64 0, i32 1, !dbg !42
  store i64 %6, i64 addrspace(10)* %.repack7.i, align 8, !dbg !42, !tbaa !55, !alias.scope !51, !noalias !52
  %.repack9.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %16, i64 0, i32 2, !dbg !42
  store {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195729619072 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(10)* %.repack9.i, align 8, !dbg !42, !tbaa !55, !alias.scope !51, !noalias !52
  %.repack11.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %16, i64 0, i32 3, !dbg !42
  store i64 %10, i64 addrspace(10)* %.repack11.i, align 8, !dbg !42, !tbaa !55, !alias.scope !51, !noalias !52
  store atomic {} addrspace(10)* %15, {} addrspace(10)* addrspace(11)* %13 release, align 8, !dbg !42, !tbaa !45, !alias.scope !51, !noalias !52
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %11, {} addrspace(10)* nonnull %15) #17, !dbg !42
  %17 = bitcast {} addrspace(10)* %11 to i8 addrspace(10)*, !dbg !42
  %18 = addrspacecast i8 addrspace(10)* %17 to i8 addrspace(11)*, !dbg !42
  %19 = getelementptr inbounds i8, i8 addrspace(11)* %18, i64 8, !dbg !42
  %20 = bitcast i8 addrspace(11)* %19 to {} addrspace(10)* addrspace(11)*, !dbg !42
  store atomic {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(11)* %20 release, align 8, !dbg !42, !tbaa !45, !alias.scope !51, !noalias !52
  %21 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %20 acquire, align 8, !dbg !57, !tbaa !45, !alias.scope !51, !noalias !68, !nonnull !13
  %22 = addrspacecast {} addrspace(10)* %21 to {} addrspace(11)*, !dbg !69
  %.not13.i = icmp eq {} addrspace(11)* %22, addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(11)*), !dbg !69
  br i1 %.not13.i, label %L17.i, label %L32.i, !dbg !69

L17.i:                                            ; preds = %L12.i
  %23 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195691323952 to {}*) to {} addrspace(10)*)) #18, !dbg !70
  %24 = bitcast {} addrspace(10)* %23 to {} addrspace(10)* addrspace(10)*, !dbg !70
  store {} addrspace(10)* %11, {} addrspace(10)* addrspace(10)* %24, align 8, !dbg !70, !tbaa !55, !alias.scope !51, !noalias !52
  %25 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195718475744 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195662589696 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888394272 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195672369408 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %23) #19, !dbg !70
  %26 = cmpxchg {} addrspace(10)* addrspace(11)* %20, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* %25 acq_rel acquire, align 8, !dbg !74, !tbaa !45, !alias.scope !51, !noalias !68
  %27 = extractvalue { {} addrspace(10)*, i1 } %26, 0, !dbg !74
  %28 = extractvalue { {} addrspace(10)*, i1 } %26, 1, !dbg !74
  br i1 %28, label %xchg_wb.i, label %L27.i, !dbg !74

L27.i:                                            ; preds = %L17.i
  %29 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* %27) #20, !dbg !77
  %30 = icmp eq {} addrspace(10)* %29, addrspacecast ({}* inttoptr (i64 140195745050864 to {}*) to {} addrspace(10)*), !dbg !77
  br i1 %30, label %L32.i, label %fail.i, !dbg !77

L32.i:                                            ; preds = %xchg_wb.i, %L27.i, %L12.i
  %value_phi.i = phi {} addrspace(10)* [ %25, %xchg_wb.i ], [ %21, %L12.i ], [ %27, %L27.i ]
  %31 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195706238240 to {}*) to {} addrspace(10)*)) #18, !dbg !41
  %32 = bitcast {} addrspace(10)* %31 to {} addrspace(10)* addrspace(10)*, !dbg !41
  store {} addrspace(10)* %value_phi.i, {} addrspace(10)* addrspace(10)* %32, align 8, !dbg !41, !tbaa !55, !alias.scope !51, !noalias !52
  %33 = addrspacecast {} addrspace(10)* %31 to {} addrspace(12)*, !dbg !41
  call void @ijl_throw({} addrspace(12)* %33) #21, !dbg !41
  unreachable, !dbg !41

xchg_wb.i:                                        ; preds = %L17.i
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %11, {} addrspace(10)* nonnull %25) #17, !dbg !74
  br label %L32.i, !dbg !77

fail.i:                                           ; preds = %L27.i
  %34 = addrspacecast {} addrspace(10)* %27 to {} addrspace(12)*, !dbg !77
  call void @ijl_type_error(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @_j_str1, i64 0, i64 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195745050864 to {}*) to {} addrspace(10)*), {} addrspace(12)* %34) #21, !dbg !77
  unreachable, !dbg !77

julia_dot_2276_inner.exit:                        ; preds = %entry
  %35 = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* nonnull %0, {} addrspace(10)* nonnull %1), !dbg !78
  %36 = addrspacecast {} addrspace(10)* %0 to {} addrspace(11)*, !dbg !79
  %37 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* noundef %36) #20, !dbg !79
  %38 = bitcast {}* %37 to i8**, !dbg !79
  %39 = load i8*, i8** %38, align 8, !dbg !79, !tbaa !89, !alias.scope !29, !noalias !32, !nonnull !13
  %40 = ptrtoint i8* %39 to i64, !dbg !79
  %41 = addrspacecast {} addrspace(10)* %1 to {} addrspace(11)*, !dbg !79
  %42 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* noundef %41) #20, !dbg !79
  %43 = bitcast {}* %42 to i8**, !dbg !79
  %44 = load i8*, i8** %43, align 8, !dbg !79, !tbaa !89, !alias.scope !29, !noalias !32, !nonnull !13
  %45 = ptrtoint i8* %44 to i64, !dbg !79
  %46 = call fastcc double @julia_dot_2279(i64 signext %6, i64 zeroext %40, i64 noundef signext 1, i64 zeroext %45, i64 noundef signext 1) #16, !dbg !78
  call void @llvm.julia.gc_preserve_end(token %35), !dbg !78
  ret double %46, !dbg !92
}

; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #12

; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #12

; Function Attrs: readnone
declare void @llvm.enzymefakeuse(...) #13

; Function Attrs: mustprogress willreturn
define double @preprocess_julia_dot_2276_inner.3({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1) local_unnamed_addr #14 !dbg !93 {
entry:
  %2 = call {}*** @julia.get_pgcstack() #22
  %3 = bitcast {} addrspace(10)* %0 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !94
  %4 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %3 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !94
  %5 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %4, i64 0, i32 1, !dbg !94
  %6 = load i64, i64 addrspace(11)* %5, align 8, !dbg !94, !range !28, !alias.scope !29, !noalias !32
  %7 = bitcast {} addrspace(10)* %1 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !94
  %8 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %7 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !94
  %9 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %8, i64 0, i32 1, !dbg !94
  %10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !94, !range !28, !alias.scope !29, !noalias !32
  %.not.i = icmp eq i64 %6, %10, !dbg !97
  br i1 %.not.i, label %julia_dot_2276_inner.exit, label %L12.i, !dbg !99

L12.i:                                            ; preds = %entry
  %current_task15.i = getelementptr inbounds {}**, {}*** %2, i64 -13, !dbg !100
  %current_task1.i = bitcast {}*** %current_task15.i to {}**, !dbg !100
  %11 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 16, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195699359008 to {}*) to {} addrspace(10)*)) #23, !dbg !100
  %12 = bitcast {} addrspace(10)* %11 to {} addrspace(10)* addrspace(10)*, !dbg !100
  %13 = addrspacecast {} addrspace(10)* addrspace(10)* %12 to {} addrspace(10)* addrspace(11)*, !dbg !100
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %13, align 8, !dbg !100, !tbaa !45, !alias.scope !51, !noalias !101
  %14 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %13, i64 1, !dbg !100
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %14, align 8, !dbg !100, !tbaa !45, !alias.scope !51, !noalias !101
  %15 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 32, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195667121680 to {}*) to {} addrspace(10)*)) #23, !dbg !100
  %16 = bitcast {} addrspace(10)* %15 to { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)*, !dbg !100
  %.repack.i = bitcast {} addrspace(10)* %15 to {} addrspace(10)* addrspace(10)*, !dbg !100
  store {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195729619104 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(10)* %.repack.i, align 8, !dbg !100, !tbaa !55, !alias.scope !51, !noalias !101
  %.repack7.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %16, i64 0, i32 1, !dbg !100
  store i64 %6, i64 addrspace(10)* %.repack7.i, align 8, !dbg !100, !tbaa !55, !alias.scope !51, !noalias !101
  %.repack9.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %16, i64 0, i32 2, !dbg !100
  store {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195729619072 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(10)* %.repack9.i, align 8, !dbg !100, !tbaa !55, !alias.scope !51, !noalias !101
  %.repack11.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %16, i64 0, i32 3, !dbg !100
  store i64 %10, i64 addrspace(10)* %.repack11.i, align 8, !dbg !100, !tbaa !55, !alias.scope !51, !noalias !101
  store atomic {} addrspace(10)* %15, {} addrspace(10)* addrspace(11)* %13 release, align 8, !dbg !100, !tbaa !45, !alias.scope !51, !noalias !101
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %11, {} addrspace(10)* nonnull %15) #24, !dbg !100
  %17 = bitcast {} addrspace(10)* %11 to i8 addrspace(10)*, !dbg !100
  %18 = addrspacecast i8 addrspace(10)* %17 to i8 addrspace(11)*, !dbg !100
  %19 = getelementptr inbounds i8, i8 addrspace(11)* %18, i64 8, !dbg !100
  %20 = bitcast i8 addrspace(11)* %19 to {} addrspace(10)* addrspace(11)*, !dbg !100
  store atomic {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(11)* %20 release, align 8, !dbg !100, !tbaa !45, !alias.scope !51, !noalias !101
  %21 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %20 acquire, align 8, !dbg !104, !tbaa !45, !alias.scope !51, !noalias !68, !nonnull !13
  %22 = addrspacecast {} addrspace(10)* %21 to {} addrspace(11)*, !dbg !108
  %.not13.i = icmp eq {} addrspace(11)* %22, addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(11)*), !dbg !108
  br i1 %.not13.i, label %L17.i, label %L32.i, !dbg !108

L17.i:                                            ; preds = %L12.i
  %23 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195691323952 to {}*) to {} addrspace(10)*)) #23, !dbg !109
  %24 = bitcast {} addrspace(10)* %23 to {} addrspace(10)* addrspace(10)*, !dbg !109
  store {} addrspace(10)* %11, {} addrspace(10)* addrspace(10)* %24, align 8, !dbg !109, !tbaa !55, !alias.scope !51, !noalias !101
  %25 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195718475744 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195662589696 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888394272 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195672369408 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %23) #25, !dbg !109
  %26 = cmpxchg {} addrspace(10)* addrspace(11)* %20, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* %25 acq_rel acquire, align 8, !dbg !111, !tbaa !45, !alias.scope !51, !noalias !68
  %27 = extractvalue { {} addrspace(10)*, i1 } %26, 0, !dbg !111
  %28 = extractvalue { {} addrspace(10)*, i1 } %26, 1, !dbg !111
  br i1 %28, label %xchg_wb.i, label %L27.i, !dbg !111

L27.i:                                            ; preds = %L17.i
  %29 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* %27) #26, !dbg !113
  %30 = icmp eq {} addrspace(10)* %29, addrspacecast ({}* inttoptr (i64 140195745050864 to {}*) to {} addrspace(10)*), !dbg !113
  br i1 %30, label %L32.i, label %fail.i, !dbg !113

L32.i:                                            ; preds = %xchg_wb.i, %L27.i, %L12.i
  %value_phi.i = phi {} addrspace(10)* [ %25, %xchg_wb.i ], [ %21, %L12.i ], [ %27, %L27.i ]
  %31 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195706238240 to {}*) to {} addrspace(10)*)) #23, !dbg !99
  %32 = bitcast {} addrspace(10)* %31 to {} addrspace(10)* addrspace(10)*, !dbg !99
  store {} addrspace(10)* %value_phi.i, {} addrspace(10)* addrspace(10)* %32, align 8, !dbg !99, !tbaa !55, !alias.scope !51, !noalias !101
  %33 = addrspacecast {} addrspace(10)* %31 to {} addrspace(12)*, !dbg !99
  call void @ijl_throw({} addrspace(12)* %33) #27, !dbg !99
  unreachable, !dbg !99

xchg_wb.i:                                        ; preds = %L17.i
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %11, {} addrspace(10)* nonnull %25) #24, !dbg !111
  br label %L32.i, !dbg !113

fail.i:                                           ; preds = %L27.i
  %34 = addrspacecast {} addrspace(10)* %27 to {} addrspace(12)*, !dbg !113
  call void @ijl_type_error(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @_j_str1, i64 0, i64 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195745050864 to {}*) to {} addrspace(10)*), {} addrspace(12)* %34) #27, !dbg !113
  unreachable, !dbg !113

julia_dot_2276_inner.exit:                        ; preds = %entry
  %35 = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* nonnull %0, {} addrspace(10)* nonnull %1) #22, !dbg !114
  %36 = addrspacecast {} addrspace(10)* %0 to {} addrspace(11)*, !dbg !115
  %37 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* noundef %36) #26, !dbg !115
  %38 = bitcast {}* %37 to i8**, !dbg !115
  %39 = load i8*, i8** %38, align 8, !dbg !115, !tbaa !89, !alias.scope !29, !noalias !32, !nonnull !13
  %40 = ptrtoint i8* %39 to i64, !dbg !115
  %41 = addrspacecast {} addrspace(10)* %1 to {} addrspace(11)*, !dbg !115
  %42 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* noundef %41) #26, !dbg !115
  %43 = bitcast {}* %42 to i8**, !dbg !115
  %44 = load i8*, i8** %43, align 8, !dbg !115, !tbaa !89, !alias.scope !29, !noalias !32, !nonnull !13
  %45 = ptrtoint i8* %44 to i64, !dbg !115
  %46 = call fastcc double @julia_dot_2279(i64 signext %6, i64 zeroext %40, i64 noundef signext 1, i64 zeroext %45, i64 noundef signext 1) #28, !dbg !114
  call void @llvm.julia.gc_preserve_end(token %35) #22, !dbg !114
  ret double %46, !dbg !120
}

; Function Attrs: mustprogress willreturn
define internal void @fwddiffejulia_dot_2276_inner.3({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* %"'", {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1, {} addrspace(10)* %"'1") local_unnamed_addr #14 !dbg !121 {
entry:
  %2 = call {}*** @julia.get_pgcstack()
  %3 = call {}*** @julia.get_pgcstack()
  %4 = call {}*** @julia.get_pgcstack()
  %5 = call {}*** @julia.get_pgcstack()
  %6 = call {}*** @julia.get_pgcstack()
  %7 = call {}*** @julia.get_pgcstack() #22
  %8 = bitcast {} addrspace(10)* %0 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !122
  %9 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %8 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !122
  %10 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %9, i64 0, i32 1, !dbg !122
  %11 = load i64, i64 addrspace(11)* %10, align 8, !dbg !122, !range !28, !alias.scope !125, !noalias !128
  %12 = bitcast {} addrspace(10)* %1 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !122
  %13 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %12 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !122
  %14 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %13, i64 0, i32 1, !dbg !122
  %15 = load i64, i64 addrspace(11)* %14, align 8, !dbg !122, !range !28, !alias.scope !130, !noalias !133
  %.not.i = icmp eq i64 %11, %15, !dbg !135
  br i1 %.not.i, label %julia_dot_2276_inner.exit, label %L12.i, !dbg !137

L12.i:                                            ; preds = %entry
  %current_task15.i = getelementptr inbounds {}**, {}*** %7, i64 -13, !dbg !138
  %current_task1.i = bitcast {}*** %current_task15.i to {}**, !dbg !138
  %16 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 16, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195699359008 to {}*) to {} addrspace(10)*)) #23, !dbg !138
  %17 = bitcast {} addrspace(10)* %16 to {} addrspace(10)* addrspace(10)*, !dbg !138
  %18 = addrspacecast {} addrspace(10)* addrspace(10)* %17 to {} addrspace(10)* addrspace(11)*, !dbg !138
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %18, align 8, !dbg !138, !tbaa !45, !alias.scope !51, !noalias !139
  %19 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %18, i64 1, !dbg !138
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %19, align 8, !dbg !138, !tbaa !45, !alias.scope !51, !noalias !139
  %20 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 32, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195667121680 to {}*) to {} addrspace(10)*)) #23, !dbg !138
  %21 = bitcast {} addrspace(10)* %20 to { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)*, !dbg !138
  %.repack.i = bitcast {} addrspace(10)* %20 to {} addrspace(10)* addrspace(10)*, !dbg !138
  store {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195729619104 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(10)* %.repack.i, align 8, !dbg !138, !tbaa !55, !alias.scope !51, !noalias !139
  %.repack7.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %21, i64 0, i32 1, !dbg !138
  store i64 %11, i64 addrspace(10)* %.repack7.i, align 8, !dbg !138, !tbaa !55, !alias.scope !51, !noalias !139
  %.repack9.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %21, i64 0, i32 2, !dbg !138
  store {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195729619072 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(10)* %.repack9.i, align 8, !dbg !138, !tbaa !55, !alias.scope !51, !noalias !139
  %.repack11.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %21, i64 0, i32 3, !dbg !138
  store i64 %15, i64 addrspace(10)* %.repack11.i, align 8, !dbg !138, !tbaa !55, !alias.scope !51, !noalias !139
  store atomic {} addrspace(10)* %20, {} addrspace(10)* addrspace(11)* %18 release, align 8, !dbg !138, !tbaa !45, !alias.scope !51, !noalias !139
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %16, {} addrspace(10)* nonnull %20) #24, !dbg !138
  %22 = bitcast {} addrspace(10)* %16 to i8 addrspace(10)*, !dbg !138
  %23 = addrspacecast i8 addrspace(10)* %22 to i8 addrspace(11)*, !dbg !138
  %24 = getelementptr inbounds i8, i8 addrspace(11)* %23, i64 8, !dbg !138
  %25 = bitcast i8 addrspace(11)* %24 to {} addrspace(10)* addrspace(11)*, !dbg !138
  store atomic {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(11)* %25 release, align 8, !dbg !138, !tbaa !45, !alias.scope !51, !noalias !139
  %26 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %25 acquire, align 8, !dbg !142, !tbaa !45, !alias.scope !51, !noalias !68, !nonnull !13
  %27 = addrspacecast {} addrspace(10)* %26 to {} addrspace(11)*, !dbg !146
  %.not13.i = icmp eq {} addrspace(11)* %27, addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(11)*), !dbg !146
  br i1 %.not13.i, label %L17.i, label %L32.i, !dbg !146

L17.i:                                            ; preds = %L12.i
  %28 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195691323952 to {}*) to {} addrspace(10)*)) #23, !dbg !147
  %29 = bitcast {} addrspace(10)* %28 to {} addrspace(10)* addrspace(10)*, !dbg !147
  store {} addrspace(10)* %16, {} addrspace(10)* addrspace(10)* %29, align 8, !dbg !147, !tbaa !55, !alias.scope !51, !noalias !139
  %30 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195718475744 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195662589696 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888394272 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195672369408 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %28) #25, !dbg !147
  %31 = cmpxchg {} addrspace(10)* addrspace(11)* %25, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* %30 acq_rel acquire, align 8, !dbg !149, !tbaa !45, !alias.scope !51, !noalias !68
  %32 = extractvalue { {} addrspace(10)*, i1 } %31, 0, !dbg !149
  %33 = extractvalue { {} addrspace(10)*, i1 } %31, 1, !dbg !149
  br i1 %33, label %xchg_wb.i, label %L27.i, !dbg !149

L27.i:                                            ; preds = %L17.i
  %34 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* %32) #26, !dbg !151
  %35 = icmp eq {} addrspace(10)* %34, addrspacecast ({}* inttoptr (i64 140195745050864 to {}*) to {} addrspace(10)*), !dbg !151
  br i1 %35, label %L32.i, label %fail.i, !dbg !151

L32.i:                                            ; preds = %xchg_wb.i, %L27.i, %L12.i
  %value_phi.i = phi {} addrspace(10)* [ %30, %xchg_wb.i ], [ %26, %L12.i ], [ %32, %L27.i ]
  %36 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195706238240 to {}*) to {} addrspace(10)*)) #23, !dbg !137
  %37 = bitcast {} addrspace(10)* %36 to {} addrspace(10)* addrspace(10)*, !dbg !137
  store {} addrspace(10)* %value_phi.i, {} addrspace(10)* addrspace(10)* %37, align 8, !dbg !137, !tbaa !55, !alias.scope !51, !noalias !139
  %38 = addrspacecast {} addrspace(10)* %36 to {} addrspace(12)*, !dbg !137
  call void @ijl_throw({} addrspace(12)* %38) #27, !dbg !137
  unreachable, !dbg !137

xchg_wb.i:                                        ; preds = %L17.i
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %16, {} addrspace(10)* nonnull %30) #24, !dbg !149
  br label %L32.i, !dbg !151

fail.i:                                           ; preds = %L27.i
  %39 = addrspacecast {} addrspace(10)* %32 to {} addrspace(12)*, !dbg !151
  call void @ijl_type_error(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @_j_str1, i64 0, i64 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195745050864 to {}*) to {} addrspace(10)*), {} addrspace(12)* %39) #27, !dbg !151
  unreachable, !dbg !151

julia_dot_2276_inner.exit:                        ; preds = %entry
  %40 = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %0, {} addrspace(10)* %"'", {} addrspace(10)* %1, {} addrspace(10)* %"'1"), !dbg !152
  %"'ipc" = addrspacecast {} addrspace(10)* %"'" to {} addrspace(11)*, !dbg !153
  %41 = addrspacecast {} addrspace(10)* %0 to {} addrspace(11)*, !dbg !153
  %42 = call {}* @julia.pointer_from_objref({} addrspace(11)* %"'ipc"), !dbg !153
  %_replacementA = phi {}* , !dbg !153
  %"'ipc25" = bitcast {}* %42 to i8**, !dbg !153
  %_replacementA17 = phi i8** , !dbg !153
  %"'ipl" = load i8*, i8** %"'ipc25", align 8, !dbg !153, !tbaa !89, !alias.scope !158, !noalias !159, !nonnull !13
  %"'ipc26" = ptrtoint i8* %"'ipl" to i64, !dbg !153
  %_replacementA19 = phi i64 , !dbg !153
  %"'ipc20" = addrspacecast {} addrspace(10)* %"'1" to {} addrspace(11)*, !dbg !153
  %43 = addrspacecast {} addrspace(10)* %1 to {} addrspace(11)*, !dbg !153
  %44 = call {}* @julia.pointer_from_objref({} addrspace(11)* %"'ipc20"), !dbg !153
  %_replacementA21 = phi {}* , !dbg !153
  %"'ipc27" = bitcast {}* %44 to i8**, !dbg !153
  %_replacementA22 = phi i8** , !dbg !153
  %"'ipl28" = load i8*, i8** %"'ipc27", align 8, !dbg !153, !tbaa !89, !alias.scope !160, !noalias !161, !nonnull !13
  %_replacementA23 = phi i8* , !dbg !153
  %"'ipc29" = ptrtoint i8* %"'ipl28" to i64, !dbg !153
  %_replacementA24 = phi i64 , !dbg !153
  %45 = bitcast {}*** %6 to {}**, !dbg !152
  %46 = getelementptr inbounds {}*, {}** %45, i64 -13, !dbg !152
  %47 = getelementptr inbounds {}*, {}** %46, i64 15, !dbg !152
  %48 = bitcast {}** %47 to i8**, !dbg !152
  %49 = load i8*, i8** %48, align 8, !dbg !152
  %50 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %46, i64 8, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195837729488 to {}*) to {} addrspace(10)*)), !dbg !152
  %51 = bitcast {} addrspace(10)* %50 to [1 x i64] addrspace(10)*, !dbg !152
  %52 = addrspacecast [1 x i64] addrspace(10)* %51 to [1 x i64] addrspace(11)*, !dbg !152
  %53 = getelementptr [1 x i64], [1 x i64] addrspace(11)* %52, i64 0, i32 0, !dbg !152
  store i64 %11, i64 addrspace(11)* %53, align 8, !dbg !152
  %54 = bitcast {}*** %5 to {}**, !dbg !152
  %55 = getelementptr inbounds {}*, {}** %54, i64 -13, !dbg !152
  %56 = getelementptr inbounds {}*, {}** %55, i64 15, !dbg !152
  %57 = bitcast {}** %56 to i8**, !dbg !152
  %58 = load i8*, i8** %57, align 8, !dbg !152
  %59 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %55, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195885311824 to {}*) to {} addrspace(10)*)), !dbg !152
  %60 = bitcast {} addrspace(10)* %59 to [2 x i64] addrspace(10)*, !dbg !152
  %61 = addrspacecast [2 x i64] addrspace(10)* %60 to [2 x i64] addrspace(11)*, !dbg !152
  %62 = getelementptr [2 x i64], [2 x i64] addrspace(11)* %61, i64 0, i32 0, !dbg !152
  store i64 %_replacementA19, i64 addrspace(11)* %62, align 8, !dbg !152
  %63 = getelementptr [2 x i64], [2 x i64] addrspace(11)* %61, i64 0, i32 1, !dbg !152
  store i64 %"'ipc26", i64 addrspace(11)* %63, align 8, !dbg !152
  %64 = bitcast {}*** %4 to {}**, !dbg !152
  %65 = getelementptr inbounds {}*, {}** %64, i64 -13, !dbg !152
  %66 = getelementptr inbounds {}*, {}** %65, i64 15, !dbg !152
  %67 = bitcast {}** %66 to i8**, !dbg !152
  %68 = load i8*, i8** %67, align 8, !dbg !152
  %69 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %65, i64 8, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195837729488 to {}*) to {} addrspace(10)*)), !dbg !152
  %70 = bitcast {} addrspace(10)* %69 to [1 x i64] addrspace(10)*, !dbg !152
  %71 = addrspacecast [1 x i64] addrspace(10)* %70 to [1 x i64] addrspace(11)*, !dbg !152
  %72 = getelementptr [1 x i64], [1 x i64] addrspace(11)* %71, i64 0, i32 0, !dbg !152
  store i64 1, i64 addrspace(11)* %72, align 8, !dbg !152
  %73 = bitcast {}*** %3 to {}**, !dbg !152
  %74 = getelementptr inbounds {}*, {}** %73, i64 -13, !dbg !152
  %75 = getelementptr inbounds {}*, {}** %74, i64 15, !dbg !152
  %76 = bitcast {}** %75 to i8**, !dbg !152
  %77 = load i8*, i8** %76, align 8, !dbg !152
  %78 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %74, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195885311824 to {}*) to {} addrspace(10)*)), !dbg !152
  %79 = bitcast {} addrspace(10)* %78 to [2 x i64] addrspace(10)*, !dbg !152
  %80 = addrspacecast [2 x i64] addrspace(10)* %79 to [2 x i64] addrspace(11)*, !dbg !152
  %81 = getelementptr [2 x i64], [2 x i64] addrspace(11)* %80, i64 0, i32 0, !dbg !152
  store i64 %_replacementA24, i64 addrspace(11)* %81, align 8, !dbg !152
  %82 = getelementptr [2 x i64], [2 x i64] addrspace(11)* %80, i64 0, i32 1, !dbg !152
  store i64 %"'ipc29", i64 addrspace(11)* %82, align 8, !dbg !152
  %83 = bitcast {}*** %2 to {}**, !dbg !152
  %84 = getelementptr inbounds {}*, {}** %83, i64 -13, !dbg !152
  %85 = getelementptr inbounds {}*, {}** %84, i64 15, !dbg !152
  %86 = bitcast {}** %85 to i8**, !dbg !152
  %87 = load i8*, i8** %86, align 8, !dbg !152
  %88 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %84, i64 8, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195837729488 to {}*) to {} addrspace(10)*)), !dbg !152
  %89 = bitcast {} addrspace(10)* %88 to [1 x i64] addrspace(10)*, !dbg !152
  %90 = addrspacecast [1 x i64] addrspace(10)* %89 to [1 x i64] addrspace(11)*, !dbg !152
  %91 = getelementptr [1 x i64], [1 x i64] addrspace(11)* %90, i64 0, i32 0, !dbg !152
  store i64 1, i64 addrspace(11)* %91, align 8, !dbg !152
  %92 = call fast double @julia_forward_2281([1 x i64] addrspace(11)* %52, [2 x i64] addrspace(11)* %61, [1 x i64] addrspace(11)* %71, [2 x i64] addrspace(11)* %80, [1 x i64] addrspace(11)* %90), !dbg !152
  call void @llvm.julia.gc_preserve_end(token %40) #22, !dbg !152
  ret void

allocsForInversion:                               ; No predecessors!
}

; Function Attrs: alwaysinline
define double @julia_forward_2281([1 x i64] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) %0, [2 x i64] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(16) %1, [1 x i64] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) %2, [2 x i64] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(16) %3, [1 x i64] addrspace(11)* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) %4) #15 !dbg !162 {
top:
  %5 = call {}*** @julia.get_pgcstack()
  %6 = getelementptr inbounds [1 x i64], [1 x i64] addrspace(11)* %0, i64 0, i64 0, !dbg !163
  %7 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(11)* %1, i64 0, i64 0, !dbg !163
  %8 = getelementptr inbounds [1 x i64], [1 x i64] addrspace(11)* %2, i64 0, i64 0, !dbg !163
  %9 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(11)* %3, i64 0, i64 0, !dbg !163
  %10 = getelementptr inbounds [1 x i64], [1 x i64] addrspace(11)* %4, i64 0, i64 0, !dbg !163
  %11 = load i64, i64 addrspace(11)* %6, align 8, !dbg !165, !tbaa !166, !alias.scope !168, !noalias !169
  %12 = load i64, i64 addrspace(11)* %7, align 8, !dbg !165, !tbaa !166, !alias.scope !168, !noalias !169
  %13 = load i64, i64 addrspace(11)* %8, align 8, !dbg !165, !tbaa !166, !alias.scope !168, !noalias !169
  %14 = load i64, i64 addrspace(11)* %9, align 8, !dbg !165, !tbaa !166, !alias.scope !168, !noalias !169
  %15 = load i64, i64 addrspace(11)* %10, align 8, !dbg !165, !tbaa !166, !alias.scope !168, !noalias !169
  %16 = call double @julia_dot_2284(i64 signext %11, i64 zeroext %12, i64 signext %13, i64 zeroext %14, i64 signext %15) #16, !dbg !165
  ret double %16, !dbg !165
}

define internal double @julia_dot_2284(i64 signext %0, i64 zeroext %1, i64 signext %2, i64 zeroext %3, i64 signext %4) #16 !dbg !170 {
top:
  %5 = call {}*** @julia.get_pgcstack()
  %6 = call double inttoptr (i64 140194943493221 to double (i64, i64, i64, i64, i64)*)(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4), !dbg !171
  ret double %6, !dbg !171
}

attributes #0 = { noinline nosync readonly "enzyme_math"="enzyme_custom" "enzyme_preserve_primal"="*" "enzymejl_job"="140194381763984" "enzymejl_mi"="140193926607856" "enzymejl_world"="33467" "frame-pointer"="all" "probe-stack"="inline-asm" }
attributes #1 = { nofree readnone "enzyme_inactive" "enzyme_shouldrecompute" "enzymejl_world"="33467" }
attributes #2 = { inaccessiblememonly allocsize(1) "enzymejl_world"="33467" }
attributes #3 = { inaccessiblememonly nofree "enzyme_inactive" "enzymejl_world"="33467" }
attributes #4 = { nofree nounwind readnone "enzymejl_world"="33467" }
attributes #5 = { inaccessiblememonly nofree norecurse nounwind "enzyme_inactive" "enzymejl_world"="33467" }
attributes #6 = { nofree "enzymejl_world"="33467" }
attributes #7 = { "enzymejl_world"="33467" }
attributes #8 = { noreturn "enzymejl_world"="33467" }
attributes #9 = { nofree norecurse nounwind readnone "enzyme_inactive" "enzyme_shouldrecompute" "enzymejl_world"="33467" }
attributes #10 = { nofree nosync nounwind readnone speculatable willreturn "enzymejl_world"="33467" }
attributes #11 = { "enzymejl_world"="33467" "probe-stack"="inline-asm" }
attributes #12 = { argmemonly nofree nosync nounwind willreturn "enzymejl_world"="33467" }
attributes #13 = { readnone "enzymejl_world"="33467" }
attributes #14 = { mustprogress willreturn "enzymejl_world"="33467" "probe-stack"="inline-asm" }
attributes #15 = { alwaysinline "frame-pointer"="all" "probe-stack"="inline-asm" }
attributes #16 = { "frame-pointer"="all" "probe-stack"="inline-asm" }
attributes #17 = { nounwind }
attributes #18 = { allocsize(1) }
attributes #19 = { nofree }
attributes #20 = { nounwind readnone }
attributes #21 = { noreturn }
attributes #22 = { mustprogress willreturn }
attributes #23 = { mustprogress willreturn allocsize(1) }
attributes #24 = { mustprogress nounwind willreturn }
attributes #25 = { mustprogress nofree willreturn }
attributes #26 = { mustprogress nounwind readnone willreturn }
attributes #27 = { mustprogress noreturn willreturn }
attributes #28 = { mustprogress willreturn "frame-pointer"="all" "probe-stack"="inline-asm" }

!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.dbg.cu = !{!4, !6, !7, !9}
!llvm.ident = !{!10}

!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 1, !"wchar_size", i32 4}
!3 = !{i32 7, !"uwtable", i32 1}
!4 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !5, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!5 = !DIFile(filename: "/cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/usr/share/julia/stdlib/v1.9/LinearAlgebra/src/blas.jl", directory: ".")
!6 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !5, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!7 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !8, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!8 = !DIFile(filename: "/home/sethaxen/projects/Enzyme.jl/src/rules/LinearAlgebra/blas.jl", directory: ".")
!9 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !5, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None)
!10 = !{!"clang version 14.0.3 (/depot/downloads/clones/llvm-project.git-5a9787eb535c2edc5dea030cc221c1d60f38c9f42344f410e425ea2139e233aa 465c166c5422079185c3289cdc2613420d8d6c51)"}
!11 = distinct !DISubprogram(name: "dot", linkageName: "julia_dot_2279", scope: null, file: !5, line: 344, type: !12, scopeLine: 344, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !13)
!12 = !DISubroutineType(types: !13)
!13 = !{}
!14 = !DILocation(line: 345, scope: !11)
!15 = !{!16, !16, i64 0}
!16 = !{!"double", !17, i64 0}
!17 = !{!"omnipotent char", !18, i64 0}
!18 = !{!"Simple C/C++ TBAA"}
!19 = distinct !{!19, !20, !21}
!20 = !{!"llvm.loop.mustprogress"}
!21 = !{!"llvm.loop.unroll.disable"}
!22 = distinct !DISubprogram(name: "dot", linkageName: "julia_dot_2276", scope: null, file: !5, line: 392, type: !12, scopeLine: 392, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!23 = !DILocation(line: 10, scope: !24, inlinedAt: !26)
!24 = distinct !DISubprogram(name: "length;", linkageName: "length", scope: !25, file: !25, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!25 = !DIFile(filename: "essentials.jl", directory: ".")
!26 = distinct !DILocation(line: 393, scope: !22, inlinedAt: !27)
!27 = distinct !DILocation(line: 0, scope: !22)
!28 = !{i64 0, i64 9223372036854775807}
!29 = !{!30}
!30 = !{!"jnoalias_typemd", !31}
!31 = !{!"jnoalias"}
!32 = !{!33, !34, !35, !36}
!33 = !{!"jnoalias_gcframe", !31}
!34 = !{!"jnoalias_stack", !31}
!35 = !{!"jnoalias_data", !31}
!36 = !{!"jnoalias_const", !31}
!37 = !DILocation(line: 499, scope: !38, inlinedAt: !40)
!38 = distinct !DISubprogram(name: "==;", linkageName: "==", scope: !39, file: !39, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!39 = !DIFile(filename: "promotion.jl", directory: ".")
!40 = distinct !DILocation(line: 394, scope: !22, inlinedAt: !27)
!41 = !DILocation(line: 394, scope: !22, inlinedAt: !27)
!42 = !DILocation(line: 41, scope: !43, inlinedAt: !40)
!43 = distinct !DISubprogram(name: "LazyString;", linkageName: "LazyString", scope: !44, file: !44, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!44 = !DIFile(filename: "strings/lazy.jl", directory: ".")
!45 = !{!46, !46, i64 0}
!46 = !{!"jtbaa_mutab", !47, i64 0}
!47 = !{!"jtbaa_value", !48, i64 0}
!48 = !{!"jtbaa_data", !49, i64 0}
!49 = !{!"jtbaa", !50, i64 0}
!50 = !{!"jtbaa"}
!51 = !{!35}
!52 = !{!53, !33, !34, !30, !36}
!53 = distinct !{!53, !54, !"na_addr13"}
!54 = distinct !{!54, !"addr13"}
!55 = !{!56, !56, i64 0}
!56 = !{!"jtbaa_immut", !47, i64 0}
!57 = !DILocation(line: 53, scope: !58, inlinedAt: !60)
!58 = distinct !DISubprogram(name: "getproperty;", linkageName: "getproperty", scope: !59, file: !59, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!59 = !DIFile(filename: "Base.jl", directory: ".")
!60 = distinct !DILocation(line: 81, scope: !61, inlinedAt: !62)
!61 = distinct !DISubprogram(name: "String;", linkageName: "String", scope: !44, file: !44, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!62 = distinct !DILocation(line: 232, scope: !63, inlinedAt: !65)
!63 = distinct !DISubprogram(name: "convert;", linkageName: "convert", scope: !64, file: !64, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!64 = !DIFile(filename: "strings/basic.jl", directory: ".")
!65 = distinct !DILocation(line: 12, scope: !66, inlinedAt: !40)
!66 = distinct !DISubprogram(name: "DimensionMismatch;", linkageName: "DimensionMismatch", scope: !67, file: !67, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!67 = !DIFile(filename: "array.jl", directory: ".")
!68 = !{!33, !34, !30, !36}
!69 = !DILocation(line: 82, scope: !61, inlinedAt: !62)
!70 = !DILocation(line: 107, scope: !71, inlinedAt: !73)
!71 = distinct !DISubprogram(name: "sprint;", linkageName: "sprint", scope: !72, file: !72, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!72 = !DIFile(filename: "strings/io.jl", directory: ".")
!73 = distinct !DILocation(line: 83, scope: !61, inlinedAt: !62)
!74 = !DILocation(line: 61, scope: !75, inlinedAt: !76)
!75 = distinct !DISubprogram(name: "replaceproperty!;", linkageName: "replaceproperty!", scope: !59, file: !59, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!76 = distinct !DILocation(line: 88, scope: !61, inlinedAt: !62)
!77 = !DILocation(line: 89, scope: !61, inlinedAt: !62)
!78 = !DILocation(line: 395, scope: !22, inlinedAt: !27)
!79 = !DILocation(line: 65, scope: !80, inlinedAt: !82)
!80 = distinct !DISubprogram(name: "unsafe_convert;", linkageName: "unsafe_convert", scope: !81, file: !81, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!81 = !DIFile(filename: "pointer.jl", directory: ".")
!82 = distinct !DILocation(line: 1240, scope: !83, inlinedAt: !85)
!83 = distinct !DISubprogram(name: "pointer;", linkageName: "pointer", scope: !84, file: !84, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!84 = !DIFile(filename: "abstractarray.jl", directory: ".")
!85 = distinct !DILocation(line: 177, scope: !86, inlinedAt: !87)
!86 = distinct !DISubprogram(name: "vec_pointer_stride;", linkageName: "vec_pointer_stride", scope: !5, file: !5, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!87 = distinct !DILocation(line: 177, scope: !86, inlinedAt: !88)
!88 = distinct !DILocation(line: 395, scope: !22, inlinedAt: !27)
!89 = !{!90, !90, i64 0}
!90 = !{!"jtbaa_arrayptr", !91, i64 0}
!91 = !{!"jtbaa_array", !49, i64 0}
!92 = !DILocation(line: 0, scope: !22)
!93 = distinct !DISubprogram(name: "dot", linkageName: "julia_dot_2276", scope: null, file: !5, line: 392, type: !12, scopeLine: 392, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!94 = !DILocation(line: 10, scope: !24, inlinedAt: !95)
!95 = distinct !DILocation(line: 393, scope: !93, inlinedAt: !96)
!96 = distinct !DILocation(line: 0, scope: !93)
!97 = !DILocation(line: 499, scope: !38, inlinedAt: !98)
!98 = distinct !DILocation(line: 394, scope: !93, inlinedAt: !96)
!99 = !DILocation(line: 394, scope: !93, inlinedAt: !96)
!100 = !DILocation(line: 41, scope: !43, inlinedAt: !98)
!101 = !{!102, !33, !34, !30, !36}
!102 = distinct !{!102, !103, !"na_addr13"}
!103 = distinct !{!103, !"addr13"}
!104 = !DILocation(line: 53, scope: !58, inlinedAt: !105)
!105 = distinct !DILocation(line: 81, scope: !61, inlinedAt: !106)
!106 = distinct !DILocation(line: 232, scope: !63, inlinedAt: !107)
!107 = distinct !DILocation(line: 12, scope: !66, inlinedAt: !98)
!108 = !DILocation(line: 82, scope: !61, inlinedAt: !106)
!109 = !DILocation(line: 107, scope: !71, inlinedAt: !110)
!110 = distinct !DILocation(line: 83, scope: !61, inlinedAt: !106)
!111 = !DILocation(line: 61, scope: !75, inlinedAt: !112)
!112 = distinct !DILocation(line: 88, scope: !61, inlinedAt: !106)
!113 = !DILocation(line: 89, scope: !61, inlinedAt: !106)
!114 = !DILocation(line: 395, scope: !93, inlinedAt: !96)
!115 = !DILocation(line: 65, scope: !80, inlinedAt: !116)
!116 = distinct !DILocation(line: 1240, scope: !83, inlinedAt: !117)
!117 = distinct !DILocation(line: 177, scope: !86, inlinedAt: !118)
!118 = distinct !DILocation(line: 177, scope: !86, inlinedAt: !119)
!119 = distinct !DILocation(line: 395, scope: !93, inlinedAt: !96)
!120 = !DILocation(line: 0, scope: !93)
!121 = distinct !DISubprogram(name: "dot", linkageName: "julia_dot_2276", scope: null, file: !5, line: 392, type: !12, scopeLine: 392, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !6, retainedNodes: !13)
!122 = !DILocation(line: 10, scope: !24, inlinedAt: !123)
!123 = distinct !DILocation(line: 393, scope: !121, inlinedAt: !124)
!124 = distinct !DILocation(line: 0, scope: !121)
!125 = !{!126, !30}
!126 = distinct !{!126, !127, !"primal"}
!127 = distinct !{!127, !" diff: %"}
!128 = !{!129, !33, !34, !35, !36}
!129 = distinct !{!129, !127, !"shadow_0"}
!130 = !{!131, !30}
!131 = distinct !{!131, !132, !"primal"}
!132 = distinct !{!132, !" diff: %"}
!133 = !{!134, !33, !34, !35, !36}
!134 = distinct !{!134, !132, !"shadow_0"}
!135 = !DILocation(line: 499, scope: !38, inlinedAt: !136)
!136 = distinct !DILocation(line: 394, scope: !121, inlinedAt: !124)
!137 = !DILocation(line: 394, scope: !121, inlinedAt: !124)
!138 = !DILocation(line: 41, scope: !43, inlinedAt: !136)
!139 = !{!140, !33, !34, !30, !36}
!140 = distinct !{!140, !141, !"na_addr13"}
!141 = distinct !{!141, !"addr13"}
!142 = !DILocation(line: 53, scope: !58, inlinedAt: !143)
!143 = distinct !DILocation(line: 81, scope: !61, inlinedAt: !144)
!144 = distinct !DILocation(line: 232, scope: !63, inlinedAt: !145)
!145 = distinct !DILocation(line: 12, scope: !66, inlinedAt: !136)
!146 = !DILocation(line: 82, scope: !61, inlinedAt: !144)
!147 = !DILocation(line: 107, scope: !71, inlinedAt: !148)
!148 = distinct !DILocation(line: 83, scope: !61, inlinedAt: !144)
!149 = !DILocation(line: 61, scope: !75, inlinedAt: !150)
!150 = distinct !DILocation(line: 88, scope: !61, inlinedAt: !144)
!151 = !DILocation(line: 89, scope: !61, inlinedAt: !144)
!152 = !DILocation(line: 395, scope: !121, inlinedAt: !124)
!153 = !DILocation(line: 65, scope: !80, inlinedAt: !154)
!154 = distinct !DILocation(line: 1240, scope: !83, inlinedAt: !155)
!155 = distinct !DILocation(line: 177, scope: !86, inlinedAt: !156)
!156 = distinct !DILocation(line: 177, scope: !86, inlinedAt: !157)
!157 = distinct !DILocation(line: 395, scope: !121, inlinedAt: !124)
!158 = !{!129, !30}
!159 = !{!126, !33, !34, !35, !36}
!160 = !{!134, !30}
!161 = !{!131, !33, !34, !35, !36}
!162 = distinct !DISubprogram(name: "forward", linkageName: "julia_forward_2281", scope: null, file: !8, line: 62, type: !12, scopeLine: 62, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !7, retainedNodes: !13)
!163 = !DILocation(line: 37, scope: !164, inlinedAt: !165)
!164 = distinct !DISubprogram(name: "getproperty;", linkageName: "getproperty", scope: !59, file: !59, type: !12, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !7, retainedNodes: !13)
!165 = !DILocation(line: 75, scope: !162)
!166 = !{!167, !167, i64 0, i64 0}
!167 = !{!"jtbaa_const", !49, i64 0}
!168 = !{!36}
!169 = !{!33, !34, !35, !30}
!170 = distinct !DISubprogram(name: "dot", linkageName: "julia_dot_2284", scope: null, file: !5, line: 344, type: !12, scopeLine: 344, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !9, retainedNodes: !13)
!171 = !DILocation(line: 345, scope: !170)

oldFunc:; Function Attrs: mustprogress willreturn
define double @preprocess_julia_dot_2276_inner.3({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1) local_unnamed_addr #14 !dbg !93 {
entry:
  %2 = call {}*** @julia.get_pgcstack() #17
  %3 = bitcast {} addrspace(10)* %0 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !94
  %4 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %3 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !94
  %5 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %4, i64 0, i32 1, !dbg !94
  %6 = load i64, i64 addrspace(11)* %5, align 8, !dbg !94, !range !28, !alias.scope !29, !noalias !32
  %7 = bitcast {} addrspace(10)* %1 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !94
  %8 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %7 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !94
  %9 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %8, i64 0, i32 1, !dbg !94
  %10 = load i64, i64 addrspace(11)* %9, align 8, !dbg !94, !range !28, !alias.scope !29, !noalias !32
  %.not.i = icmp eq i64 %6, %10, !dbg !97
  br i1 %.not.i, label %julia_dot_2276_inner.exit, label %L12.i, !dbg !99

L12.i:                                            ; preds = %entry
  %current_task15.i = getelementptr inbounds {}**, {}*** %2, i64 -13, !dbg !100
  %current_task1.i = bitcast {}*** %current_task15.i to {}**, !dbg !100
  %11 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 16, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195699359008 to {}*) to {} addrspace(10)*)) #18, !dbg !100
  %12 = bitcast {} addrspace(10)* %11 to {} addrspace(10)* addrspace(10)*, !dbg !100
  %13 = addrspacecast {} addrspace(10)* addrspace(10)* %12 to {} addrspace(10)* addrspace(11)*, !dbg !100
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %13, align 8, !dbg !100, !tbaa !45, !alias.scope !51, !noalias !101
  %14 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %13, i64 1, !dbg !100
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %14, align 8, !dbg !100, !tbaa !45, !alias.scope !51, !noalias !101
  %15 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 32, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195667121680 to {}*) to {} addrspace(10)*)) #18, !dbg !100
  %16 = bitcast {} addrspace(10)* %15 to { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)*, !dbg !100
  %.repack.i = bitcast {} addrspace(10)* %15 to {} addrspace(10)* addrspace(10)*, !dbg !100
  store {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195729619104 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(10)* %.repack.i, align 8, !dbg !100, !tbaa !55, !alias.scope !51, !noalias !101
  %.repack7.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %16, i64 0, i32 1, !dbg !100
  store i64 %6, i64 addrspace(10)* %.repack7.i, align 8, !dbg !100, !tbaa !55, !alias.scope !51, !noalias !101
  %.repack9.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %16, i64 0, i32 2, !dbg !100
  store {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195729619072 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(10)* %.repack9.i, align 8, !dbg !100, !tbaa !55, !alias.scope !51, !noalias !101
  %.repack11.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %16, i64 0, i32 3, !dbg !100
  store i64 %10, i64 addrspace(10)* %.repack11.i, align 8, !dbg !100, !tbaa !55, !alias.scope !51, !noalias !101
  store atomic {} addrspace(10)* %15, {} addrspace(10)* addrspace(11)* %13 release, align 8, !dbg !100, !tbaa !45, !alias.scope !51, !noalias !101
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %11, {} addrspace(10)* nonnull %15) #19, !dbg !100
  %17 = bitcast {} addrspace(10)* %11 to i8 addrspace(10)*, !dbg !100
  %18 = addrspacecast i8 addrspace(10)* %17 to i8 addrspace(11)*, !dbg !100
  %19 = getelementptr inbounds i8, i8 addrspace(11)* %18, i64 8, !dbg !100
  %20 = bitcast i8 addrspace(11)* %19 to {} addrspace(10)* addrspace(11)*, !dbg !100
  store atomic {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(11)* %20 release, align 8, !dbg !100, !tbaa !45, !alias.scope !51, !noalias !101
  %21 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %20 acquire, align 8, !dbg !104, !tbaa !45, !alias.scope !51, !noalias !68, !nonnull !13
  %22 = addrspacecast {} addrspace(10)* %21 to {} addrspace(11)*, !dbg !108
  %.not13.i = icmp eq {} addrspace(11)* %22, addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(11)*), !dbg !108
  br i1 %.not13.i, label %L17.i, label %L32.i, !dbg !108

L17.i:                                            ; preds = %L12.i
  %23 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195691323952 to {}*) to {} addrspace(10)*)) #18, !dbg !109
  %24 = bitcast {} addrspace(10)* %23 to {} addrspace(10)* addrspace(10)*, !dbg !109
  store {} addrspace(10)* %11, {} addrspace(10)* addrspace(10)* %24, align 8, !dbg !109, !tbaa !55, !alias.scope !51, !noalias !101
  %25 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195718475744 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195662589696 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888394272 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195672369408 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %23) #20, !dbg !109
  %26 = cmpxchg {} addrspace(10)* addrspace(11)* %20, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* %25 acq_rel acquire, align 8, !dbg !111, !tbaa !45, !alias.scope !51, !noalias !68
  %27 = extractvalue { {} addrspace(10)*, i1 } %26, 0, !dbg !111
  %28 = extractvalue { {} addrspace(10)*, i1 } %26, 1, !dbg !111
  br i1 %28, label %xchg_wb.i, label %L27.i, !dbg !111

L27.i:                                            ; preds = %L17.i
  %29 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* %27) #21, !dbg !113
  %30 = icmp eq {} addrspace(10)* %29, addrspacecast ({}* inttoptr (i64 140195745050864 to {}*) to {} addrspace(10)*), !dbg !113
  br i1 %30, label %L32.i, label %fail.i, !dbg !113

L32.i:                                            ; preds = %xchg_wb.i, %L27.i, %L12.i
  %value_phi.i = phi {} addrspace(10)* [ %25, %xchg_wb.i ], [ %21, %L12.i ], [ %27, %L27.i ]
  %31 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195706238240 to {}*) to {} addrspace(10)*)) #18, !dbg !99
  %32 = bitcast {} addrspace(10)* %31 to {} addrspace(10)* addrspace(10)*, !dbg !99
  store {} addrspace(10)* %value_phi.i, {} addrspace(10)* addrspace(10)* %32, align 8, !dbg !99, !tbaa !55, !alias.scope !51, !noalias !101
  %33 = addrspacecast {} addrspace(10)* %31 to {} addrspace(12)*, !dbg !99
  call void @ijl_throw({} addrspace(12)* %33) #22, !dbg !99
  unreachable, !dbg !99

xchg_wb.i:                                        ; preds = %L17.i
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %11, {} addrspace(10)* nonnull %25) #19, !dbg !111
  br label %L32.i, !dbg !113

fail.i:                                           ; preds = %L27.i
  %34 = addrspacecast {} addrspace(10)* %27 to {} addrspace(12)*, !dbg !113
  call void @ijl_type_error(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @_j_str1, i64 0, i64 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195745050864 to {}*) to {} addrspace(10)*), {} addrspace(12)* %34) #22, !dbg !113
  unreachable, !dbg !113

julia_dot_2276_inner.exit:                        ; preds = %entry
  %35 = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* nonnull %0, {} addrspace(10)* nonnull %1) #17, !dbg !114
  %36 = addrspacecast {} addrspace(10)* %0 to {} addrspace(11)*, !dbg !115
  %37 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* noundef %36) #21, !dbg !115
  %38 = bitcast {}* %37 to i8**, !dbg !115
  %39 = load i8*, i8** %38, align 8, !dbg !115, !tbaa !89, !alias.scope !29, !noalias !32, !nonnull !13
  %40 = ptrtoint i8* %39 to i64, !dbg !115
  %41 = addrspacecast {} addrspace(10)* %1 to {} addrspace(11)*, !dbg !115
  %42 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* noundef %41) #21, !dbg !115
  %43 = bitcast {}* %42 to i8**, !dbg !115
  %44 = load i8*, i8** %43, align 8, !dbg !115, !tbaa !89, !alias.scope !29, !noalias !32, !nonnull !13
  %45 = ptrtoint i8* %44 to i64, !dbg !115
  %46 = call fastcc double @julia_dot_2279(i64 signext %6, i64 zeroext %40, i64 noundef signext 1, i64 zeroext %45, i64 noundef signext 1) #23, !dbg !114
  call void @llvm.julia.gc_preserve_end(token %35) #17, !dbg !114
  ret double %46, !dbg !120
}

newFunc:; Function Attrs: mustprogress willreturn
define internal void @fwddiffejulia_dot_2276_inner.3({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0, {} addrspace(10)* %"'", {} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %1, {} addrspace(10)* %"'1") local_unnamed_addr #14 !dbg !121 {
entry:
  %2 = call {}*** @julia.get_pgcstack()
  %3 = call {}*** @julia.get_pgcstack()
  %4 = call {}*** @julia.get_pgcstack()
  %5 = call {}*** @julia.get_pgcstack()
  %6 = call {}*** @julia.get_pgcstack()
  %7 = call {}*** @julia.get_pgcstack() #17
  %8 = bitcast {} addrspace(10)* %0 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !122
  %9 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %8 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !122
  %10 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %9, i64 0, i32 1, !dbg !122
  %11 = load i64, i64 addrspace(11)* %10, align 8, !dbg !122, !range !28, !alias.scope !125, !noalias !128
  %12 = bitcast {} addrspace(10)* %1 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)*, !dbg !122
  %13 = addrspacecast { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(10)* %12 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !122
  %14 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %13, i64 0, i32 1, !dbg !122
  %15 = load i64, i64 addrspace(11)* %14, align 8, !dbg !122, !range !28, !alias.scope !130, !noalias !133
  %.not.i = icmp eq i64 %11, %15, !dbg !135
  br i1 %.not.i, label %julia_dot_2276_inner.exit, label %L12.i, !dbg !137

L12.i:                                            ; preds = %entry
  %current_task15.i = getelementptr inbounds {}**, {}*** %7, i64 -13, !dbg !138
  %current_task1.i = bitcast {}*** %current_task15.i to {}**, !dbg !138
  %16 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 16, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195699359008 to {}*) to {} addrspace(10)*)) #18, !dbg !138
  %17 = bitcast {} addrspace(10)* %16 to {} addrspace(10)* addrspace(10)*, !dbg !138
  %18 = addrspacecast {} addrspace(10)* addrspace(10)* %17 to {} addrspace(10)* addrspace(11)*, !dbg !138
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %18, align 8, !dbg !138, !tbaa !45, !alias.scope !51, !noalias !139
  %19 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %18, i64 1, !dbg !138
  store {} addrspace(10)* null, {} addrspace(10)* addrspace(11)* %19, align 8, !dbg !138, !tbaa !45, !alias.scope !51, !noalias !139
  %20 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 32, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195667121680 to {}*) to {} addrspace(10)*)) #18, !dbg !138
  %21 = bitcast {} addrspace(10)* %20 to { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)*, !dbg !138
  %.repack.i = bitcast {} addrspace(10)* %20 to {} addrspace(10)* addrspace(10)*, !dbg !138
  store {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195729619104 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(10)* %.repack.i, align 8, !dbg !138, !tbaa !55, !alias.scope !51, !noalias !139
  %.repack7.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %21, i64 0, i32 1, !dbg !138
  store i64 %11, i64 addrspace(10)* %.repack7.i, align 8, !dbg !138, !tbaa !55, !alias.scope !51, !noalias !139
  %.repack9.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %21, i64 0, i32 2, !dbg !138
  store {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195729619072 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(10)* %.repack9.i, align 8, !dbg !138, !tbaa !55, !alias.scope !51, !noalias !139
  %.repack11.i = getelementptr inbounds { {} addrspace(10)*, i64, {} addrspace(10)*, i64 }, { {} addrspace(10)*, i64, {} addrspace(10)*, i64 } addrspace(10)* %21, i64 0, i32 3, !dbg !138
  store i64 %15, i64 addrspace(10)* %.repack11.i, align 8, !dbg !138, !tbaa !55, !alias.scope !51, !noalias !139
  store atomic {} addrspace(10)* %20, {} addrspace(10)* addrspace(11)* %18 release, align 8, !dbg !138, !tbaa !45, !alias.scope !51, !noalias !139
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %16, {} addrspace(10)* nonnull %20) #19, !dbg !138
  %22 = bitcast {} addrspace(10)* %16 to i8 addrspace(10)*, !dbg !138
  %23 = addrspacecast i8 addrspace(10)* %22 to i8 addrspace(11)*, !dbg !138
  %24 = getelementptr inbounds i8, i8 addrspace(11)* %23, i64 8, !dbg !138
  %25 = bitcast i8 addrspace(11)* %24 to {} addrspace(10)* addrspace(11)*, !dbg !138
  store atomic {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspace(11)* %25 release, align 8, !dbg !138, !tbaa !45, !alias.scope !51, !noalias !139
  %26 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %25 acquire, align 8, !dbg !142, !tbaa !45, !alias.scope !51, !noalias !68, !nonnull !13
  %27 = addrspacecast {} addrspace(10)* %26 to {} addrspace(11)*, !dbg !146
  %.not13.i = icmp eq {} addrspace(11)* %27, addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(11)*), !dbg !146
  br i1 %.not13.i, label %L17.i, label %L32.i, !dbg !146

L17.i:                                            ; preds = %L12.i
  %28 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195691323952 to {}*) to {} addrspace(10)*)) #18, !dbg !147
  %29 = bitcast {} addrspace(10)* %28 to {} addrspace(10)* addrspace(10)*, !dbg !147
  store {} addrspace(10)* %16, {} addrspace(10)* addrspace(10)* %29, align 8, !dbg !147, !tbaa !55, !alias.scope !51, !noalias !139
  %30 = call nonnull {} addrspace(10)* ({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)*, {} addrspace(10)*, {} addrspace(10)*, ...) @julia.call2({} addrspace(10)* ({} addrspace(10)*, {} addrspace(10)**, i32, {} addrspace(10)*)* noundef nonnull @ijl_invoke, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195718475744 to {}*) to {} addrspace(10)*), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195662589696 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888394272 to {}*) to {} addrspace(10)*), {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195672369408 to {}*) to {} addrspace(10)*), {} addrspace(10)* nonnull %28) #20, !dbg !147
  %31 = cmpxchg {} addrspace(10)* addrspace(11)* %25, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195888361480 to {}*) to {} addrspace(10)*), {} addrspace(10)* %30 acq_rel acquire, align 8, !dbg !149, !tbaa !45, !alias.scope !51, !noalias !68
  %32 = extractvalue { {} addrspace(10)*, i1 } %31, 0, !dbg !149
  %33 = extractvalue { {} addrspace(10)*, i1 } %31, 1, !dbg !149
  br i1 %33, label %xchg_wb.i, label %L27.i, !dbg !149

L27.i:                                            ; preds = %L17.i
  %34 = call {} addrspace(10)* @julia.typeof({} addrspace(10)* %32) #21, !dbg !151
  %35 = icmp eq {} addrspace(10)* %34, addrspacecast ({}* inttoptr (i64 140195745050864 to {}*) to {} addrspace(10)*), !dbg !151
  br i1 %35, label %L32.i, label %fail.i, !dbg !151

L32.i:                                            ; preds = %xchg_wb.i, %L27.i, %L12.i
  %value_phi.i = phi {} addrspace(10)* [ %30, %xchg_wb.i ], [ %26, %L12.i ], [ %32, %L27.i ]
  %36 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1.i, i64 noundef 8, {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195706238240 to {}*) to {} addrspace(10)*)) #18, !dbg !137
  %37 = bitcast {} addrspace(10)* %36 to {} addrspace(10)* addrspace(10)*, !dbg !137
  store {} addrspace(10)* %value_phi.i, {} addrspace(10)* addrspace(10)* %37, align 8, !dbg !137, !tbaa !55, !alias.scope !51, !noalias !139
  %38 = addrspacecast {} addrspace(10)* %36 to {} addrspace(12)*, !dbg !137
  call void @ijl_throw({} addrspace(12)* %38) #22, !dbg !137
  unreachable, !dbg !137

xchg_wb.i:                                        ; preds = %L17.i
  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* noundef nonnull %16, {} addrspace(10)* nonnull %30) #19, !dbg !149
  br label %L32.i, !dbg !151

fail.i:                                           ; preds = %L27.i
  %39 = addrspacecast {} addrspace(10)* %32 to {} addrspace(12)*, !dbg !151
  call void @ijl_type_error(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @_j_str1, i64 0, i64 0), {} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 140195745050864 to {}*) to {} addrspace(10)*), {} addrspace(12)* %39) #22, !dbg !151
  unreachable, !dbg !151

julia_dot_2276_inner.exit:                        ; preds = %entry
  %40 = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %0, {} addrspace(10)* %"'", {} addrspace(10)* %1, {} addrspace(10)* %"'1"), !dbg !152
  %"'ipc" = addrspacecast {} addrspace(10)* %"'" to {} addrspace(11)*, !dbg !153
  %41 = addrspacecast {} addrspace(10)* %0 to {} addrspace(11)*, !dbg !153
  %42 = call {}* @julia.pointer_from_objref({} addrspace(11)* %"'ipc"), !dbg !153
  %_replacementA = phi {}* , !dbg !153
  %"'ipc25" = bitcast {}* %42 to i8**, !dbg !153
  %_replacementA17 = phi i8** , !dbg !153
  %"'ipl" = load i8*, i8** %"'ipc25", align 8, !dbg !153, !tbaa !89, !alias.scope !158, !noalias !159, !nonnull !13
  %"'ipc26" = ptrtoint i8* %"'ipl" to i64, !dbg !153
  %_replacementA19 = phi i64 , !dbg !153
  %"'ipc20" = addrspacecast {} addrspace(10)* %"'1" to {} addrspace(11)*, !dbg !153
  %43 = addrspacecast {} addrspace(10)* %1 to {} addrspace(11)*, !dbg !153
  %44 = call {}* @julia.pointer_from_objref({} addrspace(11)* %"'ipc20"), !dbg !153
  %_replacementA21 = phi {}* , !dbg !153
  %"'ipc27" = bitcast {}* %44 to i8**, !dbg !153
  %_replacementA22 = phi i8** , !dbg !153
  %"'ipl28" = load i8*, i8** %"'ipc27", align 8, !dbg !153, !tbaa !89, !alias.scope !160, !noalias !161, !nonnull !13
  %_replacementA23 = phi i8* , !dbg !153
  %"'ipc29" = ptrtoint i8* %"'ipl28" to i64, !dbg !153
  %_replacementA24 = phi i64 , !dbg !153
  %45 = bitcast {}*** %6 to {}**, !dbg !152
  %46 = getelementptr inbounds {}*, {}** %45, i64 -13, !dbg !152
  %47 = getelementptr inbounds {}*, {}** %46, i64 15, !dbg !152
  %48 = bitcast {}** %47 to i8**, !dbg !152
  %49 = load i8*, i8** %48, align 8, !dbg !152
  %50 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %46, i64 8, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195837729488 to {}*) to {} addrspace(10)*)), !dbg !152
  %51 = bitcast {} addrspace(10)* %50 to [1 x i64] addrspace(10)*, !dbg !152
  %52 = addrspacecast [1 x i64] addrspace(10)* %51 to [1 x i64] addrspace(11)*, !dbg !152
  %53 = getelementptr [1 x i64], [1 x i64] addrspace(11)* %52, i64 0, i32 0, !dbg !152
  store i64 %11, i64 addrspace(11)* %53, align 8, !dbg !152
  %54 = bitcast {}*** %5 to {}**, !dbg !152
  %55 = getelementptr inbounds {}*, {}** %54, i64 -13, !dbg !152
  %56 = getelementptr inbounds {}*, {}** %55, i64 15, !dbg !152
  %57 = bitcast {}** %56 to i8**, !dbg !152
  %58 = load i8*, i8** %57, align 8, !dbg !152
  %59 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %55, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195885311824 to {}*) to {} addrspace(10)*)), !dbg !152
  %60 = bitcast {} addrspace(10)* %59 to [2 x i64] addrspace(10)*, !dbg !152
  %61 = addrspacecast [2 x i64] addrspace(10)* %60 to [2 x i64] addrspace(11)*, !dbg !152
  %62 = getelementptr [2 x i64], [2 x i64] addrspace(11)* %61, i64 0, i32 0, !dbg !152
  store i64 %_replacementA19, i64 addrspace(11)* %62, align 8, !dbg !152
  %63 = getelementptr [2 x i64], [2 x i64] addrspace(11)* %61, i64 0, i32 1, !dbg !152
  store i64 %"'ipc26", i64 addrspace(11)* %63, align 8, !dbg !152
  %64 = bitcast {}*** %4 to {}**, !dbg !152
  %65 = getelementptr inbounds {}*, {}** %64, i64 -13, !dbg !152
  %66 = getelementptr inbounds {}*, {}** %65, i64 15, !dbg !152
  %67 = bitcast {}** %66 to i8**, !dbg !152
  %68 = load i8*, i8** %67, align 8, !dbg !152
  %69 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %65, i64 8, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195837729488 to {}*) to {} addrspace(10)*)), !dbg !152
  %70 = bitcast {} addrspace(10)* %69 to [1 x i64] addrspace(10)*, !dbg !152
  %71 = addrspacecast [1 x i64] addrspace(10)* %70 to [1 x i64] addrspace(11)*, !dbg !152
  %72 = getelementptr [1 x i64], [1 x i64] addrspace(11)* %71, i64 0, i32 0, !dbg !152
  store i64 1, i64 addrspace(11)* %72, align 8, !dbg !152
  %73 = bitcast {}*** %3 to {}**, !dbg !152
  %74 = getelementptr inbounds {}*, {}** %73, i64 -13, !dbg !152
  %75 = getelementptr inbounds {}*, {}** %74, i64 15, !dbg !152
  %76 = bitcast {}** %75 to i8**, !dbg !152
  %77 = load i8*, i8** %76, align 8, !dbg !152
  %78 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %74, i64 16, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195885311824 to {}*) to {} addrspace(10)*)), !dbg !152
  %79 = bitcast {} addrspace(10)* %78 to [2 x i64] addrspace(10)*, !dbg !152
  %80 = addrspacecast [2 x i64] addrspace(10)* %79 to [2 x i64] addrspace(11)*, !dbg !152
  %81 = getelementptr [2 x i64], [2 x i64] addrspace(11)* %80, i64 0, i32 0, !dbg !152
  store i64 %_replacementA24, i64 addrspace(11)* %81, align 8, !dbg !152
  %82 = getelementptr [2 x i64], [2 x i64] addrspace(11)* %80, i64 0, i32 1, !dbg !152
  store i64 %"'ipc29", i64 addrspace(11)* %82, align 8, !dbg !152
  %83 = bitcast {}*** %2 to {}**, !dbg !152
  %84 = getelementptr inbounds {}*, {}** %83, i64 -13, !dbg !152
  %85 = getelementptr inbounds {}*, {}** %84, i64 15, !dbg !152
  %86 = bitcast {}** %85 to i8**, !dbg !152
  %87 = load i8*, i8** %86, align 8, !dbg !152
  %88 = call {} addrspace(10)* @julia.gc_alloc_obj({}** %84, i64 8, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 140195837729488 to {}*) to {} addrspace(10)*)), !dbg !152
  %89 = bitcast {} addrspace(10)* %88 to [1 x i64] addrspace(10)*, !dbg !152
  %90 = addrspacecast [1 x i64] addrspace(10)* %89 to [1 x i64] addrspace(11)*, !dbg !152
  %91 = getelementptr [1 x i64], [1 x i64] addrspace(11)* %90, i64 0, i32 0, !dbg !152
  store i64 1, i64 addrspace(11)* %91, align 8, !dbg !152
  %92 = call fast double @julia_forward_2281([1 x i64] addrspace(11)* %52, [2 x i64] addrspace(11)* %61, [1 x i64] addrspace(11)* %71, [2 x i64] addrspace(11)* %80, [1 x i64] addrspace(11)* %90), !dbg !152
  call void @llvm.julia.gc_preserve_end(token %40) #17, !dbg !152
  ret void

allocsForInversion:                               ; No predecessors!
}

 pp:   %_replacementA19 = phi i64 , !dbg !78 of   %40 = ptrtoint i8* %39 to i64, !dbg !70
julia: /workspace/srcdir/Enzyme/enzyme/Enzyme/GradientUtils.cpp:7903: void GradientUtils::eraseFictiousPHIs(): Assertion `pp->getNumUses() == 0' failed.

[107420] signal (6.-6): Aborted
in expression starting at REPL[13]:1
pthread_kill at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
raise at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
abort at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x7f81ede2871a)
__assert_fail at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
eraseFictiousPHIs at /workspace/srcdir/Enzyme/enzyme/Enzyme/GradientUtils.cpp:7903
CreateForwardDiff at /workspace/srcdir/Enzyme/enzyme/Enzyme/EnzymeLogic.cpp:4648
EnzymeCreateForwardDiff at /workspace/srcdir/Enzyme/enzyme/Enzyme/CApi.cpp:502
EnzymeCreateForwardDiff at /home/sethaxen/projects/Enzyme.jl/src/api.jl:138
enzyme! at /home/sethaxen/projects/Enzyme.jl/src/compiler.jl:6956
unknown function (ip: 0x7f81c93d23b9)
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
#codegen#162 at /home/sethaxen/projects/Enzyme.jl/src/compiler.jl:8194
codegen at /home/sethaxen/projects/Enzyme.jl/src/compiler.jl:7820
unknown function (ip: 0x7f81c93aa5fd)
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
_thunk at /home/sethaxen/projects/Enzyme.jl/src/compiler.jl:8707
_thunk at /home/sethaxen/projects/Enzyme.jl/src/compiler.jl:8704 [inlined]
cached_compilation at /home/sethaxen/projects/Enzyme.jl/src/compiler.jl:8742 [inlined]
#s287#191 at /home/sethaxen/projects/Enzyme.jl/src/compiler.jl:8800 [inlined]
#s287#191 at ./none:0
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
GeneratedFunctionStub at ./boot.jl:602
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
jl_call_staged at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/method.c:530
ijl_code_for_staged at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/method.c:581
get_staged at ./compiler/utilities.jl:115
retrieve_code_info at ./compiler/utilities.jl:127 [inlined]
InferenceState at ./compiler/inferencestate.jl:354
typeinf_edge at ./compiler/typeinfer.jl:922
abstract_call_method at ./compiler/abstractinterpretation.jl:611
abstract_call_gf_by_type at ./compiler/abstractinterpretation.jl:152
abstract_call_known at ./compiler/abstractinterpretation.jl:1949
jfptr_abstract_call_known_12792.clone_1 at /home/sethaxen/.julia/juliaup/julia-1.9.0-rc2+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
tojlinvoke21381.clone_1 at /home/sethaxen/.julia/juliaup/julia-1.9.0-rc2+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
j_abstract_call_known_12333.clone_1 at /home/sethaxen/.julia/juliaup/julia-1.9.0-rc2+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
abstract_call at ./compiler/abstractinterpretation.jl:2020
abstract_call at ./compiler/abstractinterpretation.jl:1999
abstract_eval_statement_expr at ./compiler/abstractinterpretation.jl:2183
abstract_eval_statement at ./compiler/abstractinterpretation.jl:2396
abstract_eval_basic_statement at ./compiler/abstractinterpretation.jl:2684
typeinf_local at ./compiler/abstractinterpretation.jl:2869
typeinf_nocycle at ./compiler/abstractinterpretation.jl:2957
_typeinf at ./compiler/typeinfer.jl:244
typeinf at ./compiler/typeinfer.jl:215
typeinf_ext at ./compiler/typeinfer.jl:1056
typeinf_ext_toplevel at ./compiler/typeinfer.jl:1089
typeinf_ext_toplevel at ./compiler/typeinfer.jl:1085
jfptr_typeinf_ext_toplevel_16333.clone_1 at /home/sethaxen/.julia/juliaup/julia-1.9.0-rc2+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
jl_type_infer at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:320
jl_generate_fptr_impl at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/jitlayers.cpp:444
jl_compile_method_internal at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2348 [inlined]
jl_compile_method_internal at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2237
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2750 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
autodiff at /home/sethaxen/projects/Enzyme.jl/src/Enzyme.jl:321
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
do_apply at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/builtins.c:730
autodiff at /home/sethaxen/projects/Enzyme.jl/src/Enzyme.jl:215
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
do_call at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/interpreter.c:126
eval_value at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/interpreter.c:226
eval_stmt_value at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/interpreter.c:177 [inlined]
eval_body at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/interpreter.c:624
jl_interpret_toplevel_thunk at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/interpreter.c:762
jl_toplevel_eval_flex at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/toplevel.c:912
jl_toplevel_eval_flex at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/toplevel.c:856
ijl_toplevel_eval_in at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/toplevel.c:971
eval at ./boot.jl:370 [inlined]
eval_user_input at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/usr/share/julia/stdlib/v1.9/REPL/src/REPL.jl:153
repl_backend_loop at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/usr/share/julia/stdlib/v1.9/REPL/src/REPL.jl:249
#start_repl_backend#46 at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/usr/share/julia/stdlib/v1.9/REPL/src/REPL.jl:234
start_repl_backend at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/usr/share/julia/stdlib/v1.9/REPL/src/REPL.jl:231
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
#run_repl#59 at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/usr/share/julia/stdlib/v1.9/REPL/src/REPL.jl:377
run_repl at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/usr/share/julia/stdlib/v1.9/REPL/src/REPL.jl:363
jfptr_run_repl_61794.clone_1 at /home/sethaxen/.julia/juliaup/julia-1.9.0-rc2+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
#1019 at ./client.jl:421
jfptr_YY.1019_49540.clone_1 at /home/sethaxen/.julia/juliaup/julia-1.9.0-rc2+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
jl_f__call_latest at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/builtins.c:774
#invokelatest#2 at ./essentials.jl:816 [inlined]
invokelatest at ./essentials.jl:813 [inlined]
run_main_repl at ./client.jl:405
exec_options at ./client.jl:322
_start at ./client.jl:522
jfptr__start_37296.clone_1 at /home/sethaxen/.julia/juliaup/julia-1.9.0-rc2+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/julia.h:1879 [inlined]
true_main at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/jlapi.c:573
jl_repl_entrypoint at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/jlapi.c:717
main at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/cli/loader_exe.c:59
unknown function (ip: 0x7f81ede29d8f)
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x401098)
Allocations: 20156377 (Pool: 20133683; Big: 22694); GC: 28
Aborted (core dumped)

This is strange because only this line of code should be hit in this case, and all it does is call the primal function.

Copy link
Collaborator Author

@sethaxen sethaxen Apr 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 2-arg methods also error for reverse-mode. These are the only remaining failures in the test suite.

Edit: also, this only happens with dot and real inputs, not with dotc or dotu.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Open an issue?


dval = if !(X isa Const) && !(Y isa Const)
_map_tuple(X.dval, Y.dval) do dX, dY
func.val(n.val, dX, incx.val, Y.val, incy.val) +
func.val(n.val, X.val, incx.val, dY, incy.val)
end
elseif !(X isa Const)
_map_tuple(dX -> func.val(n.val, dX, incx.val, Y.val, incy.val), X.dval)
elseif !(Y isa Const)
_map_tuple(dY -> func.val(n.val, X.val, incx.val, dY, incy.val), Y.dval)
else
zero(T)
end

if RT <: Union{DuplicatedNoNeed,BatchDuplicatedNoNeed}
return dval
else
val = func.val(n.val, X.val, incx.val, Y.val, incy.val)
return RT(val, dval)
end
end

function EnzymeRules.augmented_primal(
config::EnzymeRules.ConfigWidth{1},
func::Const{typeof(BLAS.$fname)},
RT::Type{<:Union{Const,Active}},
n::Const{<:Integer},
X::ConstOrDuplicated{<:Union{Ptr{T},AbstractArray{T}}},
incx::Const{<:Integer},
Y::ConstOrDuplicated{<:Union{Ptr{T},AbstractArray{T}}},
incy::Const{<:Integer},
) where {T<:BLAS.$Ttype}
primal, shadow = _maybe_primal_shadow(
config, func.val, (n.val, X.val, incx.val, Y.val, incy.val)
)

# build tape
_, _, Xow, _, Yow = EnzymeRules.overwritten(config)
tape_X = !(RT <: Const) && !(Y isa Const) && Xow
tape_Y = !(RT <: Const) && !(X isa Const) && Yow
Xtape = tape_X ? _strided_tape(n.val, X.val, incx.val) : (X.val, incx.val)
Ytape = tape_Y ? _strided_tape(n.val, Y.val, incy.val) : (Y.val, incy.val)
tape = (Xtape, Ytape)

return EnzymeRules.AugmentedReturn(primal, shadow, tape)
end

function EnzymeRules.reverse(
config::EnzymeRules.ConfigWidth{1},
fun::Const{typeof(BLAS.$fname)},
dret::Union{Active,Type{<:Const}},
tape,
n::Const{<:Integer},
X::ConstOrDuplicated{<:Union{Ptr{T},AbstractArray{T}}},
incx::Const{<:Integer},
Y::ConstOrDuplicated{<:Union{Ptr{T},AbstractArray{T}}},
incy::Const{<:Integer},
) where {T<:BLAS.$Ttype}
ret = (nothing, nothing, nothing, nothing, nothing)
dret isa Type{<:Const} && return ret

(Xval, incxval), (Yval, incyval) = tape

atransxpy! = fun.val === BLAS.dotu ? _aconjxpy! : BLAS.axpy!
dval_X = fun.val === BLAS.dotu ? dret.val : conj(dret.val)
X isa Const || atransxpy!(n.val, dval_X, Yval, incyval, X.dval, incx.val)
Y isa Const || atransxpy!(n.val, dret.val, Xval, incxval, Y.dval, incy.val)

return ret
end
end
end

end # module
190 changes: 190 additions & 0 deletions test/rules/LinearAlgebra/blas.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
using Enzyme
using FiniteDifferences
using LinearAlgebra
using Test

@testset "BLAS rules" begin
fdm = central_fdm(5, 1)
RTs = (Float32, Float64)
RCs = (ComplexF32, ComplexF64)
n = 10

@testset for fun in (BLAS.dot, BLAS.dotu, BLAS.dotc)
@testset "forward" begin
@testset for Tret in (Const, Duplicated, DuplicatedNoNeed),
Tx in (Const, Duplicated),
Ty in (Const, Duplicated),
pfun in (identity, pointer),
T in (fun == BLAS.dot ? RTs : RCs),
(sz, inc) in ((10, 1), ((2, 20), -2))

Tx <: Const && Ty <: Const && !(Tret <: Const) && continue

x, ∂x = ntuple(_ -> randn(T, sz), 2)
y, ∂y = ntuple(_ -> randn(T, sz), 2)

x_annot = Tx <: Const ? Const(pfun(x)) : Duplicated(pfun(x), pfun(∂x))
y_annot = Ty <: Const ? Const(pfun(y)) : Duplicated(pfun(y), pfun(∂y))

vexp = fun(n, x, inc, y, inc)
dexp = FiniteDifferences.jvp(
fdm,
(x, y) -> fun(n, x, inc, y, inc),
Tx <: Const ? (x, zero(x)) : (x, ∂x),
Ty <: Const ? (y, zero(y)) : (y, ∂y),
)[1]
ret = autodiff(
Forward, fun, Tret, Const(n), x_annot, Const(inc), y_annot, Const(inc)
)

Tret <: Const && @test ret === ()
if Tret <: Duplicated
v, d = ret
@test v ≈ vexp
@test d ≈ dexp
elseif Tret <: DuplicatedNoNeed
@test only(ret) ≈ dexp
end

if pfun === identity && sz == n && inc == 1
@testset "consistency of 2-arg version" begin
ret2 = autodiff(Forward, fun, Tret, x_annot, y_annot)
@test ret2 == ret
end
end
end

@testset for Tret in (BatchDuplicated, BatchDuplicatedNoNeed),
T in (fun == BLAS.dot ? RTs : RCs)

batch_size = 3
inc = 1
x = randn(T, n)
y = randn(T, n)
∂xs = ntuple(_ -> randn(T, n), batch_size)
∂ys = ntuple(_ -> randn(T, n), batch_size)
vexp = fun(n, x, inc, y, inc)
dexp = map(∂xs, ∂ys) do ∂x, ∂y
FiniteDifferences.jvp(
fdm, (x, y) -> fun(n, x, inc, y, inc), (x, ∂x), (y, ∂y)
)[1]
end
ret = autodiff(
Forward,
fun,
Tret,
Const(n),
BatchDuplicated(x, ∂xs),
Const(inc),
BatchDuplicated(y, ∂ys),
Const(inc),
)
if Tret <: BatchDuplicated
v, ds = ret
@test v ≈ vexp
else
ds = only(ret)
end
@test all(map(≈, values(ds), dexp))
end
end

@testset "reverse" begin
function fun_overwrite!(n, x, incx, y, incy)
d = fun(n, x, incx, y, incy)
x[1] = 0
y[1] = 0
return d
end
function fun_overwrite!(x, y)
d = fun(x, y)
x[1] = 0
y[1] = 0
return d
end

@testset for Tret in (Const, Active),
Tx in (Const, Duplicated),
Ty in (Const, Duplicated),
pfun in (identity, pointer),
T in (fun == BLAS.dot ? RTs : RCs),
(sz, inc) in ((10, 1), ((2, 20), -2)),
f in (pfun === identity ? (fun, fun_overwrite!) : (fun,))

Tx <: Const && Ty <: Const && !(Tret <: Const) && continue

x, ∂x = ntuple(_ -> randn(T, sz), 2)
y, ∂y = ntuple(_ -> randn(T, sz), 2)
∂z = randn(T)
xcopy, ycopy, ∂xcopy, ∂ycopy = map(copy, (x, y, ∂x, ∂y))

x_annot =
Tx <: Const ? Const(pfun(xcopy)) : Duplicated(pfun(xcopy), pfun(∂xcopy))
y_annot =
Ty <: Const ? Const(pfun(ycopy)) : Duplicated(pfun(ycopy), pfun(∂ycopy))
activities = (Const(n), x_annot, Const(inc), y_annot, Const(inc))

vexp = fun(n, x, inc, y, inc)
dret = randn(typeof(vexp))

dexp = FiniteDifferences.j′vp(
fdm, (x, y) -> fun(n, x, inc, y, inc), dret, x, y
)
fwd, rev = autodiff_thunk(
ReverseSplitWithPrimal,
Const{typeof(f)},
Tret,
map(typeof, activities)...,
)
tape, val, shadow_val = fwd(Const(f), activities...)
if Tret <: Const
dval, = rev(Const(f), activities..., tape)
else
dval, = rev(Const(f), activities..., dret, tape)
end

@test all(isnothing, dval)
@test val ≈ vexp
@test ∂xcopy ≈
dexp[1] .* !(Tx <: Const || Tret <: Const) .+
∂x .* ((Tx <: Const) .| (x .== xcopy))
@test ∂ycopy ≈
dexp[2] .* !(Ty <: Const || Tret <: Const) .+
∂y .* ((Ty <: Const) .| (y .== ycopy))

if pfun === identity && sz == n && inc == 1
@testset "consistency of 2-arg version" begin
xcopy2, ycopy2, ∂xcopy2, ∂ycopy2 = map(copy, (x, y, ∂x, ∂y))
x_annot = if Tx <: Const
Const(pfun(xcopy2))
else
Duplicated(pfun(xcopy2), pfun(∂xcopy2))
end
y_annot = if Ty <: Const
Const(pfun(ycopy2))
else
Duplicated(pfun(ycopy2), pfun(∂ycopy2))
end
activities = (x_annot, y_annot)
fwd, rev = autodiff_thunk(
ReverseSplitWithPrimal,
Const{typeof(f)},
Tret,
map(typeof, activities)...,
)
tape, val2, shadow_val = fwd(Const(f), activities...)
if Tret <: Const
dval2, = rev(Const(f), activities..., tape)
else
dval2, = rev(Const(f), activities..., dret, tape)
end
@test all(isnothing, dval2)
@test val2 == val
@test ∂xcopy2 == ∂xcopy
@test ∂ycopy2 == ∂ycopy
end
end
end
end
end
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ include("typetree.jl")
# XXX invalidation does not work on Julia 1.8
include("ruleinvalidation.jl")
end
include("rules/LinearAlgebra/blas.jl")
end
@static if VERSION ≥ v"1.7-" || !Sys.iswindows()
include("blas.jl")
Expand Down
Loading