pnnl · AK2000 · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -107,6 +107,8 @@ add_custom_target(comet-headers)
 set_target_properties(comet-headers PROPERTIES FOLDER "Misc")
 add_custom_target(comet-doc)
 
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+
 # Add MLIR, LLVM and BLIS headers to the include path
 include_directories(${LLVM_INCLUDE_DIRS})
 include_directories(${MLIR_INCLUDE_DIRS})
@@ -141,7 +143,7 @@ endif()
 add_subdirectory(include/comet)
 add_subdirectory(lib)
 add_subdirectory(frontends/comet_dsl)
-add_subdirectory(integration_test)
+add_subdirectory(test/integration)
 
 
 option(COMET_INCLUDE_DOCS "Generate build targets for the COMET docs.")
@@ -202,3 +204,9 @@ if (STANDALONE_INSTALL)
   message(STATUS "Setting an $ORIGIN-based RPATH on all executables")
   set_rpath_all_targets(${CMAKE_CURRENT_SOURCE_DIR})
 endif()
+
+option(DEBUG_MODE "Create a installation with debug information" off)
+if (DEBUG_MODE)
+  message(STATUS "Building comet in debug mode")
+  add_compile_options(-DCOMET_DEBUG_MODE)
+endif()
diff --git a/frontends/comet_dsl/CMakeLists.txt b/frontends/comet_dsl/CMakeLists.txt
@@ -22,15 +22,15 @@ get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
 llvm_update_compile_flags(comet-opt)
 
 set(LIBS
-MLIRAnalysis
-MLIRIR
-MLIRParser
-MLIRPass
-MLIRTransforms
-COMETUtils
-COMETTensorAlgebraDialect
-COMETIndexTreeDialect
-COMETIndexTreeToSCF
+  MLIRAnalysis
+  MLIRIR
+  MLIRParser
+  MLIRPass
+  MLIRTransforms
+  COMETUtils
+  COMETTensorAlgebraDialect
+  COMETIndexTreeDialect
+  # COMETIndexTreeToSCF
 )
 
 if(ENABLE_GPU_TARGET)

diff --git a/frontends/comet_dsl/comet.cpp b/frontends/comet_dsl/comet.cpp
@@ -44,6 +44,9 @@
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Dialect/Func/Transforms/Passes.h"
 #include "mlir/Dialect/Tensor/Transforms/Passes.h"
+#include "mlir/Dialect/SCF/Transforms/Passes.h"
+#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
+
 
 #include "mlir/Conversion/Passes.h"
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
@@ -339,6 +342,14 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
 
   mlir::OpPassManager &optPM = pm.nest<mlir::func::FuncOp>();
 
+  /// Check to see if we are dumping to TA dialect.
+  if (emitTA)
+  {
+    if (mlir::failed(pm.run(*module)))
+      return 4;
+    return 0;
+  }
+
   ///  =============================================================================
   ///  High-level optimization at the TA dialect
   ///  Such as finding the optimal ordering of dense tensor contractions, or reformulating tensor contractions
@@ -375,25 +386,14 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
     /// Generate the index tree IR
     optPM.addPass(mlir::comet::createLowerTensorAlgebraToIndexTreePass(CodegenTarget));
 
-    if (OptKernelFusion)
-    {
-      /// Apply partial fusion on index tree dialect for some compound expressions.
-      optPM.addPass(mlir::comet::createIndexTreeKernelFusionPass());
-    }
+    // Create new pass manager to optimize the index tree dialect
+    optPM.addPass(mlir::comet::createIndexTreeDomainInferencePass());
 
-    if (OptWorkspace)
-    {
-      /// Optimized workspace transformations, reduce iteration space for nonzero elements
-      optPM.addPass(mlir::comet::createIndexTreeWorkspaceTransformationsPass());
-    }
-
-    /// Dump index tree dialect.
-    if (emitIT)
-    {
-      if (mlir::failed(pm.run(*module)))
-        return 4;
-      return 0;
-    }
+    // if (OptKernelFusion)
+    // {
+    //   /// Apply partial fusion on index tree dialect for some compound expressions.
+    //   optPM.addPass(mlir::comet::createIndexTreeKernelFusionPass());
+    // }
   }
 
   /// =============================================================================
@@ -408,7 +408,10 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
   /// input and output sparse tensor declaration lowering are distant and need different information
   optPM.addPass(mlir::comet::createSparseTensorDeclLoweringPass());
   optPM.addPass(mlir::comet::createDenseTensorDeclLoweringPass());
+  optPM.addPass(mlir::comet::createSparseTempOutputTensorDeclLoweringPass());
+  optPM.addPass(mlir::comet::createSparseOutputTensorDeclLoweringPass());
   optPM.addPass(mlir::comet::createTensorFillLoweringPass());
+  optPM.addPass(mlir::comet::createDimOpLoweringPass());
 
   /// =============================================================================
 
@@ -419,9 +422,9 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
     optPM.addPass(mlir::comet::createLoweringTTGTPass(IsSelectBestPermTTGT, selectedPermNum, IsPrintFlops));
   }
 
-  /// =============================================================================
-  /// Operation based optimizations
-  /// =============================================================================
+  // /// =============================================================================
+  // /// Operation based optimizations
+  // /// =============================================================================
   if (OptMatmulTiling)
   {
     optPM.addPass(mlir::comet::createLinAlgMatmulTilingPass());
@@ -435,34 +438,39 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
   /// =============================================================================
   /// Lowering all the operations to loops
   /// =============================================================================
-  if (IsLoweringtoSCF || emitLoops || emitTriton_ ||  emitLLVM )
-  {
-
-    /// Workspace transformations will create new dense tensor declarations, so we need to call createDenseTensorDeclLoweringPass
-    optPM.addPass(mlir::comet::createDenseTensorDeclLoweringPass());            /// lowers dense input/output tensor declaration
-    optPM.addPass(mlir::comet::createSparseTempOutputTensorDeclLoweringPass()); /// Temporary sparse output tensor declarations introduced by compound expressions
-                                                                                /// should be lowered before sparse output tensor declarations
-    optPM.addPass(mlir::comet::createSparseOutputTensorDeclLoweringPass());     /// lowering for sparse output tensor declarations
-                                                                                //(sparse_output_tensor_decl and temp_sparse_output_tensor_decl)
-
-    optPM.addPass(mlir::comet::createDimOpLoweringPass());
-
-    /// The partial Fusion pass might add new tensor.fill operations
-    optPM.addPass(mlir::comet::createTensorFillLoweringPass());
-    optPM.addPass(mlir::comet::createPCToLoopsLoweringPass());
-
+  if (IsLoweringtoSCF || emitLoops || emitLLVM)
+  { 
     /// =============================================================================
     /// Lowering of other operations such as transpose, sum, etc. to SCF dialect
     /// =============================================================================
     /// If it is a transpose of dense tensor, the rewrites rules replaces ta.transpose with linalg.copy.
     /// If it is a transpose of sparse tensor, it lowers the code to make a runtime call to specific sorting algorithm
     optPM.addPass(mlir::comet::createLowerTensorAlgebraToSCFPass());
 
+    /// Concretize the domains of all the index variables
+    optPM.addPass(mlir::comet::createIndexTreeDomainConcretizationPass());
+
+    if (OptWorkspace) {
+      /// Optimized workspace transformations, reduce iteration space for nonzero elements
+      optPM.addPass(mlir::comet::createIndexTreeWorkspaceTransformationsPass());
+    }
+
+    optPM.addPass(mlir::comet::createIndexTreeSymbolicComputePass());
+
+    /// Dump index tree dialect.
+    if (emitIT)
+    {
+      if (mlir::failed(pm.run(*module)))
+        return 4;
+      return 0;
+    }
+
     /// Finally lowering index tree to SCF dialect
     optPM.addPass(mlir::comet::createLowerIndexTreeToSCFPass());
-    optPM.addPass(mlir::tensor::createTensorBufferizePass());
-    pm.addPass(mlir::func::createFuncBufferizePass()); /// Needed for func
-    pm.addPass(mlir::createConvertLinalgToLoopsPass());
+    optPM.addPass(mlir::comet::createConvertSymbolicDomainsPass());
+    optPM.addPass(mlir::comet::createSparseTensorConversionPass());
+    optPM.addPass(mlir::comet::createIndexTreeInliningPass());
+    optPM.addPass(mlir::createCanonicalizerPass());
 
     if (OptDenseTransposeOp) /// Optimize Dense Transpose operation
     {
@@ -487,14 +495,23 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
 
 
 
-  /// =============================================================================
-  /// Late lowering passes
-  /// =============================================================================
+  // /// =============================================================================
+  // /// Late lowering passes
+  // /// =============================================================================
+  // pm.addPass(mlir::bufferization::createEmptyTensorToAllocTensorPass());
+  pm.addPass(mlir::comet::createTABufferizeFunc());
+  pm.addPass(mlir::createCanonicalizerPass());
+
+  mlir::bufferization::OneShotBufferizationOptions opts;
+  opts.allowUnknownOps = true;
+  pm.addPass(mlir::bufferization::createOneShotBufferizePass(opts));
 
-  optPM.addPass(mlir::comet::createSTCRemoveDeadOpsPass());
-  optPM.addPass(mlir::comet::createLateLoweringPass());
-  // pm.addPass(mlir::createCanonicalizerPass());
-  optPM.addPass(mlir::createCSEPass());
+  mlir::OpPassManager &late_lowering_pm = pm.nest<mlir::func::FuncOp>();
+  late_lowering_pm.addPass(mlir::comet::createSTCRemoveDeadOpsPass());
+  late_lowering_pm.addPass(mlir::comet::createLateLoweringPass());
+
+  pm.addPass(mlir::createCanonicalizerPass());
+  pm.addPass(mlir::createCSEPass());
 
 #ifdef ENABLE_GPU_TARGET
   if (CodegenTarget == TargetDevice::GPU && (emitTriton_ || emitLLVM || IsLoweringtoTriton))
@@ -616,6 +633,7 @@ int main(int argc, char **argv)
   context.loadDialect<mlir::linalg::LinalgDialect>();
   context.loadDialect<mlir::scf::SCFDialect>();
   context.loadDialect<mlir::bufferization::BufferizationDialect>();
+  context.loadDialect<mlir::index::IndexDialect>();
 
   mlir::OwningOpRef<mlir::ModuleOp> module;