From aadac1d33ac02e1eeb5023cce9f30fe16b6af10a Mon Sep 17 00:00:00 2001
From: hamlin <hamlin@hamlin.eu.rivosinc.com>
Date: Wed, 18 Sep 2024 08:52:47 +0000
Subject: [PATCH 01/12] Initial commit

---
 make/modules/jdk.incubator.vector/Lib.gmk     | 16 ++++
 src/hotspot/cpu/riscv/assembler_riscv.hpp     |  5 +-
 src/hotspot/cpu/riscv/riscv.ad                | 26 +++++-
 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 14 ++-
 src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 52 +++++++++++
 src/hotspot/cpu/x86/stubGenerator_x86_64.cpp  | 24 ++---
 src/hotspot/share/opto/callnode.cpp           |  2 +-
 src/hotspot/share/opto/library_call.hpp       |  2 +-
 src/hotspot/share/opto/vectorIntrinsics.cpp   | 37 +++++---
 src/hotspot/share/prims/vectorSupport.cpp     |  4 +-
 src/hotspot/share/prims/vectorSupport.hpp     | 13 +--
 src/hotspot/share/runtime/stubRoutines.cpp    |  4 +-
 src/hotspot/share/runtime/stubRoutines.hpp    |  4 +-
 .../native/libsleef/lib/vector_math_rvv.c     | 89 +++++++++++++++++++
 14 files changed, 247 insertions(+), 45 deletions(-)
 create mode 100644 src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c

diff --git a/make/modules/jdk.incubator.vector/Lib.gmk b/make/modules/jdk.incubator.vector/Lib.gmk
index 0620549f05cd7..0ac2075bab0dc 100644
--- a/make/modules/jdk.incubator.vector/Lib.gmk
+++ b/make/modules/jdk.incubator.vector/Lib.gmk
@@ -37,3 +37,19 @@ ifeq ($(call isTargetOs, linux windows)+$(call isTargetCpu, x86_64)+$(INCLUDE_CO
 
   TARGETS += $(BUILD_LIBJSVML)
 endif
+
+ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2), true+true+true)
+  $(eval $(call SetupJdkLibrary, BUILD_LIBSLEEF, \
+      NAME := sleef, \
+      SRC := libsleef/lib, \
+      EXTRA_SRC := libsleef/generated, \
+      DISABLED_WARNINGS_gcc := unused-function, \
+      DISABLED_WARNINGS_clang := unused-function, \
+      CFLAGS := $(CFLAGS_JDKLIB) -O3 -march=rv64gcv, \
+      LDFLAGS := $(LDFLAGS_JDKLIB) \
+          $(call SET_SHARED_LIBRARY_ORIGIN), \
+      LIBS := $(JDKLIB_LIBS) \
+  ))
+
+  TARGETS += $(BUILD_LIBSLEEF)
+endif
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
index 98ab86bf72eb6..1443e2d8a8b04 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -46,8 +46,9 @@
 class Argument {
  public:
   enum {
-    n_int_register_parameters_c   = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
-    n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
+    n_int_register_parameters_c   = 8,   // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
+    n_float_register_parameters_c = 8,   // f10, f11, ... f17 (c_farg0, c_farg1, ... )
+    n_vector_register_parameters_c = 8,  // v8, v9, ... v15
 
     n_int_register_parameters_j   = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...)
     n_float_register_parameters_j = 8  // f10, f11, ... f17 (j_farg0, j_farg1, ...)
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
index 05f55fd0da7af..55f98858e3db2 100644
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -1972,12 +1972,15 @@ const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length)
 
 // Vector calling convention not yet implemented.
 bool Matcher::supports_vector_calling_convention(void) {
-  return false;
+  return EnableVectorSupport && UseVectorStubs;
 }
 
 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
-  Unimplemented();
-  return OptoRegPair(0, 0);
+  assert(EnableVectorSupport && UseVectorStubs, "sanity");
+  assert(ideal_reg == Op_VecA, "sanity");
+  int lo = V8_num;
+  int hi = V8_K_num;
+  return OptoRegPair(hi, lo);
 }
 
 // Is this branch offset short enough that a short branch can be used?
@@ -10066,6 +10069,23 @@ instruct CallLeafDirect(method meth, rFlagsReg cr)
   ins_pipe(pipe_class_call);
 %}
 
+// Call Runtime Instruction without safepoint and with vector arguments
+
+instruct CallLeafDirectVector(method meth)
+%{
+  match(CallLeafVector);
+
+  effect(USE meth);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "CALL, runtime leaf vector $meth" %}
+
+  ins_encode(riscv_enc_java_to_runtime(meth));
+
+  ins_pipe(pipe_class_call);
+%}
+
 // Call Runtime Instruction
 
 instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
index 879fd92272279..03907b7d9b764 100644
--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -666,7 +666,19 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
                                              uint num_bits,
                                              uint total_args_passed) {
-  Unimplemented();
+  // More than 8 argument inputs are not supported now.
+  assert(total_args_passed <= Argument::n_vector_register_parameters_c, "unsupported");
+  assert(num_bits >= 64 && num_bits <= 2048 && is_power_of_2(num_bits), "unsupported");
+
+  static const VectorRegister VEC_ArgReg[Argument::n_vector_register_parameters_c] = {
+    v8, v9, v10, v11, v12, v13, v14, v15
+  };
+
+  const int next_reg_val = 3;
+  for (uint i = 0; i < total_args_passed; i++) {
+    VMReg vmreg = VEC_ArgReg[i]->as_VMReg();
+    regs[i].set_pair(vmreg->next(next_reg_val), vmreg);
+  }
   return 0;
 }
 
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
index 8792dea7de5eb..84389ae12dc2c 100644
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -6059,6 +6059,56 @@ static const int64_t right_3_bits = right_n_bits(3);
     return start;
   }
 
+  void generate_vector_math_stubs() {
+    if (UseRVV) {
+      // Get native vector math stub routine addresses
+      void* libsleef = nullptr;
+      char ebuf[1024];
+      char dll_name[JVM_MAXPATHLEN];
+      if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) {
+        libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf);
+      }
+      if (libsleef != nullptr) {
+        // Method naming convention
+        //   All the methods are named as <OP><T>_<U><suffix>
+        //
+        //   Where:
+        //     <OP>     is the operation name, e.g. sin, cos
+        //     <T>      is to indicate float/double
+        //              "fx/dx" for vector float/double operation
+        //     <U>      is the precision level
+        //              "u10/u05" represents 1.0/0.5 ULP error bounds
+        //               We use "u10" for all operations by default
+        //               But for those functions do not have u10 support, we use "u05" instead
+        //     <suffix> rvv, indicates riscv vector extension
+        //
+        //   e.g. sinfx_u10rvv is the method for computing vector float sin using rvv instructions
+        //
+        log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef));
+
+        for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
+          int vop = VectorSupport::VECTOR_OP_MATH_START + op;
+          if (vop == VectorSupport::VECTOR_OP_TANH) { // skip tanh because of performance regression
+            continue;
+          }
+
+          // The native library does not support u10 level of "hypot".
+          const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
+
+          snprintf(ebuf, sizeof(ebuf), "%sfx_%srvv", VectorSupport::mathname[op], ulf);
+          StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
+
+          snprintf(ebuf, sizeof(ebuf), "%sdx_%srvv", VectorSupport::mathname[op], ulf);
+          StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
+        }
+      } else {
+        log_info(library)("Failed to load native vector math (sleef) library, %s!", ebuf);
+      }
+    } else {
+      log_info(library)("vector is not supported, skip loading vector math (sleef) library!");
+    }
+  }
+
 #endif // COMPILER2
 
   /**
@@ -6264,6 +6314,8 @@ static const int64_t right_3_bits = right_n_bits(3);
 
     generate_string_indexof_stubs();
 
+    generate_vector_math_stubs();
+
 #endif // COMPILER2
   }
 
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
index 2bc4a0a9cba94..83d9328f5db71 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -4157,41 +4157,41 @@ void StubGenerator::generate_compiler_stubs() {
 
     log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "jsvml" JNI_LIB_SUFFIX, p2i(libjsvml));
     if (UseAVX > 2) {
-      for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
-        int vop = VectorSupport::VECTOR_OP_SVML_START + op;
+      for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
+        int vop = VectorSupport::VECTOR_OP_MATH_START + op;
         if ((!VM_Version::supports_avx512dq()) &&
             (vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) {
           continue;
         }
-        snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::svmlname[op]);
+        snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::mathname[op]);
         StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-        snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::svmlname[op]);
+        snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::mathname[op]);
         StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
       }
     }
     const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex");
-    for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
-      int vop = VectorSupport::VECTOR_OP_SVML_START + op;
+    for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
+      int vop = VectorSupport::VECTOR_OP_MATH_START + op;
       if (vop == VectorSupport::VECTOR_OP_POW) {
         continue;
       }
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
     }
   }
diff --git a/src/hotspot/share/opto/callnode.cpp b/src/hotspot/share/opto/callnode.cpp
index d715e6533432e..e800b3c736bf2 100644
--- a/src/hotspot/share/opto/callnode.cpp
+++ b/src/hotspot/share/opto/callnode.cpp
@@ -755,7 +755,7 @@ Node *CallNode::match( const ProjNode *proj, const Matcher *match ) {
 
     if (Opcode() == Op_CallLeafVector) {
       // If the return is in vector, compute appropriate regmask taking into account the whole range
-      if(ideal_reg >= Op_VecS && ideal_reg <= Op_VecZ) {
+      if(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ) {
         if(OptoReg::is_valid(regs.second())) {
           for (OptoReg::Name r = regs.first(); r <= regs.second(); r = OptoReg::add(r, 1)) {
             rm.Insert(r);
diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp
index dd74734802f65..5f020fc169e6d 100644
--- a/src/hotspot/share/opto/library_call.hpp
+++ b/src/hotspot/share/opto/library_call.hpp
@@ -372,7 +372,7 @@ class LibraryCallKit : public GraphKit {
   bool inline_index_vector();
   bool inline_index_partially_in_upper_range();
 
-  Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);
+  Node* gen_call_to_vector_math(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);
 
   enum VectorMaskUseType {
     VecMaskUseLoad  = 1 << 0,
diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp
index cfcd903e79d95..d6a60788e6b9f 100644
--- a/src/hotspot/share/opto/vectorIntrinsics.cpp
+++ b/src/hotspot/share/opto/vectorIntrinsics.cpp
@@ -468,11 +468,11 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) {
   Node* operation = nullptr;
   if (opc == Op_CallLeafVector) {
     assert(UseVectorStubs, "sanity");
-    operation = gen_call_to_svml(opr->get_con(), elem_bt, num_elem, opd1, opd2);
+    operation = gen_call_to_vector_math(opr->get_con(), elem_bt, num_elem, opd1, opd2);
     if (operation == nullptr) {
-      log_if_needed("  ** svml call failed for %s_%s_%d",
-                         (elem_bt == T_FLOAT)?"float":"double",
-                         VectorSupport::svmlname[opr->get_con() - VectorSupport::VECTOR_OP_SVML_START],
+      log_if_needed("  ** Vector math call failed for %s_%s_%d",
+                         (elem_bt == T_FLOAT) ? "float" : "double",
+                         VectorSupport::mathname[opr->get_con() - VectorSupport::VECTOR_OP_MATH_START],
                          num_elem * type2aelembytes(elem_bt));
       return false;
      }
@@ -2013,12 +2013,12 @@ bool LibraryCallKit::inline_vector_rearrange() {
   return true;
 }
 
-static address get_svml_address(int vop, int bits, BasicType bt, char* name_ptr, int name_len) {
+static address get_vector_math_address(int vop, int bits, BasicType bt, char* name_ptr, int name_len) {
   address addr = nullptr;
   assert(UseVectorStubs, "sanity");
   assert(name_ptr != nullptr, "unexpected");
-  assert((vop >= VectorSupport::VECTOR_OP_SVML_START) && (vop <= VectorSupport::VECTOR_OP_SVML_END), "unexpected");
-  int op = vop - VectorSupport::VECTOR_OP_SVML_START;
+  assert((vop >= VectorSupport::VECTOR_OP_MATH_START) && (vop <= VectorSupport::VECTOR_OP_MATH_END), "unexpected");
+  int op = vop - VectorSupport::VECTOR_OP_MATH_START;
 
   switch(bits) {
     case 64:  //fallthough
@@ -2026,11 +2026,11 @@ static address get_svml_address(int vop, int bits, BasicType bt, char* name_ptr,
     case 256: //fallthough
     case 512:
       if (bt == T_FLOAT) {
-        snprintf(name_ptr, name_len, "vector_%s_float%d", VectorSupport::svmlname[op], bits);
+        snprintf(name_ptr, name_len, "vector_%s_float_%dbits_fixed", VectorSupport::mathname[op], bits);
         addr = StubRoutines::_vector_f_math[exact_log2(bits/64)][op];
       } else {
         assert(bt == T_DOUBLE, "must be FP type only");
-        snprintf(name_ptr, name_len, "vector_%s_double%d", VectorSupport::svmlname[op], bits);
+        snprintf(name_ptr, name_len, "vector_%s_double_%dbits_fixed", VectorSupport::mathname[op], bits);
         addr = StubRoutines::_vector_d_math[exact_log2(bits/64)][op];
       }
       break;
@@ -2041,19 +2041,30 @@ static address get_svml_address(int vop, int bits, BasicType bt, char* name_ptr,
       break;
   }
 
+  if (addr == nullptr && Matcher::supports_scalable_vector()) {
+    if (bt == T_FLOAT) {
+      snprintf(name_ptr, name_len, "vector_%s_float_%dbits_scalable", VectorSupport::mathname[op], bits);
+      addr = StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op];
+    } else {
+      assert(bt == T_DOUBLE, "must be FP type only");
+      snprintf(name_ptr, name_len, "vector_%s_double_%dbits_scalable", VectorSupport::mathname[op], bits);
+      addr = StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op];
+    }
+  }
+
   return addr;
 }
 
-Node* LibraryCallKit::gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2) {
+Node* LibraryCallKit::gen_call_to_vector_math(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2) {
   assert(UseVectorStubs, "sanity");
-  assert(vector_api_op_id >= VectorSupport::VECTOR_OP_SVML_START && vector_api_op_id <= VectorSupport::VECTOR_OP_SVML_END, "need valid op id");
+  assert(vector_api_op_id >= VectorSupport::VECTOR_OP_MATH_START && vector_api_op_id <= VectorSupport::VECTOR_OP_MATH_END, "need valid op id");
   assert(opd1 != nullptr, "must not be null");
   const TypeVect* vt = TypeVect::make(bt, num_elem);
   const TypeFunc* call_type = OptoRuntime::Math_Vector_Vector_Type(opd2 != nullptr ? 2 : 1, vt, vt);
   char name[100] = "";
 
-  // Get address for svml method.
-  address addr = get_svml_address(vector_api_op_id, vt->length_in_bytes() * BitsPerByte, bt, name, 100);
+  // Get address for vector math method.
+  address addr = get_vector_math_address(vector_api_op_id, vt->length_in_bytes() * BitsPerByte, bt, name, 100);
 
   if (addr == nullptr) {
     return nullptr;
diff --git a/src/hotspot/share/prims/vectorSupport.cpp b/src/hotspot/share/prims/vectorSupport.cpp
index e0517c91e957d..65bc6c48fee7b 100644
--- a/src/hotspot/share/prims/vectorSupport.cpp
+++ b/src/hotspot/share/prims/vectorSupport.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -43,7 +43,7 @@
 #endif // COMPILER2
 
 #ifdef COMPILER2
-const char* VectorSupport::svmlname[VectorSupport::NUM_SVML_OP] = {
+const char* VectorSupport::mathname[VectorSupport::NUM_VECTOR_OP_MATH] = {
     "tan",
     "tanh",
     "sin",
diff --git a/src/hotspot/share/prims/vectorSupport.hpp b/src/hotspot/share/prims/vectorSupport.hpp
index 7302e0060648b..6f8e52e9ec0c6 100644
--- a/src/hotspot/share/prims/vectorSupport.hpp
+++ b/src/hotspot/share/prims/vectorSupport.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -121,9 +121,9 @@ class VectorSupport : AllStatic {
     VECTOR_OP_EXPM1 = 117,
     VECTOR_OP_HYPOT = 118,
 
-    VECTOR_OP_SVML_START = VECTOR_OP_TAN,
-    VECTOR_OP_SVML_END   = VECTOR_OP_HYPOT,
-    NUM_SVML_OP = VECTOR_OP_SVML_END - VECTOR_OP_SVML_START + 1
+    VECTOR_OP_MATH_START = VECTOR_OP_TAN,
+    VECTOR_OP_MATH_END   = VECTOR_OP_HYPOT,
+    NUM_VECTOR_OP_MATH   = VECTOR_OP_MATH_END - VECTOR_OP_MATH_START + 1
   };
 
   enum {
@@ -131,7 +131,8 @@ class VectorSupport : AllStatic {
     VEC_SIZE_128 = 1,
     VEC_SIZE_256 = 2,
     VEC_SIZE_512 = 3,
-    NUM_VEC_SIZES = 4
+    VEC_SIZE_SCALABLE = 4,
+    NUM_VEC_SIZES = 5
   };
 
   enum {
@@ -139,7 +140,7 @@ class VectorSupport : AllStatic {
     MODE_BITS_COERCED_LONG_TO_MASK = 1
   };
 
-  static const char* svmlname[VectorSupport::NUM_SVML_OP];
+  static const char* mathname[VectorSupport::NUM_VECTOR_OP_MATH];
 
   static int vop2ideal(jint vop, BasicType bt);
 
diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp
index c13f64fca4bed..3ee9e84a1b54a 100644
--- a/src/hotspot/share/runtime/stubRoutines.cpp
+++ b/src/hotspot/share/runtime/stubRoutines.cpp
@@ -175,8 +175,8 @@ address StubRoutines::_dtan = nullptr;
 address StubRoutines::_f2hf = nullptr;
 address StubRoutines::_hf2f = nullptr;
 
-address StubRoutines::_vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}};
-address StubRoutines::_vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}};
+address StubRoutines::_vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH] = {{nullptr}, {nullptr}};
+address StubRoutines::_vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH] = {{nullptr}, {nullptr}};
 
 address StubRoutines::_method_entry_barrier = nullptr;
 address StubRoutines::_array_sort = nullptr;
diff --git a/src/hotspot/share/runtime/stubRoutines.hpp b/src/hotspot/share/runtime/stubRoutines.hpp
index f5b932569be81..8d93b04e1259d 100644
--- a/src/hotspot/share/runtime/stubRoutines.hpp
+++ b/src/hotspot/share/runtime/stubRoutines.hpp
@@ -293,8 +293,8 @@ class StubRoutines: AllStatic {
   static address _cont_returnBarrierExc;
 
   // Vector Math Routines
-  static address _vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP];
-  static address _vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP];
+  static address _vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH];
+  static address _vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_VECTOR_OP_MATH];
 
   static address _upcall_stub_exception_handler;
 
diff --git a/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
new file mode 100644
index 0000000000000..c093f5cdeaa94
--- /dev/null
+++ b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2024, Rivos Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifdef __riscv_v_intrinsic
+
+#include <stdint.h>
+
+#include <riscv_vector.h>
+
+#include "../generated/misc.h"
+#include "../generated/sleefinline_rvvm1.h"
+
+#include <jni.h>
+
+#define DEFINE_VECTOR_MATH_UNARY_RVV(op, type) \
+JNIEXPORT                                      \
+type op##rvv(type input) {                     \
+  return Sleef_##op##rvvm1(input);             \
+}
+
+#define DEFINE_VECTOR_MATH_BINARY_RVV(op, type) \
+JNIEXPORT                                       \
+type op##rvv(type input1, type input2) {        \
+  return Sleef_##op##rvvm1(input1, input2);     \
+}
+
+DEFINE_VECTOR_MATH_UNARY_RVV(tanfx_u10,   vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(sinfx_u10,   vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(sinhfx_u10,  vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(cosfx_u10,   vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(coshfx_u10,  vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(asinfx_u10,  vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(acosfx_u10,  vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(atanfx_u10,  vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(cbrtfx_u10,  vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(logfx_u10,   vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(log10fx_u10, vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(log1pfx_u10, vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(expfx_u10,   vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(expm1fx_u10, vfloat_rvvm1_sleef)
+
+DEFINE_VECTOR_MATH_UNARY_RVV(tandx_u10,   vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(sindx_u10,   vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(sinhdx_u10,  vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(cosdx_u10,   vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(coshdx_u10,  vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(asindx_u10,  vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(acosdx_u10,  vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(atandx_u10,  vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(cbrtdx_u10,  vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(logdx_u10,   vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(log10dx_u10, vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(log1pdx_u10, vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(expdx_u10,   vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_UNARY_RVV(expm1dx_u10, vdouble_rvvm1_sleef)
+
+DEFINE_VECTOR_MATH_BINARY_RVV(atan2fx_u10, vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_BINARY_RVV(powfx_u10,   vfloat_rvvm1_sleef)
+DEFINE_VECTOR_MATH_BINARY_RVV(hypotfx_u05, vfloat_rvvm1_sleef)
+
+DEFINE_VECTOR_MATH_BINARY_RVV(atan2dx_u10, vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_BINARY_RVV(powdx_u10,   vdouble_rvvm1_sleef)
+DEFINE_VECTOR_MATH_BINARY_RVV(hypotdx_u05, vdouble_rvvm1_sleef)
+
+#undef DEFINE_VECTOR_MATH_UNARY_RVV
+
+#undef DEFINE_VECTOR_MATH_BINARY_RVV
+
+#endif /* __riscv_v_intrinsic */

From 304b74a6f860dd56e5625dfefd56dfe0697fd5a7 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-2-236.us-east-2.compute.internal>
Date: Wed, 18 Sep 2024 11:03:14 +0000
Subject: [PATCH 02/12] fix make warning

---
 make/modules/jdk.incubator.vector/Lib.gmk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/make/modules/jdk.incubator.vector/Lib.gmk b/make/modules/jdk.incubator.vector/Lib.gmk
index 0ac2075bab0dc..5e52277919af2 100644
--- a/make/modules/jdk.incubator.vector/Lib.gmk
+++ b/make/modules/jdk.incubator.vector/Lib.gmk
@@ -43,8 +43,8 @@ ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2
       NAME := sleef, \
       SRC := libsleef/lib, \
       EXTRA_SRC := libsleef/generated, \
-      DISABLED_WARNINGS_gcc := unused-function, \
-      DISABLED_WARNINGS_clang := unused-function, \
+      DISABLED_WARNINGS_gcc := unused-function sign-compare tautological-compare ignored-qualifiers, \
+      DISABLED_WARNINGS_clang := unused-function sign-compare tautological-compare ignored-qualifiers, \
       CFLAGS := $(CFLAGS_JDKLIB) -O3 -march=rv64gcv, \
       LDFLAGS := $(LDFLAGS_JDKLIB) \
           $(call SET_SHARED_LIBRARY_ORIGIN), \

From 26a6807146a19bf51803409b4af4d96076d6321d Mon Sep 17 00:00:00 2001
From: hamlin <hamlin@hamlin.eu.rivosinc.com>
Date: Mon, 23 Sep 2024 07:25:15 +0000
Subject: [PATCH 03/12] modify cflags style

---
 make/modules/jdk.incubator.vector/Lib.gmk | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/make/modules/jdk.incubator.vector/Lib.gmk b/make/modules/jdk.incubator.vector/Lib.gmk
index 5e52277919af2..c6c6103a30113 100644
--- a/make/modules/jdk.incubator.vector/Lib.gmk
+++ b/make/modules/jdk.incubator.vector/Lib.gmk
@@ -41,11 +41,12 @@ endif
 ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2), true+true+true)
   $(eval $(call SetupJdkLibrary, BUILD_LIBSLEEF, \
       NAME := sleef, \
+      OPTIMIZATION := HIGH, \
       SRC := libsleef/lib, \
       EXTRA_SRC := libsleef/generated, \
       DISABLED_WARNINGS_gcc := unused-function sign-compare tautological-compare ignored-qualifiers, \
       DISABLED_WARNINGS_clang := unused-function sign-compare tautological-compare ignored-qualifiers, \
-      CFLAGS := $(CFLAGS_JDKLIB) -O3 -march=rv64gcv, \
+      CFLAGS := $(CFLAGS_JDKLIB) -march=rv64gcv, \
       LDFLAGS := $(LDFLAGS_JDKLIB) \
           $(call SET_SHARED_LIBRARY_ORIGIN), \
       LIBS := $(JDKLIB_LIBS) \

From f879fa2cfc033958ceccc0126474ec01026a6629 Mon Sep 17 00:00:00 2001
From: hamlin <hamlin@hamlin.eu.rivosinc.com>
Date: Mon, 23 Sep 2024 16:20:28 +0000
Subject: [PATCH 04/12] comment

---
 .../linux/native/libsleef/lib/vector_math_rvv.c          | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
index c093f5cdeaa94..ebdbddb8ab1ab 100644
--- a/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
+++ b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
@@ -21,6 +21,15 @@
  * questions.
  */
 
+// On riscv, sleef vector apis depend on native vector intrinsic, which is supported on
+// some compiler, e.g. gcc 14+.
+// __riscv_v_intrinsic is used to tell if the compiler supports vector intrinsic.
+//
+// At compile time, when current compiler does support vector intrinsic, there will be
+// bridge functions built in the library, otherwise no such fuctions in the library.
+// At runtime, if the library is found and bridge fuctions are found in the library,
+// then the java vector API will call into bridge functions and sleef, otherwise not.
+
 #ifdef __riscv_v_intrinsic
 
 #include <stdint.h>

From 32eb54d5001b0ea1f847cbcf5b134eb3596c86f7 Mon Sep 17 00:00:00 2001
From: hamlin <hamlin@hamlin.eu.rivosinc.com>
Date: Mon, 23 Sep 2024 19:27:52 +0000
Subject: [PATCH 05/12] refine comment

---
 .../linux/native/libsleef/lib/vector_math_rvv.c          | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
index ebdbddb8ab1ab..bc2523fe2957d 100644
--- a/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
+++ b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
@@ -25,10 +25,11 @@
 // some compiler, e.g. gcc 14+.
 // __riscv_v_intrinsic is used to tell if the compiler supports vector intrinsic.
 //
-// At compile time, when current compiler does support vector intrinsic, there will be
-// bridge functions built in the library, otherwise no such fuctions in the library.
-// At runtime, if the library is found and bridge fuctions are found in the library,
-// then the java vector API will call into bridge functions and sleef, otherwise not.
+// At compile-time, if the current compiler does support vector intrinsics, bridge
+// functions will be built in the library. In case the current compiler doesn't support
+// vector intrinsics (gcc < 14), then the bridge functions won't be compiled.
+// At run-time, if the library is found and the bridge functions are available in the
+// library, then the java vector API will call into the bridge functions and sleef.
 
 #ifdef __riscv_v_intrinsic
 

From f190709a92dcd72b303fc739d7329824ac814e42 Mon Sep 17 00:00:00 2001
From: hamlin <hamlin@hamlin.eu.rivosinc.com>
Date: Tue, 24 Sep 2024 14:51:22 +0000
Subject: [PATCH 06/12] misc

---
 src/hotspot/cpu/riscv/assembler_riscv.hpp     |  1 +
 src/hotspot/cpu/riscv/riscv.ad                |  5 +-
 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  3 +-
 src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 92 ++++++++++---------
 4 files changed, 53 insertions(+), 48 deletions(-)

diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
index 1443e2d8a8b04..f0a01ca4b9694 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -46,6 +46,7 @@
 class Argument {
  public:
   enum {
+    // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
     n_int_register_parameters_c   = 8,   // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
     n_float_register_parameters_c = 8,   // f10, f11, ... f17 (c_farg0, c_farg1, ... )
     n_vector_register_parameters_c = 8,  // v8, v9, ... v15
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
index 55f98858e3db2..5f26f596ec40b 100644
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -1978,6 +1978,7 @@ bool Matcher::supports_vector_calling_convention(void) {
 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
   assert(EnableVectorSupport && UseVectorStubs, "sanity");
   assert(ideal_reg == Op_VecA, "sanity");
+  // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
   int lo = V8_num;
   int hi = V8_K_num;
   return OptoRegPair(hi, lo);
@@ -10071,11 +10072,11 @@ instruct CallLeafDirect(method meth, rFlagsReg cr)
 
 // Call Runtime Instruction without safepoint and with vector arguments
 
-instruct CallLeafDirectVector(method meth)
+instruct CallLeafDirectVector(method meth, rFlagsReg cr)
 %{
   match(CallLeafVector);
 
-  effect(USE meth);
+  effect(USE meth, KILL cr);
 
   ins_cost(BRANCH_COST);
 
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
index 03907b7d9b764..797ed598eb059 100644
--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -666,10 +666,11 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
                                              uint num_bits,
                                              uint total_args_passed) {
-  // More than 8 argument inputs are not supported now.
+  // More than 8 argument inputs are not supported for now.
   assert(total_args_passed <= Argument::n_vector_register_parameters_c, "unsupported");
   assert(num_bits >= 64 && num_bits <= 2048 && is_power_of_2(num_bits), "unsupported");
 
+  // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
   static const VectorRegister VEC_ArgReg[Argument::n_vector_register_parameters_c] = {
     v8, v9, v10, v11, v12, v13, v14, v15
   };
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
index 84389ae12dc2c..02f0c8dbfa335 100644
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -6060,52 +6060,54 @@ static const int64_t right_3_bits = right_n_bits(3);
   }
 
   void generate_vector_math_stubs() {
-    if (UseRVV) {
-      // Get native vector math stub routine addresses
-      void* libsleef = nullptr;
-      char ebuf[1024];
-      char dll_name[JVM_MAXPATHLEN];
-      if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) {
-        libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf);
-      }
-      if (libsleef != nullptr) {
-        // Method naming convention
-        //   All the methods are named as <OP><T>_<U><suffix>
-        //
-        //   Where:
-        //     <OP>     is the operation name, e.g. sin, cos
-        //     <T>      is to indicate float/double
-        //              "fx/dx" for vector float/double operation
-        //     <U>      is the precision level
-        //              "u10/u05" represents 1.0/0.5 ULP error bounds
-        //               We use "u10" for all operations by default
-        //               But for those functions do not have u10 support, we use "u05" instead
-        //     <suffix> rvv, indicates riscv vector extension
-        //
-        //   e.g. sinfx_u10rvv is the method for computing vector float sin using rvv instructions
-        //
-        log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef));
-
-        for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
-          int vop = VectorSupport::VECTOR_OP_MATH_START + op;
-          if (vop == VectorSupport::VECTOR_OP_TANH) { // skip tanh because of performance regression
-            continue;
-          }
-
-          // The native library does not support u10 level of "hypot".
-          const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
-
-          snprintf(ebuf, sizeof(ebuf), "%sfx_%srvv", VectorSupport::mathname[op], ulf);
-          StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
-
-          snprintf(ebuf, sizeof(ebuf), "%sdx_%srvv", VectorSupport::mathname[op], ulf);
-          StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
-        }
-      } else {
-        log_info(library)("Failed to load native vector math (sleef) library, %s!", ebuf);
-      }
-    } else {
+    if (!UseRVV) {
       log_info(library)("vector is not supported, skip loading vector math (sleef) library!");
+      return;
+    }
+
+    // Get native vector math stub routine addresses
+    void* libsleef = nullptr;
+    char ebuf[1024];
+    char dll_name[JVM_MAXPATHLEN];
+    if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "sleef")) {
+      libsleef = os::dll_load(dll_name, ebuf, sizeof ebuf);
+    }
+    if (libsleef == nullptr) {
+      log_info(library)("Failed to load native vector math (sleef) library, %s!", ebuf);
+      return;
+    }
+
+    // Method naming convention
+    //   All the methods are named as <OP><T>_<U><suffix>
+    //
+    //   Where:
+    //     <OP>     is the operation name, e.g. sin, cos
+    //     <T>      is to indicate float/double
+    //              "fx/dx" for vector float/double operation
+    //     <U>      is the precision level
+    //              "u10/u05" represents 1.0/0.5 ULP error bounds
+    //               We use "u10" for all operations by default
+    //               But for those functions do not have u10 support, we use "u05" instead
+    //     <suffix> rvv, indicates riscv vector extension
+    //
+    //   e.g. sinfx_u10rvv is the method for computing vector float sin using rvv instructions
+    //
+    log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "sleef" JNI_LIB_SUFFIX, p2i(libsleef));
+
+    for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
+      int vop = VectorSupport::VECTOR_OP_MATH_START + op;
+      if (vop == VectorSupport::VECTOR_OP_TANH) { // skip tanh because of performance regression
+        continue;
+      }
+
+      // The native library does not support u10 level of "hypot".
+      const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
+
+      snprintf(ebuf, sizeof(ebuf), "%sfx_%srvv", VectorSupport::mathname[op], ulf);
+      StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
+
+      snprintf(ebuf, sizeof(ebuf), "%sdx_%srvv", VectorSupport::mathname[op], ulf);
+      StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libsleef, ebuf);
     }
   }
 

From 7719b5cfd24c630bbdfdb8411e88955154540eae Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-2-236.us-east-2.compute.internal>
Date: Tue, 24 Sep 2024 19:07:02 +0000
Subject: [PATCH 07/12] use all arg v regs

---
 src/hotspot/cpu/riscv/assembler_riscv.hpp     | 2 +-
 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
index f0a01ca4b9694..e6d919b3a3725 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -49,7 +49,7 @@ class Argument {
     // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
     n_int_register_parameters_c   = 8,   // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
     n_float_register_parameters_c = 8,   // f10, f11, ... f17 (c_farg0, c_farg1, ... )
-    n_vector_register_parameters_c = 8,  // v8, v9, ... v15
+    n_vector_register_parameters_c = 16,  // v8, v9, ... v23
 
     n_int_register_parameters_j   = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...)
     n_float_register_parameters_j = 8  // f10, f11, ... f17 (j_farg0, j_farg1, ...)
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
index 797ed598eb059..ec5a751c0500d 100644
--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -666,13 +666,13 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
                                              uint num_bits,
                                              uint total_args_passed) {
-  // More than 8 argument inputs are not supported for now.
   assert(total_args_passed <= Argument::n_vector_register_parameters_c, "unsupported");
   assert(num_bits >= 64 && num_bits <= 2048 && is_power_of_2(num_bits), "unsupported");
 
   // check more info at https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
   static const VectorRegister VEC_ArgReg[Argument::n_vector_register_parameters_c] = {
-    v8, v9, v10, v11, v12, v13, v14, v15
+    v8, v9, v10, v11, v12, v13, v14, v15,
+    v16, v17, v18, v19, v20, v21, v22, v23
   };
 
   const int next_reg_val = 3;

From 50b6d5291642fa1f5a7c1e96ba3d72957383a259 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-2-236.us-east-2.compute.internal>
Date: Thu, 26 Sep 2024 11:48:59 +0000
Subject: [PATCH 08/12] check frm after sleef call

---
 .../native/libsleef/lib/vector_math_rvv.c     | 25 +++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
index bc2523fe2957d..c46f98ba1bb77 100644
--- a/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
+++ b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
@@ -42,16 +42,37 @@
 
 #include <jni.h>
 
+// We maintain an invariant in java world that default dynamic rounding mode is RNE,
+// please check JDK-8330094, JDK-8330266 for more details.
+// Currently, sleef source on riscv does not change rounding mode to others except
+// of RNE. But we still think it's safer to make sure that after calling into sleef
+// the dynamic rounding mode is always RNE.
+
+#ifdef ASSERT
+#define CHECK_FRM   __asm__ __volatile__ (     \
+    "    frrm   t0              \n\t"          \
+    "    beqz   t0, 2f          \n\t"          \
+    "    csrrw  x0, cycle, x0   \n\t"          \
+    "2:                         \n\t"          \
+    : : : "memory" );
+#else
+#define CHECK_FRM
+#endif
+
 #define DEFINE_VECTOR_MATH_UNARY_RVV(op, type) \
 JNIEXPORT                                      \
 type op##rvv(type input) {                     \
-  return Sleef_##op##rvvm1(input);             \
+  type res = Sleef_##op##rvvm1(input);         \
+  CHECK_FRM                                    \
+  return res;                                  \
 }
 
 #define DEFINE_VECTOR_MATH_BINARY_RVV(op, type) \
 JNIEXPORT                                       \
 type op##rvv(type input1, type input2) {        \
-  return Sleef_##op##rvvm1(input1, input2);     \
+  type res = Sleef_##op##rvvm1(input1, input2); \
+  CHECK_FRM                                     \
+  return res;                                   \
 }
 
 DEFINE_VECTOR_MATH_UNARY_RVV(tanfx_u10,   vfloat_rvvm1_sleef)

From 0bd263d1144788e05b7f7dca7dbe1e3c8876f19b Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-2-236.us-east-2.compute.internal>
Date: Thu, 26 Sep 2024 13:10:31 +0000
Subject: [PATCH 09/12] fix test macro

---
 .../linux/native/libsleef/lib/vector_math_rvv.c                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
index c46f98ba1bb77..4515457fa899a 100644
--- a/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
+++ b/src/jdk.incubator.vector/linux/native/libsleef/lib/vector_math_rvv.c
@@ -48,7 +48,7 @@
 // of RNE. But we still think it's safer to make sure that after calling into sleef
 // the dynamic rounding mode is always RNE.
 
-#ifdef ASSERT
+#ifdef DEBUG
 #define CHECK_FRM   __asm__ __volatile__ (     \
     "    frrm   t0              \n\t"          \
     "    beqz   t0, 2f          \n\t"          \

From 0f9175616c51254a99b74dbda5df7adc9109efd4 Mon Sep 17 00:00:00 2001
From: hamlin <hamlin@hamlin.eu.rivosinc.com>
Date: Mon, 30 Sep 2024 10:26:38 +0000
Subject: [PATCH 10/12] bits > 512

---
 src/hotspot/share/opto/vectorIntrinsics.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp
index d6a60788e6b9f..ef9275ddd6cfb 100644
--- a/src/hotspot/share/opto/vectorIntrinsics.cpp
+++ b/src/hotspot/share/opto/vectorIntrinsics.cpp
@@ -2035,9 +2035,11 @@ static address get_vector_math_address(int vop, int bits, BasicType bt, char* na
       }
       break;
     default:
-      snprintf(name_ptr, name_len, "invalid");
-      addr = nullptr;
-      Unimplemented();
+      if (!Matcher::supports_scalable_vector() || !Matcher::vector_size_supported(bt, bits/type2aelembytes(bt)) ) {
+        snprintf(name_ptr, name_len, "invalid");
+        addr = nullptr;
+        Unimplemented();
+      }
       break;
   }
 

From f5ec51601a46f5a6977044747e7cb3bddfec67a8 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-2-236.us-east-2.compute.internal>
Date: Tue, 8 Oct 2024 14:01:29 +0000
Subject: [PATCH 11/12] make minor

---
 make/modules/jdk.incubator.vector/Lib.gmk | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/make/modules/jdk.incubator.vector/Lib.gmk b/make/modules/jdk.incubator.vector/Lib.gmk
index c6c6103a30113..aab30672df3e3 100644
--- a/make/modules/jdk.incubator.vector/Lib.gmk
+++ b/make/modules/jdk.incubator.vector/Lib.gmk
@@ -38,6 +38,10 @@ ifeq ($(call isTargetOs, linux windows)+$(call isTargetCpu, x86_64)+$(INCLUDE_CO
   TARGETS += $(BUILD_LIBJSVML)
 endif
 
+################################################################################
+## Build libsleef
+################################################################################
+
 ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2), true+true+true)
   $(eval $(call SetupJdkLibrary, BUILD_LIBSLEEF, \
       NAME := sleef, \
@@ -46,10 +50,7 @@ ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2
       EXTRA_SRC := libsleef/generated, \
       DISABLED_WARNINGS_gcc := unused-function sign-compare tautological-compare ignored-qualifiers, \
       DISABLED_WARNINGS_clang := unused-function sign-compare tautological-compare ignored-qualifiers, \
-      CFLAGS := $(CFLAGS_JDKLIB) -march=rv64gcv, \
-      LDFLAGS := $(LDFLAGS_JDKLIB) \
-          $(call SET_SHARED_LIBRARY_ORIGIN), \
-      LIBS := $(JDKLIB_LIBS) \
+      CFLAGS := -march=rv64gcv \
   ))
 
   TARGETS += $(BUILD_LIBSLEEF)

From 76b8bbc5c595b2e35951303fdfefa94037c0ca1a Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-2-236.us-east-2.compute.internal>
Date: Tue, 8 Oct 2024 14:49:50 +0000
Subject: [PATCH 12/12] keep comma

---
 make/modules/jdk.incubator.vector/Lib.gmk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/make/modules/jdk.incubator.vector/Lib.gmk b/make/modules/jdk.incubator.vector/Lib.gmk
index aab30672df3e3..bf6ace6f97f7c 100644
--- a/make/modules/jdk.incubator.vector/Lib.gmk
+++ b/make/modules/jdk.incubator.vector/Lib.gmk
@@ -50,7 +50,7 @@ ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, riscv64)+$(INCLUDE_COMPILER2
       EXTRA_SRC := libsleef/generated, \
       DISABLED_WARNINGS_gcc := unused-function sign-compare tautological-compare ignored-qualifiers, \
       DISABLED_WARNINGS_clang := unused-function sign-compare tautological-compare ignored-qualifiers, \
-      CFLAGS := -march=rv64gcv \
+      CFLAGS := -march=rv64gcv, \
   ))
 
   TARGETS += $(BUILD_LIBSLEEF)