Merge pull request #1803 from amcadmus/master

Merge devel into master
deepmodeling · Jul 1, 2022 · 5a32c49 · 5a32c49
2 parents 03c8742 + e8a57ca
commit 5a32c49
Show file tree

Hide file tree

Showing 107 changed files with 6,552 additions and 246 deletions.
diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
@@ -28,7 +28,7 @@ jobs:
       - name: Build wheels
         env:
           CIBW_BUILD: "cp36-* cp37-* cp38-* cp39-* cp310-*"
-          CIBW_MANYLINUX_X86_64_IMAGE: ghcr.io/deepmodeling/manylinux2014_x86_64_tensorflow
+          CIBW_MANYLINUX_X86_64_IMAGE: ghcr.io/deepmodeling/manylinux_2_24_x86_64_tensorflow
           CIBW_BEFORE_BUILD: pip install tensorflow
           CIBW_SKIP: "*-win32 *-manylinux_i686 *-musllinux*"
         run: |

diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml
@@ -21,7 +21,7 @@ jobs:
       run: pip install -r requirements.txt
     - uses: marian-code/[email protected]
       with:
-        python-root-list: "./deepmd/*.py ./deepmd/*/*.py ./source/train/*.py ./source/tests/*.py ./source/op/*.py"
+        python-root-list: "./deepmd/*.py ./deepmd/*/*.py ./deepmd/*/*/*.py ./source/train/*.py ./source/tests/*.py ./source/op/*.py"
         use-black: true
         use-isort: true
         use-mypy: true

diff --git a/README.md b/README.md
@@ -124,6 +124,7 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp
     - [Run path-integral MD with i-PI](doc/third-party/ipi.md)
     - [Run MD with GROMACS](doc/third-party/gromacs.md)
     - [Interfaces out of DeePMD-kit](doc/third-party/out-of-deepmd-kit.md)
+- [Use NVNMD](doc/nvnmd/index.md)
 
 # Code structure
 

diff --git a/deepmd/__init__.py b/deepmd/__init__.py
@@ -6,7 +6,7 @@
     import importlib_metadata as metadata
 import deepmd.utils.network as network
 
-from . import cluster, descriptor, fit, loss, utils
+from . import cluster, descriptor, fit, loss, utils, nvnmd
 from .env import set_mkl
 from .infer import DeepEval, DeepPotential
 from .infer.data_modifier import DipoleChargeModifier
@@ -32,4 +32,5 @@
     "DeepEval",
     "DeepPotential",
     "DipoleChargeModifier",
+    "nvnmd",
 ]
diff --git a/deepmd/common.py b/deepmd/common.py
@@ -34,7 +34,7 @@
         from typing import Literal  # python >3.6
     except ImportError:
         from typing_extensions import Literal  # type: ignore
-    _ACTIVATION = Literal["relu", "relu6", "softplus", "sigmoid", "tanh", "gelu"]
+    _ACTIVATION = Literal["relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf"]
     _PRECISION = Literal["default", "float16", "float32", "float64"]
 
 # define constants
@@ -49,7 +49,7 @@
 def gelu(x: tf.Tensor) -> tf.Tensor:
     """Gaussian Error Linear Unit.
 
-    This is a smoother version of the RELU.
+    This is a smoother version of the RELU, implemented by custom operator.
 
     Parameters
     ----------
@@ -58,7 +58,31 @@ def gelu(x: tf.Tensor) -> tf.Tensor:
 
     Returns
     -------
-    `x` with the GELU activation applied
+    tf.Tensor
+        `x` with the GELU activation applied
+
+    References
+    ----------
+    Original paper
+    https://arxiv.org/abs/1606.08415
+    """
+    return op_module.gelu(x)
+
+
+def gelu_tf(x: tf.Tensor) -> tf.Tensor:
+    """Gaussian Error Linear Unit.
+
+    This is a smoother version of the RELU, implemented by TF.
+
+    Parameters
+    ----------
+    x : tf.Tensor
+        float Tensor to perform activation
+
+    Returns
+    -------
+    tf.Tensor
+        `x` with the GELU activation applied
 
     References
     ----------
@@ -69,10 +93,10 @@ def gelu_wrapper(x):
         try:
             return tensorflow.nn.gelu(x, approximate=True)
         except AttributeError:
+            warnings.warn("TensorFlow does not provide an implementation of gelu, please upgrade your TensorFlow version. Fallback to the custom gelu operator.")
             return op_module.gelu(x)
     return (lambda x: gelu_wrapper(x))(x)
 
-
 # TODO this is not a good way to do things. This is some global variable to which
 # TODO anyone can write and there is no good way to keep track of the changes
 data_requirement = {}
@@ -84,6 +108,7 @@ def gelu_wrapper(x):
     "sigmoid": tf.sigmoid,
     "tanh": tf.nn.tanh,
     "gelu": gelu,
+    "gelu_tf": gelu_tf,
 }
 
 

diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
@@ -17,6 +17,9 @@
 from .descriptor import Descriptor
 from .se import DescrptSe
 
+from deepmd.nvnmd.descriptor.se_a import descrpt2r4, build_davg_dstd, build_op_descriptor, filter_lower_R42GR, filter_GR2D
+from deepmd.nvnmd.utils.config import nvnmd_cfg 
+
 @Descriptor.register("se_e2_a")
 @Descriptor.register("se_a")
 class DescrptSeA (DescrptSe):
@@ -412,6 +415,7 @@ def build (self,
         """
         davg = self.davg
         dstd = self.dstd
+        if nvnmd_cfg.enable and nvnmd_cfg.restore_descriptor: davg, dstd = build_davg_dstd()
         with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) :
             if davg is None:
                 davg = np.zeros([self.ntypes, self.ndescrpt]) 
@@ -448,8 +452,9 @@ def build (self,
         box   = tf.reshape (box_, [-1, 9])
         atype = tf.reshape (atype_, [-1, natoms[1]])
 
+        op_descriptor = build_op_descriptor() if nvnmd_cfg.enable else op_module.prod_env_mat_a
         self.descrpt, self.descrpt_deriv, self.rij, self.nlist \
-            = op_module.prod_env_mat_a (coord,
+            = op_descriptor           (coord,
                                        atype,
                                        natoms,
                                        box,
@@ -576,6 +581,8 @@ def _pass_filter(self,
             inputs_i = inputs
             inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
             type_i = -1
+            if nvnmd_cfg.enable and nvnmd_cfg.quantize_descriptor: 
+                inputs_i = descrpt2r4(inputs_i, natoms)
             layer, qmat = self._filter(inputs_i, type_i, name='filter_type_all'+suffix, natoms=natoms, reuse=reuse, trainable = trainable, activation_fn = self.filter_activation_fn, type_embedding=type_embedding)
             layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()])
             qmat  = tf.reshape(qmat,  [tf.shape(inputs)[0], natoms[0], self.get_dim_rot_mat_1() * 3])
@@ -717,6 +724,14 @@ def _filter_lower(
             if self.compress:
                 raise RuntimeError('compression of type embedded descriptor is not supported at the moment')
         # natom x 4 x outputs_size
+        if nvnmd_cfg.enable:
+          return filter_lower_R42GR(
+            type_i, type_input, inputs_i, is_exclude,
+            activation_fn, bavg, stddev, trainable, 
+            suffix, self.seed, self.seed_shift, self.uniform_seed,
+            self.filter_neuron, self.filter_precision, self.filter_resnet_dt,
+            self.embedding_net_variables
+          )
         if self.compress and (not is_exclude):
             if self.type_one_side:
                 net = 'filter_-1_net_' + str(type_i)
@@ -825,6 +840,7 @@ def _filter(
                   stddev = stddev,
                   bavg = bavg,
                   trainable = trainable)
+          if nvnmd_cfg.enable: return filter_GR2D(xyz_scatter_1)
           # natom x nei x outputs_size
           # xyz_scatter = tf.concat(xyz_scatter_total, axis=1)
           # natom x nei x 4

diff --git a/deepmd/entrypoints/freeze.py b/deepmd/entrypoints/freeze.py
@@ -19,6 +19,8 @@
 
 from typing import List, Optional
 
+from deepmd.nvnmd.entrypoints.freeze import save_weight
+
 __all__ = ["freeze"]
 
 log = logging.getLogger(__name__)
@@ -160,7 +162,7 @@ def _make_node_names(model_type: str, modifier_type: Optional[str] = None) -> Li
 
 
 def freeze(
-    *, checkpoint_folder: str, output: str, node_names: Optional[str] = None, **kwargs
+    *, checkpoint_folder: str, output: str, node_names: Optional[str] = None, nvnmd_weight: Optional[str] = None, **kwargs
 ):
     """Freeze the graph in supplied folder.
 
@@ -237,6 +239,9 @@ def freeze(
             output_node_list = node_names.split(",")
         log.info(f"The following nodes will be frozen: {output_node_list}")
 
+        if nvnmd_weight is not None:
+            save_weight(sess, nvnmd_weight) # nvnmd
+
         # We use a built-in TF helper to export variables to constants
         output_graph_def = tf.graph_util.convert_variables_to_constants(
             sess,  # The session is used to retrieve the weights

diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py
@@ -20,6 +20,8 @@
 )
 from deepmd.loggers import set_log_handles
 
+from deepmd.nvnmd.entrypoints.train import train_nvnmd
+
 __all__ = ["main", "parse_args", "get_ll", "main_parser"]
 
 
@@ -204,6 +206,13 @@ def main_parser() -> argparse.ArgumentParser:
         default=None,
         help="the frozen nodes, if not set, determined from the model type",
     )
+    parser_frz.add_argument(
+        "-w",
+        "--nvnmd-weight",
+        type=str,
+        default=None,
+        help="the name of weight file (.npy), if set, save the model's weight into the file",
+    )
 
     # * test script ********************************************************************
     parser_tst = subparsers.add_parser(
@@ -436,9 +445,28 @@ def main_parser() -> argparse.ArgumentParser:
         required=True,
         help="type map",
     )
-        
+
     # --version
     parser.add_argument('--version', action='version', version='DeePMD-kit v%s' % __version__)
+
+    # * train nvnmd script ******************************************************************
+    parser_train_nvnmd = subparsers.add_parser(
+        "train-nvnmd",
+        parents=[parser_log],
+        help="train nvnmd model",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser_train_nvnmd.add_argument(
+        "INPUT", help="the input parameter file in json format"
+    )
+    parser_train_nvnmd.add_argument(
+        "-s",
+        "--step",
+        default="s1",
+        type=str,
+        choices=['s1', 's2'],
+        help="steps to train model of NVNMD: s1 (train CNN), s2 (train QNN)"
+    )
     return parser
 
 
@@ -504,6 +532,8 @@ def main():
         convert(**dict_args)
     elif args.command == "neighbor-stat":
         neighbor_stat(**dict_args)
+    elif args.command == "train-nvnmd":  # nvnmd
+        train_nvnmd(**dict_args)
     elif args.command is None:
         pass
     else:

diff --git a/deepmd/env.py b/deepmd/env.py
@@ -281,13 +281,21 @@ def get_module(module_name: str) -> "ModuleType":
                         TF_VERSION,
                         tf_py_version,
                     )) from e
-            raise RuntimeError(
+            error_message = (
                 "This deepmd-kit package is inconsitent with TensorFlow "
                 "Runtime, thus an error is raised when loading %s. "
                 "You need to rebuild deepmd-kit against this TensorFlow "
                 "runtime." % (
                     module_name,
-                )) from e
+                )
+            )
+            if TF_CXX11_ABI_FLAG == 1:
+                # #1791
+                error_message += (
+                    "\nWARNING: devtoolset on RHEL6 and RHEL7 does not support _GLIBCXX_USE_CXX11_ABI=1. "
+                    "See https://bugzilla.redhat.com/show_bug.cgi?id=1546704"
+                )
+            raise RuntimeError(error_message) from e
         return module
 
 

diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
@@ -5,14 +5,18 @@
 
 from deepmd.env import tf
 from deepmd.common import add_data_requirement, get_activation_func, get_precision, cast_precision
-from deepmd.utils.network import one_layer, one_layer_rand_seed_shift
+from deepmd.utils.network import one_layer_rand_seed_shift
+from deepmd.utils.network import one_layer as one_layer_deepmd
 from deepmd.utils.type_embed import embed_atom_type
 from deepmd.utils.graph import get_fitting_net_variables_from_graph_def, load_graph_def, get_tensor_by_name_from_graph
 from deepmd.fit.fitting import Fitting
 
 from deepmd.env import global_cvt_2_tf_float
 from deepmd.env import GLOBAL_TF_FLOAT_PRECISION, TF_VERSION
 
+from deepmd.nvnmd.utils.config import nvnmd_cfg
+from deepmd.nvnmd.fit.ener import one_layer_nvnmd
+
 class EnerFitting (Fitting):
     r"""Fitting the energy of the system. The force and the virial can also be trained.
 
@@ -291,8 +295,12 @@ def _build_lower(
             ext_aparam = tf.cast(ext_aparam,self.fitting_precision)
             layer = tf.concat([layer, ext_aparam], axis = 1)
 
+        if nvnmd_cfg.enable: 
+            one_layer = one_layer_nvnmd
+        else:
+            one_layer = one_layer_deepmd
         for ii in range(0,len(self.n_neuron)) :
-            if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii-1] :
+            if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii-1] and (not nvnmd_cfg.enable):
                 layer+= one_layer(
                     layer,
                     self.n_neuron[ii],

diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
@@ -1,5 +1,6 @@
 import os
 from typing import List, Optional, TYPE_CHECKING, Union
+from functools import lru_cache
 
 import numpy as np
 from deepmd.common import make_default_mesh
@@ -27,8 +28,6 @@ class DeepEval:
         as the initial batch size.
     """
 
-    _model_type: Optional[str] = None
-    _model_version: Optional[str] = None
     load_prefix: str  # set by subclass
 
     def __init__(
@@ -64,19 +63,19 @@ def __init__(
             raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
 
     @property
+    @lru_cache(maxsize=None)
     def model_type(self) -> str:
         """Get type of model.
 
         :type:str
         """
-        if not self._model_type:
-            t_mt = self._get_tensor("model_attr/model_type:0")
-            sess = tf.Session(graph=self.graph, config=default_tf_session_config)
-            [mt] = run_sess(sess, [t_mt], feed_dict={})
-            self._model_type = mt.decode("utf-8")
-        return self._model_type
+        t_mt = self._get_tensor("model_attr/model_type:0")
+        sess = tf.Session(graph=self.graph, config=default_tf_session_config)
+        [mt] = run_sess(sess, [t_mt], feed_dict={})
+        return mt.decode("utf-8")
 
     @property
+    @lru_cache(maxsize=None)
     def model_version(self) -> str:
         """Get version of model.
 
@@ -85,17 +84,15 @@ def model_version(self) -> str:
         str
             version of model
         """
-        if not self._model_version:
-            try:
-                t_mt = self._get_tensor("model_attr/model_version:0")
-            except KeyError:
-                # For deepmd-kit version 0.x - 1.x, set model version to 0.0
-                self._model_version = "0.0"
-            else:
-                sess = tf.Session(graph=self.graph, config=default_tf_session_config)
-                [mt] = run_sess(sess, [t_mt], feed_dict={})
-                self._model_version = mt.decode("utf-8")
-        return self._model_version    
+        try:
+            t_mt = self._get_tensor("model_attr/model_version:0")
+        except KeyError:
+            # For deepmd-kit version 0.x - 1.x, set model version to 0.0
+            return "0.0"
+        else:
+            sess = tf.Session(graph=self.graph, config=default_tf_session_config)
+            [mt] = run_sess(sess, [t_mt], feed_dict={})
+            return mt.decode("utf-8")
 
     def _graph_compatable(
         self