[SYSTEMDS-3694] Python NN Sequence and layer interface

This commit: - Adds a Layer interface for the Python API. - Affine and ReLU classes are changed to extend this interface. - Fixes fixes some small formatting issues in the modified classes. - Adds a Sequential primitive to the nn Python API. It is able to combine multiple nn layers into one sequential module. - fix in the python MultiReturn so outputs of the instance can be properly accessed. - Adds the backwards pass to the Sequential primitives. - Variations to Sequential testing involving MultiReturns. - Test if the input gradient is set correctly on the backwards pass and Fixes a bug where this was not the case on the affine layer. - Testing to verify that the layer gets updated correctly during forward and backward pass. AMLS project SoSe'24 Closes apache#2025
Baunsgaard · Jul 1, 2024 · 8e1e53b · 8e1e53b
1 parent 8f2a18a
commit 8e1e53b
Show file tree

Hide file tree

Showing 8 changed files with 572 additions and 44 deletions.
diff --git a/src/main/python/systemds/operator/nn/affine.py b/src/main/python/systemds/operator/nn/affine.py
@@ -18,33 +18,24 @@
 # under the License.
 #
 # -------------------------------------------------------------
-import os
-
 from systemds.context import SystemDSContext
-from systemds.operator import Matrix, Source, MultiReturn
-from systemds.utils.helpers import get_path_to_script_layers
+from systemds.operator import Matrix, MultiReturn
+from systemds.operator.nn.layer import Layer
 
 
-class Affine:
- _source: Source = None
+class Affine(Layer):
  weight: Matrix
  bias: Matrix
 
- def __new__(cls, *args, **kwargs):
- return super().__new__(cls)
-
  def __init__(self, sds_context: SystemDSContext, d, m, seed=-1):
  """
  sds_context: The systemdsContext to construct the layer inside of
  d: The number of features that are input to the affine layer
  m: The number of neurons that are contained in the layer, 
  and the number of features output
  """
- Affine._create_source(sds_context)
-
- # bypassing overload limitation in python
- self.forward = self._instance_forward
- self.backward = self._instance_backward
+ super().__init__(sds_context, 'affine.dml')
+ self._X = None
 
  # init weight and bias
  self.weight = Matrix(sds_context, '')
@@ -64,7 +55,7 @@ def forward(X: Matrix, W: Matrix, b: Matrix):
  b: The bias added in the output.
  return out: An output matrix.
  """
- Affine._create_source(X.sds_context)
+ Affine._create_source(X.sds_context, "affine.dml")
  return Affine._source.forward(X, W, b)
 
  @staticmethod
@@ -77,7 +68,7 @@ def backward(dout:Matrix, X: Matrix, W: Matrix, b: Matrix):
  return dX, dW, db: The gradients of: input X, weights and bias.
  """
  sds = X.sds_context
- Affine._create_source(sds)
+ Affine._create_source(sds, "affine.dml")
  params_dict = {'dout': dout, 'X': X, 'W': W, 'b': b}
  dX = Matrix(sds, '')
  dW = Matrix(sds, '')
@@ -104,11 +95,6 @@ def _instance_backward(self, dout: Matrix, X: Matrix):
  X: The input to this layer.
  return dX, dW,db: gradient of input, weights and bias, respectively
  """
- return Affine.backward(dout, X, self.weight, self.bias)
-
- @staticmethod
- def _create_source(sds: SystemDSContext):
- if Affine._source is None or Affine._source.sds_context != sds:
- path = get_path_to_script_layers()
- path = os.path.join(path, "affine.dml")
- Affine._source = sds.source(path, "affine")
+ gradients = Affine.backward(dout, X, self.weight, self.bias)
+ self._X = gradients[0]
+ return gradients
diff --git a/src/main/python/systemds/operator/nn/layer.py b/src/main/python/systemds/operator/nn/layer.py
@@ -0,0 +1,69 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+import os
+
+from systemds.context import SystemDSContext
+from systemds.operator import Source
+from systemds.utils.helpers import get_path_to_script_layers
+
+
+class Layer:
+ """
+ Interface for neural network layers
+ """
+
+ _source: Source = None
+
+ def __init__(self, sds_context: SystemDSContext = None, dml_script: str = None):
+ if sds_context is not None and dml_script is not None:
+ self.__class__._create_source(sds_context, dml_script)
+
+ # bypassing overload limitation in python
+ self.forward = self._instance_forward
+ self.backward = self._instance_backward
+
+ @classmethod
+ def _create_source(cls, sds_context: SystemDSContext, dml_script: str):
+ """
+ Create SystemDS source
+ :param sds_context: SystemDS context
+ :param dml_script: DML script inside /scripts/nn/layers/
+ :return:
+ """
+ if cls._source is None or cls._source.sds_context != sds_context:
+ script_path = get_path_to_script_layers()
+ path = os.path.join(script_path, dml_script)
+ name = dml_script.split(".")[0]
+ cls._source = sds_context.source(path, name)
+
+ def _instance_forward(self, *args):
+ raise NotImplementedError
+
+ def _instance_backward(self, *args):
+ raise NotImplementedError
+
+ @staticmethod
+ def forward(*args):
+ raise NotImplementedError
+
+ @staticmethod
+ def backward(*args):
+ raise NotImplementedError
diff --git a/src/main/python/systemds/operator/nn/relu.py b/src/main/python/systemds/operator/nn/relu.py
@@ -18,28 +18,24 @@
 # under the License.
 #
 # -------------------------------------------------------------
-import os.path
-
 from systemds.context import SystemDSContext
 from systemds.operator import Matrix, Source
-from systemds.utils.helpers import get_path_to_script_layers
+from systemds.operator.nn.layer import Layer
 
 
-class ReLU:
+class ReLU(Layer):
  _source: Source = None
 
- def __init__(self, sds: SystemDSContext):
- ReLU._create_source(sds)
- self.forward = self._instance_forward
- self.backward = self._instance_backward
+ def __init__(self, sds_context: SystemDSContext):
+ super().__init__(sds_context, "relu.dml")
 
  @staticmethod
  def forward(X: Matrix):
  """
  X: input matrix
  return out: output matrix
  """
- ReLU._create_source(X.sds_context)
+ ReLU._create_source(X.sds_context, "relu.dml")
  return ReLU._source.forward(X)
 
  @staticmethod
@@ -49,7 +45,7 @@ def backward(dout: Matrix, X: Matrix):
  X: input matrix
  return dX: gradient of input
  """
- ReLU._create_source(dout.sds_context)
+ ReLU._create_source(dout.sds_context, "relu.dml")
  return ReLU._source.backward(dout, X)
 
  def _instance_forward(self, X: Matrix):
@@ -58,11 +54,3 @@ def _instance_forward(self, X: Matrix):
 
  def _instance_backward(self, dout: Matrix, X: Matrix):
  return ReLU.backward(dout, X)
-
- @staticmethod
- def _create_source(sds: SystemDSContext):
- if ReLU._source is None or ReLU._source.sds_context != sds:
- path = get_path_to_script_layers()
- path = os.path.join(path, "relu.dml")
- ReLU._source = sds.source(path, "relu")
-
diff --git a/src/main/python/systemds/operator/nn/sequential.py b/src/main/python/systemds/operator/nn/sequential.py
@@ -0,0 +1,97 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+from systemds.operator import MultiReturn
+from systemds.operator.nn.layer import Layer
+
+
+class Sequential(Layer):
+ def __init__(self, *args):
+ super().__init__()
+
+ self.layers = []
+ if len(args) == 1 and isinstance(args[0], list):
+ self.layers = args[0]
+ else:
+ self.layers = list(args)
+
+ def __len__(self):
+ return len(self.layers)
+
+ def __getitem__(self, idx):
+ return self.layers[idx]
+
+ def __setitem__(self, idx, value):
+ self.layers[idx] = value
+
+ def __delitem__(self, idx):
+ del self.layers[idx]
+
+ def __iter__(self):
+ return iter(self.layers)
+
+ def __reversed__(self):
+ return reversed(self.layers)
+
+ def push(self, layer: Layer):
+ """
+ Add layer
+ :param layer: Layer
+ :return:
+ """
+ self.layers.append(layer)
+
+ def pop(self):
+ """
+ Remove last layer
+ :return: Layer
+ """
+ return self.layers.pop()
+
+ def _instance_forward(self, X):
+ """
+ Forward pass
+ :param X: Input matrix
+ :return: output matrix
+ """
+ out = X
+ for layer in self:
+ out = layer.forward(out)
+
+ # if MultiReturn, take only output matrix
+ if isinstance(out, MultiReturn):
+ out = out[0]
+ return out
+
+ def _instance_backward(self, dout, X):
+ """
+ Backward pass
+ :param dout: gradient of output, passed from the upstream
+ :param X: input matrix
+ :return: output matrix
+ """
+ dx = dout
+ for layer in reversed(self):
+ dx = layer.backward(dx, X)
+
+ # if MultiReturn, take only gradient of input
+ if isinstance(dx, MultiReturn):
+ dx = dx[0]
+ return dx
diff --git a/src/main/python/systemds/operator/nodes/multi_return.py b/src/main/python/systemds/operator/nodes/multi_return.py
@@ -47,7 +47,7 @@ def __init__(self, sds_context, operation,
  named_input_nodes, OutputType.MULTI_RETURN, False)
 
  def __getitem__(self, key):
- self._outputs[key]
+ return self._outputs[key]
 
  def code_line(self, var_name: str, unnamed_input_vars: Sequence[str],
  named_input_vars: Dict[str, str]) -> str:

diff --git a/src/main/python/tests/nn/test_affine.py b/src/main/python/tests/nn/test_affine.py
@@ -77,6 +77,7 @@ def test_forward(self):
  out = affine.forward(Xm).compute()
  self.assertEqual(len(out), 5)
  self.assertEqual(len(out[0]), 6)
+ assert_almost_equal(affine._X.compute(), Xm.compute())
 
  # test static method
  out = Affine.forward(Xm, Wm, bm).compute()
@@ -91,10 +92,13 @@ def test_backward(self):
 
  # test class method
  affine = Affine(self.sds, dim, m, 10)
- [dx, dw, db] = affine.backward(doutm, Xm).compute()
+ gradients = affine.backward(doutm, Xm)
+ intermediate = affine._X.compute()
+ [dx, dw, db] = gradients.compute()
  assert len(dx) == 5 and len(dx[0]) == 6
  assert len(dw) == 6 and len(dx[0]) == 6
  assert len(db) == 1 and len(db[0]) == 6
+ assert_almost_equal(intermediate, dx)
 
  # test static method
  res = Affine.backward(doutm, Xm, Wm, bm).compute()