Skip to content

Commit

Permalink
[MINOR] add SVD and Eigen to Python API
Browse files Browse the repository at this point in the history
This commit adds SVD and Eigen to the Python API.

Closes apache#2035
  • Loading branch information
Baunsgaard committed Jun 20, 2024
1 parent 6081016 commit 5ad67e8
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 7 deletions.
1 change: 1 addition & 0 deletions src/main/python/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ tests/lineage/temp

python_venv/
venv
.venv

# Main Jar location for API communiation.
systemds/SystemDS.jar
Expand Down
5 changes: 4 additions & 1 deletion src/main/python/systemds/operator/nodes/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@
import numpy as np
import pandas as pd
from py4j.java_gateway import JavaObject, JVMView
from systemds.operator import Matrix, MultiReturn, OperationNode, Scalar
from systemds.operator.operation_node import OperationNode
from systemds.operator.nodes.multi_return import MultiReturn
from systemds.operator.nodes.scalar import Scalar
from systemds.operator.nodes.matrix import Matrix
from systemds.script_building.dag import DAGNode, OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
from systemds.utils.converters import (frame_block_to_pandas,
Expand Down
3 changes: 2 additions & 1 deletion src/main/python/systemds/operator/nodes/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@

import numpy as np
from py4j.java_gateway import JavaObject
from systemds.operator import ListAccess, OperationNode
from systemds.operator.operation_node import OperationNode
from systemds.operator.nodes.list_access import ListAccess
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES
from systemds.utils.converters import numpy_to_matrix_block
Expand Down
5 changes: 4 additions & 1 deletion src/main/python/systemds/operator/nodes/list_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
__all__ = ["ListAccess"]

from typing import Dict, Sequence
from systemds.operator import Frame, Matrix, OperationNode, Scalar
from systemds.operator.operation_node import OperationNode
from systemds.operator.nodes.matrix import Matrix
from systemds.operator.nodes.scalar import Scalar
from systemds.operator.nodes.frame import Frame
from systemds.script_building.dag import OutputType


Expand Down
47 changes: 46 additions & 1 deletion src/main/python/systemds/operator/nodes/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@

import numpy as np
from py4j.java_gateway import JavaObject
from systemds.operator import OperationNode, Scalar
from systemds.operator.operation_node import OperationNode
from systemds.operator.nodes.multi_return import MultiReturn
from systemds.operator.nodes.scalar import Scalar
from systemds.script_building.dag import OutputType
from systemds.utils.consts import (BINARY_OPERATIONS, VALID_ARITHMETIC_TYPES,
VALID_INPUT_TYPES)
Expand Down Expand Up @@ -342,6 +344,49 @@ def cholesky(self, safe: bool = False) -> 'Matrix':
:return: the OperationNode representing this operation
"""
return Matrix(self.sds_context, 'cholesky', [self])


def svd(self) -> 'Matrix':
"""
Singular Value Decomposition of a matrix A (of size m x m), which decomposes into three matrices
U, V, and S as A = U %% S %% t(V), where U is an m x m unitary matrix (i.e., orthogonal),
V is an n x n unitary matrix (also orthogonal),
and S is an m x n matrix with non-negative real numbers on the diagonal.
matrices U <(m x m)>, S <(m x n)>, and V <(n x n)>
:return: The MultiReturn node containing the three Matrices U,S, and V
"""

U = Matrix(self.sds_context, '')
S = Matrix(self.sds_context, '')
V = Matrix(self.sds_context, '')
output_nodes = [U, S, V ]

op = MultiReturn(self.sds_context, 'svd', output_nodes, unnamed_input_nodes=[self])
return op


def eigen(self) -> 'Matrix':
"""
Computes Eigen decomposition of input matrix A. The Eigen decomposition consists of
two matrices V and w such that A = V %*% diag(w) %*% t(V). The columns of V are the
eigenvectors of the original matrix A. And, the eigen values are given by w.
It is important to note that this function can operate only on small-to-medium sized
input matrix that can fit in the main memory. For larger matrices, an out-of-memory
exception is raised.
This function returns two matrices w and V, where w is (m x 1) and V is of size (m x m).
:return: The MultiReturn node containing the two Matrices w and V
"""

V = Matrix(self.sds_context, '')
w = Matrix(self.sds_context, '')
output_nodes = [w,V]
op = MultiReturn(self.sds_context, 'eigen', output_nodes, unnamed_input_nodes=[self])
return op


def to_one_hot(self, num_classes: int) -> 'Matrix':
""" OneHot encode the matrix.
Expand Down
2 changes: 1 addition & 1 deletion src/main/python/systemds/operator/nodes/scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

import numpy as np
from py4j.java_gateway import JavaObject, JVMView
from systemds.operator import OperationNode
from systemds.operator.operation_node import OperationNode
from systemds.script_building.dag import OutputType
from systemds.utils.consts import (BINARY_OPERATIONS, VALID_ARITHMETIC_TYPES,
VALID_INPUT_TYPES)
Expand Down
49 changes: 49 additions & 0 deletions src/main/python/tests/matrix/test_eigen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------

import unittest
import numpy as np
from systemds.context import SystemDSContext

class TestEigen(unittest.TestCase):
def setUp(self):
self.sds = SystemDSContext()

def tearDown(self):
self.sds.close()

def test_svd_basic(self):

input_matrix = np.array([[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12],
[13, 14, 15, 16]])

sds_input = self.sds.from_numpy(input_matrix)
eigen_result = sds_input.eigen().compute()

w,V = eigen_result
# TODO add a proper verification
# Currently this implementation rely on internal testing only.


if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion src/main/python/tests/matrix/test_fft.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,4 +330,4 @@ def test_ifft_non_power_of_two_matrix(self):
_ = self.sds.ifft(sds_real_input, sds_imag_input).compute()

if __name__ == '__main__':
unittest.main()
unittest.main()
2 changes: 1 addition & 1 deletion src/main/python/tests/matrix/test_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,4 @@ def test_slice_row_col_both(self):
self.sds.from_numpy(m)[[1, 2], [0, 3]]

if __name__ == "__main__":
unittest.main(exit=False)
unittest.main(exit=False)
49 changes: 49 additions & 0 deletions src/main/python/tests/matrix/test_svd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------

import unittest
import numpy as np
from systemds.context import SystemDSContext

class TestSVD(unittest.TestCase):
def setUp(self):
self.sds = SystemDSContext()

def tearDown(self):
self.sds.close()

def test_svd_basic(self):

input_matrix = np.array([[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12],
[13, 14, 15, 16]])

sds_input = self.sds.from_numpy(input_matrix)
svd_result = sds_input.svd().compute()

U,S,V = svd_result
# TODO add a proper verification
# Currently this implementation rely on internal testing only.


if __name__ == '__main__':
unittest.main()

0 comments on commit 5ad67e8

Please sign in to comment.