Skip to content

Commit

Permalink
Improve azimuth data cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
axdanbol committed Mar 29, 2024
1 parent 4715cc9 commit b04eea6
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions containers/azimuth/context/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from pathlib import Path

import anndata
import numpy as np
import pandas
import scipy.sparse as sp_sparse

from src.algorithm import Algorithm, RunResult, add_common_arguments
from src.util.layers import set_data_layer
Expand Down Expand Up @@ -41,8 +43,9 @@ def do_run(
# after which the annotations are copied back to the original matrix
temp_index = self.create_temp_obs_index(data)
clean_matrix_path = Path("clean_matrix.h5ad")
clean_matrix = self.create_clean_matrix(data, temp_index)
clean_matrix = set_data_layer(clean_matrix, options["query_layers_key"])
clean_matrix = self.create_clean_matrix(
data, temp_index, options["query_layers_key"]
)
clean_matrix.write_h5ad(clean_matrix_path)

annotated_matrix_path = self.run_azimuth_scripts(
Expand Down Expand Up @@ -71,28 +74,34 @@ def create_temp_obs_index(self, matrix: anndata.AnnData) -> pandas.Index:
return matrix.obs.index.map(lambda name: f"QUERY:{name}")

def create_clean_matrix(
self,
matrix: anndata.AnnData,
temp_index: pandas.Index,
self, matrix: anndata.AnnData, temp_index: pandas.Index, layer: t.Optional[str]
) -> anndata.AnnData:
"""Creates a copy of the data with all observation columns removed.
"""Creates a copy of the data with all extra attributes removed.
Args:
matrix (anndata.AnnData): Original data
temp_index (pandas.Index): Temporary index generated by `create_temp_obs_index`
layer (str | None): Data layer to use
Returns:
anndata.AnnData: Cleaned data
"""
clean_obs = pandas.DataFrame(index=temp_index)
clean_matrix = matrix.copy()
clean_matrix = set_data_layer(clean_matrix, layer)
clean_matrix.obs = clean_obs
clean_matrix.obsm = None
clean_matrix.obsp = None
clean_matrix.varm = None
clean_matrix.varp = None
clean_matrix.raw = None
clean_matrix.layers = None
clean_matrix.uns = {}

# Azimuth cannot properly load numpy arrays. Only csc and csr matrices
if isinstance(clean_matrix.X, np.ndarray):
clean_matrix.X = sp_sparse.csr_matrix(clean_matrix.X)

return clean_matrix

def copy_annotations(
Expand Down

0 comments on commit b04eea6

Please sign in to comment.