Skip to content

Commit

Permalink
cleanup log outputs to make more understandable
Browse files Browse the repository at this point in the history
  • Loading branch information
sophiamaedler committed Apr 27, 2024
1 parent eca69c7 commit 3f9ad76
Showing 1 changed file with 16 additions and 14 deletions.
30 changes: 16 additions & 14 deletions src/sparcscore/pipeline/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import matplotlib.pyplot as plt

import _pickle as cPickle
#to perform garbage collection
import gc

class HDF5CellExtraction(ProcessingStep):
"""
Expand Down Expand Up @@ -753,9 +755,6 @@ def _transfer_tempmmap_to_hdf5(self):

self.log("Transferring results to final HDF5 data container.")

self.log(f"number of cells too close to image edges to extract: {len(self.save_index_to_remove)}")
self.log(f"{_tmp_single_cell_data.shape} shape of single-cell data before removing cells to close to image edges")

#generate final index of all of the rows that we wish to keep out of the original array
keep_index = np.setdiff1d(np.arange(_tmp_single_cell_index.shape[0]), self.save_index_to_remove)

Expand All @@ -765,6 +764,7 @@ def _transfer_tempmmap_to_hdf5(self):
self.log("Creating HDF5 file to save results to.")

with h5py.File(self.output_path, 'w') as hf:
self.log(f"Transferring extended labelling to ['single_cell_index_labelled'] container.")
#create special datatype for storing strings
dt = h5py.special_dtype(vlen=str)

Expand All @@ -779,6 +779,7 @@ def _transfer_tempmmap_to_hdf5(self):
hf.create_dataset('single_cell_index_labelled', data = index_labelled, chunks = None, dtype = dt)
del index_labelled #cleanup to free up memory

self.log(f"Transferring extracted single cells to ['single_cell_data'] container.")
_, c, x, y = _tmp_single_cell_data.shape
single_cell_data = hf.create_dataset('single_cell_data',
shape = (len(keep_index), c, x, y),
Expand All @@ -793,37 +794,37 @@ def _transfer_tempmmap_to_hdf5(self):
#this is required to process large datasets to not run into memory issues
for ix, i in enumerate(keep_index):
single_cell_data[ix] = _tmp_single_cell_data[i]

self.log(f"Transferring exracted single cells to .hdf5")


with h5py.File(self.output_path, 'a') as hf:

self.log(f"Transferring simple cell_id index to ['single_cell_index'] container.")
#need to save this index seperately since otherwise we get issues with the classificaiton of the extracted cells
cell_ids = _tmp_single_cell_index[keep_index, 1]
index = np.array(list(zip(range(len(cell_ids)), cell_ids)))
index = index.astype("uint64")

hf.create_dataset('single_cell_index', data = index, dtype="uint64")
del index

#delete tempobjects (to cleanup directory)
self.log(f"Tempmmap Folder location {self.TEMP_DIR_NAME} will now be removed.")
shutil.rmtree(self.TEMP_DIR_NAME, ignore_errors=True)

del _tmp_single_cell_data, _tmp_single_cell_index, self.TEMP_DIR_NAME
del _tmp_single_cell_data, _tmp_single_cell_index, keep_index, self.TEMP_DIR_NAME
gc.collect()

def _save_cell_info(self, index, cell_id, image_index, label_info, stack):
global _tmp_single_cell_data, _tmp_single_cell_index
#label info is None so just ignore for the base case

#save single cell images
_tmp_single_cell_data[index] = stack
# print("index:", index)
# import matplotlib.pyplot as plt

# for i in stack:
# plt.figure()
# plt.imshow(i)
# plt.show()
# #perform check to see if stack only contains zeros
# if np.all(stack == 0):
# self.log(f"Cell with the index {index} only contains zeros. Skipping this cell.")
# self.save_index_to_remove.append(index)
# return

#get label information
with h5py.File(self.input_segmentation_path, "r") as hf:
Expand Down Expand Up @@ -974,7 +975,8 @@ def process(self, input_segmentation_path, filtered_classes_path = None):

for centers_index, cell_id in enumerate(_cell_ids):
save_index = lookup_saveindex.index.get_loc(cell_id)
self._extract_classes(input_segmentation_path, px_centers, (centers_index, save_index, cell_id, image_index, label_info))
x = self._extract_classes(input_segmentation_path, px_centers, (centers_index, save_index, cell_id, image_index, label_info))
self.save_index_to_remove.extend(x)
else:
self.log(f"Image with the image_index {image_index} doesn't contain any cells. Skipping this image.")

Expand Down

0 comments on commit 3f9ad76

Please sign in to comment.