diff --git a/example_data/example_5/config_example5.yml b/example_data/example_5/config_example5.yml index 19fc686..dee5aa7 100644 --- a/example_data/example_5/config_example5.yml +++ b/example_data/example_5/config_example5.yml @@ -7,7 +7,7 @@ Cytosol_Cellpose_TimecourseSegmentation: output_masks: 2 shard_size: 4000000 # Average number of pixel per tile. 10.000 * 10.000 pixel are recommended. Can be adapted to memory and computation needs. chunk_size: 50 # chunk size for chunked HDF5 storage. is needed for correct caching and high performance reading. should be left at 50. - cache: "." + cache: "/Users/sophia/Documents/GitHub/SPARCSpy/development/cache" lower_quantile_normalization: 0.001 upper_quantile_normalization: 0.999 median_filter_size: 4 # Size in pixels @@ -23,7 +23,7 @@ Multithreaded_Cytosol_Cellpose_TimecourseSegmentation: shard_size: 4000000 # Average number of pixel per tile. 10.000 * 10.000 pixel are recommended. Can be adapted to memory and computation needs. chunk_size: 50 # chunk size for chunked HDF5 storage. is needed for correct caching and high performance reading. should be left at 50. threads: 5 # number of shards / tiles segmented at the same size. should be adapted to the maximum amount allowed by memory. - cache: "." + cache: "/Users/sophia/Documents/GitHub/SPARCSpy/development/cache" lower_quantile_normalization: 0.001 upper_quantile_normalization: 0.999 median_filter_size: 4 # Size in pixels @@ -37,7 +37,7 @@ TimecourseHDF5CellExtraction: compression: True threads: 80 # threads used in multithreading image_size: 128 # image size in pixel - cache: "." + cache: "/Users/sophia/Documents/GitHub/SPARCSpy/development/cache" hdf5_rdcc_nbytes: 5242880000 # 5gb 1024 * 1024 * 5000 hdf5_rdcc_w0: 1 hdf5_rdcc_nslots: 50000 \ No newline at end of file diff --git a/src/sparcscore/pipeline/base.py b/src/sparcscore/pipeline/base.py index b43fc8c..9e32c0f 100644 --- a/src/sparcscore/pipeline/base.py +++ b/src/sparcscore/pipeline/base.py @@ -2,6 +2,7 @@ import os import warnings import shutil +import tempfile class Logable(object): @@ -102,7 +103,7 @@ def __init__( self.directory = directory self.project_location = project_location self.config = config - + self.create_temp_dir() def __call__( self, *args, debug=None, intermediate_output=None, overwrite=None, **kwargs @@ -140,6 +141,9 @@ def __call__( return x else: warnings.warn("no process method defined") + + #after call is completed empty out temporary directories + self.clear_temp_dir() def __call_empty__( self, *args, debug=None, intermediate_output=None, overwrite=None, **kwargs @@ -176,6 +180,9 @@ def __call_empty__( else: warnings.warn("no return_empty_mask method defined") + #also clear empty temp directory here + self.clear_temp_dir() + def register_parameter(self, key, value): """ Registers a new parameter by updating the configuration dictionary if the key didn't exist. @@ -210,3 +217,30 @@ def get_directory(self): str: Directory path. """ return self.directory + + def create_temp_dir(self): + """ + Create a temporary directory in the cache directory specified in the config for saving all intermediate results. + If "cache" not specified in the config for the method no directory will be created. + """ + global TEMP_DIR_NAME #this is the global variable name used within alphabase.io.tempmmap which is required to intialize a memory mapped temp array using this code + + if "cache" in self.config.keys(): + self._tmp_dir_path = os.path.join(self.config["cache"], f"{self.__class__.__name__}_") + self._tmp_dir = tempfile.TemporaryDirectory(prefix = self._tmp_dir_path) + self.log(f"Initialized temporary directory for saving all temp results at {self._tmp_dir_path}") + print(f"Initialized temporary directory for saving all temp results at {self._tmp_dir_path} for {self.__class__.__name__}") + TEMP_DIR_NAME = self._tmp_dir.name + else: + self.log("No cache directory specified in config. Skipping temporary directory creation") + + def clear_temp_dir(self): + """Delete created temporary directory.""" + + if "_tmp_dir" in self.__dict__.keys(): + shutil.rmtree(self._tmp_dir) + self.log(f"Cleaned up temporary directory at {self._tmp_dir}") + + del self._tmp_dir, self._tmp_dir_path + else: + self.log(f"Temporary directory not found, skipping cleanup") \ No newline at end of file diff --git a/src/sparcscore/pipeline/extraction.py b/src/sparcscore/pipeline/extraction.py index 02e28f4..c4b1949 100644 --- a/src/sparcscore/pipeline/extraction.py +++ b/src/sparcscore/pipeline/extraction.py @@ -28,6 +28,7 @@ import timeit import matplotlib.pyplot as plt +from alphabase.io import tempmmap import _pickle as cPickle #to perform garbage collection @@ -226,10 +227,6 @@ def _initialize_tempmmap_array(self, index_len = 2): self.n_channels_output, self.config["image_size"], self.config["image_size"]) - - #import tempmmap module and reset temp folder location - from alphabase.io import tempmmap - TEMP_DIR_NAME = tempmmap.redefine_temp_location(self.config["cache"]) #generate container for single_cell_data _tmp_single_cell_data = tempmmap.array(shape = self.single_cell_data_shape, dtype = np.float16) @@ -240,8 +237,6 @@ def _initialize_tempmmap_array(self, index_len = 2): else: #use a regulary numpy array instead of a tempmmap array to be able to save strings as well as ints _tmp_single_cell_index = np.empty(self.single_cell_index_shape, dtype = "