From 6b260e4a646a6c27849ba1b9cc5b4c27ed44200a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sophia=20M=C3=A4dler?=
 <15019107+sophiamaedler@users.noreply.github.com>
Date: Thu, 25 Apr 2024 17:29:32 +0200
Subject: [PATCH] Fix timecourse extraction code to properly generate labelled
 index accounting for cells that can not be extracted as being too close to
 slide edges

final fix for issues resulting in "empty" cells without proper labelling being generated
---
 src/sparcscore/pipeline/extraction.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/sparcscore/pipeline/extraction.py b/src/sparcscore/pipeline/extraction.py
index 3a50391..f99aaa6 100644
--- a/src/sparcscore/pipeline/extraction.py
+++ b/src/sparcscore/pipeline/extraction.py
@@ -781,7 +781,7 @@ def _transfer_tempmmap_to_hdf5(self):
             
             #generate index data container
             index_labelled = _tmp_single_cell_index[keep_index]
-            index_labelled = pd.DataFrame(index_labelled)[1:].reset_index().values #need to reset the lookup index so that it goes up sequentially
+            index_labelled = pd.DataFrame(index_labelled).iloc[:, 1:].reset_index(drop = True).values #need to reset the lookup index so that it goes up sequentially
             index_labelled = np.char.encode(index_labelled.astype(str))
 
             hf.create_dataset('single_cell_index_labelled', data = index_labelled, chunks = None, dtype = dt)
@@ -807,8 +807,7 @@ def _transfer_tempmmap_to_hdf5(self):
         with h5py.File(self.output_path, 'a') as hf:
             
             #need to save this index seperately since otherwise we get issues with the classificaiton of the extracted cells
-            index = _tmp_single_cell_index[keep_index, 0:2]
-            _, cell_ids = index.T
+            cell_ids = _tmp_single_cell_index[keep_index, 1]
             index = np.array(list(zip(range(len(cell_ids)), cell_ids)))
             index = index.astype("uint64")