Skip to content

Commit

Permalink
Update versions of dependencies and constraints (#65)
Browse files Browse the repository at this point in the history
* fixed some linting

* address a warning raised during tests

* update deps

* bump fcs_parser version

* allow also pandas 1.*

* poetry upgrade

* Update pyproject.toml

---------

Co-authored-by: Eugene Yurtsev <[email protected]>
  • Loading branch information
cebasfu93 and eyurtsev committed Oct 17, 2023
1 parent 7b44cae commit 0022ed9
Show file tree
Hide file tree
Showing 5 changed files with 807 additions and 699 deletions.
2 changes: 1 addition & 1 deletion fcsparser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os

from .version import __version__
from .api import parse
from .version import __version__

test_sample_path = os.path.join(
os.path.abspath(os.path.dirname(__file__)),
Expand Down
69 changes: 45 additions & 24 deletions fcsparser/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@
Distributed under the MIT License.
Useful documentation for dtypes in numpy
http://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.byteswap.html?highlight=byteswap#numpy.ndarray.byteswap # noqa
http://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.byteswap.html?highlight=byteswap#numpy.ndarray.byteswap
http://docs.scipy.org/doc/numpy/user/basics.types.html
http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html
"""
from __future__ import division

import contextlib
import logging
from io import BytesIO
import string
import sys
import warnings
from io import BytesIO

import numpy
import pandas as pd
Expand Down Expand Up @@ -132,8 +132,10 @@ def __init__(
Compatible with most FCS 2.0, 3.0, 3.1 files.
self.annotation: a dictionary holding the parsed content of the TEXT segment
In addition, a key called __header__ has been added to this dictionary
It specifies the information parsed from the FCS file HEADER segment.
In addition, a key called __header__
has been added to this dictionary
It specifies the information parsed
from the FCS file HEADER segment.
(This won't be necessary for most users.)
self.data holds the parsed DATA segment
Expand All @@ -152,7 +154,8 @@ def __init__(
channel_naming: '$PnS' | '$PnN'
Determines which meta data field is used for naming the channels.
The default should be $PnS (even though it is not guaranteed to be unique)
The default should be $PnS
(even though it is not guaranteed to be unique)
$PnN stands for the short name (guaranteed to be unique).
Will look like 'FL1-H'
Expand All @@ -165,7 +168,8 @@ def __init__(
The program attempts to use the alternative field by default.
Note: These names are not flipped in the implementation.
It looks like they were swapped for some reason in the official FCS specification.
It looks like they were swapped
for some reason in the official FCS specification.
data_set: int
Index of retrieved data set in the fcs file.
This value specifies the data set being retrieved from an fcs file with
Expand Down Expand Up @@ -239,12 +243,14 @@ def from_data(cls, data):
def read_header(self, file_handle, nextdata_offset=0):
"""Read the header of the FCS file.
The header specifies where the annotation, data and analysis are located inside the binary
The header specifies where the annotation, data and analysis
are located inside the binary
file.
Args:
file_handle: buffer containing FCS file.
nextdata_offset: byte offset of a set header from file start specified by $NEXTDATA
nextdata_offset: byte offset of a set header
from file start specified by $NEXTDATA
"""
header = {"FCS format": file_handle.read(6)}

Expand Down Expand Up @@ -309,7 +315,8 @@ def _extract_text_dict(raw_text):
raw_text = raw_text.strip()
if raw_text[-1] != delimiter:
msg = (
"The first two characters were:\n {}. The last two characters were: {}\n"
"The first two characters were:\n {}. "
"The last two characters were: {}\n"
"Parser expects the same delimiter character in beginning "
"and end of TEXT segment. "
"This file may be parsed incorrectly!".format(
Expand All @@ -323,18 +330,20 @@ def _extract_text_dict(raw_text):
else:
raw_text = raw_text[1:-1]

# 1:-1 above removes the first and last characters which are reserved for the delimiter.
# 1:-1 above removes the first and last characters which are reserved
# for the delimiter.

# The delimiter is escaped by being repeated (two consecutive delimiters). This code splits
# on the escaped delimiter first, so there is no need for extra logic to distinguish
# The delimiter is escaped by being repeated (two consecutive delimiters).
# This code splits on the escaped delimiter first,
# so there is no need for extra logic to distinguish
# actual delimiters from escaped delimiters.
nested_split_list = [x.split(delimiter) for x in raw_text.split(delimiter * 2)]

# Flatten the nested list to a list of elements (alternating keys and values)
raw_text_elements = nested_split_list[0]
for partial_element_list in nested_split_list[1:]:
# Rejoin two parts of an element that was split by an escaped delimiter (the end and
# start of two successive sub-lists in nested_split_list)
# Rejoin two parts of an element that was split by an escaped delimiter
# (the end and start of two successive sub-lists in nested_split_list)
raw_text_elements[-1] += delimiter + partial_element_list[0]
raw_text_elements.extend(partial_element_list[1:])

Expand Down Expand Up @@ -397,7 +406,9 @@ def read_text(self, file_handle):
self.channel_names_s = tuple(channel_names_s)

# Convert some of the fields into integer values
keys_encoding_bits = [f"$P{channel_number}B" for channel_number in self.channel_numbers]
keys_encoding_bits = [
f"$P{channel_number}B" for channel_number in self.channel_numbers
]

add_keys_to_convert_to_int = ["$NEXTDATA", "$PAR", "$TOT"]

Expand Down Expand Up @@ -544,7 +555,8 @@ def read_data(self, file_handle):
# and a type; i.e.,
# https://docs.scipy.org/doc/numpy/reference/generated/numpy.recarray.html
# The names are assigned automatically.
# In order for this code to work correctly with the pandas DataFrame constructor,
# In order for this code to work correctly
# with the pandas DataFrame constructor,
# we convert the *names* of the dtypes to the channel names we want to use.

names = self.get_channel_names()
Expand Down Expand Up @@ -615,7 +627,8 @@ def analysis(self):
def reformat_meta(self):
"""Collect the meta data information in a more user friendly format.
Function looks through the meta data, collecting the channel related information into a
Function looks through the meta data,
collecting the channel related information into a
dataframe and moving it into the _channels_ key.
"""
meta = self.annotation # For shorthand (passed by reference)
Expand All @@ -626,7 +639,8 @@ def reformat_meta(self):
if key[3] not in string.digits:
channel_properties.append(key[3:])

# Capture all the channel information in a list of lists -- used to create a data frame
# Capture all the channel information in a list of lists --
# used to create a data frame
channel_matrix = [
[meta.get("$P{0}{1}".format(ch, p)) for p in channel_properties]
for ch in self.channel_numbers
Expand Down Expand Up @@ -681,8 +695,11 @@ def parse(
path: str
Path of .fcs file
meta_data_only: bool
If True, the parse_fcs only returns the meta_data (the TEXT segment of the FCS file)
compensate: bool, reserved parameter to indicate whether the FCS data should be compensated, unimplemented.
If True, the parse_fcs only returns the meta_data
(the TEXT segment of the FCS file)
compensate: bool,
reserved parameter to indicate whether the FCS data
should be compensated, unimplemented.
channel_naming: '$PnS' | '$PnN'
Determines which meta data field is used for naming the channels.
The default should be $PnS (even though it is not guaranteed to be unique)
Expand All @@ -695,17 +712,21 @@ def parse(
The chosen field will be used to population self.channels
Note: These names are not flipped in the implementation.
It looks like they were swapped for some reason in the official FCS specification.
It looks like they were swapped for some reason
in the official FCS specification.
reformat_meta: bool
If true, the meta data is reformatted with the channel information organized
into a DataFrame and moved into the '_channels_' key
data_set: int
Index of retrieved data set in the fcs file.
This value specifies the data set being retrieved from an fcs file with multiple data sets.
This value specifies the data set being retrieved from an fcs file
with multiple data sets.
dtype: str | None
If provided, will force convert all data into this dtype.
This is set by default to auto-convert to float32 to deal with cases in which the original
data has been stored using a smaller data type (e.g., unit8). This modifies the original
This is set by default to auto-convert to float32
to deal with cases in which the original
data has been stored using a smaller data type (e.g., unit8).
This modifies the original
data, but should make follow up analysis safer in basically all cases.
encoding: str
Provide encoding type of the text section.
Expand Down
2 changes: 1 addition & 1 deletion fcsparser/tests/test_fcs_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def test_parsed_into_dataframe_correctly(self):
# ['<u2', '<u2', '<u2', '<u2', '<u2', '<u2', '<u2', '<u2', '<u4', '<u1']
fcsparser = FCSParser(path=fname)
# Make sure that data gets parsed as 2-dimensional
self.assertEquals(fcsparser.dataframe.shape, (725, 10))
self.assertEqual(fcsparser.dataframe.shape, (725, 10))
# Verify that the values are correct.
assert_array_equal(fcsparser.dataframe.values[:2, :], np.array(
[[8, 7, 15, 15, 5, 8, 7, 6, 23, 0], [6, 7, 13, 14, 6, 9, 10, 4, 23, 0]]))
Expand Down

0 comments on commit 0022ed9

Please sign in to comment.