Skip to content

Commit

Permalink
Merge pull request #18 from moka-guys/v1.3.0
Browse files Browse the repository at this point in the history
V1.3.0 (#18)

Co-Authored-By: Graeme <[email protected]>
Co-Authored-By: rebeccahaines1 <[email protected]>
  • Loading branch information
rebeccahaines1 and Graeme-Smith committed Jun 21, 2024
2 parents 872b630 + 694dabf commit 381ddcb
Show file tree
Hide file tree
Showing 11 changed files with 124 additions and 62 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Runs a series of checks on the sample sheet, collects any errors identified. Che
* Pan numbers are in the list of allowed pan numbers supplied to the script
* Samplesheet contains any TSO samples

If samplesheet contains an input dev_panno, the package will skip samplesheet chcks for the samplesheet.
If samplesheet contains an input dev_pannos, the package will skip samplesheet checks for the samplesheet.

## Usage

Expand All @@ -40,7 +40,7 @@ sscheck_obj = SamplesheetCheck(
sequencer_ids, # list
panels, # list
tso_panels, # list
dev_panno, # str
dev_pannos, # list
logdir, # str
)
sscheck_obj.ss_checks() # Carry out samplesheeet validation
Expand Down Expand Up @@ -73,8 +73,8 @@ options:
Comma separated string of allowed panel numbers
-T TSO_PANELS, --tso_panels TSO_PANELS
Comma separated string of tso panels
-D DEV_PANNO, --dev_panno DEV_PANNO
Development pan number
-D DEV_PANNOS, --dev_pannos DEV_PANNOS
Development pan numbers
-L LOGDIR, --logdir LOGDIR
Directory to save the output logfile to
-NSH NO_STREAM_HANDLER, --no_stream_handler NO_STRAM_HANDLER
Expand Down
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[pytest]
addopts = -v --ignore=test/data/ --ignore=test/temp/ --cov=. --cov-report term-missing --sequencer_ids=NB551068,NB552085,M02353,M02631,A01229 --tso_panels=Pan4969,Pan5085,Pan5112,Pan5114 --dev_panno=Pan5180 --panels=Pan5180,Pan4009,Pan2835,Pan4940,Pan4396,Pan5113,Pan5115,Pan4969,Pan5085,Pan5112,Pan5114,Pan5007,Pan5008,Pan5009,Pan5010,Pan5011,Pan5012,Pan5013,Pan5014,Pan5015,Pan5016,Pan4119,Pan4121,Pan4122,Pan4125,Pan4126,Pan4974,Pan4975,Pan4976,Pan4977,Pan4978,Pan4979,Pan4980,Pan4981,Pan4982,Pan4983,Pan4984,Pan4821,Pan4822,Pan4823,Pan4824,Pan4825,Pan4149,Pan4150,Pan4129,Pan4964,Pan4130,Pan5121,Pan5185,Pan5186,Pan5143,Pan5147,Pan4816,Pan4817,Pan5122,Pan5144,Pan5148,Pan4819,Pan4820,Pan4145,Pan4146,Pan4132,Pan4134,Pan4136,Pan4137,Pan4138,Pan4143,Pan4144,Pan4151,Pan4314,Pan4351,Pan4387,Pan4390,Pan4826,Pan4827,Pan4828,Pan4829,Pan4830,Pan4831,Pan4832,Pan4833,Pan4834,Pan4835,Pan4836 --logdir=.
addopts = -v --ignore=test/data/ --ignore=test/temp/ --cov=. --cov-report term-missing --sequencer_ids=NB551068,NB552085,M02353,M02631,A01229 --tso_panels=Pan5085,Pan5112,Pan5114 --dev_pannos=Pan5180,Pan5227 --panels=Pan5180,Pan4009,Pan2835,Pan4940,Pan4396,Pan5113,Pan5115,Pan5226,Pan5085,Pan5112,Pan5114,Pan5007,Pan5008,Pan5009,Pan5010,Pan5011,Pan5012,Pan5013,Pan5014,Pan5015,Pan5016,Pan4119,Pan4121,Pan4122,Pan4125,Pan4126,Pan4974,Pan4975,Pan4976,Pan4977,Pan4978,Pan4979,Pan4980,Pan4981,Pan4982,Pan4983,Pan4984,Pan4821,Pan4822,Pan4823,Pan4824,Pan4825,Pan4149,Pan4150,Pan4129,Pan4964,Pan4130,Pan5121,Pan5185,Pan5186,Pan5143,Pan5147,Pan4816,Pan4817,Pan5122,Pan5144,Pan5148,Pan4819,Pan4820,Pan4145,Pan4146,Pan4132,Pan4134,Pan4136,Pan4137,Pan4138,Pan4143,Pan4144,Pan4151,Pan4314,Pan4351,Pan4387,Pan4390,Pan4826,Pan4827,Pan4828,Pan4829,Pan4830,Pan4831,Pan4832,Pan4833,Pan4834,Pan4835,Pan4836 --logdir=.
10 changes: 4 additions & 6 deletions samplesheet_validator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ def get_arguments():
)
parser.add_argument(
"-D",
"--dev_panno",
"--dev_pannos",
type=str,
required=True,
help="Development pan number",
help="Comma separated string of development pan numbers",
)
parser.add_argument(
"-L",
Expand All @@ -65,7 +65,7 @@ def get_arguments():
parser.add_argument(
"-NSH",
"--no_stream_handler",
action='store_true',
action="store_true",
required=False,
help=(
"Provide flag when we don't want a stream handler (prevents duplication of log messages "
Expand Down Expand Up @@ -101,7 +101,6 @@ def is_valid_dir(parser: argparse.ArgumentParser, dir: str) -> str:
return dir



if __name__ == "__main__":
parsed_args = get_arguments()
logger = set_root_logger(parsed_args.no_stream_handler)
Expand All @@ -110,8 +109,7 @@ def is_valid_dir(parser: argparse.ArgumentParser, dir: str) -> str:
parsed_args.sequencer_ids,
parsed_args.panels,
parsed_args.tso_panels,
parsed_args.dev_panno,
parsed_args.dev_pannos,
parsed_args.logdir,
)
sscheck_obj.ss_checks() # Carry out samplesheeet validation

7 changes: 5 additions & 2 deletions samplesheet_validator/git_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ def git_tag() -> str:
cmd = f"git -C {filepath} describe --tags"

proc = subprocess.Popen(
[cmd], stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True,
[cmd],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
shell=True,
)
out, _ = proc.communicate()
# Return standard out, removing any new line characters
return out.rstrip().decode("utf-8")
return out.rstrip().decode("utf-8")
45 changes: 30 additions & 15 deletions samplesheet_validator/samplesheet_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import re
import logging
from typing import Union
from . import config
from .ss_logger import SSLogger
from seglh_naming.sample import Sample
from seglh_naming.samplesheet import Samplesheet
Expand Down Expand Up @@ -45,7 +44,7 @@ class SamplesheetCheck:
development_panels (list): Development pan numbers
runfolder_name (str): Name of runfolder
logfile_path (str): Path to use for logfile
logger (logging.Logger): Logger
logger (logging.Logger): Logger
Methods:
get_logger()
Expand Down Expand Up @@ -91,7 +90,7 @@ def __init__(
sequencer_ids: list,
panels: list,
tso_panels: list,
dev_panno: str,
dev_pannos: list,
logdir: str,
):
"""
Expand All @@ -100,7 +99,7 @@ def __init__(
:param sequencer_ids (list): Allowed sequencer IDs
:param panels (list): Allowed pan numbers
:param tso_panels (list): TSO500 pan numbers
:param dev_panno (str): Development pan number
:param dev_pannos (list): Development pan numbers
:param logdir (str): Log file directory
"""
self.samplesheet_path = samplesheet_path
Expand All @@ -118,20 +117,22 @@ def __init__(
self.sequencer_ids = sequencer_ids
self.panels = panels
self.tso_panels = tso_panels
self.dev_panno = dev_panno
self.runfolder_name = (self.samplesheet_path.split("/")[-1]).split("_SampleSheet.csv")[0]
self.logfile_path = f"{os.path.join(logdir, self.runfolder_name)}_samplesheet_validator.log"
self.dev_pannos = dev_pannos
self.runfolder_name = (self.samplesheet_path.split("/")[-1]).split(
"_SampleSheet.csv"
)[0]
self.logfile_path = (
f"{os.path.join(logdir, self.runfolder_name)}_samplesheet_validator.log"
)
self.logger = self.get_logger()


def get_logger(self) -> logging.Logger:
"""
Get logger for the class
:return (object): Logger
"""
return SSLogger(self.logfile_path, self.runfolder_name).get_logger(__name__)


def ss_checks(self) -> None:
"""
Run checks at samplesheet and sample level. Performs required extra checks for
Expand Down Expand Up @@ -222,7 +223,11 @@ def development_run(self) -> Union[bool, None]:
:return True | None: True if contains dev pan numbers, None if does not
"""
strings_to_check = self.samples["Sample_ID"] + self.samples["Sample_Name"]
if any(self.dev_panno in sample_name for sample_name in strings_to_check):

if any(
any(dev_panno in sample_string for sample_string in strings_to_check)
for dev_panno in self.dev_pannos
):
self.logger.info(
self.logger.log_msgs["dev_run"],
self.samplesheet_path,
Expand Down Expand Up @@ -286,7 +291,9 @@ def get_data_section(self) -> None:
self.extract_headers(line, line_index)
break
elif len(line.split(",")[0]) < 2:
self.logger.info(self.logger.log_msgs["found_empty_line"], line_index)
self.logger.info(
self.logger.log_msgs["found_empty_line"], line_index
)
pass # Skip empty lines
else: # Contains sample
self.extract_sample_name_id(line, line_index)
Expand All @@ -307,7 +314,8 @@ def extract_headers(self, line: str, line_index: int) -> None:
)
self.add_msg_to_error_dict(
"Error extracting headers",
self.logger.log_msgs["error_extracting_headers"] % (line_index, exception),
self.logger.log_msgs["error_extracting_headers"]
% (line_index, exception),
)

def extract_sample_name_id(self, line: str, line_index: int) -> None:
Expand All @@ -324,11 +332,16 @@ def extract_sample_name_id(self, line: str, line_index: int) -> None:
except Exception as exception:
self.errors = True
self.logger.warning(
self.logger.log_msgs["col_extraction_error"], col_name, line_index, line, exception,
self.logger.log_msgs["col_extraction_error"],
col_name,
line_index,
line,
exception,
)
self.add_msg_to_error_dict(
"Error extracting sample name and ID",
self.logger.log_msgs["col_extraction_error"] % (col_name, line_index, line, exception)
self.logger.log_msgs["col_extraction_error"]
% (col_name, line_index, line, exception),
)

def check_expected_headers(self) -> None:
Expand All @@ -340,7 +353,9 @@ def check_expected_headers(self) -> None:
if not all(
header in self.data_headers for header in self.expected_data_headers
):
self.missing_headers = list(set(self.expected_data_headers).difference(self.data_headers))
self.missing_headers = list(
set(self.expected_data_headers).difference(self.data_headers)
)
self.errors = True
self.add_msg_to_error_dict(
"Missing headers",
Expand Down
2 changes: 1 addition & 1 deletion samplesheet_validator/ss_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Class used to create Samplesheet Validator logfiles
"""

import sys
from . import config
import logging
Expand Down Expand Up @@ -30,7 +31,6 @@ def set_root_logger(no_stream_handler: bool):
return logger



class SSLogger:
"""
Creates a python logging object with a file handler and syslog handler
Expand Down
37 changes: 37 additions & 0 deletions settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.envFile": "${workspaceFolder}/.venv",
"python.analysis.extraPaths": [
],
"editor.formatOnSaveMode": "file",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
},
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
}
},
"isort.args": [
"--profile",
"black"
],
"flake8.args": [
"--max-line-length=120"
],
"pylint.args": [
"--max-line-length=120"
],
"black-formatter.args": [
"--line-length",
"120"
],
"python.analysis.typeCheckingMode": "basic"
}
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name="samplesheet_validator",
version="TEST",
version=git_tag(),
description="Python library for samplesheet validation",
url="https://github.com/moka-guys/samplesheet_validator",
author="Rachel Duffin",
Expand Down
37 changes: 6 additions & 31 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,36 +13,11 @@


def pytest_addoption(parser):
parser.addoption(
"--sequencer_ids",
action="store",
type=str,
required=True,
)
parser.addoption(
"--panels",
action="store",
type=str,
required=True,
)
parser.addoption(
"--tso_panels",
action="store",
type=str,
required=True,
)
parser.addoption(
"--dev_panno",
action="store",
type=str,
required=True
)
parser.addoption(
"--logdir",
action="store",
type=str,
required=True,
)
parser.addoption("--sequencer_ids", action="store", type=str, required=True)
parser.addoption("--panels", action="store", type=str, required=True)
parser.addoption("--tso_panels", action="store", type=str, required=True)
parser.addoption("--dev_pannos", action="store", type=str, required=True)
parser.addoption("--logdir", action="store", type=str, required=True)


def pytest_configure(config):
Expand All @@ -52,7 +27,7 @@ def pytest_configure(config):
os.environ["sequencer_ids"] = config.getoption("sequencer_ids")
os.environ["panels"] = config.getoption("panels")
os.environ["tso_panels"] = config.getoption("tso_panels")
os.environ["dev_panno"] = config.getoption("dev_panno")
os.environ["dev_pannos"] = config.getoption("dev_pannos")
data_dir = os.path.abspath("test/data/")
os.environ["samplesheet_dir"] = f'{os.path.join(data_dir, "samplesheets")}'
# Temporary directories to copy test files into and to contain outputs
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[Header],,,,,,,,,
IEMFileVersion,4,,,,,,,,
Investigator Name,DEV01,,,,,,,,
Experiment Name,DEV01,,,,,,,,
Date,12/10/2023,,,,,,,,
Workflow,GenerateFASTQ,,,,,,,,
Application,FASTQ Only,,,,,,,,
Assay,Nextera XT,,,,,,,,
Description,DEV01,,,,,,,,
Chemistry,Amplicon,,,,,,,,
,,,,,,,,,
[Reads],,,,,,,,,
251,,,,,,,,,
251,,,,,,,,,
,,,,,,,,,
[Settings],,,,,,,,,
ReverseComplement,0,,,,,,,,
Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA,,,,,,,,
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT,,,,,,,,
,,,,,,,,,
[Data],,,,,,,,,
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
DEV01_01_000000_0000000_Pan5227,DEV01_01_000000_0000000_Pan5227,,,IDT8_UDI_1_2,CTGATCGT,IDT8_UDI_1_1,ATATGCGC,,
DEV01_02_000000_0000000_Pan5227,DEV01_02_000000_0000000_Pan5227,,,IDT8_UDI_2_2,ACTCTCGA,IDT8_UDI_2_1,TGGTACAG,,
DEV01_03_000000_NTC0000_Pan5227,DEV01_03_000000_NTC0000_Pan5227,,,IDT8_UDI_3_2,TGAGCTAG,IDT8_UDI_3_1,AACCGTTC,,
DEV01_04_000000_0000000_Pan5227,DEV01_04_000000_0000000_Pan5227,,,IDT8_UDI_4_2,GAGACGAT,IDT8_UDI_4_1,TAACCGGT,,
DEV01_05_000000_0000000_Pan5227,DEV01_05_000000_0000000_Pan5227,,,IDT8_UDI_5_2,CTTGTCGA,IDT8_UDI_5_1,GAACATCG,,
9 changes: 8 additions & 1 deletion test/test_samplesheet_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from samplesheet_validator.__main__ import is_valid_dir, is_valid_file


# TODO add second dev pan number in

def shutdown_logs(logger: object) -> None:
"""
To prevent duplicate filehandlers and system handlers close
Expand All @@ -33,7 +35,7 @@ def get_sscheck_obj(samplesheet: str) -> object:
os.getenv("sequencer_ids").split(","),
os.getenv("panels").split(","),
os.getenv("tso_panels").split(","),
os.getenv("dev_panno"),
os.getenv("dev_pannos").split(","),
os.getenv("temp_dir"),
)
sscheck_obj.ss_checks()
Expand Down Expand Up @@ -66,6 +68,11 @@ def valid_dev_samplesheet():
os.getenv("samplesheet_dir"),
"valid",
"231012_M02631_0285_000000000-LBGMH_SampleSheet.csv",
),
os.path.join(
os.getenv("samplesheet_dir"),
"valid",
"231012_M02631_0285_000000000-ERTFB_SampleSheet.csv",
)
]

Expand Down

0 comments on commit 381ddcb

Please sign in to comment.