Skip to content

Commit

Permalink
Merge pull request #17 from moka-guys/development
Browse files Browse the repository at this point in the history
Fix logging (#17)

Co-Authored-By: Graeme <[email protected]>
Co-Authored-By: rebeccahaines1 <[email protected]>
  • Loading branch information
Graeme-Smith and rebeccahaines1 committed May 10, 2024
2 parents 02b1f5b + 872b630 commit 0c7fdcb
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 62 deletions.
8 changes: 6 additions & 2 deletions samplesheet_validator/__main__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import os
import sys
import logging
import argparse
from .samplesheet_validator import SamplesheetCheck
from .ss_logger import set_root_logger
from .config import LOGGING_FORMATTER


def get_arguments():
Expand Down Expand Up @@ -98,10 +101,10 @@ def is_valid_dir(parser: argparse.ArgumentParser, dir: str) -> str:
return dir



if __name__ == "__main__":
parsed_args = get_arguments()
if not parsed_args.no_stream_handler:
set_root_logger() # Adds stream handler
logger = set_root_logger(parsed_args.no_stream_handler)
sscheck_obj = SamplesheetCheck(
parsed_args.samplesheet_path,
parsed_args.sequencer_ids,
Expand All @@ -111,3 +114,4 @@ def is_valid_dir(parser: argparse.ArgumentParser, dir: str) -> str:
parsed_args.logdir,
)
sscheck_obj.ss_checks() # Carry out samplesheeet validation

12 changes: 6 additions & 6 deletions samplesheet_validator/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
TIMESTAMP = str(f"{datetime.datetime.now():%Y%m%d_%H%M%S}")

# Specifies the layout of log records in the final output
LOGGING_FORMATTER = "%(asctime)s - SAMPLESHEET_VALIDATOR - %(levelname)s - %(message)s"
LOGGING_FORMATTER = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"

LOG_MSGS = {
"ss_present": "Samplesheet with supplied name exists (%s)",
Expand All @@ -14,11 +14,11 @@
"sequencer_id_invalid": "Sequencer id not in allowed list (%s, %s)",
"ss_not_empty": "Samplesheet is (>10 bytes)",
"ss_empty": "Samplesheet empty (<10 bytes)",
"found_header_line": "Line in samplesheet identified as a header line",
"found_sample_line": "Line in samplesheet identified as containing a sample",
"error_extracting_headers": "An error was encountered when extracting headers from the samplesheet: %s",
"found_empty_line": "Line in samplesheet is an empty line",
"col_extraction_error": "Exception raised while attempting to extract %s from sample line %s: %s",
"found_header_line": "Line %s in samplesheet identified as a header line",
"found_sample_line": "Line %s in samplesheet identified as containing a sample",
"error_extracting_headers": "An error was encountered when extracting headers from the samplesheet, from line %s: %s",
"found_empty_line": "Line %s in samplesheet is an empty line",
"col_extraction_error": "Exception raised while attempting to extract %s from sample line %s, %s: %s",
"headers_as_expected": "Expected headers present in samplesheet",
"headers_err": "Header(/s) missing from [Data] section: '%s'",
"samplenames_match": "All sample names and sample IDS match",
Expand Down
54 changes: 29 additions & 25 deletions samplesheet_validator/samplesheet_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"""
import os
import re
import logging
from typing import Union
from . import config
from .ss_logger import SSLogger
Expand Down Expand Up @@ -42,6 +43,9 @@ class SamplesheetCheck:
panels (list): Valid pan numbers
tso_panels (list): Valid TSO pannumbers
development_panels (list): Development pan numbers
runfolder_name (str): Name of runfolder
logfile_path (str): Path to use for logfile
logger (logging.Logger): Logger
Methods:
get_logger()
Expand Down Expand Up @@ -115,24 +119,24 @@ def __init__(
self.panels = panels
self.tso_panels = tso_panels
self.dev_panno = dev_panno
self.runfolder_name = (self.samplesheet_path.split("/")[-1]).split("_SampleSheet.csv")[0]
self.logfile_path = f"{os.path.join(logdir, self.runfolder_name)}_samplesheet_validator.log"
self.logger = self.get_logger()


def get_logger(self):
def get_logger(self) -> logging.Logger:
"""
Get logger for the class
:return (object): Logger
:return (object): Logger
"""
runfolder_name = (self.samplesheet_path.split("/")[-1]).split(
"_SampleSheet.csv"
)[0]
logfile_path = f"{os.path.join(self.logdir, runfolder_name)}_{config.TIMESTAMP}_samplesheet_validator.log"
return SSLogger(logfile_path).get_logger()
return SSLogger(self.logfile_path, self.runfolder_name).get_logger(__name__)


def ss_checks(self) -> None:
"""
Run checks at samplesheet and sample level. Performs required extra checks for
checks not included in seglh-naming
"""
self.logger = self.get_logger()
if self.check_ss_present():
setattr(self, "ss_obj", self.check_ss_name())
if self.ss_obj:
Expand Down Expand Up @@ -274,57 +278,57 @@ def get_data_section(self) -> None:
:return None:
"""
with open(self.samplesheet_path, "r") as samplesheet_stream:
for line in reversed(samplesheet_stream.readlines()):
samplesheet_contents = samplesheet_stream.readlines()
for line in reversed(samplesheet_contents):
line_index = samplesheet_contents.index(line)
# If line contains table headers, stop looping through the file
if any(header in line for header in self.expected_data_headers):
self.extract_headers(line)
self.extract_headers(line, line_index)
break
elif len(line.split(",")[0]) < 2:
self.logger.info(self.logger.log_msgs["found_empty_line"])
self.logger.info(self.logger.log_msgs["found_empty_line"], line_index)
pass # Skip empty lines
else: # Contains sample
self.extract_sample_name_id(line)
self.extract_sample_name_id(line, line_index)

def extract_headers(self, line: str) -> None:
def extract_headers(self, line: str, line_index: int) -> None:
"""
Extract headers from line
:param line (str): Line containing samplesheet headers
:param line (str): Line containing samplesheet headers
:param line_index (int): Index of line
"""
try:
self.logger.info(self.logger.log_msgs["found_header_line"])
self.logger.info(self.logger.log_msgs["found_header_line"], line_index)
self.data_headers = line.split(",")
except Exception as exception:
self.errors = True
self.logger.warning(
self.logger.log_msgs["error_extracting_headers"], exception
self.logger.log_msgs["error_extracting_headers"], line_index, exception
)
self.add_msg_to_error_dict(
"Error extracting headers",
self.logger.log_msgs["error_extracting_headers"] % exception,
self.logger.log_msgs["error_extracting_headers"] % (line_index, exception),
)

def extract_sample_name_id(self, line: str) -> None:
def extract_sample_name_id(self, line: str, line_index: int) -> None:
"""
Extract sample name and sample id from samplesheet line
:param line (str): Line containing sample details
:param line_index (int): Index of line
"""
self.logger.info(self.logger.log_msgs["found_sample_line"])
self.logger.info(self.logger.log_msgs["found_sample_line"], line_index)
for column_details in [("Sample_ID", 0), ("Sample_Name", 1)]:
col_name, index = column_details
try:
self.samples[col_name].append(line.split(",")[index])
except Exception as exception:
self.errors = True
self.logger.warning(
self.logger.log_msgs["col_extraction_error"],
col_name,
line,
exception,
self.logger.log_msgs["col_extraction_error"], col_name, line_index, line, exception,
)
self.add_msg_to_error_dict(
"Error extracting sample name and ID",
self.logger.log_msgs["col_extraction_error"]
% (col_name, line, exception),
self.logger.log_msgs["col_extraction_error"] % (col_name, line_index, line, exception)
)

def check_expected_headers(self) -> None:
Expand Down
52 changes: 25 additions & 27 deletions samplesheet_validator/ss_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,59 +8,68 @@
import logging.handlers


def set_root_logger():
def set_root_logger(no_stream_handler: bool):
"""
Set up root logger and add stream handler - we only want to add stream handler once
else it will duplicate log messages to the terminal
Set up root logger and add stream handler and syslog handler - we only want to add these once
else it will duplicate log messages to the terminal. All loggers named with the same stem
as the root logger will use these same syslog handler and stream handler
:param no_stream_handler (bool): True if no stream handler specified as command line input
"""
formatter = logging.Formatter(config.LOGGING_FORMATTER)
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(logging.DEBUG)
stream_handler.setFormatter(logging.Formatter(config.LOGGING_FORMATTER))
stream_handler.name = "stream_handler"
logger.addHandler(stream_handler)
syslog_handler = logging.handlers.SysLogHandler(address="/dev/log")
syslog_handler.setFormatter(formatter)
syslog_handler.name = "syslog_handler"
logger.addHandler(syslog_handler)
if not no_stream_handler:
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setFormatter(formatter)
stream_handler.name = "stream_handler"
logger.addHandler(stream_handler)
return logger



class SSLogger:
"""
Creates a python logging object with a file handler and syslog handler
Attributes
timestamp (str): Timestamp from config
logfile_path (str): Name of filepath to provide to _file_handler()
runfolder_name (str): Runfolder name
logging_formatter (logging.Formatter): Specifies the layout of log records in the final output
Methods
get_logger()
get_logger(logger_name)
Returns a Python logging object
_get_file_handler()
Get file handler for the logger
_get_syslog_handler()
Get syslog handler for the logger
"""

def __init__(self, logfile_path: str):
def __init__(self, logfile_path: str, runfolder_name: str):
"""
Constructor for the Logger class
:param logfile_path (str): Path to logfile location
:param runfolder_name (str): Runfolder name
"""
# Timestamp used for naming log files with datetime, format %Y%m%d_%H%M%S
self.timestamp = config.TIMESTAMP
self.logfile_path = logfile_path
self.runfolder_name = runfolder_name
self.logging_formatter = logging.Formatter(config.LOGGING_FORMATTER)

def get_logger(self) -> logging.Logger:
def get_logger(self, logger_name: str) -> logging.Logger:
"""
Returns a Python logging object, and give it a name
:param logger_name (str): Logger name string
:return logger (object): Python logging object with custom attributes
"""
logger = logging.getLogger()
logger = logging.getLogger(f"{logger_name}.{self.runfolder_name}")
logger.filepath = self.logfile_path
logger.setLevel(logging.DEBUG)
logger.addHandler(self._get_file_handler())
logger.addHandler(self._get_syslog_handler())
logger.timestamp = self.timestamp
logger.log_msgs = config.LOG_MSGS
return logger

Expand All @@ -74,14 +83,3 @@ def _get_file_handler(self) -> logging.FileHandler:
file_handler.setFormatter(self.logging_formatter)
file_handler.name = "file_handler"
return file_handler

def _get_syslog_handler(self) -> logging.handlers.SysLogHandler:
"""
Get syslog handler for the logger, and give it a name
:return syslog_handler (logging.SysLogHandler): SysLogHandler
"""
syslog_handler = logging.handlers.SysLogHandler(address="/dev/log")
syslog_handler.setLevel(logging.DEBUG)
syslog_handler.setFormatter(self.logging_formatter)
syslog_handler.name = "syslog_handler"
return syslog_handler
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name="samplesheet_validator",
version=git_tag(),
version="TEST",
description="Python library for samplesheet validation",
url="https://github.com/moka-guys/samplesheet_validator",
author="Rachel Duffin",
Expand Down
2 changes: 1 addition & 1 deletion test/test_samplesheet_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@ def test_extract_sample_name_id_pass(self, valid_samplesheets_with_dev, caplog):
for samplesheet in valid_samplesheets_with_dev:
sscheck_obj = get_sscheck_obj(samplesheet)
assert (
"Line in samplesheet identified as containing a sample" in caplog.text
"samplesheet identified as containing a sample" in caplog.text
)
shutdown_logs(sscheck_obj.logger)

Expand Down

0 comments on commit 0c7fdcb

Please sign in to comment.