Skip to content

Commit

Permalink
fix(dashboard): Handle encoding errors (#3847)
Browse files Browse the repository at this point in the history
  • Loading branch information
pedrooot committed May 8, 2024
1 parent 6f0dc44 commit c197aa8
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 16 deletions.
10 changes: 8 additions & 2 deletions dashboard/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import sys

# Emojis to be used in the compliance table
pass_emoji = "✅"
Expand Down Expand Up @@ -28,5 +29,10 @@
folder_path_overview = os.getcwd() + "/output"
folder_path_compliance = os.getcwd() + "/output/compliance"

# Encoding
encoding_format = "utf-8"
# Encoding, if the os is windows, use cp1252. Use utf-8 if it is running using python3
if os.name == "nt" and ".py" not in sys.argv[0].lower():
encoding_format = "cp1252"
else:
encoding_format = "utf-8"
# Error action, it is recommended to use "ignore" or "replace"
error_action = "ignore"
23 changes: 16 additions & 7 deletions dashboard/pages/compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# Config import
from dashboard.config import (
encoding_format,
error_action,
fail_color,
folder_path_compliance,
info_color,
Expand All @@ -29,6 +30,7 @@
create_region_dropdown_compliance,
)
from dashboard.lib.layouts import create_layout_compliance
from prowler.lib.logger import logger

# Suppress warnings
warnings.filterwarnings("ignore")
Expand All @@ -38,19 +40,24 @@

csv_files = []
for file in glob.glob(os.path.join(folder_path_compliance, "*.csv")):
with open(file, "r", newline="", encoding=encoding_format) as csvfile:
reader = csv.reader(csvfile)
num_rows = sum(1 for row in reader)
if num_rows > 1:
csv_files.append(file)
try:
with open(
file, "r", newline="", encoding=encoding_format, errors=error_action
) as csvfile:
reader = csv.reader(csvfile)
num_rows = sum(1 for row in reader)
if num_rows > 1:
csv_files.append(file)
except UnicodeDecodeError:
logger.error(f"Error decoding file: {file}")


def load_csv_files(csv_files):
# Load CSV files into a single pandas DataFrame.
dfs = []
results = []
for file in csv_files:
df = pd.read_csv(file, sep=";", on_bad_lines="skip")
df = pd.read_csv(file, sep=";", on_bad_lines="skip", encoding=encoding_format)
if "CHECKID" in df.columns:
dfs.append(df)
result = file
Expand Down Expand Up @@ -238,7 +245,9 @@ def load_csv_files(files):
"""Load CSV files into a single pandas DataFrame."""
dfs = []
for file in files:
df = pd.read_csv(file, sep=";", on_bad_lines="skip")
df = pd.read_csv(
file, sep=";", on_bad_lines="skip", encoding=encoding_format
)
dfs.append(df.astype(str))
return pd.concat(dfs, ignore_index=True)

Expand Down
21 changes: 14 additions & 7 deletions dashboard/pages/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from dashboard.config import (
critical_color,
encoding_format,
error_action,
fail_color,
folder_path_overview,
high_color,
Expand All @@ -42,6 +43,7 @@
create_table_row_dropdown,
)
from dashboard.lib.layouts import create_layout_overview
from prowler.lib.logger import logger

# Suppress warnings
warnings.filterwarnings("ignore")
Expand All @@ -51,11 +53,16 @@
csv_files = []

for file in glob.glob(os.path.join(folder_path_overview, "*.csv")):
with open(file, "r", newline="", encoding=encoding_format) as csvfile:
reader = csv.reader(csvfile)
num_rows = sum(1 for row in reader)
if num_rows > 1:
csv_files.append(file)
with open(
file, "r", newline="", encoding=encoding_format, errors=error_action
) as csvfile:
try:
reader = csv.reader(csvfile)
num_rows = sum(1 for row in reader)
if num_rows > 1:
csv_files.append(file)
except UnicodeDecodeError:
logger.error(f"Error decoding file: {file}")


# Import logos providers
Expand All @@ -77,7 +84,7 @@ def load_csv_files(csv_files):
"""Load CSV files into a single pandas DataFrame."""
dfs = []
for file in csv_files:
df = pd.read_csv(file, sep=";", on_bad_lines="skip")
df = pd.read_csv(file, sep=";", on_bad_lines="skip", encoding=encoding_format)
if "CHECK_ID" in df.columns:
if "TIMESTAMP" in df.columns or df["PROVIDER"].unique() == "aws":
dfs.append(df.astype(str))
Expand Down Expand Up @@ -456,7 +463,7 @@ def filter_data(
# Select the files in the list_files that have the same date as the selected date
list_files = []
for file in csv_files:
df = pd.read_csv(file, sep=";", on_bad_lines="skip")
df = pd.read_csv(file, sep=";", on_bad_lines="skip", encoding=encoding_format)
if "CHECK_ID" in df.columns:
if "TIMESTAMP" in df.columns or df["PROVIDER"].unique() == "aws":
# This handles the case where we are using v3 outputs
Expand Down

0 comments on commit c197aa8

Please sign in to comment.