fix(dashboard): Handle encoding errors (#3847)

prowler-cloud · May 8, 2024 · c197aa8 · c197aa8
1 parent 6f0dc44
commit c197aa8
Show file tree

Hide file tree

Showing 3 changed files with 38 additions and 16 deletions.
diff --git a/dashboard/config.py b/dashboard/config.py
@@ -1,4 +1,5 @@
 import os
+import sys
 
 # Emojis to be used in the compliance table
 pass_emoji = "✅"
@@ -28,5 +29,10 @@
 folder_path_overview = os.getcwd() + "/output"
 folder_path_compliance = os.getcwd() + "/output/compliance"
 
-# Encoding
-encoding_format = "utf-8"
+# Encoding, if the os is windows, use cp1252. Use utf-8 if it is running using python3
+if os.name == "nt" and ".py" not in sys.argv[0].lower():
+ encoding_format = "cp1252"
+else:
+ encoding_format = "utf-8"
+# Error action, it is recommended to use "ignore" or "replace"
+error_action = "ignore"
diff --git a/dashboard/pages/compliance.py b/dashboard/pages/compliance.py
@@ -16,6 +16,7 @@
 # Config import
 from dashboard.config import (
  encoding_format,
+ error_action,
  fail_color,
  folder_path_compliance,
  info_color,
@@ -29,6 +30,7 @@
  create_region_dropdown_compliance,
 )
 from dashboard.lib.layouts import create_layout_compliance
+from prowler.lib.logger import logger
 
 # Suppress warnings
 warnings.filterwarnings("ignore")
@@ -38,19 +40,24 @@
 
 csv_files = []
 for file in glob.glob(os.path.join(folder_path_compliance, "*.csv")):
- with open(file, "r", newline="", encoding=encoding_format) as csvfile:
- reader = csv.reader(csvfile)
- num_rows = sum(1 for row in reader)
- if num_rows > 1:
- csv_files.append(file)
+ try:
+ with open(
+ file, "r", newline="", encoding=encoding_format, errors=error_action
+ ) as csvfile:
+ reader = csv.reader(csvfile)
+ num_rows = sum(1 for row in reader)
+ if num_rows > 1:
+ csv_files.append(file)
+ except UnicodeDecodeError:
+ logger.error(f"Error decoding file: {file}")
 
 
 def load_csv_files(csv_files):
  # Load CSV files into a single pandas DataFrame.
  dfs = []
  results = []
  for file in csv_files:
- df = pd.read_csv(file, sep=";", on_bad_lines="skip")
+ df = pd.read_csv(file, sep=";", on_bad_lines="skip", encoding=encoding_format)
  if "CHECKID" in df.columns:
  dfs.append(df)
  result = file
@@ -238,7 +245,9 @@ def load_csv_files(files):
  """Load CSV files into a single pandas DataFrame."""
  dfs = []
  for file in files:
- df = pd.read_csv(file, sep=";", on_bad_lines="skip")
+ df = pd.read_csv(
+ file, sep=";", on_bad_lines="skip", encoding=encoding_format
+ )
  dfs.append(df.astype(str))
  return pd.concat(dfs, ignore_index=True)
 

diff --git a/dashboard/pages/overview.py b/dashboard/pages/overview.py
@@ -18,6 +18,7 @@
 from dashboard.config import (
  critical_color,
  encoding_format,
+ error_action,
  fail_color,
  folder_path_overview,
  high_color,
@@ -42,6 +43,7 @@
  create_table_row_dropdown,
 )
 from dashboard.lib.layouts import create_layout_overview
+from prowler.lib.logger import logger
 
 # Suppress warnings
 warnings.filterwarnings("ignore")
@@ -51,11 +53,16 @@
 csv_files = []
 
 for file in glob.glob(os.path.join(folder_path_overview, "*.csv")):
- with open(file, "r", newline="", encoding=encoding_format) as csvfile:
- reader = csv.reader(csvfile)
- num_rows = sum(1 for row in reader)
- if num_rows > 1:
- csv_files.append(file)
+ with open(
+ file, "r", newline="", encoding=encoding_format, errors=error_action
+ ) as csvfile:
+ try:
+ reader = csv.reader(csvfile)
+ num_rows = sum(1 for row in reader)
+ if num_rows > 1:
+ csv_files.append(file)
+ except UnicodeDecodeError:
+ logger.error(f"Error decoding file: {file}")
 
 
 # Import logos providers
@@ -77,7 +84,7 @@ def load_csv_files(csv_files):
  """Load CSV files into a single pandas DataFrame."""
  dfs = []
  for file in csv_files:
- df = pd.read_csv(file, sep=";", on_bad_lines="skip")
+ df = pd.read_csv(file, sep=";", on_bad_lines="skip", encoding=encoding_format)
  if "CHECK_ID" in df.columns:
  if "TIMESTAMP" in df.columns or df["PROVIDER"].unique() == "aws":
  dfs.append(df.astype(str))
@@ -456,7 +463,7 @@ def filter_data(
  # Select the files in the list_files that have the same date as the selected date
  list_files = []
  for file in csv_files:
- df = pd.read_csv(file, sep=";", on_bad_lines="skip")
+ df = pd.read_csv(file, sep=";", on_bad_lines="skip", encoding=encoding_format)
  if "CHECK_ID" in df.columns:
  if "TIMESTAMP" in df.columns or df["PROVIDER"].unique() == "aws":
  # This handles the case where we are using v3 outputs