hmc-alpaqa · JoshCordova · May 5, 2021 · May 6, 2021 · May 19, 2021 · May 19, 2021
diff --git a/src/config/pylintrc b/src/config/pylintrc
@@ -203,7 +203,7 @@ ignore-comments=yes
 ignore-docstrings=yes
 
 # Ignore imports when computing similarities.
-ignore-imports=no
+ignore-imports=yes
 
 # Minimum lines number of a similarity.
 min-similarity-lines=4
@@ -275,7 +275,7 @@ indent-string=' '
 max-line-length=120
 
 # Maximum number of lines in a module.
-max-module-lines=1000
+max-module-lines=1048
 
 # List of optional constructs for which whitespace checking is disabled. `dict-
 # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
@@ -565,7 +565,7 @@ max-bool-expr=5
 max-branches=13
 
 # Maximum number of locals for function / method body.
-max-locals=16
+max-locals=18
 
 # Maximum number of parents for a class (see R0901).
 max-parents=8
@@ -577,7 +577,7 @@ max-public-methods=20
 max-returns=7
 
 # Maximum number of statements in function / method body.
-max-statements=52
+max-statements=60
 
 # Minimum number of public methods for a class (see R0903).
 min-public-methods=2

diff --git a/src/core/command.py b/src/core/command.py
@@ -4,23 +4,26 @@
 
 import copy
 import os.path
+
 import re
 import readline
 import subprocess
 import tempfile
 import time
+from collections import defaultdict
 from enum import Enum
 from functools import partial
-from multiprocessing import Manager, Pool
+from multiprocessing import Manager, Pool, Lock # pylint: disable=unused-import
 from os import listdir
 from pathlib import Path
-from typing import Iterable, Optional, Union, cast
+from queue import Queue
+from typing import Iterable, Optional, Union, cast, Tuple, Callable
 
 import numpy # type: ignore
 from rich.console import Console
 from rich.table import Table
 
-from core.command_data import AnyDict, Data, ObjTypes, PathComplexityRes
+from core.command_data import AnyDict, Data, MetricRes, ObjTypes, PathComplexityRes
 from core.env import KnownExtensions
 from core.error_messages import (EXTENSION, MISSING_FILENAME, MISSING_NAME, MISSING_TYPE_AND_NAME,
  NO_FILE_EXT, ReplErrors)
@@ -84,7 +87,7 @@ def get_graph_generator(self, file_extension: str) -> converter.ConverterAbstrac
  return self.graph_generators[file_extension]
 
 
-def worker_main(shared_dict: dict[str, ControlFlowGraph], file: str) -> None:
+def multiprocess_import(shared_dict: dict[str, ControlFlowGraph], file: str) -> None:
  """Handle the multiprocessing of import."""
  graph = ControlFlowGraph.from_file(file)
  if isinstance(graph, dict):
@@ -94,23 +97,40 @@ def worker_main(shared_dict: dict[str, ControlFlowGraph], file: str) -> None:
  shared_dict[filepath] = graph
 
 
-def worker_main_two(metrics_generator: metric.MetricAbstract,
- shared_dict: dict[tuple[str, str], Union[int, PathComplexityRes]],
- graph: ControlFlowGraph) -> None:
- """Handle the multiprocessing of convert."""
- try:
- with Timeout(10, "Took too long!"):
- result = metrics_generator.evaluate(graph)
+def multiprocess_metrics(metrics_generators: dict[str, metric.MetricAbstract],
+ shared_dict: dict[tuple[str, str], Union[int, PathComplexityRes]],
+ queue: Queue[Tuple[ControlFlowGraph, str]],
+ lock: Callable[[], None],
+ process_count: int) -> None:
+ """Handle the multiprocessing of metrics."""
+ print(f"Starting thread {process_count}")
+ while True:
+ with lock: # type: ignore
+ if not queue.empty():
+ graph, generator_name = queue.get()
+ else:
+ break
+ metrics_generator = metrics_generators[generator_name]
+ timeout = 1200 if metrics_generator.name() == "Path Complexity" else 180
+ try:
+ if metrics_generator.name() == "Lines of Code" and \
+ graph.metadata.language is not KnownExtensions.Python:
+ continue
+
+ with Timeout(timeout, "Took too long!"):
+ result = metrics_generator.evaluate(graph)
 
- if graph.name is None:
- raise ValueError("No Graph name.")
+  if graph.name is None:
+  raise ValueError("No Graph name.")
 
- shared_dict[(graph.name, metrics_generator.name())] = result
- except IndexError as err:
- print(graph)
- print(err)
- except TimeoutError as err:
- print(err, graph.name, metrics_generator.name())
+ shared_dict[(graph.name, metrics_generator.name())] = result
+ except IndexError as err:
+ print(graph)
+ print(err)
+ except TimeoutError as err:
+ print(err, graph.name, metrics_generator.name())
+ shared_dict[(graph.name, metrics_generator.name())] = ("NA", "Timeout")
+ print(f"Thread {process_count} is done.")
 
 
 class REPLOptions():
@@ -455,7 +475,8 @@ def do_convert(self, args: str) -> None: # pylint: disable=too-many-branches
  if graph == {}:
  self.logger.v_msg("Converted without errors, but no graphs created.")
  else:
- self.logger.v_msg(f"Created graph objects {' '.join(list(graph.keys()))}")
+ self.logger.v_msg(f"Created graph objects {Colors.MAGENTA.value}"
+ f"{' '.join(list(graph.keys()))}{Colors.ENDC.value}")
  self.data.graphs.update(graph)
  elif isinstance(graph, ControlFlowGraph):
  self.logger.v_msg(f"Created graph {graph.name}")
@@ -494,17 +515,20 @@ def do_import(self, flags: Options, *args_list: str) -> None:
  manager = Manager()
  shared_dict: dict[str, ControlFlowGraph] = manager.dict()
  pool = Pool(8)
- pool.map(partial(worker_main, shared_dict), all_files)
+ pool.map(partial(multiprocess_import, shared_dict), all_files)
+ self.logger.v_msg(f"Created graph objects "
+ f"{Colors.MAGENTA.value}{' '.join(shared_dict.keys())}{Colors.ENDC.value}")
  self.data.graphs.update(shared_dict)
  else:
+ graphs = []
  for file in all_files:
  filepath, _ = os.path.splitext(file)
  graph = ControlFlowGraph.from_file(file)
- self.logger.v_msg(str(graph))
- if isinstance(graph, dict):
-  self.data.graphs.update(graph)
-  else:
- self.data.graphs[filepath] = graph
+ graphs.append(graph)
+ self.data.graphs[filepath] = graph
+ names = [graph.name for graph in graphs]
+ self.logger.v_msg(f"Created graph objects "
+  f"{Colors.MAGENTA.value}{' '.join(names)}{Colors.ENDC.value}")
 
  def do_list(self, flags: Options, list_typename: str) -> None:
  """
@@ -538,14 +562,35 @@ def do_list(self, flags: Options, list_typename: str) -> None:
  else:
  self.logger.v_msg(f"Type {list_type} not recognized")
 
- def do_metrics_multithreaded(self, graphs: list[ControlFlowGraph]) -> None:
+ def do_metrics_multithreaded(self, cfgs: list[ControlFlowGraph]) -> None:
  """Compute all of the metrics for some set of graphs using parallelization."""
- pool = Pool(8)
+ pool_size = 8
+ pool = Pool(pool_size)
  manager = Manager()
+ graph_queue = manager.Queue()
+ lock = manager.Lock() # pylint: disable=no-member
+ cfgs = sorted(cfgs, key=lambda cfg: len(cfg.graph.vertices()), reverse=True)
+ results: defaultdict[str, list[tuple[str, MetricRes]]] = defaultdict(list)
  shared_dict: dict[tuple[str, str], Union[int, PathComplexityRes]] = manager.dict()
- for metrics_generator in self.controller.metrics_generators:
- pool.map(partial(worker_main_two, metrics_generator, shared_dict), graphs)
- self.logger.v_msg(str(shared_dict))
+ # Queue up all of the cfgs / metrics to execute
+ for metrics_generator in self.controller.metrics_generators[::-1]:
+ for cfg in cfgs:
+ graph_queue.put((cfg, metrics_generator.name()))
+
+ generator_dict = {generator.name(): generator for generator in self.controller.metrics_generators}
+
+ func_to_execute = partial(
+ multiprocess_metrics,
+ generator_dict,
+ shared_dict,
+ graph_queue,
+ lock)
+ args = list(range(pool_size))
+
+ result = pool.map(func_to_execute, args, chunksize=1) # pylint: disable=unused-variable
+ for (name, metric_generator), res in shared_dict.items():
+ results[name].append((metric_generator, res))
+ self.data.metrics.update(results)
 
  def get_metrics_list(self, name: str) -> list[str]:
  """Get the list of metric names from command argument."""
@@ -579,58 +624,64 @@ def do_metrics(self, flags: Options, name: str) -> None:
  # pylint: disable=R1702
  # pylint: disable=R0912
  graphs = [self.data.graphs[name] for name in self.get_metrics_list(name)]
- for graph in graphs:
- self.logger.v_msg(f"Computing metrics for {graph.name}")
- results = []
- if self.rich:
- table = Table(title=f"Metrics for {graph.name}")
- table.add_column("Metric", style="cyan")
- table.add_column("Result", style="magenta", no_wrap=False)
- table.add_column("Time Elapsed", style="green")
- for metric_generator in self.controller.metrics_generators:
- # Lines of Code is currently only supported in Python.
- if metric_generator.name() == "Lines of Code" and \
- graph.metadata.language is not KnownExtensions.Python:
- continue
- start_time = time.time()
-
- try:
- with Timeout(6000, "Took too long!"):
- result = metric_generator.evaluate(graph)
- runtime = time.time() - start_time
- if result is not None:
- results.append((metric_generator.name(), result))
- time_out = f"{runtime:.5f} seconds"
- if metric_generator.name() == "Path Complexity":
- result_ = cast(tuple[Union[float, str], Union[float, str]],
- result)
- path_out = f"(APC: {result_[0]}, Path Complexity: {result_[1]})"
-
- if self.rich:
- table.add_row(metric_generator.name(), path_out, time_out)
+ if self.multi_threaded:
+ start_time = time.time()
+ self.do_metrics_multithreaded(graphs)
+ elapsed = time.time() - start_time
+ print(f"TIME ELAPSED: {elapsed}")
+ else:
+ for graph in graphs:
+ self.logger.v_msg(f"Computing metrics for {graph.name}")
+ results = []
+ if self.rich:
+ table = Table(title=f"Metrics for {graph.name}")
+ table.add_column("Metric", style="cyan")
+ table.add_column("Result", style="magenta", no_wrap=False)
+ table.add_column("Time Elapsed", style="green")
+ for metric_generator in self.controller.metrics_generators:
+ # Lines of Code is currently only supported in Python.
+ if metric_generator.name() == "Lines of Code" and \
+ graph.metadata.language is not KnownExtensions.Python:
+ continue
+ start_time = time.time()
+
+ try:
+ with Timeout(6000, "Took too long!"):
+ result = metric_generator.evaluate(graph)
+ runtime = time.time() - start_time
+ if result is not None:
+ results.append((metric_generator.name(), result))
+ time_out = f"{runtime:.5f} seconds"
+ if metric_generator.name() == "Path Complexity":
+ result_ = cast(tuple[Union[float, str], Union[float, str]],
+ result)
+ path_out = f"(APC: {result_[0]}, Path Complexity: {result_[1]})"
+
+ if self.rich:
+ table.add_row(metric_generator.name(), path_out, time_out)
+ else:
+ self.logger.v_msg(f"Got {path_out}, {time_out}")
  else:
- self.logger.v_msg(f"Got {path_out}, {time_out}")
+ if self.rich:
+ table.add_row(metric_generator.name(), str(result), time_out)
+ else:
+ self.logger.v_msg(f" Got {result}, took {runtime:.3e} seconds")
  else:
- if self.rich:
- table.add_row(metric_generator.name(), str(result), time_out)
- else:
- self.logger.v_msg(f" Got {result}, took {runtime:.3e} seconds")
- else:
- self.logger.v_msg("Got None")
- except TimeoutError:
- self.logger.e_msg("Timeout!")
- except IndexError as err:
- self.logger.e_msg("Index Error")
- self.logger.e_msg(str(err))
- except numpy.linalg.LinAlgError as err:
- self.logger.e_msg("Lin Alg Error")
- self.logger.e_msg(str(err))
- if self.rich:
- console = Console()
- console.print(table)
-
- if graph.name is not None:
- self.data.metrics[graph.name] = results
+ self.logger.v_msg("Got None")
+ except TimeoutError:
+ self.logger.e_msg("Timeout!")
+ except IndexError as err:
+ self.logger.e_msg("Index Error")
+ self.logger.e_msg(str(err))
+ except numpy.linalg.LinAlgError as err:
+ self.logger.e_msg("Lin Alg Error")
+ self.logger.e_msg(str(err))
+ if self.rich:
+ console = Console()
+ console.print(table)
+
+ if graph.name is not None:
+ self.data.metrics[graph.name] = results
 
  def log_name(self, name: str) -> bool:
  """Log all objects of a given name."""

diff --git a/src/core/command_data.py b/src/core/command_data.py
@@ -77,6 +77,7 @@ def export_metrics(self, name: str, new_name: str) -> None:
  "cyclo": [], "npath": []})
  for m_name in self.metrics:
  metric_value = self.metrics[m_name]
+ metric_value = sorted(metric_value, key=lambda val: val[0])
  new_row = {"graph_name": m_name, "apc": metric_value[2][1],
  "cyclo": metric_value[0][1], "npath": metric_value[1][1]}
  data = data.append(new_row, ignore_index=True)

diff --git a/src/graph/control_flow_graph.py b/src/graph/control_flow_graph.py
@@ -168,7 +168,7 @@ def from_file(filename: str, graph_type: Type[Graph] = AdjListGraph,
  return ControlFlowGraph(graph, Metadata(*options, Metadata.with_calls(calls)))
 
  @staticmethod
- def get_calls_structure(graphs: dict[str, ControlFlowGraph]) -> Optional[Tuple[list[list[str]], list[str]]]:
+ def get_calls_structure(graphs: dict[str, ControlFlowGraph]) -> Tuple[list[list[str]], list[str]]:
  """Create lists describing the hierarchy of a program's function calls."""
  calls_list = []
  simple_funcs = []
@@ -180,38 +180,39 @@ def get_calls_structure(graphs: dict[str, ControlFlowGraph]) -> Optional[Tuple[l
  for func2 in graphs:
  if calls_function(graphs[func1].metadata.calls, func2):
  calls_list.append([func1, func2])
- if graphs[func2].metadata.calls is None:
+ if graphs[func2].metadata.calls is None or graphs[func2].metadata.calls == {}:
  simple_funcs.append(func2)
  return calls_list, simple_funcs
 
  @staticmethod
  def stitch(graphs: dict[str, ControlFlowGraph]) -> ControlFlowGraph:
  """Create new CFG by substituting function calls with their graphs."""
- call_structure = ControlFlowGraph.get_calls_structure(graphs)
- if call_structure is not None:
- calls_list, simple_funcs = call_structure
+ calls_list, simple_funcs = ControlFlowGraph.get_calls_structure(graphs)
 
  while calls_list:
  for func_pair in calls_list:
- func0, func1 = func_pair
- if func0 == func1:
- for _ in range(len(calls_function(graphs[func0].metadata.calls, func1))):
- node = calls_function(graphs[func0].metadata.calls, func1)[0]
- graphs[func0] = ControlFlowGraph.recursify(graphs[func0], node)
+ if func_pair[0] == func_pair[1]:
+ for _ in range(len(
+ calls_function(graphs[func_pair[0]].metadata.calls, func_pair[1])
+ )):
+ node = calls_function(graphs[func_pair[0]].metadata.calls, func_pair[1])[0]
+ graphs[func_pair[0]] = ControlFlowGraph.recursify(graphs[func_pair[0]], node)
  calls_list.remove(func_pair)
- if func0 not in [i[0] for i in calls_list]:
- simple_funcs.append(func0)
-
- elif func1 in simple_funcs:
- for _ in range(len(calls_function(graphs[func0].metadata.calls, func1))):
- cfg1, cfg2 = graphs[func0], graphs[func1]
+ if func_pair[0] not in [i[0] for i in calls_list]:
+ simple_funcs.append(func_pair[0])
+
+ elif func_pair[1] in simple_funcs:
+ for _ in range(len(
+ calls_function(graphs[func_pair[0]].metadata.calls, func_pair[1])
+ )):
+ cfg1, cfg2 = graphs[func_pair[0]], graphs[func_pair[1]]
+ node = calls_function(graphs[func_pair[0]].metadata.calls, func_pair[1])[0]
  if cfg1.metadata.calls is not None:
- node = calls_function(cfg1.metadata.calls, func1)[0]
  cfg1.metadata.calls.pop(node)
-  graphs[func0] = ControlFlowGraph.compose(cfg1, cfg2, node)
+ graphs[func_pair[0]] = ControlFlowGraph.compose(cfg1, cfg2, node)
  calls_list.remove(func_pair)
- if func0 not in [i[0] for i in calls_list]:
- simple_funcs.append(func0)
+ if func_pair[0] not in [i[0] for i in calls_list]:
+ simple_funcs.append(func_pair[0])
 
  return graphs[simple_funcs[-1]]