Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Npath with DFS and multiprocessing #39

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/config/pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ ignore-comments=yes
ignore-docstrings=yes

# Ignore imports when computing similarities.
ignore-imports=no
ignore-imports=yes

# Minimum lines number of a similarity.
min-similarity-lines=4
Expand Down Expand Up @@ -275,7 +275,7 @@ indent-string=' '
max-line-length=120

# Maximum number of lines in a module.
max-module-lines=1000
max-module-lines=1048

# List of optional constructs for which whitespace checking is disabled. `dict-
# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
Expand Down Expand Up @@ -565,7 +565,7 @@ max-bool-expr=5
max-branches=13

# Maximum number of locals for function / method body.
max-locals=16
max-locals=18

# Maximum number of parents for a class (see R0901).
max-parents=8
Expand All @@ -577,7 +577,7 @@ max-public-methods=20
max-returns=7

# Maximum number of statements in function / method body.
max-statements=52
max-statements=60

# Minimum number of public methods for a class (see R0903).
min-public-methods=2
Expand Down
213 changes: 132 additions & 81 deletions src/core/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,26 @@

import copy
import os.path

import re
import readline
import subprocess
import tempfile
import time
from collections import defaultdict
from enum import Enum
from functools import partial
from multiprocessing import Manager, Pool
from multiprocessing import Manager, Pool, Lock # pylint: disable=unused-import
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove disable unused imports

from os import listdir
from pathlib import Path
from typing import Iterable, Optional, Union, cast
from queue import Queue
from typing import Iterable, Optional, Union, cast, Tuple, Callable

import numpy # type: ignore
from rich.console import Console
from rich.table import Table

from core.command_data import AnyDict, Data, ObjTypes, PathComplexityRes
from core.command_data import AnyDict, Data, MetricRes, ObjTypes, PathComplexityRes
from core.env import KnownExtensions
from core.error_messages import (EXTENSION, MISSING_FILENAME, MISSING_NAME, MISSING_TYPE_AND_NAME,
NO_FILE_EXT, ReplErrors)
Expand Down Expand Up @@ -84,7 +87,7 @@ def get_graph_generator(self, file_extension: str) -> converter.ConverterAbstrac
return self.graph_generators[file_extension]


def worker_main(shared_dict: dict[str, ControlFlowGraph], file: str) -> None:
def multiprocess_import(shared_dict: dict[str, ControlFlowGraph], file: str) -> None:
"""Handle the multiprocessing of import."""
graph = ControlFlowGraph.from_file(file)
if isinstance(graph, dict):
Expand All @@ -94,23 +97,40 @@ def worker_main(shared_dict: dict[str, ControlFlowGraph], file: str) -> None:
shared_dict[filepath] = graph


def worker_main_two(metrics_generator: metric.MetricAbstract,
shared_dict: dict[tuple[str, str], Union[int, PathComplexityRes]],
graph: ControlFlowGraph) -> None:
"""Handle the multiprocessing of convert."""
try:
with Timeout(10, "Took too long!"):
result = metrics_generator.evaluate(graph)
def multiprocess_metrics(metrics_generators: dict[str, metric.MetricAbstract],
shared_dict: dict[tuple[str, str], Union[int, PathComplexityRes]],
queue: Queue[Tuple[ControlFlowGraph, str]],
lock: Callable[[], None],
process_count: int) -> None:
"""Handle the multiprocessing of metrics."""
print(f"Starting thread {process_count}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pass in the logger

while True:
with lock: # type: ignore
if not queue.empty():
graph, generator_name = queue.get()
else:
break
metrics_generator = metrics_generators[generator_name]
timeout = 1200 if metrics_generator.name() == "Path Complexity" else 180
try:
if metrics_generator.name() == "Lines of Code" and \
graph.metadata.language is not KnownExtensions.Python:
continue

with Timeout(timeout, "Took too long!"):
result = metrics_generator.evaluate(graph)

if graph.name is None:
raise ValueError("No Graph name.")
if graph.name is None:
raise ValueError("No Graph name.")

shared_dict[(graph.name, metrics_generator.name())] = result
except IndexError as err:
print(graph)
print(err)
except TimeoutError as err:
print(err, graph.name, metrics_generator.name())
shared_dict[(graph.name, metrics_generator.name())] = result
except IndexError as err:
print(graph)
print(err)
except TimeoutError as err:
print(err, graph.name, metrics_generator.name())
shared_dict[(graph.name, metrics_generator.name())] = ("NA", "Timeout")
print(f"Thread {process_count} is done.")


class REPLOptions():
Expand Down Expand Up @@ -455,7 +475,8 @@ def do_convert(self, args: str) -> None: # pylint: disable=too-many-branches
if graph == {}:
self.logger.v_msg("Converted without errors, but no graphs created.")
else:
self.logger.v_msg(f"Created graph objects {' '.join(list(graph.keys()))}")
self.logger.v_msg(f"Created graph objects {Colors.MAGENTA.value}"
f"{' '.join(list(graph.keys()))}{Colors.ENDC.value}")
self.data.graphs.update(graph)
elif isinstance(graph, ControlFlowGraph):
self.logger.v_msg(f"Created graph {graph.name}")
Expand Down Expand Up @@ -494,17 +515,20 @@ def do_import(self, flags: Options, *args_list: str) -> None:
manager = Manager()
shared_dict: dict[str, ControlFlowGraph] = manager.dict()
pool = Pool(8)
pool.map(partial(worker_main, shared_dict), all_files)
pool.map(partial(multiprocess_import, shared_dict), all_files)
self.logger.v_msg(f"Created graph objects "
f"{Colors.MAGENTA.value}{' '.join(shared_dict.keys())}{Colors.ENDC.value}")
self.data.graphs.update(shared_dict)
else:
graphs = []
for file in all_files:
filepath, _ = os.path.splitext(file)
graph = ControlFlowGraph.from_file(file)
self.logger.v_msg(str(graph))
if isinstance(graph, dict):
self.data.graphs.update(graph)
else:
self.data.graphs[filepath] = graph
graphs.append(graph)
self.data.graphs[filepath] = graph
names = [graph.name for graph in graphs]
self.logger.v_msg(f"Created graph objects "
f"{Colors.MAGENTA.value}{' '.join(names)}{Colors.ENDC.value}")

def do_list(self, flags: Options, list_typename: str) -> None:
"""
Expand Down Expand Up @@ -538,14 +562,35 @@ def do_list(self, flags: Options, list_typename: str) -> None:
else:
self.logger.v_msg(f"Type {list_type} not recognized")

def do_metrics_multithreaded(self, graphs: list[ControlFlowGraph]) -> None:
def do_metrics_multithreaded(self, cfgs: list[ControlFlowGraph]) -> None:
"""Compute all of the metrics for some set of graphs using parallelization."""
pool = Pool(8)
pool_size = 8
Copy link
Contributor

@gabrielbessler gabrielbessler Jun 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

define in constructor (for now) & use in do_import as well

pool = Pool(pool_size)
manager = Manager()
graph_queue = manager.Queue()
lock = manager.Lock() # pylint: disable=no-member
cfgs = sorted(cfgs, key=lambda cfg: len(cfg.graph.vertices()), reverse=True)
results: defaultdict[str, list[tuple[str, MetricRes]]] = defaultdict(list)
shared_dict: dict[tuple[str, str], Union[int, PathComplexityRes]] = manager.dict()
for metrics_generator in self.controller.metrics_generators:
pool.map(partial(worker_main_two, metrics_generator, shared_dict), graphs)
self.logger.v_msg(str(shared_dict))
# Queue up all of the cfgs / metrics to execute
for metrics_generator in self.controller.metrics_generators[::-1]:
for cfg in cfgs:
graph_queue.put((cfg, metrics_generator.name()))

generator_dict = {generator.name(): generator for generator in self.controller.metrics_generators}

func_to_execute = partial(
multiprocess_metrics,
generator_dict,
shared_dict,
graph_queue,
lock)
args = list(range(pool_size))

result = pool.map(func_to_execute, args, chunksize=1) # pylint: disable=unused-variable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't think we need this var

for (name, metric_generator), res in shared_dict.items():
results[name].append((metric_generator, res))
self.data.metrics.update(results)

def get_metrics_list(self, name: str) -> list[str]:
"""Get the list of metric names from command argument."""
Expand Down Expand Up @@ -579,58 +624,64 @@ def do_metrics(self, flags: Options, name: str) -> None:
# pylint: disable=R1702
# pylint: disable=R0912
graphs = [self.data.graphs[name] for name in self.get_metrics_list(name)]
for graph in graphs:
self.logger.v_msg(f"Computing metrics for {graph.name}")
results = []
if self.rich:
table = Table(title=f"Metrics for {graph.name}")
table.add_column("Metric", style="cyan")
table.add_column("Result", style="magenta", no_wrap=False)
table.add_column("Time Elapsed", style="green")
for metric_generator in self.controller.metrics_generators:
# Lines of Code is currently only supported in Python.
if metric_generator.name() == "Lines of Code" and \
graph.metadata.language is not KnownExtensions.Python:
continue
start_time = time.time()

try:
with Timeout(6000, "Took too long!"):
result = metric_generator.evaluate(graph)
runtime = time.time() - start_time
if result is not None:
results.append((metric_generator.name(), result))
time_out = f"{runtime:.5f} seconds"
if metric_generator.name() == "Path Complexity":
result_ = cast(tuple[Union[float, str], Union[float, str]],
result)
path_out = f"(APC: {result_[0]}, Path Complexity: {result_[1]})"

if self.rich:
table.add_row(metric_generator.name(), path_out, time_out)
if self.multi_threaded:
start_time = time.time()
self.do_metrics_multithreaded(graphs)
elapsed = time.time() - start_time
print(f"TIME ELAPSED: {elapsed}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

log

else:
for graph in graphs:
self.logger.v_msg(f"Computing metrics for {graph.name}")
results = []
if self.rich:
table = Table(title=f"Metrics for {graph.name}")
table.add_column("Metric", style="cyan")
table.add_column("Result", style="magenta", no_wrap=False)
table.add_column("Time Elapsed", style="green")
for metric_generator in self.controller.metrics_generators:
# Lines of Code is currently only supported in Python.
if metric_generator.name() == "Lines of Code" and \
graph.metadata.language is not KnownExtensions.Python:
continue
start_time = time.time()

try:
with Timeout(6000, "Took too long!"):
result = metric_generator.evaluate(graph)
runtime = time.time() - start_time
if result is not None:
results.append((metric_generator.name(), result))
time_out = f"{runtime:.5f} seconds"
if metric_generator.name() == "Path Complexity":
result_ = cast(tuple[Union[float, str], Union[float, str]],
result)
path_out = f"(APC: {result_[0]}, Path Complexity: {result_[1]})"

if self.rich:
table.add_row(metric_generator.name(), path_out, time_out)
else:
self.logger.v_msg(f"Got {path_out}, {time_out}")
else:
self.logger.v_msg(f"Got {path_out}, {time_out}")
if self.rich:
table.add_row(metric_generator.name(), str(result), time_out)
else:
self.logger.v_msg(f" Got {result}, took {runtime:.3e} seconds")
else:
if self.rich:
table.add_row(metric_generator.name(), str(result), time_out)
else:
self.logger.v_msg(f" Got {result}, took {runtime:.3e} seconds")
else:
self.logger.v_msg("Got None")
except TimeoutError:
self.logger.e_msg("Timeout!")
except IndexError as err:
self.logger.e_msg("Index Error")
self.logger.e_msg(str(err))
except numpy.linalg.LinAlgError as err:
self.logger.e_msg("Lin Alg Error")
self.logger.e_msg(str(err))
if self.rich:
console = Console()
console.print(table)

if graph.name is not None:
self.data.metrics[graph.name] = results
self.logger.v_msg("Got None")
except TimeoutError:
self.logger.e_msg("Timeout!")
except IndexError as err:
self.logger.e_msg("Index Error")
self.logger.e_msg(str(err))
except numpy.linalg.LinAlgError as err:
self.logger.e_msg("Lin Alg Error")
self.logger.e_msg(str(err))
if self.rich:
console = Console()
console.print(table)

if graph.name is not None:
self.data.metrics[graph.name] = results

def log_name(self, name: str) -> bool:
"""Log all objects of a given name."""
Expand Down
1 change: 1 addition & 0 deletions src/core/command_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def export_metrics(self, name: str, new_name: str) -> None:
"cyclo": [], "npath": []})
for m_name in self.metrics:
metric_value = self.metrics[m_name]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

itemgetter(0)

metric_value = sorted(metric_value, key=lambda val: val[0])
new_row = {"graph_name": m_name, "apc": metric_value[2][1],
"cyclo": metric_value[0][1], "npath": metric_value[1][1]}
data = data.append(new_row, ignore_index=True)
Expand Down
41 changes: 21 additions & 20 deletions src/graph/control_flow_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def from_file(filename: str, graph_type: Type[Graph] = AdjListGraph,
return ControlFlowGraph(graph, Metadata(*options, Metadata.with_calls(calls)))

@staticmethod
def get_calls_structure(graphs: dict[str, ControlFlowGraph]) -> Optional[Tuple[list[list[str]], list[str]]]:
def get_calls_structure(graphs: dict[str, ControlFlowGraph]) -> Tuple[list[list[str]], list[str]]:
"""Create lists describing the hierarchy of a program's function calls."""
calls_list = []
simple_funcs = []
Expand All @@ -180,38 +180,39 @@ def get_calls_structure(graphs: dict[str, ControlFlowGraph]) -> Optional[Tuple[l
for func2 in graphs:
if calls_function(graphs[func1].metadata.calls, func2):
calls_list.append([func1, func2])
if graphs[func2].metadata.calls is None:
if graphs[func2].metadata.calls is None or graphs[func2].metadata.calls == {}:
simple_funcs.append(func2)
return calls_list, simple_funcs

@staticmethod
def stitch(graphs: dict[str, ControlFlowGraph]) -> ControlFlowGraph:
"""Create new CFG by substituting function calls with their graphs."""
call_structure = ControlFlowGraph.get_calls_structure(graphs)
if call_structure is not None:
calls_list, simple_funcs = call_structure
calls_list, simple_funcs = ControlFlowGraph.get_calls_structure(graphs)

while calls_list:
for func_pair in calls_list:
func0, func1 = func_pair
if func0 == func1:
for _ in range(len(calls_function(graphs[func0].metadata.calls, func1))):
node = calls_function(graphs[func0].metadata.calls, func1)[0]
graphs[func0] = ControlFlowGraph.recursify(graphs[func0], node)
if func_pair[0] == func_pair[1]:
for _ in range(len(
calls_function(graphs[func_pair[0]].metadata.calls, func_pair[1])
)):
node = calls_function(graphs[func_pair[0]].metadata.calls, func_pair[1])[0]
graphs[func_pair[0]] = ControlFlowGraph.recursify(graphs[func_pair[0]], node)
calls_list.remove(func_pair)
if func0 not in [i[0] for i in calls_list]:
simple_funcs.append(func0)

elif func1 in simple_funcs:
for _ in range(len(calls_function(graphs[func0].metadata.calls, func1))):
cfg1, cfg2 = graphs[func0], graphs[func1]
if func_pair[0] not in [i[0] for i in calls_list]:
simple_funcs.append(func_pair[0])

elif func_pair[1] in simple_funcs:
for _ in range(len(
calls_function(graphs[func_pair[0]].metadata.calls, func_pair[1])
)):
cfg1, cfg2 = graphs[func_pair[0]], graphs[func_pair[1]]
node = calls_function(graphs[func_pair[0]].metadata.calls, func_pair[1])[0]
if cfg1.metadata.calls is not None:
node = calls_function(cfg1.metadata.calls, func1)[0]
cfg1.metadata.calls.pop(node)
graphs[func0] = ControlFlowGraph.compose(cfg1, cfg2, node)
graphs[func_pair[0]] = ControlFlowGraph.compose(cfg1, cfg2, node)
calls_list.remove(func_pair)
if func0 not in [i[0] for i in calls_list]:
simple_funcs.append(func0)
if func_pair[0] not in [i[0] for i in calls_list]:
simple_funcs.append(func_pair[0])

return graphs[simple_funcs[-1]]

Expand Down
Loading