From 4e52cf8b23c13ed1e6d74bd7f65e85b960164ab3 Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Fri, 3 May 2024 16:41:31 +0200 Subject: [PATCH] lib/ir/project: update docs of CF-propagation pass No functional changes. --- .../intermediate_representation/project.rs | 1 - .../project/propagate_control_flow.rs | 75 ++++++++++++------- 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/src/cwe_checker_lib/src/intermediate_representation/project.rs b/src/cwe_checker_lib/src/intermediate_representation/project.rs index 31e3694e3..5f3cd165c 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project.rs @@ -5,7 +5,6 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; /// Contains implementation of the block duplication normalization pass. mod block_duplication_normalization; use block_duplication_normalization::*; -/// Contains implementation of the propagate control flow normalization pass. mod propagate_control_flow; use propagate_control_flow::*; diff --git a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs index 32bbccbdf..91e82966f 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs @@ -1,3 +1,22 @@ +//! Control Flow Propagation Normalization Pass +//! +//! The `propagate_control_flow` normalization pass tries to simplify the +//! representation of sequences of if-else blocks that all have the same +//! condition. After this transformation the program should be of a form where +//! either all or none of the blocks are executed. Such sequences are often +//! generated by sequences of conditional assignment assembly instructions. +//! +//! In addition to the above, the pass also removes blocks that consist of a +//! single, unconditional jump. +//! +//! For each "re-targetalbe" intraprocedural control flow transfer, i.e., +//! call-returns and (conditional) jumps, the pass computes a new target that is +//! equivalent to the old target but skips zero or more intermediate blocks. +//! Knowledge about conditions that are always true when a particular branch is +//! executed are used to resolve the target of intermediate conditional jumps. +//! +//! Lastly, the newly bypassed blocks are considered dead code and are removed. + use crate::analysis::graph::{self, Edge, Graph, Node}; use crate::intermediate_representation::*; @@ -6,17 +25,10 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use petgraph::graph::NodeIndex; use petgraph::Direction::Incoming; -/// The `propagate_control_flow` normalization pass tries to simplify the representation of -/// sequences of if-else blocks that all have the same condition -/// so that they are either all executed or none of the blocks are executed. -/// Such sequences are often generated by sequences of conditional assignment assembly instructions. +/// Performs the Control Flow Propagation normalization pass. /// -/// To simplify the generated control flow graph -/// (and thus propagate the knowledge that either all or none of these blocks are executed to the control flow graph) -/// we look for sequences of (conditional) jumps where the final jump target is determined by the source of the first jump -/// (because we know that the conditionals for all jumps evaluate to the same value along the sequence). -/// For such a sequence we then retarget the destination of the first jump to the final jump destination of the sequence. -/// Lastly, the newly bypassed blocks are considered dead code and are removed. +/// See the module-level documentation for more information on what this pass +/// does. pub fn propagate_control_flow(project: &mut Project) { let cfg_before_normalization = graph::get_program_cfg(&project.program); let nodes_without_incoming_edges_at_beginning = @@ -30,6 +42,8 @@ pub fn propagate_control_flow(project: &mut Project) { // Conditions that we know to be true "on" a particular outgoing // edge. let mut true_conditions = Vec::new(); + // Check if some condition must be true at the beginning of the block, + // and still holds after all DEFs are executed. if let Some(block_precondition) = get_block_precondition_after_defs(&cfg_before_normalization, node) { @@ -123,7 +137,8 @@ pub fn propagate_control_flow(project: &mut Project) { ); } -/// Insert the new target TIDs into jump instructions for which a new target was computed. +/// Inserts the new target TIDs into jump instructions for which a new target +/// was computed. fn retarget_jumps(project: &mut Project, mut jmps_to_retarget: HashMap) { for sub in project.program.term.subs.values_mut() { for blk in sub.term.blocks.iter_mut() { @@ -153,10 +168,14 @@ fn retarget_jumps(project: &mut Project, mut jmps_to_retarget: HashMap } } -/// Under the assumption that the given `true_condition` expression evaluates to `true`, -/// check whether we can retarget jumps to the given target to another final jump target. -/// I.e. we follow sequences of jumps that are not interrupted by [`Def`] instructions to their final jump target -/// using the `true_condition` to resolve the targets of conditional jumps if possible. +/// Under the assumption that the given `true_conditions` expressions all +/// evaluate to `true`, check whether we can retarget jumps to the given target +/// to another final jump target. +/// +/// In other words, we follow sequences of jumps that are not interrupted by +/// [`Def`] instructions (or other things that may have side-effects) to their +/// final jump target using the `true_condition` to resolve the targets of +/// conditional jumps if possible. fn find_target_for_retargetable_jump( target: &Tid, sub: &Sub, @@ -171,7 +190,8 @@ fn find_target_for_retargetable_jump( }; if !visited_tids.insert(retarget.clone()) { - // The target was already visited, so we abort the search to avoid infinite loops. + // The target was already visited, so we abort the search to avoid + // infinite loops. break; } @@ -186,9 +206,10 @@ fn find_target_for_retargetable_jump( } /// Check whether the given block does not contain any [`Def`] instructions. -/// If yes, check whether the target of the jump at the end of the block is predictable -/// under the assumption that the given `true_condition` expression evaluates to true. -/// If it can be predicted, return the target of the jump. +/// If yes, check whether the target of the jump at the end of the block is +/// predictable under the assumption that the given `true_conditions` +/// expressions all evaluate to true. If it can be predicted, return the target +/// of the jump. fn check_for_retargetable_block<'a>( block: &'a Term, true_conditions: &[Expression], @@ -286,12 +307,14 @@ fn get_block_precondition_after_defs(cfg: &Graph, node: NodeIndex) -> Option Option Expression { if let Expression::UnOp { op: UnOpType::BoolNegate, @@ -327,7 +351,7 @@ fn negate_condition(expr: Expression) -> Expression { } } -/// Iterates the CFG and returns all node's blocks, that do not have an incoming edge. +/// Iterates the CFG and returns all node that do not have an incoming edge. fn get_nodes_without_incoming_edge(cfg: &Graph) -> HashSet { cfg.node_indices() .filter_map(|node| { @@ -340,7 +364,8 @@ fn get_nodes_without_incoming_edge(cfg: &Graph) -> HashSet { .collect() } -/// Calculates the difference of the orphaned blocks and removes them from the project. +/// Calculates the difference of the orphaned blocks and removes them from the +/// project. fn remove_new_orphaned_blocks( project: &mut Project, orphaned_blocks_before: HashSet,