diff --git a/CHANGES.md b/CHANGES.md index 6df991024..65ba2ab95 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,9 @@ 0.9-dev === +- Improve taint analysis abstraction to simplify interprocedural bottom-up + analysis of memory taint (PR #451) +- Added check for CWE-252: Unchecked Return Value (PR #451) - Implemented experimental support for Linux Kernel Modules (PR #441) - Support LZCOUNT P-Code operation (PR #444) @@ -11,7 +14,7 @@ - Enforce expression complexity limit to reduce RAM usage (PR #428) - Implement tracking of nested parameters to improve runtime and analysis quality (PR #432) - Reduce amount of false positives generated by CWE-416 check (PR #433) -- Added check for CWE-337: Predictable Seed in Pseudo-Random Number Generator (PR #439) +- Added check for CWE-337: Predictable Seed in Pseudo-Random Number Generator (PR #439) 0.7 (2023-06) ==== diff --git a/src/caller/src/main.rs b/src/caller/src/main.rs index 1e8cf373e..d6f34b98f 100644 --- a/src/caller/src/main.rs +++ b/src/caller/src/main.rs @@ -142,7 +142,7 @@ fn run_with_ghidra(args: &CmdlineArgs) -> Result<(), Error> { let modules_depending_on_string_abstraction = BTreeSet::from_iter(["CWE78"]); let modules_depending_on_pointer_inference = BTreeSet::from_iter([ - "CWE119", "CWE134", "CWE190", "CWE337", "CWE416", "CWE476", "CWE789", "Memory", + "CWE119", "CWE134", "CWE190", "CWE252", "CWE337", "CWE416", "CWE476", "CWE789", "Memory", ]); let string_abstraction_needed = modules diff --git a/src/config.json b/src/config.json index 1f0b2d7a8..afb2b3aaf 100644 --- a/src/config.json +++ b/src/config.json @@ -73,6 +73,149 @@ "CWE248": { "symbols": [] }, + "CWE252": { + "_comment": "External symbols whose return value must be used.", + "_comment": "Every exported function that is annotated with warn_unused_result in glibc.", + "_comment": "Strict mode activates the check for symbols that even very mature projects ignore.", + "strict_mode": false, + "strict_symbols": [ + "fileno", + "setpgid" + ], + "symbols": [ + "__asprintf", + "__getdelim", + "__getrandom", + "__isoc23_fscanf", + "__isoc23_scanf", + "__isoc23_sscanf", + "__isoc23_vfscanf", + "__isoc23_vscanf", + "__isoc99_fscanf", + "__isoc99_scanf", + "__isoc99_sscanf", + "__isoc99_vfscanf", + "__isoc99_vscanf", + "access", + "arc4random_uniform", + "asprintf", + "atof", + "atoi", + "atol", + "atoll", + "brk", + "chdir", + "chown", + "chroot", + "confstr", + "daemon", + "dup", + "faccessat", + "fchdir", + "fchmodat", + "fchown", + "fchownat", + "feof", + "feof_unlocked", + "ferror_unlocked", + "fgets", + "fgets_unlocked", + "fgetws", + "fgetws_unlocked", + "fileno_unlocked", + "fputc", + "fread", + "fread_unlocked", + "fscanf", + "ftell", + "ftello", + "ftello64", + "ftruncate", + "ftruncate64", + "ftrylockfile", + "getdelim", + "getdomainname", + "getentropy", + "getgroups", + "getline", + "getrandom", + "gets", + "getsubopt", + "lchown", + "link", + "linkat", + "lockf", + "lockf64", + "mkostemp", + "mkostemp64", + "mkostemps", + "mkostemps64", + "mkstemp", + "mkstemps", + "mkstemps64", + "mmap", + "nice", + "pipe", + "pipe2", + "posix_memalign", + "posix_openpt", + "prctl", + "pread", + "preadv", + "preadv2", + "preadv64", + "preadv64v2", + "pthread_cond_timedwait", + "pthread_cond_wait", + "pthread_mutex_lock", + "pthread_mutex_trylock", + "pthread_spin_lock", + "pthread_spin_trylock", + "pwritev", + "pwritev2", + "pwritev64v2", + "read", + "readlink", + "readlinkat", + "readv", + "revoke", + "rpmatch", + "setdomainname", + "setegid", + "seteuid", + "setgid", + "sethostid", + "sethostname", + "setregid", + "setresgid", + "setresuid", + "setreuid", + "setrlimit", + "setsid", + "setuid", + "sscanf", + "stat", + "strtod", + "strtof", + "strtol", + "strtold", + "strtoll", + "strtoul", + "strtoull", + "symlink", + "symlinkat", + "system", + "truncate", + "truncate64", + "ttyname_r", + "vasprintf", + "vfscanf", + "vscanf", + "wcrtomb", + "write", + "writev" + ] + }, "CWE332": { "pairs": [ [ diff --git a/src/cwe_checker_lib/src/abstract_domain/domain_map.rs b/src/cwe_checker_lib/src/abstract_domain/domain_map.rs index c39958673..878f86da8 100644 --- a/src/cwe_checker_lib/src/abstract_domain/domain_map.rs +++ b/src/cwe_checker_lib/src/abstract_domain/domain_map.rs @@ -98,11 +98,22 @@ where if self == other { self.clone() } else { - DomainMap { - inner: Arc::new(S::merge_map(&self.inner, &other.inner)), - phantom: PhantomData, - } + let mut new_map = self.clone(); + + new_map.merge_with(other); + + new_map + } + } + + fn merge_with(&mut self, other: &Self) -> &mut Self { + if self != other { + let mut_map = Arc::make_mut(&mut self.inner); + + S::merge_map_with(mut_map, &other.inner); } + + self } /// A `DomainMap` is considered to be a `Top` element if it is empty. @@ -111,6 +122,33 @@ where } } +impl Default for DomainMap +where + K: PartialOrd + Ord + Clone, + V: AbstractDomain, + S: MapMergeStrategy + Clone + Eq, +{ + fn default() -> Self { + Self::new() + } +} + +impl DomainMap +where + K: PartialOrd + Ord + Clone, + V: AbstractDomain, + S: MapMergeStrategy + Clone + Eq, +{ + /// Returns a new, empty map into the abstract domain `V`. + /// + /// The semantics of an empty map depend on the use case. Oftentimes + /// non-existent keys will be mapped to the Top, Bottom, or some default + /// element in the target domain. + pub fn new() -> Self { + BTreeMap::new().into() + } +} + /// A `MapMergeStrategy` determines how the merge-method for a [`DomainMap`] works. /// /// The possible strategies are: @@ -118,8 +156,23 @@ where /// * [`IntersectMergeStrategy`] /// * [`MergeTopStrategy`] pub trait MapMergeStrategy { - /// This function determines how two [`DomainMap`] instances are merged as abstract domains. - fn merge_map(map_left: &BTreeMap, map_right: &BTreeMap) -> BTreeMap; + /// This function determines how two [`DomainMap`] instances are merged as + /// abstract domains. + /// + /// # Default + /// + /// Clones the left side and uses [`MapMergeStrategy::merge_map_with`] to + /// combine it with the right side. + fn merge_map(map_left: &BTreeMap, map_right: &BTreeMap) -> BTreeMap { + let mut map = map_left.clone(); + + Self::merge_map_with(&mut map, map_right); + + map + } + + /// Merges `map` with `other` by modifying `map` in-place. + fn merge_map_with(map: &mut BTreeMap, other: &BTreeMap); } /// A [`MapMergeStrategy`] where key-value pairs whose key is only present in one input map @@ -135,45 +188,43 @@ pub struct UnionMergeStrategy { } impl MapMergeStrategy for UnionMergeStrategy { - fn merge_map(map_left: &BTreeMap, map_right: &BTreeMap) -> BTreeMap { - let mut merged_map = map_left.clone(); - for (key, value_right) in map_right.iter() { - merged_map - .entry(key.clone()) + fn merge_map_with(map: &mut BTreeMap, other: &BTreeMap) { + for (key, value_other) in other.iter() { + map.entry(key.clone()) .and_modify(|value| { - *value = value.merge(value_right); + value.merge_with(value_other); }) - .or_insert_with(|| value_right.clone()); + .or_insert_with(|| value_other.clone()); } - merged_map } } -/// A [`MapMergeStrategy`] where the merge function only keeps keys -/// that are present in both input maps. -/// Furthermore, keys whose values are merged to the `Top` value are also removed from the merged map. +/// A [`MapMergeStrategy`] where the merge function only keeps keys that are +/// present in both input maps. +/// +/// Furthermore, keys whose values are merged to the `Top` value are also +/// removed from the merged map. /// -/// The strategy is meant to be used for maps, -/// where keys not present in the map have an implicit `Top` value associated to them. -/// The strategy implicitly assumes -/// that the `Top` value of the value abstract domain is an actual maximal value of the domain. +/// The strategy is meant to be used for maps, where keys not present in the map +/// have an implicit `Top` value associated to them. The strategy implicitly +/// assumes that the `Top` value of the value abstract domain is an actual +/// maximal value of the domain. #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] pub struct IntersectMergeStrategy { _private: (), // Marker to prevent instantiation } impl MapMergeStrategy for IntersectMergeStrategy { - fn merge_map(map_left: &BTreeMap, map_right: &BTreeMap) -> BTreeMap { - let mut merged_map = BTreeMap::new(); - for (key, value_left) in map_left.iter() { - if let Some(value_right) = map_right.get(key) { - let merged_value = value_left.merge(value_right); - if !merged_value.is_top() { - merged_map.insert(key.clone(), merged_value); - } - } - } - merged_map + fn merge_map_with(map: &mut BTreeMap, other: &BTreeMap) { + map.retain(|k, value| { + let Some(value_other) = other.get(k) else { + return false; + }; + + value.merge_with(value_other); + + !value.is_top() + }); } } @@ -190,27 +241,29 @@ pub struct MergeTopStrategy { } impl MapMergeStrategy for MergeTopStrategy { - fn merge_map(map_left: &BTreeMap, map_right: &BTreeMap) -> BTreeMap { - let mut merged_map = BTreeMap::new(); - for (var, value_left) in map_left.iter() { - let merged_value = if let Some(value_right) = map_right.get(var) { - value_left.merge(value_right) + fn merge_map_with(map: &mut BTreeMap, other: &BTreeMap) { + map.retain(|key, value| { + if let Some(value_other) = other.get(key) { + value.merge_with(value_other); } else { - value_left.top().merge(value_left) + let top = value.top(); + + value.merge_with(&top); }; - if !merged_value.is_top() { - merged_map.insert(var.clone(), merged_value); - } - } - for (var, value_right) in map_right.iter() { - if map_left.get(var).is_none() { - let merged_value = value_right.top().merge(value_right); + + !value.is_top() + }); + for (k, value_other) in other.iter() { + if map.get(k).is_none() { + let mut merged_value = value_other.top(); + + merged_value.merge_with(value_other); + if !merged_value.is_top() { - merged_map.insert(var.clone(), merged_value); + map.insert(k.clone(), merged_value); } } } - merged_map } } diff --git a/src/cwe_checker_lib/src/abstract_domain/mem_region.rs b/src/cwe_checker_lib/src/abstract_domain/mem_region.rs index 32691b118..942383943 100644 --- a/src/cwe_checker_lib/src/abstract_domain/mem_region.rs +++ b/src/cwe_checker_lib/src/abstract_domain/mem_region.rs @@ -1,6 +1,7 @@ use super::{AbstractDomain, HasTop, SizedDomain}; use crate::intermediate_representation::ByteSize; use crate::prelude::*; +use crate::utils::debug::ToJsonCompact; use apint::{Int, Width}; use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; @@ -66,6 +67,19 @@ impl HasTop for MemR } } +impl ToJsonCompact for MemRegion +where + T: ToJsonCompact + AbstractDomain + SizedDomain + HasTop + std::fmt::Debug, +{ + fn to_json_compact(&self) -> serde_json::Value { + serde_json::Value::Object( + self.iter() + .map(|(offset, val)| (offset.to_string(), val.to_json_compact())) + .collect(), + ) + } +} + impl MemRegion { /// Create a new, empty memory region. pub fn new(address_bytesize: ByteSize) -> MemRegion { diff --git a/src/cwe_checker_lib/src/abstract_domain/mod.rs b/src/cwe_checker_lib/src/abstract_domain/mod.rs index ee446693a..74122ee5e 100644 --- a/src/cwe_checker_lib/src/abstract_domain/mod.rs +++ b/src/cwe_checker_lib/src/abstract_domain/mod.rs @@ -36,10 +36,32 @@ pub use domain_map::*; /// Each abstract domain is partially ordered. /// Abstract domains of the same type can be merged. pub trait AbstractDomain: Sized + Eq + Clone { - /// Return an upper bound (with respect to the partial order on the domain) for the two inputs `self` and `other`. + /// Returns an upper bound (with respect to the partial order on the domain) + /// for the two inputs `self` and `other`. #[must_use] fn merge(&self, other: &Self) -> Self; + /// Returns an upper bound (with respect to the partial order on the domain) + /// for the two inputs `self` and `other`. + /// + /// Modifies `self` in-place to hold the result. This can be useful in + /// situations where it is not necessary to create a new object and more + /// efficient to modify an existing one in-place. + /// + /// # Default + /// + /// Calls [`AbstractDomain::merge`] on the inputs and overwrites `self` with + /// the result. Does nothing when `self` is equal to `other`. + fn merge_with(&mut self, other: &Self) -> &mut Self { + if self != other { + let new_value = self.merge(other); + + *self = new_value; + } + + self + } + /// Returns whether the element represents the top element (i.e. maximal with respect to the partial order) or not. /// If a domain has no maximal element, this function should always return false. fn is_top(&self) -> bool; diff --git a/src/cwe_checker_lib/src/analysis/interprocedural_fixpoint_generic.rs b/src/cwe_checker_lib/src/analysis/interprocedural_fixpoint_generic.rs index 99e1a5faf..4fb913fe1 100644 --- a/src/cwe_checker_lib/src/analysis/interprocedural_fixpoint_generic.rs +++ b/src/cwe_checker_lib/src/analysis/interprocedural_fixpoint_generic.rs @@ -14,7 +14,7 @@ use crate::prelude::*; /// The interprocedural_flow value will either be transferred from the end of the called subroutine /// to the return site in case of a forward analysis or from the beginning of the called subroutine /// to the callsite in a backward analysis. -#[derive(PartialEq, Eq, Serialize, Deserialize, Clone)] +#[derive(PartialEq, Eq, Serialize, Deserialize, Clone, Debug)] pub enum NodeValue { /// A single abstract value Value(T), diff --git a/src/cwe_checker_lib/src/analysis/pointer_inference/vsa_result_impl.rs b/src/cwe_checker_lib/src/analysis/pointer_inference/vsa_result_impl.rs index 7193c3601..6b20859a7 100644 --- a/src/cwe_checker_lib/src/analysis/pointer_inference/vsa_result_impl.rs +++ b/src/cwe_checker_lib/src/analysis/pointer_inference/vsa_result_impl.rs @@ -50,4 +50,11 @@ impl<'a> VsaResult for PointerInference<'a> { None } } + + fn get_call_renaming_map( + &self, + call: &Tid, + ) -> Option<&BTreeMap> { + self.id_renaming_maps_at_calls.get(call) + } } diff --git a/src/cwe_checker_lib/src/analysis/taint/mod.rs b/src/cwe_checker_lib/src/analysis/taint/mod.rs index 92a34dd01..bc2fb3c23 100644 --- a/src/cwe_checker_lib/src/analysis/taint/mod.rs +++ b/src/cwe_checker_lib/src/analysis/taint/mod.rs @@ -20,6 +20,7 @@ use crate::analysis::{ }; use crate::intermediate_representation::*; use crate::prelude::*; +use crate::utils::debug::ToJsonCompact; use std::convert::AsRef; use std::fmt::Display; @@ -43,6 +44,23 @@ use state::State; /// to many taint analyses. However, you almost certainly want to override some /// of them to implement the custom logic of your analysis. pub trait TaintAnalysis<'a>: HasCfg<'a> + HasVsaResult + AsRef { + /// Called when a transition function mapped the input state to the empty + /// state. + /// + /// This function will be called every time a default transition function + /// maps a (possibly empty) input state to the empty state. Its return value + /// will override the `Some(empty_state)` return value of the transition + /// function. + /// + /// # Default + /// + /// Just returns `None`. This is the desired behavior as long as it is + /// impossible for transition functions to generate taint from an empty + /// state. + fn handle_empty_state_out(&self, _tid: &Tid) -> Option { + None + } + /// Update taint state on a function call without further target information. /// /// # Default @@ -51,7 +69,7 @@ pub trait TaintAnalysis<'a>: HasCfg<'a> + HasVsaResult + AsRef fn update_call_generic( &self, state: &State, - _call_tid: &Tid, + call_tid: &Tid, calling_convention_hint: &Option, ) -> Option { let mut new_state = state.clone(); @@ -62,7 +80,11 @@ pub trait TaintAnalysis<'a>: HasCfg<'a> + HasVsaResult + AsRef new_state.remove_non_callee_saved_taint(calling_conv); } - Some(new_state) + if new_state.is_empty() { + self.handle_empty_state_out(call_tid) + } else { + Some(new_state) + } } /// Transition function for edges of type [`Call`]. @@ -86,29 +108,76 @@ pub trait TaintAnalysis<'a>: HasCfg<'a> + HasVsaResult + AsRef None } + /// Transition function for calls to external functions. + /// + /// # Default + /// + /// Removes taint from non-callee-saved registers. + fn update_extern_call( + &self, + state: &State, + _call: &Term, + project: &Project, + extern_symbol: &ExternSymbol, + ) -> Option { + let mut new_state = state.clone(); + + new_state.remove_non_callee_saved_taint(project.get_calling_convention(extern_symbol)); + + Some(new_state) + } + /// Transition function for edges of type [`ExternCallStub`]. /// /// Corresponds to inter-program calls, i.e., calls to shared libraries. + /// Currently, indirect calls also lead to edges of type [`ExternCallStub`]. + /// If you are only interested in handling calls to library functions + /// consider implementing [`update_extern_call`] instead. /// + /// [`update_extern_call`]: TaintAnalysis::update_extern_call + /// [indirect calls]: crate::intermediate_representation::Jmp::CallInd /// [`ExternCallStub`]: crate::analysis::graph::Edge::ExternCallStub - fn update_call_stub(&self, state: &State, call: &Term) -> Option; + /// + /// # Default + /// + /// Remove taint from non-callee-saved registers. + fn update_call_stub(&self, state: &State, call: &Term) -> Option { + match &call.term { + Jmp::Call { target, .. } => { + let project = >::as_ref(self); + let extern_symbol = project + .program + .term + .extern_symbols + .get(target) + .expect("TA: BUG: Unable to find extern symbol for call."); + + match self.update_extern_call(state, call, project, extern_symbol) { + Some(new_state) if new_state.is_empty() => { + self.handle_empty_state_out(&call.tid) + } + new_state_option => new_state_option, + } + } + Jmp::CallInd { .. } => self.update_call_generic(state, &call.tid, &None), + _ => panic!("TA: BUG: Malformed control flow graph encountered."), + } + } /// Returns the new taint state after a jump. /// /// # Default /// - /// Clones the state before the jump, or returns `None` if the state is - /// empty. + /// Clones the state before the jump. fn update_jump( &self, state: &State, - _jump: &Term, + jump: &Term, _untaken_conditional: Option<&Term>, _target: &Term, ) -> Option { if state.is_empty() { - // Without taint there is nothing to propagate. - None + self.handle_empty_state_out(&jump.tid) } else { Some(state.clone()) } @@ -121,8 +190,8 @@ pub trait TaintAnalysis<'a>: HasCfg<'a> + HasVsaResult + AsRef /// the table in [`update_return`]. The `state` parameter corresponds to /// the taint state at the return sites of the called subroutine. /// - /// By implementing this method you can perform a **limited** - /// interprocedural taint analysis: + /// By implementing this method you can perform an interprocedural taint + /// analysis: /// - If you return `Some(state)` you may influence the taint /// state in the caller (see the documentation of [`update_return`] for /// more information), by having it be [`merged`] into the state coming @@ -135,17 +204,6 @@ pub trait TaintAnalysis<'a>: HasCfg<'a> + HasVsaResult + AsRef /// [`update_return`]: TaintAnalysis::update_return /// [`merged`]: State::merge /// - /// # Limitations - /// - /// The interprocedural analysis capabilities are limited since it is - /// forbidden to propagate memory taint. Memory objects from the callee are - /// meaningless in the caller and thus any state returned by this method - /// **MUST** have an empty memory taint, i.e., `State::has_memory_taint` - /// must evaluate to `false`. Future work may lift this requirement. - // FIXME: Add translation of memory objects to default implementation. - /// - /// It is possible to propagate taint in registers. - /// /// # Default /// /// Returns an empty state, i.e., information is propagated through the call @@ -190,27 +248,25 @@ pub trait TaintAnalysis<'a>: HasCfg<'a> + HasVsaResult + AsRef return_term: &Term, calling_convention: &Option, ) -> Option { - match (state_before_call, state_before_return) { + let new_state = match (state_before_call, state_before_return) { (Some(state_before_call), Some(state_before_return)) => { let state_from_caller = self.update_call_generic(state_before_call, &call_term.tid, calling_convention); - let state_from_callee = self - .update_return_callee( - state_before_return, - call_term, - return_term, - calling_convention, - ) - .inspect(|s| { - assert!( - !s.has_memory_taint(), - "TA: BUG: `update_return_callee` returned memory taint." - ) - }); + let state_from_callee = self.update_return_callee( + state_before_return, + call_term, + return_term, + calling_convention, + ); match (state_from_caller, state_from_callee) { - (Some(state_caller), Some(state_callee)) => { - Some(state_caller.merge(&state_callee)) + (Some(mut state_caller), Some(state_callee)) => { + state_caller.merge_with_renaming( + &state_callee, + self.vsa_result().get_call_renaming_map(&call_term.tid), + ); + + Some(state_caller) } // If one implementation indicated that no information // should be propagated by returning `None` we ignore what @@ -228,13 +284,28 @@ pub trait TaintAnalysis<'a>: HasCfg<'a> + HasVsaResult + AsRef return_term, calling_convention, ) - .inspect(|s| { - assert!( - !s.has_memory_taint(), - "TA: BUG: `update_return_callee` returned memory taint." - ) + .map(|state_callee| { + let mut dummy_caller_state = State::new_empty(); + + dummy_caller_state.merge_with_renaming( + &state_callee, + self.vsa_result().get_call_renaming_map(&call_term.tid), + ); + + dummy_caller_state }), _ => None, + }; + + match new_state { + Some(state) => { + if state.is_empty() { + self.handle_empty_state_out(&return_term.tid) + } else { + Some(state) + } + } + None => None, } } @@ -331,9 +402,13 @@ pub trait TaintAnalysis<'a>: HasCfg<'a> + HasVsaResult + AsRef &self, _old_state: &State, new_state: State, - _def: &Term, + def: &Term, ) -> Option { - Some(new_state) + if new_state.is_empty() { + self.handle_empty_state_out(&def.tid) + } else { + Some(new_state) + } } } @@ -383,11 +458,6 @@ impl<'a, T: TaintAnalysis<'a>> forward_interprocedural_fixpoint::Context<'a> for } fn update_def(&self, state: &Self::Value, def: &Term) -> Option { - if state.is_empty() { - // Without taint there is nothing to propagate. - return None; - } - let new_state = match &def.term { Def::Assign { var, value } => self.update_def_assign(state, &def.tid, var, value), Def::Load { var, address } => self.update_def_load(state, &def.tid, var, address), @@ -452,6 +522,20 @@ impl AbstractDomain for Taint { } } + /// Replaces `self` with `other` iff `self` is untainted and `other` is + /// tainted. + /// + /// No change to `self` is required in the other cases. + fn merge_with(&mut self, other: &Self) -> &mut Self { + use Taint::*; + + if let (Top(_), Tainted(_)) = (&self, other) { + *self = *other; + }; + + self + } + /// Checks whether the value is an untainted `Top`-value. fn is_top(&self) -> bool { matches!(self, Taint::Top(_)) @@ -515,6 +599,12 @@ impl RegisterDomain for Taint { } } +impl ToJsonCompact for Taint { + fn to_json_compact(&self) -> serde_json::Value { + serde_json::json!(self.to_string()) + } +} + impl Taint { /// Checks whether the given value is in fact tainted. pub fn is_tainted(&self) -> bool { diff --git a/src/cwe_checker_lib/src/analysis/taint/state.rs b/src/cwe_checker_lib/src/analysis/taint/state.rs index 0afdf702c..2b2a9bc39 100644 --- a/src/cwe_checker_lib/src/analysis/taint/state.rs +++ b/src/cwe_checker_lib/src/analysis/taint/state.rs @@ -5,29 +5,61 @@ use crate::abstract_domain::AbstractLocation; use crate::abstract_domain::{ - AbstractDomain, AbstractIdentifier, MemRegion, RegisterDomain, SizedDomain, TryToBitvec, + AbstractDomain, AbstractIdentifier, IntervalDomain, MemRegion, RegisterDomain, SizedDomain, + TryToBitvec, }; use crate::analysis::graph::NodeIndex; use crate::analysis::pointer_inference::Data as PiData; use crate::analysis::vsa_results::VsaResult; use crate::intermediate_representation::*; use crate::prelude::*; +use crate::utils::debug::ToJsonCompact; -use std::collections::HashMap; +use std::collections::BTreeMap; use super::Taint; +mod memory_taint; +mod register_taint; #[cfg(test)] mod tests; +use memory_taint::MemoryTaint; +use register_taint::RegisterTaint; + /// The state object of the taint analysis representing all known tainted memory /// and register values at a certain location within the program. #[derive(Serialize, Deserialize, Debug, Eq, Clone)] pub struct State { /// The set of currently tainted registers. - register_taint: HashMap, + register_taint: RegisterTaint, /// The Taint contained in memory objects - memory_taint: HashMap>, + memory_taint: MemoryTaint, +} + +impl ToJsonCompact for State { + fn to_json_compact(&self) -> serde_json::Value { + let mut state_map = serde_json::Map::new(); + + let register_taint = self + .register_taint + .iter() + .map(|(reg, taint)| (reg.name.clone(), taint.to_json_compact())) + .collect(); + let register_taint = serde_json::Value::Object(register_taint); + + let memory_taint = self + .memory_taint + .iter() + .map(|(mem_id, mem_region)| (mem_id.to_string(), mem_region.to_json_compact())) + .collect(); + let memory_taint = serde_json::Value::Object(memory_taint); + + state_map.insert("registers".into(), register_taint); + state_map.insert("memory".into(), memory_taint); + + serde_json::Value::Object(state_map) + } } impl PartialEq for State { @@ -42,41 +74,19 @@ impl AbstractDomain for State { /// /// Any value tainted in at least one input state is also tainted in the /// merged state. - // FIXME: The used algorithm for merging the taints contained in memory - // regions is unsound when merging taints that intersect only partially. - // If, for example, in state A `RSP[0:3]` is tainted and in state B - // `RSP[2:3]` is tainted, A u B will only report `RSP[2:3]` as tainted. - // - // For the NULL pointer dereference check, however, this should not have an - // effect in practice, since these values are usually unsound or a sign of - // incorrect behavior of the analysed program. fn merge(&self, other: &Self) -> Self { - let mut register_taint = self.register_taint.clone(); - for (var, other_taint) in other.register_taint.iter() { - if let Some(taint) = self.register_taint.get(var) { - register_taint.insert(var.clone(), taint.merge(other_taint)); - } else { - register_taint.insert(var.clone(), *other_taint); - } - } + let mut new_state = self.clone(); - let mut memory_taint = self.memory_taint.clone(); - for (tid, other_mem_region) in other.memory_taint.iter() { - if let Some(mem_region) = memory_taint.get_mut(tid) { - for (index, taint) in other_mem_region.iter() { - mem_region.insert_at_byte_index(*taint, *index); - // FIXME: Unsound in theory for partially intersecting - // taints. Should not matter in practice. - } - } else { - memory_taint.insert(tid.clone(), other_mem_region.clone()); - } - } + new_state.merge_with(other); - State { - register_taint, - memory_taint, - } + new_state + } + + fn merge_with(&mut self, other: &Self) -> &mut Self { + self.register_taint.merge_with(&other.register_taint); + self.memory_taint.merge_with(&other.memory_taint); + + self } /// The state has no explicit Top element. @@ -88,11 +98,9 @@ impl AbstractDomain for State { impl State { /// Returns an empty state. pub fn new_empty() -> Self { - // Using capacity zero guarantees that the implementation will not - // allocate. Do that to keep the cost of creating a new state low. Self { - register_taint: HashMap::with_capacity(0), - memory_taint: HashMap::with_capacity(0), + register_taint: RegisterTaint::new(), + memory_taint: MemoryTaint::new(), } } @@ -103,8 +111,8 @@ impl State { return_node: NodeIndex, ) -> Self { let mut state = Self { - register_taint: HashMap::new(), - memory_taint: HashMap::new(), + register_taint: RegisterTaint::new(), + memory_taint: MemoryTaint::new(), }; for return_arg in taint_source.return_values.iter() { @@ -180,10 +188,15 @@ impl State { /// Mark the value at the given address with the given taint. /// /// If the address may point to more than one object, we merge the taint - /// object with the object at the targets, possibly tainting all possible - /// targets. + /// into all objects for which the corresponding offset is exact. Since we + /// merge, this will never remove any taint. + /// + /// If the pointee object and offset are exactly known, we write the + /// `taint` to the object at the given offset. This may remove taint. + /// + /// In all other cases we do nothing. pub fn save_taint_to_memory(&mut self, address: &PiData, taint: Taint) { - if let Some((mem_id, offset)) = address.get_if_unique_target() { + if let Some((mem_id, offset)) = get_if_unique_target(address) { if let Ok(position) = offset.try_to_bitvec() { if let Some(mem_region) = self.memory_taint.get_mut(mem_id) { mem_region.add(taint, position); @@ -211,7 +224,7 @@ impl State { /// Remove all knowledge about taints contained in memory objects. pub fn remove_all_memory_taints(&mut self) { - self.memory_taint = HashMap::new(); + self.memory_taint = MemoryTaint::new(); } /// Set the taint of a register. @@ -223,6 +236,14 @@ impl State { } } + /// Returns the taint state of the given register. + pub fn get_register_taint(&self, register: &Variable) -> Taint { + self.register_taint + .get(register) + .copied() + .unwrap_or(Taint::Top(register.size)) + } + /// Returns true if the memory object with the given ID contains a tainted /// value. pub fn check_mem_id_for_taint(&self, id: &AbstractIdentifier) -> bool { @@ -405,7 +426,14 @@ impl State { /// Check whether `self` contains any taint at all. pub fn is_empty(&self) -> bool { - !self.has_memory_taint() && self.register_taint.is_empty() + !self.has_memory_taint() && !self.has_register_taint() + } + + /// Check whether there are any tainted registers in the state. + pub fn has_register_taint(&self) -> bool { + self.register_taint + .iter() + .any(|(_, taint)| matches!(*taint, Taint::Tainted(_))) } /// Check whether there is any tainted memory in the state. @@ -418,6 +446,81 @@ impl State { .flat_map(|(_, mem_region)| mem_region.iter()) .any(|(_, taint)| taint.is_tainted()) } + + /// Merges the given `other` state into this state with renaming of abstract + /// identifiers. + /// + /// The set of valid abstract identfiers (aIDs) is local to a given + /// function. When merging states across function boundaries it is necessary + /// to map aIDs into the set of valid aIDs in the target context before + /// performing the merging. + /// + /// This function assumes that the target context is the one of `self` and + /// that `renaming_map` specifies how valid aIDs in the context of `other` + /// correspond to the aIDs of this context. + pub fn merge_with_renaming( + &mut self, + other: &Self, + renaming_map: Option<&BTreeMap>, + ) { + let Self { + register_taint: other_register_taint, + memory_taint: other_memory_taint, + } = other; + + // Naive merging works for register taint. + self.register_taint.merge_with(other_register_taint); + + let Some(renaming_map) = renaming_map else { + // Without a renaming rule we can not do anything meaningful with + // the memory objects of the other state, i.e., we are done here. + return; + }; + + for (other_aid, other_memory_object) in other_memory_taint.iter() { + let Some(value) = renaming_map.get(other_aid) else { + // The pointer inference decided that this object is not + // referenced in the context of `self`, so no need to merge it. + continue; + }; + // There is more information in `value` that we could base our + // decision on; however, + // - we decide to ignore whether the `value` may be absolute in the + // context of `self`. This is not important for taint analyses. + // - in cases where it may be some unknown base + offset it is still + // worth handling the bases that we know about. + for (aid, offset_interval) in value.get_relative_values() { + let Ok(offset) = offset_interval.try_to_offset() else { + // The offset of the old memory object into the new one is + // not known exactly. At this point we could merge the old + // object at every possible offset (sound) or not merge at + // all (unsound). + // + // Depending on the analysis it will lead to more FP + // (CWE252) or FN (CWE476); on the upside, we have to track + // less state and are faster. + continue; + }; + + // Starts tracking the object if it does not exist. + self.memory_taint + .merge_memory_object_with_offset(aid, other_memory_object, offset); + } + } + } + + /// Deconstructs a `State` into its register and memory taint maps. + pub fn into_mem_reg_taint(self) -> (RegisterTaint, MemoryTaint) { + (self.register_taint, self.memory_taint) + } + + /// Constructs a `State` from register and memory taint maps. + pub fn from_mem_reg_taint(register_taint: RegisterTaint, memory_taint: MemoryTaint) -> Self { + Self { + register_taint, + memory_taint, + } + } } impl State { @@ -450,3 +553,17 @@ impl State { Value::Object(Map::from_iter(state_map)) } } + +/// Returns target ID and offset iff there is a single relative value. +/// +/// In contrast to `DataDomain::get_if_unique_target` this function also +/// returns the pair when the `is_top` flag is set or the value may be absolute. +fn get_if_unique_target(address: &PiData) -> Option<(&AbstractIdentifier, &IntervalDomain)> { + let relative_values = address.get_relative_values(); + + if relative_values.len() == 1 { + Some(relative_values.iter().next().unwrap()) + } else { + None + } +} diff --git a/src/cwe_checker_lib/src/analysis/taint/state/memory_taint.rs b/src/cwe_checker_lib/src/analysis/taint/state/memory_taint.rs new file mode 100644 index 000000000..c780c08b9 --- /dev/null +++ b/src/cwe_checker_lib/src/analysis/taint/state/memory_taint.rs @@ -0,0 +1,218 @@ +//! Tracking of taint in memory. + +use crate::abstract_domain::{AbstractIdentifier, DomainMap, MapMergeStrategy, MemRegion}; + +use super::Taint; + +use std::collections::BTreeMap; + +/// Strategy for merging two memory taint states. +/// +/// Essentially a [`UnionMergeStrategy`], i.e., the set of keys is the union of +/// the individual key sets, but we do not use the merging provided by the +/// [`MemRegion`] type on the intersection. Instead, we implement our own +/// merging of `MemRegion` in [`merge_memory_object_with_offset`]. +/// +/// [`UnionMergeStrategy`]: crate::abstract_domain::UnionMergeStrategy +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct MemoryTaintMergeStrategy { + _private: (), // Marker to prevent instantiation +} + +impl MapMergeStrategy> for MemoryTaintMergeStrategy { + fn merge_map_with( + memory_taint: &mut BTreeMap>, + other_memory_taint: &BTreeMap>, + ) { + for (aid, other_memory_object) in other_memory_taint.iter() { + memory_taint + .entry(aid.clone()) + .and_modify(|memory_object| { + merge_memory_object_with_offset(memory_object, other_memory_object, 0); + }) + .or_insert_with(|| other_memory_object.clone()); + } + } +} + +/// Represents our knowledge about taint in memory at a particular point in the +/// program. +pub type MemoryTaint = DomainMap, MemoryTaintMergeStrategy>; + +impl MemoryTaint { + /// Merges the given pair of abstract identifier and memory object into the + /// state. + pub fn merge_memory_object_with_offset( + &mut self, + aid: &AbstractIdentifier, + other_memory_object: &MemRegion, + offset: i64, + ) { + use std::collections::btree_map::Entry::*; + + match self.entry(aid.clone()) { + Occupied(mut current_memory_object) => { + let current_memory_object = current_memory_object.get_mut(); + + merge_memory_object_with_offset(current_memory_object, other_memory_object, offset); + } + Vacant(entry) => { + let mut new_memory_object = other_memory_object.clone(); + + new_memory_object.add_offset_to_all_indices(offset); + entry.insert(new_memory_object); + } + } + } +} + +// FIXME: The used algorithm for merging the taints contained in memory +// regions is unsound when merging taints that intersect only partially. +// If, for example, in state A `RSP[0:3]` is tainted and in state B +// `RSP[2:3]` is tainted, A u B will only report `RSP[2:3]` as tainted. +// +// For the NULL pointer dereference check, however, this should not have an +// effect in practice, since these values are usually unsound or a sign of +// incorrect behavior of the analysed program. +// FIXME: This looks a lot like it should be a method on `MemRegion`. +/// Merges `other_memory_object` into `memory_object`. +/// +/// The `other_memory_object` is shifted by `offset` before the merging is +/// performed. For partially overlapping taints, the value of +/// `other_memory_object` always "wins", i.e., it ends up in the merged object +/// and the overlapped value in `memory_object` is discarded. +fn merge_memory_object_with_offset( + memory_object: &mut MemRegion, + other_memory_object: &MemRegion, + offset: i64, +) { + for (index, taint) in other_memory_object.iter() { + // WARNING: This is not using the `merge` function of `Taint`. It + // will become even more incorrect once we have more complicated taint. + memory_object.insert_at_byte_index(*taint, *index + offset); + // FIXME: Unsound in theory for partially intersecting + // taints. Should not matter in practice. + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::prelude::*; + + // FIXME: This illustrates the current, unsound merging of memory taints. Make + // sure to change this test when you work on a better memory model. + #[test] + fn merge_memory_object_overlapping() { + let taint_8 = Taint::Tainted(ByteSize::new(8)); + let taint_4 = Taint::Tainted(ByteSize::new(4)); + + let mut memory_object = MemRegion::::new(ByteSize::new(8)); + let mut other_memory_object = MemRegion::::new(ByteSize::new(8)); + + memory_object.insert_at_byte_index(taint_4, 2); + memory_object.insert_at_byte_index(taint_8, 8); + memory_object.insert_at_byte_index(taint_4, 16); + other_memory_object.insert_at_byte_index(taint_8, 0); + other_memory_object.insert_at_byte_index(taint_4, 8); + other_memory_object.insert_at_byte_index(taint_4, 14); + + merge_memory_object_with_offset(&mut memory_object, &other_memory_object, 0); + + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(0)), + Some(taint_8) + ); + assert_eq!(memory_object.get_unsized(Bitvector::from_i64(2)), None); + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(8)), + Some(taint_4) + ); + assert_eq!(memory_object.get_unsized(Bitvector::from_i64(12)), None); + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(14)), + Some(taint_4) + ); + assert_eq!(memory_object.get_unsized(Bitvector::from_i64(16)), None); + } + + #[test] + fn merge_memory_object_nonoverlapping() { + let taint_8 = Taint::Tainted(ByteSize::new(8)); + let taint_4 = Taint::Tainted(ByteSize::new(4)); + let untaint_4 = Taint::Top(ByteSize::new(4)); + + let mut memory_object = MemRegion::::new(ByteSize::new(8)); + let mut other_memory_object = MemRegion::::new(ByteSize::new(8)); + + memory_object.insert_at_byte_index(taint_8, 8); + other_memory_object.insert_at_byte_index(untaint_4, 0); + other_memory_object.insert_at_byte_index(taint_4, 4); + other_memory_object.insert_at_byte_index(untaint_4, 8); + + merge_memory_object_with_offset(&mut memory_object, &other_memory_object, 0); + + assert_eq!(memory_object.get_unsized(Bitvector::from_i64(0)), None); + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(4)), + Some(taint_4) + ); + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(8)), + Some(taint_8) + ); + } + + #[test] + fn merge_memory_object_with_offset_nonoverlapping() { + let taint_8 = Taint::Tainted(ByteSize::new(8)); + let taint_4 = Taint::Tainted(ByteSize::new(4)); + + let mut memory_object = MemRegion::::new(ByteSize::new(8)); + let mut other_memory_object = MemRegion::::new(ByteSize::new(8)); + + memory_object.insert_at_byte_index(taint_8, 8); + other_memory_object.insert_at_byte_index(taint_4, 8); + other_memory_object.insert_at_byte_index(taint_4, 12); + + merge_memory_object_with_offset(&mut memory_object, &other_memory_object, 8); + + assert_eq!(memory_object.get_unsized(Bitvector::from_i64(0)), None); + assert_eq!(memory_object.get_unsized(Bitvector::from_i64(4)), None); + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(8)), + Some(taint_8) + ); + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(16)), + Some(taint_4) + ); + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(20)), + Some(taint_4) + ); + + merge_memory_object_with_offset(&mut memory_object, &other_memory_object, -8); + + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(0)), + Some(taint_4) + ); + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(4)), + Some(taint_4) + ); + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(8)), + Some(taint_8) + ); + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(16)), + Some(taint_4) + ); + assert_eq!( + memory_object.get_unsized(Bitvector::from_i64(20)), + Some(taint_4) + ); + } +} diff --git a/src/cwe_checker_lib/src/analysis/taint/state/register_taint.rs b/src/cwe_checker_lib/src/analysis/taint/state/register_taint.rs new file mode 100644 index 000000000..f37767bf9 --- /dev/null +++ b/src/cwe_checker_lib/src/analysis/taint/state/register_taint.rs @@ -0,0 +1,10 @@ +//! Tracking of taint in registers. + +use crate::abstract_domain::{DomainMap, UnionMergeStrategy}; +use crate::intermediate_representation::Variable; + +use super::Taint; + +/// Represents our knowledge about taint in registers at a particular point in +/// the program. +pub type RegisterTaint = DomainMap; diff --git a/src/cwe_checker_lib/src/analysis/taint/state/tests.rs b/src/cwe_checker_lib/src/analysis/taint/state/tests.rs index 7eb48903e..b8c284624 100644 --- a/src/cwe_checker_lib/src/analysis/taint/state/tests.rs +++ b/src/cwe_checker_lib/src/analysis/taint/state/tests.rs @@ -1,14 +1,16 @@ use super::*; + use crate::analysis::graph::NodeIndex; use crate::analysis::pointer_inference::{tests::MockVsaResult, State as PiState, ValueDomain}; use crate::{abstract_domain::*, expr, variable}; + use std::collections::BTreeSet; impl State { pub fn mock() -> State { State { - register_taint: HashMap::new(), - memory_taint: HashMap::new(), + register_taint: RegisterTaint::new(), + memory_taint: MemoryTaint::new(), } } diff --git a/src/cwe_checker_lib/src/analysis/vsa_results/mod.rs b/src/cwe_checker_lib/src/analysis/vsa_results/mod.rs index d8b8b81aa..18b029df9 100644 --- a/src/cwe_checker_lib/src/analysis/vsa_results/mod.rs +++ b/src/cwe_checker_lib/src/analysis/vsa_results/mod.rs @@ -1,11 +1,13 @@ //! This module provides the [`VsaResult`] trait //! which defines an interface for the results of analyses similar to a value set analysis. -use crate::abstract_domain::AbstractLocation; +use crate::abstract_domain::{AbstractIdentifier, AbstractLocation}; use crate::analysis::graph::NodeIndex; use crate::intermediate_representation::{Arg, Expression}; use crate::prelude::*; +use std::collections::BTreeMap; + /// Trait for types that provide access to the result of a value set analysis. /// /// The generic type parameter can be used to implement this trait multiple @@ -59,4 +61,13 @@ pub trait VsaResult { /// Evaluate the given expression at the given node of the graph that the /// value set analysis was computed on. fn eval_at_node(&self, node: NodeIndex, expression: &Expression) -> Option; + + /// Returns the mapping of abstract identfiers in the callee to values in + /// the caller for the given call. + fn get_call_renaming_map( + &self, + _call: &Tid, + ) -> Option<&BTreeMap> { + None + } } diff --git a/src/cwe_checker_lib/src/checkers.rs b/src/cwe_checker_lib/src/checkers.rs index b1ae64374..2bcb57f16 100644 --- a/src/cwe_checker_lib/src/checkers.rs +++ b/src/cwe_checker_lib/src/checkers.rs @@ -6,8 +6,9 @@ //! See there for detailed information about this check. /// Checkers that are supported for Linux kernel modules. -pub const MODULES_LKM: [&str; 9] = [ - "CWE134", "CWE190", "CWE215", "CWE416", "CWE457", "CWE467", "CWE476", "CWE676", "CWE789", +pub const MODULES_LKM: [&str; 10] = [ + "CWE134", "CWE190", "CWE215", "CWE252", "CWE416", "CWE457", "CWE467", "CWE476", "CWE676", + "CWE789", ]; pub mod cwe_119; @@ -15,6 +16,7 @@ pub mod cwe_134; pub mod cwe_190; pub mod cwe_215; pub mod cwe_243; +pub mod cwe_252; pub mod cwe_332; pub mod cwe_337; pub mod cwe_367; diff --git a/src/cwe_checker_lib/src/checkers/cwe_252.rs b/src/cwe_checker_lib/src/checkers/cwe_252.rs new file mode 100644 index 000000000..45d2cc824 --- /dev/null +++ b/src/cwe_checker_lib/src/checkers/cwe_252.rs @@ -0,0 +1,285 @@ +//! CWE-252: Unchecked Return Value. +//! +//! It is a common programming pattern that called procedures indicate their +//! success or failure to the caller via their return value. If a caller does +//! not check the return value of a called procedure they can not know if the +//! operation was successful or not. This may lead to bugs with security +//! implications when the program resumes execution under the assumption that +//! the failed call has worked. +//! +//! # Examples +//! +//! See the following CVEs for examples in Linux user-mode programs: +//! +//! - CVE-2007-5191 +//! - CVE-2017-6964 +//! - CVE-2018-16643 +//! - CVE-2019-15900 +//! - CVE-2023-40303 +//! +//! Also see [CWE252 at Mitre]. +//! +//! [CWE252 at Mitre]: https://cwe.mitre.org/data/definitions/252.html +//! +//! # Algorithm +//! +//! We perform a taint analysis where the sources are return values of calls to +//! external functions and the sinks are: +//! +//! - Places where all taint vanishes from the state. Here, the program losses +//! all information about success of failure of the API call; thus, it can not +//! possibly adapt its behavior in the subsequent execution. +//! - Taint reaches a return site of a function without any taint being returned +//! to the caller. Here, the caller of the function cannot know if the API +//! call was successful. +//! +//! Taint propagation is stopped along paths as soon as a conditional control +//! flow transfer depends on a tainted value. +//! +//! # Limitations +//! +//! ## False Positives +//! +//! - For many API functions the necessity to check the return value depends on +//! the context of the caller. +//! - Cases where the result is checked before it is assumed that the operation +//! has worked, but there are paths from the call site that are correct +//! irrespective of the success or failure of the call. +//! - Patterns where the return value is handled by giving it as an argument to +//! another function call. +//! - Taint loss due to Pointer Inference inexactness may cause the return value +//! check to not be recognized as such. +//! +//! ## False Negatives +//! +//! - Return value is checked but the program does not act accordingly. +//! - The API function is not in the list of checked functions. +//! +//! # Configuration +//! +//! The list of checked external functions can be configured via the +//! `config.json`. By selecting the `strict_mode` additional functions can be +//! included, however, those are more likely to produce false positives. + +use crate::analysis::graph::{Edge, NodeIndex}; +use crate::analysis::pointer_inference::PointerInference; +use crate::intermediate_representation::{ExternSymbol, Jmp, Project, Term}; +use crate::pipeline::AnalysisResults; +use crate::prelude::*; +use crate::utils::log::{CweWarning, LogMessage}; +use crate::utils::symbol_utils; +use crate::CweModule; + +use petgraph::visit::EdgeRef; + +use std::collections::{BTreeMap, HashSet, VecDeque}; +use std::sync::Arc; + +mod context; +mod isolated_returns; + +use context::*; +use isolated_returns::*; + +/// CWE-252: Unchecked Return Value. +pub static CWE_MODULE: CweModule = CweModule { + name: "CWE252", + version: "0.1", + run: check_cwe, +}; + +/// Configuration of the check; this is read from the `config.json` file. +#[derive(Deserialize)] +struct Config { + strict_mode: bool, + /// External symbols whose return values must be checked. + symbols: HashSet, + /// Additional symbols that are only checked when we run in strict mode. + strict_symbols: HashSet, +} + +impl Config { + fn into_symbols(mut self) -> HashSet { + if self.strict_mode { + self.symbols.extend(self.strict_symbols); + } + + self.symbols + } +} + +/// Call whose return value must be checked. +#[derive(Clone, Copy)] +struct MustUseCall<'a> { + /// Information about the function that was called. + symbol: &'a ExternSymbol, + /// CFG node where the call will return to. + return_node: NodeIndex, + /// IR instruction of the call. + jmp: &'a Term, +} + +impl MustUseCall<'_> { + /// Returns a copy of the name of the external function that was called. + pub fn get_symbol_name(&self) -> String { + self.symbol.name.clone() + } +} + +/// List of calls to analyze. +struct Worklist<'a> { + /// Remaining calls to analyze. + calls: VecDeque>, +} + +impl<'a> Worklist<'a> { + /// Creates a new worklist of external function calls to analyze. + /// + /// Searches the program for calls to the functions specified in the + /// `config` and gathers them into a new worklist. + fn new(analysis_results: &'a AnalysisResults, symbols: &HashSet) -> Self { + let symbol_map = symbol_utils::get_symbol_map_fast(analysis_results.project, symbols); + let cfg = analysis_results + .pointer_inference + .expect("CWE252: BUG: No pointer inference results.") + .get_graph(); + + Worklist { + calls: cfg + .edge_references() + .filter_map(|edge| { + let Edge::ExternCallStub(jmp) = edge.weight() else { + return None; + }; + let Jmp::Call { target, .. } = &jmp.term else { + return None; + }; + let return_node = edge.target(); + Some(MustUseCall { + symbol: symbol_map.get(target)?, + return_node, + jmp, + }) + }) + .collect(), + } + } +} + +/// Represents the full CWE252 analysis of a given project. +struct CweAnalysis<'a, 'b: 'a> { + /// Remaining calls to external functions that need to be analyzed. + worklist: Worklist<'a>, + isolated_returns: Arc>, + project: &'a Project, + pi_result: &'a PointerInference<'b>, + /// Used to collect CWE warnings sent by the analyses for inidividual calls. + cwe_collector: crossbeam_channel::Receiver, + /// Given to analyses for inidividual calls to send their CWE warnings. + cwe_sender_proto: crossbeam_channel::Sender, +} + +impl<'a, 'b: 'a> CweAnalysis<'a, 'b> { + /// Creates a new CWE252 analysis for the given project and configuration. + fn new(analysis_results: &'a AnalysisResults<'b>, config: Config) -> Self { + let channel = crossbeam_channel::unbounded(); + let cfg = analysis_results.control_flow_graph; + + Self { + worklist: Worklist::new(analysis_results, &config.into_symbols()), + isolated_returns: Arc::new(get_isolated_returns(cfg)), + project: analysis_results.project, + pi_result: analysis_results.pointer_inference.unwrap(), + cwe_collector: channel.1, + cwe_sender_proto: channel.0, + } + } + + /// Pops a call of the worklist and returns the taint analysis definition + /// for it. + fn next_call_ctx( + &mut self, + ) -> Option<(IsolatedReturnAnalysis<'_>, TaComputationContext<'_, 'b>)> { + self.worklist.calls.pop_front().map(|call| { + ( + IsolatedReturnAnalysis::new( + call, + Arc::clone(&self.isolated_returns), + self.project, + self.cwe_sender_proto.clone(), + ), + TaComputationContext::new( + call, + self.project, + self.pi_result, + &self.cwe_sender_proto, + ), + ) + }) + } + + /// Runs the CWE252 analysis and returns the generated warnings. + fn run(mut self) -> (Vec, Vec) { + while let Some((isolated_returns, ta_comp_ctx)) = self.next_call_ctx() { + let mut ta_comp = ta_comp_ctx.into_computation(); + + ta_comp.compute_with_max_steps(100); + + isolated_returns.analyze(&ta_comp); + } + + ( + Vec::new(), + self.cwe_collector + .try_iter() + // FIXME: It would be nice to preerve all reasons during + // deduplication. + .map(|msg| (msg.tids.clone(), msg)) + .collect::>() + .into_values() + .collect(), + ) + } +} + +fn generate_cwe_warning( + sender: &crossbeam_channel::Sender, + call: &MustUseCall<'_>, + warning_location: &Tid, + reason: &str, +) { + let taint_source = call.jmp; + let taint_source_name = call.get_symbol_name(); + let cwe_warning = CweWarning::new( + CWE_MODULE.name, + CWE_MODULE.version, + format!( + "(Unchecked Return Value) There is no check of the return value of {} ({}).", + taint_source.tid.address, taint_source_name + ), + ) + .addresses(vec![ + taint_source.tid.address.clone(), + warning_location.address.clone(), + ]) + .tids(vec![ + format!("{}", taint_source.tid), + format!("{}", warning_location), + ]) + .symbols(vec![taint_source_name]) + .other(vec![vec![format!("reason={}", reason.to_string())]]); + sender + .send(cwe_warning) + .expect("CWE252: failed to send CWE warning"); +} + +/// CWE-252: Unchecked Return Value. +pub fn check_cwe( + analysis_results: &AnalysisResults, + cwe_params: &serde_json::Value, +) -> (Vec, Vec) { + let config: Config = + serde_json::from_value(cwe_params.clone()).expect("CWE252: invalid configuration"); + + CweAnalysis::new(analysis_results, config).run() +} diff --git a/src/cwe_checker_lib/src/checkers/cwe_252/context.rs b/src/cwe_checker_lib/src/checkers/cwe_252/context.rs new file mode 100644 index 000000000..72586511e --- /dev/null +++ b/src/cwe_checker_lib/src/checkers/cwe_252/context.rs @@ -0,0 +1,331 @@ +//! Definition of the Taint Analysis for CWE252. +//! +//! Implementation of the [`TaintAnalysis`] trait for this CWE check. See the +//! module documentation for more details on the algorithm and its limitations. + +use super::MustUseCall; + +use crate::analysis::fixpoint; +use crate::analysis::forward_interprocedural_fixpoint::{ + self, create_computation as fwd_fp_create_computation, +}; +use crate::analysis::graph::{Graph as Cfg, HasCfg}; +use crate::analysis::interprocedural_fixpoint_generic::NodeValue; +use crate::analysis::pointer_inference::{Data as PiData, PointerInference}; +use crate::analysis::taint::state::State as TaState; +use crate::analysis::taint::TaintAnalysis; +use crate::analysis::vsa_results::{HasVsaResult, VsaResult}; +use crate::intermediate_representation::{Blk, ExternSymbol, Jmp, Project, Term, Tid}; +use crate::utils::debug::ToJsonCompact; +use crate::utils::log::CweWarning; + +use std::convert::AsRef; + +/// Type of the fixpoint computation of the taint analysis. +pub type FpComputation<'a, 'b> = fixpoint::Computation< + forward_interprocedural_fixpoint::GeneralizedContext<'a, TaComputationContext<'a, 'b>>, +>; + +impl ToJsonCompact for FpComputation<'_, '_> { + fn to_json_compact(&self) -> serde_json::Value { + let graph = self.get_graph(); + let mut json_nodes = serde_json::Map::new(); + + for (node_index, node_value) in self.node_values().iter() { + let node = graph.node_weight(*node_index).unwrap(); + + if let NodeValue::Value(value) = node_value { + json_nodes.insert(format!("{node}"), value.to_json_compact()); + } + } + + serde_json::Value::Object(json_nodes) + } +} + +/// Type that represents the definition of the taint analysis. +/// +/// Values of this type represent the taint analysis for a particular call to an +/// external function. +pub struct TaComputationContext<'a, 'b: 'a> { + /// Extern function call that is analyzed. + call: MustUseCall<'a>, + project: &'a Project, + pi_result: &'a PointerInference<'b>, + /// Used to send generated CWE warnings to the collector. + cwe_sender: crossbeam_channel::Sender, +} + +impl<'a, 'b: 'a> TaComputationContext<'a, 'b> { + /// Creates a new taint analysis context for the given call to an external + /// function. + pub(super) fn new( + call: MustUseCall<'a>, + project: &'a Project, + pi_result: &'a PointerInference<'b>, + cwe_sender: &'a crossbeam_channel::Sender, + ) -> Self { + Self { + call, + project, + pi_result, + cwe_sender: cwe_sender.clone(), + } + } + + /// Converts the taint analysis context into a fixpoint computation. + /// + /// The returned computation can be solved to analyze this particular + /// function call. + pub fn into_computation(self) -> FpComputation<'a, 'b> { + let symbol = self.call.symbol; + let vsa_result = self.vsa_result(); + let return_node = self.call.return_node; + let node_value = NodeValue::Value(TaState::new_return(symbol, vsa_result, return_node)); + + let mut computation = fwd_fp_create_computation(self, None); + + computation.set_node_value(return_node, node_value); + + computation + } + + fn generate_cwe_warning(&self, warning_location: &Tid, reason: &str) { + super::generate_cwe_warning(&self.cwe_sender, &self.call, warning_location, reason) + } +} + +impl<'a> HasCfg<'a> for TaComputationContext<'a, '_> { + fn get_cfg(&self) -> &Cfg<'a> { + self.pi_result.get_graph() + } +} + +impl HasVsaResult for TaComputationContext<'_, '_> { + fn vsa_result(&self) -> &impl VsaResult { + self.pi_result + } +} + +impl AsRef for TaComputationContext<'_, '_> { + fn as_ref(&self) -> &Project { + self.project + } +} + +impl<'a> TaintAnalysis<'a> for TaComputationContext<'a, '_> { + /// Generates a CWE warning when a transition function returns the empty + /// state. + /// + /// If a transition function returns the empty state this implies that there + /// is some time in the program where all information about the return value + /// of a fallible call has been eradicated without a previous check. + /// + /// From this time onwards, the program cannot possibly know if the fallible + /// operation has succeeded or not. This point is reported as a possible bug + /// by the check. + /// + /// For an example where this rule is needed to detect an error consider the + /// following program: + /// + /// ```c + /// x = fallible_call(); + /// + /// if (some_unrelated_condition) { + /// x = 42; + /// } + /// + /// if (x < 0) { + /// return x; + /// } + /// + /// // Do something that assumes that `fallible_call` has worked. + /// ``` + /// + /// A pure forward 'may' data flow analysis would only see a tainted `x` + /// being used in a comparison and consider the fallible call to be checked. + /// With this condition, the analysis would emit a warning for the term + /// `x = 42` since at this point all information about the return value is + /// lost. + fn handle_empty_state_out(&self, tid: &Tid) -> Option { + self.generate_cwe_warning(tid, "empty_state"); + + None + } + + /// Update taint state on call to extern function. + /// + /// We almost always just want to remove the taint from non-callee-saved + /// registers. However, for calls/jumps into nonreturning functions at the + /// end of a procedure we need some logic to suppress false positives. + fn update_extern_call( + &self, + state: &TaState, + _call: &Term, + project: &Project, + extern_symbol: &ExternSymbol, + ) -> Option { + // External symbols that are unconditional termination points. It does + // not make sense to propagate taint through those so we always return + // `None`. + if extern_symbol.no_return { + return None; + } + // External symbols that are effectively no-ops. Here we always return + // the unmodified input state. + // + // FIXME: This is an artifact of the way in which we generate the CFG. + // On x86 and amd64 kernel functions end in a 'jmp retpoline' but we + // generate a 'call extern; return;', where we introduced an artificial + // isolated return node. Another workaround would be to replace the jmp + // with a ret in a normalization pass. + const EXTERN_NOOP: [&str; 1] = ["__x86_return_thunk"]; + + let mut new_state = state.clone(); + + if !EXTERN_NOOP.iter().any(|s| s == &extern_symbol.name) { + new_state.remove_non_callee_saved_taint(project.get_calling_convention(extern_symbol)); + } + + Some(new_state) + } + + /// Stops taint propagation if jump depends on a tainted condition. + /// + /// We assume that any check that depends on tainted values is a check of + /// the return value of the fallible function, and that the program handles + /// all outcomes correctly. + /// + /// A jump can depend on a tainted condition in two ways, either it is + /// executed because the condition evaluated to `true`, or because it + /// evaluated to `false`, both cases must be handled here. + fn update_jump( + &self, + state: &TaState, + jump: &Term, + untaken_conditional: Option<&Term>, + _target: &Term, + ) -> Option { + // If this control flow transfer depends on a condition involving + // a tainted value then we do not propagate any taint information to + // the destination. + match (&jump.term, untaken_conditional) { + // Directly depends on a tainted value. + (Jmp::CBranch { condition, .. }, _) if state.eval(condition).is_tainted() => None, + // Branch is only taken because a condition based on a tainted value + // evaluated to false. + ( + _, + Some(Term { + tid: _, + term: Jmp::CBranch { condition, .. }, + }), + ) if state.eval(condition).is_tainted() => None, + // Does not depend on tainted values. + _ => { + if state.is_empty() { + self.handle_empty_state_out(&jump.tid) + } else { + Some(state.clone()) + } + } + } + } + + /// Propagates taint from callee to caller. + /// + /// The check performs a bottom-up interprocedural taint analysis. The main + /// idea is that: *If a function may propagate the return value of a + /// fallible function call to its caller without checking it, then the + /// function itself becomes "must check".* (In the sense that the caller is + /// now responsible for checking its return value.) + /// + /// The taint may be returned directly, indirectly by returning a pointer + /// that can be used to reach taint, or written to global variables + /// (possibly as a pointer). + /// + /// Limitations stem from two sources: + /// + /// - The calling convention may be unknown, which means we can not + /// determine precisely which taint will be reachable for the caller. + /// + /// See the source code comments for further information. + /// + /// Furthermore, this callback is responsible for detecting cases where + /// taint may reach the end of a function without being returned. + /// This implies that the called function may have ignored the return + /// value of a fallible function call and there is no way for the caller to + /// know the outcome. The check raises a warning in those cases. + fn update_return_callee( + &self, + state: &TaState, + call_term: &Term, + return_term: &Term, + calling_convention: &Option, + ) -> Option { + let (mut propagated_register_taint, mut propagated_memory_taint) = + state.clone().into_mem_reg_taint(); + + // Only keep memory taint that will be propagated to the caller. We + // compute this here since we want to notice when no taint is + // propagated. + let renaming_map = self.pi_result.get_call_renaming_map(&call_term.tid); + propagated_memory_taint.retain(|aid, _| { + // This is still an over-approximation to the taint that will be + // available to the caller since it might happen that all relative + // values have non-exactly-known offsets. + renaming_map.is_some_and(|renaming_map| { + renaming_map + .get(aid) + .is_some_and(|value| value.referenced_ids().next().is_some()) + }) + }); + + if let Some(calling_convention) = self + .project + .get_specific_calling_convention(calling_convention) + { + let return_registers = calling_convention.get_all_return_register(); + + // If there are tainted return registers we propagate the taint to + // the caller, which makes them responsible for checking it. + propagated_register_taint + .retain(|reg, taint| return_registers.contains(®) && taint.is_tainted()); + } else { + // We have tainted registers but we do not know the calling + // convention. Here we simply return the complete register taint + // of the callee to the caller. This heuristic should in practice + // hopefully be a good approximation to the real calling convention: + // - It is an over approximation so return registers will be + // propagated correctly. + // - There is a chance that callee-saved registers have been + // overwritten with their saved values by the function epilog and + // are thus not tainted. + // - There is a chance that caller saved registers will be restored + // by the caller such that the taint is immediatly eliminated and + // we catch cases where the called function has ignored tainted + // values. + }; + + let propagated_state = + TaState::from_mem_reg_taint(propagated_register_taint, propagated_memory_taint); + + if propagated_state.is_empty() { + // If we can not propagate any taint to the caller it is implied + // that + // + // - the return value of a fallible function call may have been + // ignored by the callee + // - AND the result is not returned to the caller + // + // Thus, callers of this function have no way to know if the + // operation it performs was successful and the function itself + // might not have checked it either. + self.generate_cwe_warning(&return_term.tid, "return_no_taint"); + + None + } else { + Some(propagated_state) + } + } +} diff --git a/src/cwe_checker_lib/src/checkers/cwe_252/isolated_returns.rs b/src/cwe_checker_lib/src/checkers/cwe_252/isolated_returns.rs new file mode 100644 index 000000000..5c7a4bba5 --- /dev/null +++ b/src/cwe_checker_lib/src/checkers/cwe_252/isolated_returns.rs @@ -0,0 +1,159 @@ +//! Analyse Isolated Return Sites. +//! +//! Taint that reaches a return site implies that there is some path from the +//! call to a fallible function to the return instruction where the return value +//! is not checked. If additionally, the taint is not returned to the caller +//! we have a bug, since with the return of the function all information about +//! success or failure of the function call is lost. +//! +//! For each function that is called at least once, we catch those cases during +//! the FP computation. However, for functions that are never called, e.g., +//! exported functions in shared libraries or functions that are only ever +//! called indirectly, we need an additional pass once we have the result of +//! the FP computation. + +use crate::abstract_domain::AbstractDomain; +use crate::analysis::graph::{Graph, Node, NodeIndex}; +use crate::intermediate_representation::{Jmp, Project, Tid}; +use crate::utils::log::CweWarning; + +use std::collections::HashSet; +use std::sync::Arc; + +use super::context; +use super::{generate_cwe_warning, MustUseCall}; + +/// Represents a return site of a function. +#[derive(Hash, Eq, PartialEq, Debug)] +pub struct ReturnSite<'a> { + /// CFG node of the end of the block containing the return instruction. + ret_node: NodeIndex, + /// Calling convention of the function that returns. + calling_convention: &'a Option, + /// Identifier of the return instruction. + ret_insn_tid: &'a Tid, +} + +impl<'a> ReturnSite<'a> { + /// Create a new `ReturnSite`. + fn new( + ret_node: NodeIndex, + calling_convention: &'a Option, + ret_insn_tid: &'a Tid, + ) -> Self { + Self { + ret_node, + calling_convention, + ret_insn_tid, + } + } +} + +/// The set of all isolated return sites of a binary. +pub type IsolatedReturns<'a> = HashSet>; + +/// Represents the post-fixpoint-computation pass that checks isolated return +/// sites for taint. +pub struct IsolatedReturnAnalysis<'a> { + call: MustUseCall<'a>, + isolated_returns: Arc>, + project: &'a Project, + cwe_sender: crossbeam_channel::Sender, +} + +impl<'a> IsolatedReturnAnalysis<'a> { + /// Create a new `IsolatedReturnAnalysis`. + pub fn new( + call: MustUseCall<'a>, + isolated_returns: Arc>, + project: &'a Project, + cwe_sender: crossbeam_channel::Sender, + ) -> Self { + Self { + call, + isolated_returns, + project, + cwe_sender, + } + } + + /// Checks isolated return sites for taint with the results of the given + /// `computation`. + /// + /// Generates CWE warnings when non-returned taint is found. We have no + /// caller context, i.e., no aID renaming map, so we can not tell if memory + /// taint is returned or not. Currently we always assume that memory taint + /// will *not* be reachable for the caller (generates FPs). + pub fn analyze(&self, computation: &context::FpComputation<'_, '_>) { + for (taint_state, calling_convention, ret_insn_tid) in + // Filter isolated returns with a taint state. + self.isolated_returns.iter().filter_map( + |ReturnSite { + ret_node, + calling_convention, + ret_insn_tid, + }| { + computation + .node_values() + .get(ret_node) + .map(|state| (state.unwrap_value(), calling_convention, ret_insn_tid)) + }, + ) + { + if !taint_state.has_register_taint() { + // Emit a warning since we do not consider cases where memory + // taint may be returned. + generate_cwe_warning( + &self.cwe_sender, + &self.call, + ret_insn_tid, + "isolated_returns_no_reg_taint", + ); + } else if let Some(calling_convention) = self + .project + .get_specific_calling_convention(calling_convention) + { + // If no taint is returned we emit a warning. + if calling_convention + .get_all_return_register() + .iter() + .all(|return_register| taint_state.get_register_taint(return_register).is_top()) + { + generate_cwe_warning( + &self.cwe_sender, + &self.call, + ret_insn_tid, + "isolated_returns_no_reg_taint_returned", + ); + } + } + } + } +} + +/// Get the set of all isolated return sites in the given interprocedural CFG. +pub fn get_isolated_returns<'a>(cfg: &Graph<'a>) -> IsolatedReturns<'a> { + cfg.node_indices() + .filter_map(|node| { + if cfg.edges(node).next().is_some() { + // By definition, a node with outgoing edges cannot be an + // isolated return. + None + } else if let Node::BlkEnd(blk, sub) = cfg[node] { + blk.term.jmps.iter().find_map(|jmp| { + if matches!(jmp.term, Jmp::Return(_)) { + Some(ReturnSite::new( + node, + &sub.term.calling_convention, + &jmp.tid, + )) + } else { + None + } + }) + } else { + None + } + }) + .collect() +} diff --git a/src/cwe_checker_lib/src/lib.rs b/src/cwe_checker_lib/src/lib.rs index c36745885..1957b5dbb 100644 --- a/src/cwe_checker_lib/src/lib.rs +++ b/src/cwe_checker_lib/src/lib.rs @@ -119,6 +119,7 @@ pub fn get_modules() -> Vec<&'static CweModule> { &crate::checkers::cwe_190::CWE_MODULE, &crate::checkers::cwe_215::CWE_MODULE, &crate::checkers::cwe_243::CWE_MODULE, + &crate::checkers::cwe_252::CWE_MODULE, &crate::checkers::cwe_332::CWE_MODULE, &crate::checkers::cwe_337::CWE_MODULE, &crate::checkers::cwe_367::CWE_MODULE, diff --git a/src/cwe_checker_lib/src/utils/debug.rs b/src/cwe_checker_lib/src/utils/debug.rs new file mode 100644 index 000000000..10b596588 --- /dev/null +++ b/src/cwe_checker_lib/src/utils/debug.rs @@ -0,0 +1,21 @@ +//! Little helpers for developers that try to understand what their code is +//! doing. + +/// Central utility for debug printing in the `cwe_checker`. +/// +/// The canonical way to do printf-debugging in `cwe_checker` development is to +/// implement this trait for the type you want to inspect and then print it +/// via `value.print_compact_json()`. +pub trait ToJsonCompact { + /// Returns a json representation of values of type `self` that is + /// suitable for debugging purposes. + /// + /// The idea is that printing of complex types is facilitated by + /// implementing `to_json_compact` for all of their constituent parts. + fn to_json_compact(&self) -> serde_json::Value; + + /// Print values of type `Self` for debugging purposes. + fn print_compact_json(&self) { + println!("{:#}", self.to_json_compact()) + } +} diff --git a/src/cwe_checker_lib/src/utils/mod.rs b/src/cwe_checker_lib/src/utils/mod.rs index 38fccc043..93beae261 100644 --- a/src/cwe_checker_lib/src/utils/mod.rs +++ b/src/cwe_checker_lib/src/utils/mod.rs @@ -2,6 +2,7 @@ pub mod arguments; pub mod binary; +pub mod debug; pub mod ghidra; pub mod graph_utils; pub mod log; diff --git a/src/cwe_checker_lib/src/utils/symbol_utils.rs b/src/cwe_checker_lib/src/utils/symbol_utils.rs index 5cee474b0..5e6d66ed0 100644 --- a/src/cwe_checker_lib/src/utils/symbol_utils.rs +++ b/src/cwe_checker_lib/src/utils/symbol_utils.rs @@ -1,7 +1,7 @@ //! Helper functions for common tasks utilizing extern symbols, //! e.g. searching for calls to a specific extern symbol. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use crate::intermediate_representation::*; @@ -42,7 +42,12 @@ pub fn get_calls_to_symbols<'a, 'b>( } /// Get a map from TIDs to the corresponding extern symbol struct. -/// Only symbols with names contained in `symbols_to_find` are contained in the map. +/// +/// Only symbols with names contained in `symbols_to_find` are contained in the +/// map. +/// +/// This is O(|symbols_to_find| x |extern_symbols|), prefer +/// [`get_symbol_map_fast`] if speed matters. pub fn get_symbol_map<'a>( project: &'a Project, symbols_to_find: &[String], @@ -69,6 +74,33 @@ pub fn get_symbol_map<'a>( tid_map } +/// Get a map from TIDs to the corresponding extern symbol struct. +/// +/// Only symbols with names contained in `symbols_to_find` are contained in the +/// map. +/// +/// More efficient than [`get_symbol_map`], prefer this if `symbols_to_find` is +/// huge since this is O(|extern_symbols|) and not +/// O(|symbols_to_find|x|extern_symbols|). +pub fn get_symbol_map_fast<'a>( + project: &'a Project, + symbols_to_find: &HashSet, +) -> HashMap { + project + .program + .term + .extern_symbols + .iter() + .filter_map(|(_tid, symbol)| { + if symbols_to_find.contains(&symbol.name) { + Some((symbol.tid.clone(), symbol)) + } else { + None + } + }) + .collect() +} + /// Find calls to TIDs contained as keys in the given symbol map. /// For each match return the block containing the call, /// the jump term representing the call itself and the symbol corresponding to the TID from the symbol map. diff --git a/src/lkm_config.json b/src/lkm_config.json index a223c864b..cf12519b1 100644 --- a/src/lkm_config.json +++ b/src/lkm_config.json @@ -11,6 +11,577 @@ "CWE215": { "symbols": [] }, + "CWE252": { + "_comment": "All exported functions annotated with warn_unused_result.", + "strict_mode": false, + "strict_symbols": [], + "symbols": [ + "__ab_c_size", + "action", + "add_bind_files", + "add_mtd_device", + "aesgcm_decrypt", + "anybuss_host_common_probe", + "aq_check_approve_fl2", + "aq_check_approve_fvlan", + "aq_check_filter", + "aq_check_rule", + "aq_match_filter", + "aq_rule_already_exists", + "aq_rule_is_approve", + "aq_rule_is_not_correct", + "aq_rule_is_not_support", + "__arch_clear_user", + "__arch_copy_from_user", + "__arch_copy_to_user", + "arch_get_random_longs", + "arch_get_random_seed_longs", + "arch_phys_wc_add", + "arch_phys_wc_index", + "arm_clear_user", + "arm_copy_from_user", + "arm_copy_to_user", + "asix_read_cmd", + "__asm_copy_from_user", + "__asm_copy_to_user", + "ata_pci_device_do_resume", + "__attribute__", + "attribute_container_unregister", + "ax25_create_cb", + "ax25_listen_register", + "ax25_rt_add", + "ax25_uid_ioctl", + "__bch2_btree_iter_traverse", + "bch2_btree_iter_traverse", + "bch2_btree_node_lock_write", + "bch2_btree_path_make_mut", + "__bch2_btree_path_set_pos", + "bch2_btree_path_set_pos", + "bch2_btree_path_traverse", + "bch2_btree_path_traverse_one", + "bch2_trans_update", + "bch2_trans_update_buffered", + "bch2_trans_update_by_path", + "bch2_write_inode", + "bch2_write_inode_size", + "bio_add_folio", + "bio_add_page", + "blk_get_queue", + "bsg_job_get", + "btree_init", + "btree_insert", + "btree_node_lock_nopath", + "bus_create_file", + "bus_register", + "bus_rescan_devices", + "bus_rescan_devices_helper", + "can_get_echo_skb", + "__cdx_driver_register", + "cfg80211_inform_bss", + "cfg80211_inform_bss_frame", + "chacha20poly1305_decrypt", + "check_copy_size", + "check_zeroed_user", + "class_create", + "class_create_file", + "class_create_file_ns", + "class_interface_register", + "class_register", + "__clear_user", + "clear_user", + "__clear_user_std", + "clk_bulk_enable", + "clk_bulk_get", + "clk_bulk_get_all", + "clk_bulk_get_optional", + "clk_bulk_prepare", + "clk_bulk_prepare_enable", + "clk_hw_register", + "copy_from_iter", + "copy_from_iter_full", + "copy_from_iter_full_nocache", + "copy_from_iter_nocache", + "__copy_from_user", + "_copy_from_user", + "copy_from_user", + "__copy_from_user_inatomic", + "__copy_from_user_inatomic_nocache", + "_copy_from_user_key", + "copy_from_user_key", + "copy_mc_fragile", + "copy_mc_generic", + "copy_mc_to_kernel", + "copy_mc_to_user", + "copy_nofault", + "__copy_tofrom_user", + "copy_to_iter", + "__copy_to_user", + "_copy_to_user", + "copy_to_user", + "_copy_to_user_key", + "copy_to_user_key", + "__copy_to_user_std", + "__copy_user_flushcache", + "copy_user_generic", + "csr_seed_long", + "current_clr_polling_and_test", + "current_set_polling_and_test", + "curve25519", + "__deliver_ckc", + "__deliver_cpu_timer", + "__deliver_emergency_signal", + "__deliver_external_call", + "__deliver_io", + "__deliver_machine_check", + "__deliver_pfault_done", + "__deliver_pfault_init", + "__deliver_prog", + "__deliver_restart", + "__deliver_service", + "__deliver_service_ev", + "__deliver_set_prefix", + "__deliver_virtio", + "destroy_hw_fd_uobject", + "destroy_hw_idr_uobject", + "device_add", + "device_add_disk", + "device_add_group", + "device_add_groups", + "device_attach", + "device_bind_driver", + "device_create_bin_file", + "device_driver_attach", + "device_register", + "device_reprobe", + "device_reset", + "devm_anybuss_host_common_probe", + "devm_clk_bulk_get", + "devm_clk_bulk_get_all", + "devm_clk_bulk_get_optional", + "devm_clk_hw_register", + "devm_device_add_group", + "devm_device_add_groups", + "devm_pinctrl_get", + "devm_pinctrl_get_select", + "devm_pinctrl_get_select_default", + "devm_regulator_bulk_get", + "devm_regulator_bulk_get_const", + "devm_regulator_bulk_get_exclusive", + "devm_request_any_context_irq", + "devm_request_irq", + "devm_request_threaded_irq", + "__devm_uio_register_device", + "devres_open_group", + "__die", + "dm_set_target_max_io_len", + "do_mlock", + "do_notify_parent", + "down_interruptible", + "down_killable", + "down_read_interruptible", + "down_read_killable", + "down_read_killable_nested", + "down_timeout", + "down_trylock", + "down_write_killable", + "driver_attach", + "driver_create_file", + "driver_for_each_device", + "driver_register", + "drm_atomic_add_affected_connectors", + "drm_atomic_add_affected_planes", + "drm_atomic_add_encoder_bridges", + "drm_atomic_check_only", + "drm_atomic_commit", + "drm_atomic_get_crtc_state", + "drm_atomic_get_private_obj_state", + "drm_atomic_helper_swap_state", + "drm_atomic_nonblocking_commit", + "drm_atomic_set_crtc_for_connector", + "drm_atomic_set_crtc_for_plane", + "drm_atomic_set_mode_for_crtc", + "drm_atomic_set_mode_prop_for_crtc", + "drm_atomic_state_alloc", + "drm_atomic_state_init", + "drm_dp_atomic_find_time_slots", + "drm_dp_atomic_release_time_slots", + "drm_dp_mst_add_affected_dsc_crtcs", + "drm_dp_mst_atomic_check", + "drm_dp_mst_atomic_check_mgr", + "drm_dp_mst_atomic_setup_commit", + "drm_dp_mst_root_conn_atomic_check", + "drm_dp_mst_topology_mgr_resume", + "__drmm_add_action", + "__drmm_add_action_or_reset", + "drmm_mode_config_init", + "drm_modeset_lock_single_interruptible", + "dvb_usb_generic_rw", + "dvb_usb_generic_write", + "ERR_PTR", + "ffs_do_descs", + "ffs_do_single_desc", + "ffs_epfiles_create", + "fh_fill_both_attrs", + "fh_fill_pre_attrs", + "fieldbus_dev_register", + "file_check_and_advance_wb_err", + "file_fdatawait_range", + "file_write_and_wait_range", + "fsl_create_mc_io", + "fsl_mc_allocate_irqs", + "fsl_mc_device_add", + "__fsl_mc_driver_register", + "fsl_mc_is_allocatable", + "fsl_mc_object_allocate", + "fsl_mc_portal_allocate", + "fsl_mc_resource_allocate", + "__gameport_register_driver", + "guc_mcr_reg_add", + "guc_mmio_reg_add", + "hex2bin", + "hid_hw_open", + "hid_hw_start", + "__hid_register_driver", + "HYPERVISOR_hvm_op", + "i915_active_fence_set", + "i915_gem_evict_for_node", + "i915_gem_evict_something", + "i915_gem_gtt_prepare_pages", + "i915_gem_init", + "i915_gem_object_ggtt_pin", + "i915_gem_object_ggtt_pin_ww", + "i915_gem_object_pin_pages", + "i915_gem_object_set_to_cpu_domain", + "i915_gem_object_set_to_gtt_domain", + "i915_gem_object_set_to_wc_domain", + "i915_gem_ww_ctx_backoff", + "__i915_request_create", + "_i915_vma_move_to_active", + "i915_vma_pin", + "i915_vma_pin_fence", + "i915_vma_pin_ww", + "i915_vma_unbind", + "i915_vma_unbind_async", + "i915_vma_unbind_unlocked", + "i915_vm_lock_objects", + "idr_alloc_u32", + "__idxd_driver_register", + "ieee80211_link_change_bandwidth", + "ieee80211_link_reserve_chanctx", + "ieee80211_link_use_channel", + "ieee80211_link_use_reserved_context", + "igt_vma_move_to_active_unlocked", + "il4965_request_firmware", + "il_send_cmd_pdu", + "il_send_cmd_sync", + "input_allocate_device", + "input_get_new_minor", + "input_register_device", + "input_register_handler", + "intel_gt_init_hw", + "intel_gt_reset_lock_interruptible", + "intel_gt_reset_trylock", + "io_schedule_prepare", + "IS_ERR", + "IS_ERR_OR_NULL", + "ivpu_bo_pin", + "ivpu_rpm_get", + "ivpu_rpm_get_if_active", + "iwl_mvm_send_cmd", + "iwl_mvm_send_cmd_pdu", + "iwl_mvm_send_cmd_pdu_status", + "iwl_mvm_send_cmd_status", + "iwl_op_mode_hw_rf_kill", + "iwl_pci_register_driver", + "iwl_scan_initiate", + "__kasan_init_slab_obj", + "__kasan_kmalloc", + "__kasan_kmalloc_large", + "__kasan_krealloc", + "__kasan_slab_alloc", + "kfence_handle_page_fault", + "__kfifo_int_must_check_helper", + "__kfifo_uint_must_check_helper", + "kmsan_ioremap_page_range", + "kmsan_memblock_free_pages", + "kmsan_vmap_pages_range_noflush", + "kobject_add", + "kobject_create_and_add", + "kobject_get_unless_zero", + "kobject_init_and_add", + "kobject_move", + "kobject_rename", + "krealloc", + "kset_register", + "kstrtobool", + "kstrtobool_from_user", + "kstrtoint", + "kstrtoint_from_user", + "_kstrtol", + "kstrtol_from_user", + "kstrtoll", + "kstrtoll_from_user", + "kstrtos16", + "kstrtos16_from_user", + "kstrtos32", + "kstrtos32_from_user", + "kstrtos64", + "kstrtos64_from_user", + "kstrtos8", + "kstrtos8_from_user", + "kstrtou16", + "kstrtou16_from_user", + "kstrtou32", + "kstrtou32_from_user", + "kstrtou64", + "kstrtou64_from_user", + "kstrtou8", + "kstrtou8_from_user", + "kstrtouint", + "kstrtouint_from_user", + "_kstrtoul", + "kstrtoul_from_user", + "kstrtoull", + "kstrtoull_from_user", + "kvm_s390_deliver_pending_interrupts", + "kvm_s390_inject_vcpu", + "kvm_s390_inject_vm", + "lclear_user", + "may_use_simd", + "__mcb_register_driver", + "mddev_lock", + "md_flush_request", + "__media_device_register", + "media_device_register_entity", + "media_devnode_register", + "media_request_lock_for_access", + "media_request_lock_for_update", + "mhi_ep_check_mhi_state", + "mhi_poll_reg_field", + "mhi_read_reg", + "mhi_read_reg_field", + "mhi_tryset_pm_state", + "mhi_write_reg_field", + "mix_dh", + "mix_precomputed_dh", + "__mmio_reg_add", + "__must_check_fn", + "mutex_lock_interruptible", + "mutex_lock_interruptible_nested", + "mutex_lock_killable", + "mutex_lock_killable_nested", + "mux_control_select", + "mux_control_select_delay", + "mux_control_try_select", + "mux_control_try_select_delay", + "mux_state_select", + "mux_state_select_delay", + "mux_state_try_select", + "mux_state_try_select_delay", + "__nd_driver_register", + "nd_jump_link", + "npe_debug_instr", + "npe_logical_reg_write16", + "npe_logical_reg_write32", + "npe_logical_reg_write8", + "nr_set_mac_address", + "nvhe_check_data_corruption", + "nvif_chan_wait", + "object_type_to_pool_type", + "of_clk_bulk_get", + "of_clk_bulk_get_all", + "of_clk_hw_register", + "of_icc_bulk_get", + "of_regulator_bulk_get_all", + "omap_get_global_state", + "__parport_register_driver", + "pci_assign_resource", + "pcibios_enable_device", + "pci_bus_alloc_resource", + "pci_create_sysfs_dev_files", + "pci_enable_device", + "pci_enable_device_io", + "pci_enable_device_mem", + "pci_map_rom", + "pcim_enable_device", + "pcim_set_mwi", + "pci_reassign_resource", + "pci_reenable_device", + "__pci_register_driver", + "pci_request_config_region_exclusive", + "pci_request_region", + "pci_request_regions", + "pci_request_regions_exclusive", + "pci_resize_resource", + "pci_set_mwi", + "pcmcia_request_irq", + "percpu_ref_init", + "pinctrl_get", + "pinctrl_get_select", + "pinctrl_get_select_default", + "pinctrl_lookup_state", + "prom_getproperty", + "PTR_ERR", + "PTR_ERR_OR_ZERO", + "PUSH_WAIT", + "raw_copy_from_user", + "raw_copy_in_user", + "raw_copy_to_user", + "rcuref_get_slowpath", + "rcuref_put_slowpath", + "rdma_restrack_get", + "rdrand_long", + "rdseed_long", + "__realloc_size", + "__refcount_add_not_zero", + "refcount_dec_and_lock", + "refcount_dec_and_lock_irqsave", + "refcount_dec_and_mutex_lock", + "__refcount_dec_and_test", + "refcount_dec_if_one", + "refcount_dec_not_one", + "__refcount_inc_not_zero", + "__refcount_sub_and_test", + "regulator_bulk_enable", + "regulator_bulk_get", + "regulator_enable", + "reiserfs_write_unlock_nested", + "__releases", + "remove_arg_zero", + "request_any_context_irq", + "request_nmi", + "__request_percpu_irq", + "request_percpu_irq", + "request_percpu_nmi", + "request_threaded_irq", + "rfkill_alloc", + "rfkill_register", + "RING_SPACE", + "rmi_register_function", + "__rmi_register_function_handler", + "rose_add_loopback_node", + "scmi_devres_protocol_instance_get", + "scmi_voltage_info_get", + "scsi_add_host", + "scsi_add_host_with_dma", + "scsi_device_get", + "scsi_device_reprobe", + "__serio_register_driver", + "set_fd_set", + "shpchp_create_ctrl_files", + "__skb_header_pointer", + "skb_header_pointer", + "skb_pointer_if_linear", + "skb_set_owner_sk_safe", + "skb_to_sgvec", + "skb_to_sgvec_nomark", + "skip_spaces", + "__smsc75xx_read_reg", + "smsc75xx_read_reg", + "smsc75xx_read_reg_nopm", + "__smsc75xx_write_reg", + "smsc75xx_write_reg", + "smsc75xx_write_reg_nopm", + "smsc95xx_eeprom_confirm_not_busy", + "smsc95xx_read_reg", + "smsc95xx_wait_eeprom", + "smsc95xx_write_reg", + "smsc95xx_write_reg_async", + "__snd_seq_driver_register", + "spu_acquire_saved", + "ssc_request", + "stack_depot_set_extra_bits", + "__sta_info_destroy", + "__sta_info_destroy_part1", + "strncpy_from_user", + "strnlen_user", + "strstrip", + "sysfs_chmod_file", + "sysfs_create_bin_file", + "sysfs_create_dir_ns", + "sysfs_create_file", + "sysfs_create_file_ns", + "sysfs_create_files", + "sysfs_create_group", + "sysfs_create_groups", + "sysfs_create_link", + "sysfs_create_link_nowarn", + "sysfs_create_mount_point", + "sysfs_init", + "sysfs_move_dir_ns", + "sysfs_rename_dir_ns", + "sysfs_update_groups", + "tcp_queue_rcv", + "tpm_try_get_ops", + "try_get_page", + "try_grab_page", + "tty_ldisc_init", + "typeof", + "__uio_register_device", + "unclaimed_reg_debug_header", + "usb_find_bulk_in_endpoint", + "usb_find_bulk_out_endpoint", + "usb_find_common_endpoints", + "usb_find_common_endpoints_reverse", + "usb_find_int_in_endpoint", + "usb_find_int_out_endpoint", + "usb_find_last_bulk_in_endpoint", + "usb_find_last_bulk_out_endpoint", + "usb_find_last_int_in_endpoint", + "usb_find_last_int_out_endpoint", + "user_access_begin", + "user_event_last_ref", + "user_read_access_begin", + "user_write_access_begin", + "uuid_is_valid", + "v4l2_async_register_subdev_sensor", + "v4l2_device_register", + "v4l2_device_register_subdev", + "__v4l2_device_register_subdev_nodes", + "vb2_queue_init", + "vb2_queue_init_name", + "__video_register_device", + "__vmap_pages_range_noflush", + "vmap_pages_range_noflush", + "vm_brk_flags", + "__vmbus_driver_register", + "vm_mmap", + "vm_mmap_pgoff", + "wl12xx_sdio_raw_read", + "wl12xx_sdio_raw_write", + "wl12xx_spi_raw_read", + "wl12xx_spi_raw_write", + "wl12xx_top_reg_read", + "wl12xx_top_reg_write", + "wl18xx_top_reg_read", + "wl18xx_top_reg_write", + "wlcore_raw_read", + "wlcore_raw_read32", + "wlcore_raw_read_data", + "wlcore_raw_write32", + "wlcore_raw_write_data", + "wlcore_read", + "wlcore_read32", + "wlcore_read_data", + "wlcore_read_hwaddr", + "wlcore_read_reg", + "wlcore_write", + "wlcore_write32", + "wlcore_write_data", + "wlcore_write_reg", + "__wmi_driver_register", + "ww_mutex_lock", + "ww_mutex_lock_interruptible", + "ww_mutex_trylock", + "__xa_alloc", + "__xa_alloc_cyclic", + "__xa_insert", + "xchacha20poly1305_decrypt", + "__xenbus_register_backend", + "__xenbus_register_frontend", + "zswap_pool_get" + ] + }, "CWE416": { "_comment": "Functions that invalidate the pointer passed as the first argument.", "deallocation_symbols": [], diff --git a/test/artificial_samples/cwe_252.c b/test/artificial_samples/cwe_252.c new file mode 100644 index 000000000..7cfa7a7aa --- /dev/null +++ b/test/artificial_samples/cwe_252.c @@ -0,0 +1,375 @@ +#include +#include +#include +#include + +/* +Explanation of the general naming scheme used for functions in this test and how +to parse the comments. + +Everything between two horizontal / *---- ... ---* / lines belongs to a single +test case. If there are no `CWE_WARNING...` comments, then the test should not +cause a warning. If there are such comments, then the test case should cause +exactly one warning, where the first reported address (source) belongs to the +line with the comment that has a `1` and the second reported address (sink) +belongs to a line with a `2`. The `reason=` field in the comment should match +the one in the cwe warning. + +Now to the names: + +cwe252_ INTER_OR_INTRA STORAGE+ RETURN? USAGE + +INTER_OR_INTRA -> intra | inter_caller | inter_callee +Tells you if the test is exercising our interprocedural or intraprocedural +analysis, and if the function is the caller or callee for interprocedural tests. + +STORAGE -> r | s | h | g +Tells you where the relevant tainted values will be stored (register, stack, +heap, global). So the test checks that we properly track those locations. + +RETURN -> return | return ptr | return param | gptr +For interprocedural test cases, this tells you how the information is propagated +from the callee to the caller. Taint may be returned directly, a pointer to +taint may be returned, taint may be written to a pointer parameter, taint may be +written to a pointer that is stored in a global variable. + +USAGE -> lost | unchecked | checked +This tells you what the test does with the tainted value. Is there a point where +all taint vanishes from the state (lost)? Is the taint reaching the end of a +function without being returned (unchecked)? Or is there a check on every path +from the call site to the end of the function (checked)? +*/ + +char *g_ret; +char **g_ret_store; + +int main() +{ + return 1; +} + +int some_func(int x) +{ + int seed = x + (int)time(NULL); + srand(seed); + return seed; +} + +/*----------------------------------------------------------------------------*/ + +static void cwe252_inter_callee_g_checked(char *buf, size_t n) +{ + g_ret = fgets(buf, n, stdin); +} + +int cwe252_inter_caller_g_checked(void) +{ + int number = rand(); + char buf[10]; + size_t n = sizeof(buf); + + cwe252_inter_callee_g_checked(buf, n); + + if (g_ret) + printf("%s", buf); + + printf("My number was %d\n", number); + + return number * number; +} + +/*----------------------------------------------------------------------------*/ + +static void cwe252_inter_callee_g_unchecked(void) +{ + char buf[10]; + + g_ret = fgets(buf, sizeof(buf), stdin); // CWE_WARNING, 1 + + printf("%p: %s\n", buf, buf); +} + +int cwe252_inter_caller_g_unchecked(void) +{ + int number = rand(); + + cwe252_inter_callee_g_unchecked(); + + printf("My number was %d\n", number); + + return number * + number; // CWE_WARNING, 2, reason=isolated_returns_no_reg_taint +} + +/*----------------------------------------------------------------------------*/ + +static void cwe252_inter_callee_h_gptr_checked(char *buf, size_t n) +{ + g_ret_store = malloc(8); + + if (!g_ret_store) + exit(1); + + *g_ret_store = fgets(buf, n, stdin); +} + +int cwe252_inter_caller_h_gptr_checked(void) +{ + int number = rand(); + char buf[10]; + size_t n = sizeof(buf); + + cwe252_inter_callee_h_gptr_checked(buf, n); + + if (*g_ret_store) + printf("%s", buf); + + printf("My number was %d\n", number); + + return number * number; +} + +/*----------------------------------------------------------------------------*/ + +static char **cwe252_inter_callee_h_return_ptr_checked(char *buf, size_t n) +{ + char **ret_store = (char**)malloc(16) + 1; + + if (!ret_store) + return NULL; + + *ret_store = fgets(buf, n, stdin); + + return ret_store; +} + +int cwe252_inter_caller_h_return_ptr_checked(void) +{ + int number = rand(); + char buf[10]; + size_t n = sizeof(buf); + char **ret_store = cwe252_inter_callee_h_return_ptr_checked(buf, n); + + if (!ret_store) + exit(1); + + if (*ret_store) + printf("%s", buf); + + printf("My number was %d\n", number); + + return number * number; +} + +/*----------------------------------------------------------------------------*/ + +static char *cwe252_inter_callee_r_return_lost(void) +{ + char buf[10]; + + return fgets(buf, sizeof(buf), stdin); // CWE_WARNING, 1 +} + +int cwe252_inter_caller_r_return_lost(void) +{ + int number = rand(); + + cwe252_inter_callee_r_return_lost(); + + printf("My number was %d\n", number); // CWE_WARNING, 2, reason=empty_state + + return number * number; +} + +/*----------------------------------------------------------------------------*/ + +static char *cwe252_inter_callee_rh_return_checked(void) +{ + char buf[10]; + char *volatile *ret_storage = malloc(8); + + if (!ret_storage) { + return NULL; + } + + *ret_storage = fgets(buf, sizeof(buf), stdin); + + return *ret_storage; +} + +int cwe252_inter_caller_rh_return_checked(void) +{ + int number = rand(); + char *ret; + + ret = cwe252_inter_callee_rh_return_checked(); + + if (!ret) { + abort(); + } + + printf("My number was %d\n", number); + + return number * number; +} + +/*----------------------------------------------------------------------------*/ + +static void cwe252_inter_callee_s_return_param_checked(char **ret_store, + char *buf, size_t n) +{ + *ret_store = fgets(buf, n, stdin); +} + +int cwe252_inter_caller_s_return_param_checked(void) +{ + int number = rand(); + char buf[10]; + size_t n = sizeof(buf); + char *ret; + + cwe252_inter_callee_s_return_param_checked(&ret, buf, n); + + if (ret) + printf("%s", buf); + + printf("My number was %d\n", number); + + return number * number; +} + +/*----------------------------------------------------------------------------*/ + +static void cwe252_inter_callee_s_unchecked(void) +{ + char buf[10]; + char *volatile ret; + + ret = fgets(buf, sizeof(buf), stdin); // CWE_WARNING, 1 + + printf("%p: %s\n", buf, buf); // CWE_WARNING, 2, reason=return_no_taint +} + +int cwe252_inter_caller_s_unchecked(void) +{ + int number = rand(); + + cwe252_inter_callee_s_unchecked(); + + printf("My number was %d\n", number); + + return number * number; +} + +/*----------------------------------------------------------------------------*/ + +void cwe252_intra_g_lost(void) +{ + char buf[10]; + static char *volatile ret; + + ret = fgets(buf, sizeof(buf), stdin); // CWE_WARNING, 1 + + if (rand() == 0) { + ret = buf; // CWE_WARNING, 2, reason=empty_state + } + + if (ret) { + puts(buf); + } +} + +/*----------------------------------------------------------------------------*/ + +void cwe252_intra_g_unchecked(void) +{ + char buf[10]; + static char *volatile ret; + + ret = fgets(buf, sizeof(buf), stdin); // CWE_WARNING, 1 + + printf("%p: %s\n", buf, + buf); // CWE_WARNING, 2, reason=isolated_returns_no_reg_taint +} + +/*----------------------------------------------------------------------------*/ + +void cwe252_intra_h_lost(void) +{ + char buf[10]; + char *volatile *ret = malloc(8); + + if (!ret) { + return; + } + + *ret = fgets(buf, sizeof(buf), stdin); // CWE_WARNING, 1 + + if (rand() == 0) { + *ret = buf; // CWE_WARNING, 2, reason=empty_state + } + + if (*ret) { + puts(buf); + } +} + +/*----------------------------------------------------------------------------*/ + +void cwe252_intra_r_checked(void) +{ + char buf[10]; + char *ret; + + ret = fgets(buf, sizeof(buf), stdin); + + if (ret) { + puts(buf); + } +} + +/*----------------------------------------------------------------------------*/ + +extern int some_func(int); + +void cwe252_intra_r_lost(void) +{ + char buf[10]; + + fgets(buf, sizeof(buf), stdin); // CWE_WARNING, 1 + + some_func(42); // CWE_WARNING, 2, reason=empty_state + + puts(buf); +} + +/*----------------------------------------------------------------------------*/ + +void cwe252_intra_s_lost(void) +{ + char buf[10]; + char *volatile ret; + + ret = fgets(buf, sizeof(buf), stdin); // CWE_WARNING, 1 + + if (rand() == 0) { + ret = buf; // CWE_WARNING, 2, reason=empty_state + } + + if (ret) { + puts(buf); + } +} + +/*----------------------------------------------------------------------------*/ + +void cwe252_intra_s_unchecked(void) +{ + char buf[10]; + char *volatile ret; + + ret = fgets(buf, sizeof(buf), stdin); // CWE_WARNING, 1 + some_func(42); // CWE_MODULE, 2, reason=isolated_returns_no_reg_taint +} + +/*----------------------------------------------------------------------------*/ diff --git a/test/lkm_samples/Makefile b/test/lkm_samples/Makefile index 85a6ab58a..a20b31ba0 100644 --- a/test/lkm_samples/Makefile +++ b/test/lkm_samples/Makefile @@ -1,3 +1,4 @@ +obj-m += cwe_252.o obj-m += cwe_467.o obj-m += cwe_476.o obj-m += cwe_676.o diff --git a/test/lkm_samples/cwe_252.c b/test/lkm_samples/cwe_252.c new file mode 100644 index 000000000..94816104e --- /dev/null +++ b/test/lkm_samples/cwe_252.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +void cwe252_intra_h_lost(void* uptr) +{ + char buf[10]; + long *ret_store = kmalloc(8, GFP_KERNEL); + + WRITE_ONCE(*ret_store, strncpy_from_user(buf, uptr, sizeof(buf))); // CWE_WARNING, 1 + + pr_info("Call some func\n"); + + WRITE_ONCE(*ret_store, 0); // CWE_WARNING, 2, reason=empty_state + + pr_info("buf: %s\n", buf); +} + +static int __init test_init(void) +{ + pr_info("Hello, World\n"); + + return 0; +} + +static void __exit test_exit(void) +{ + pr_info("Goodbye, World\n"); +} + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Valentin Obst"); + +module_init(test_init); +module_exit(test_exit); diff --git a/test/src/lib.rs b/test/src/lib.rs index 7c19ad62d..7d408fd0f 100644 --- a/test/src/lib.rs +++ b/test/src/lib.rs @@ -19,7 +19,7 @@ pub const LKM_ARCHITECTURES: &[&str] = &["aarch64"]; /// Compilers used for the Linux kernel module test samples. pub const LKM_COMPILERS: &[&str] = &["clang"]; /// CWEs that are supported for Linux kernel modules. -pub const LKM_CWE: &[&str] = &["cwe_467", "cwe_476", "cwe_676"]; +pub const LKM_CWE: &[&str] = &["cwe_252", "cwe_467", "cwe_476", "cwe_676"]; /// A test case containing the necessary information to run an acceptance test. #[derive(Debug, PartialEq, Eq, Hash, Clone)] @@ -41,15 +41,21 @@ pub struct CweTestCase { impl CweTestCase { /// Get the file path of the test binary fn get_filepath(&self) -> String { + let cwd = std::env::current_dir() + .unwrap() + .into_os_string() + .into_string() + .unwrap(); + if self.is_lkm { format!( - "lkm_samples/build/{}_{}_{}.ko", - self.cwe, self.architecture, self.compiler + "{}/lkm_samples/build/{}_{}_{}.ko", + cwd, self.cwe, self.architecture, self.compiler ) } else { format!( - "artificial_samples/build/{}_{}_{}.out", - self.cwe, self.architecture, self.compiler + "{}/artificial_samples/build/{}_{}_{}.out", + cwd, self.cwe, self.architecture, self.compiler ) } } @@ -418,6 +424,41 @@ mod tests { } } + #[test] + #[ignore] + fn cwe_252() { + let mut error_log = Vec::new(); + let mut tests = all_test_cases("cwe_252", "CWE252"); + let num_expected_occurences = 9; + let num_expected_occurences_lkm = 1; + + mark_architecture_skipped(&mut tests, "ppc64"); + mark_architecture_skipped(&mut tests, "ppc64le"); + mark_skipped(&mut tests, "mips", "gcc"); + mark_skipped(&mut tests, "mips64", "clang"); + mark_skipped(&mut tests, "mips64el", "clang"); + mark_skipped(&mut tests, "mipsel", "gcc"); + mark_skipped(&mut tests, "x86", "gcc"); + mark_skipped(&mut tests, "x86", "mingw32-gcc"); + + for test_case in tests { + let num_expected_occurences = if test_case.is_lkm { + num_expected_occurences_lkm + } else { + num_expected_occurences + }; + + if let Err(error) = test_case.run_test("[CWE252]", num_expected_occurences) { + error_log.push((test_case.get_filepath(), error)); + } + } + + if !error_log.is_empty() { + print_errors(error_log); + panic!(); + } + } + #[test] #[ignore] fn cwe_332() {