Skip to content

Commit

Permalink
fuzz: improve fuzz testing
Browse files Browse the repository at this point in the history
It's still not as good as it could be, but we add fuzz targets for
regex-lite and DFA deserialization in regex-automata.
  • Loading branch information
BurntSushi committed Apr 30, 2023
1 parent dbd65d6 commit 7c01136
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 20 deletions.
6 changes: 6 additions & 0 deletions .vim/coc-settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"rust-analyzer.linkedProjects": [
"fuzz/Cargo.toml",
"Cargo.toml"
]
}
20 changes: 16 additions & 4 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@ name = "regex-fuzz"
version = "0.0.0"
authors = ["David Korczynski <[email protected]>"]
publish = false
edition = "2018"
edition = "2021"

[package.metadata]
cargo-fuzz = true

[dependencies]
libfuzzer-sys = "0.4.1"

[dependencies.regex]
path = ".."
regex = { path = ".." }
regex-automata = { path = "../regex-automata" }
regex-lite = { path = "../regex-lite" }

# Prevent this from interfering with workspaces
[workspace]
Expand All @@ -22,6 +22,18 @@ members = ["."]
name = "fuzz_regex_match"
path = "fuzz_targets/fuzz_regex_match.rs"

[[bin]]
name = "fuzz_regex_lite_match"
path = "fuzz_targets/fuzz_regex_lite_match.rs"

[[bin]]
name = "fuzz_regex_automata_deserialize_dense_dfa"
path = "fuzz_targets/fuzz_regex_automata_deserialize_dense_dfa.rs"

[[bin]]
name = "fuzz_regex_automata_deserialize_sparse_dfa"
path = "fuzz_targets/fuzz_regex_automata_deserialize_sparse_dfa.rs"

[profile.release]
opt-level = 3
debug = true
Expand Down
37 changes: 37 additions & 0 deletions fuzz/fuzz_targets/fuzz_regex_automata_deserialize_dense_dfa.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#![no_main]

use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let _ = run(data);
});

fn run(given_data: &[u8]) -> Option<()> {
use regex_automata::dfa::Automaton;

if given_data.len() < 2 {
return None;
}
let haystack_len = usize::from(given_data[0]);
let haystack = given_data.get(1..1 + haystack_len)?;
let given_dfa_bytes = given_data.get(1 + haystack_len..)?;

// We help the fuzzer along by adding a preamble to the bytes that should
// at least make these first parts valid. The preamble expects a very
// specific sequence of bytes, so it makes sense to just force this.
let label = "rust-regex-automata-dfa-dense\x00\x00\x00";
assert_eq!(0, label.len() % 4);
let endianness_check = 0xFEFFu32.to_ne_bytes().to_vec();
let version_check = 2u32.to_ne_bytes().to_vec();
let mut dfa_bytes: Vec<u8> = vec![];
dfa_bytes.extend(label.as_bytes());
dfa_bytes.extend(&endianness_check);
dfa_bytes.extend(&version_check);
dfa_bytes.extend(given_dfa_bytes);
// This is the real test: checking that any input we give to
// DFA::from_bytes will never result in a panic.
let (dfa, _) =
regex_automata::dfa::dense::DFA::from_bytes(&dfa_bytes).ok()?;
let _ = dfa.try_search_fwd(&regex_automata::Input::new(haystack));
Some(())
}
37 changes: 37 additions & 0 deletions fuzz/fuzz_targets/fuzz_regex_automata_deserialize_sparse_dfa.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#![no_main]

use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let _ = run(data);
});

fn run(given_data: &[u8]) -> Option<()> {
use regex_automata::dfa::Automaton;

if given_data.len() < 2 {
return None;
}
let haystack_len = usize::from(given_data[0]);
let haystack = given_data.get(1..1 + haystack_len)?;
let given_dfa_bytes = given_data.get(1 + haystack_len..)?;

// We help the fuzzer along by adding a preamble to the bytes that should
// at least make these first parts valid. The preamble expects a very
// specific sequence of bytes, so it makes sense to just force this.
let label = "rust-regex-automata-dfa-sparse\x00\x00";
assert_eq!(0, label.len() % 4);
let endianness_check = 0xFEFFu32.to_ne_bytes().to_vec();
let version_check = 2u32.to_ne_bytes().to_vec();
let mut dfa_bytes: Vec<u8> = vec![];
dfa_bytes.extend(label.as_bytes());
dfa_bytes.extend(&endianness_check);
dfa_bytes.extend(&version_check);
dfa_bytes.extend(given_dfa_bytes);
// This is the real test: checking that any input we give to
// DFA::from_bytes will never result in a panic.
let (dfa, _) =
regex_automata::dfa::sparse::DFA::from_bytes(&dfa_bytes).ok()?;
let _ = dfa.try_search_fwd(&regex_automata::Input::new(haystack));
Some(())
}
23 changes: 23 additions & 0 deletions fuzz/fuzz_targets/fuzz_regex_lite_match.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#![no_main]

use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let _ = run(data);
});

fn run(data: &[u8]) -> Option<()> {
if data.len() < 2 {
return None;
}
let mut split_at = usize::from(data[0]);
let data = std::str::from_utf8(&data[1..]).ok()?;
// Split data into a regex and haystack to search.
let len = usize::try_from(data.chars().count()).ok()?;
split_at = std::cmp::max(split_at, 1) % len;
let char_index = data.char_indices().nth(split_at)?.0;
let (pattern, input) = data.split_at(char_index);
let re = regex_lite::Regex::new(pattern).ok()?;
re.is_match(input);
Some(())
}
33 changes: 17 additions & 16 deletions fuzz/fuzz_targets/fuzz_regex_match.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
#![no_main]

use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let _ = run(data);
});

fn run(data: &[u8]) -> Option<()> {
if data.len() < 2 {
return;
return None;
}
let split_point = data[0] as usize;
if let Ok(data) = std::str::from_utf8(&data[1..]) {
use std::cmp::max;
// split data into regular expression and actual input to search through
let len = data.chars().count();
let split_off_point = max(split_point, 1) % len as usize;
let char_index = data.char_indices().nth(split_off_point);
if let Some((char_index, _)) = char_index {
let (pattern, input) = data.split_at(char_index);
if let Ok(re) = regex::Regex::new(pattern) {
re.is_match(input);
}
}
}
});
let mut split_at = usize::from(data[0]);
let data = std::str::from_utf8(&data[1..]).ok()?;
// Split data into a regex and haystack to search.
let len = usize::try_from(data.chars().count()).ok()?;
split_at = std::cmp::max(split_at, 1) % len;
let char_index = data.char_indices().nth(split_at)?.0;
let (pattern, input) = data.split_at(char_index);
let re = regex::Regex::new(pattern).ok()?;
re.is_match(input);
Some(())
}

0 comments on commit 7c01136

Please sign in to comment.