Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: --exec with placeholder corresponding to regex groups/ocurrences #1118

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
13 changes: 10 additions & 3 deletions src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,15 +401,22 @@ pub fn build_app() -> Command<'static> {
'{/}': basename\n \
'{//}': parent directory\n \
'{.}': path without file extension\n \
'{/.}': basename without file extension\n\n\
If no placeholder is present, an implicit \"{}\" at the end is assumed.\n\n\
'{/.}': basename without file extension\n \
'{N}': text matched by the N-th group in the first pattern ocurrence. Text outside groups are discarted.\n\
'{M.N}':text matched in the M-th pattern ocurrence by the N-th group over the the path or filename.\n\n\
'{N:-D}': text matched by the optional ocurrence/group or defaults to 'D' when not matched.\n\
Obs:\n
- Using 0 for M/N substitutes by the text from all groups or all ocurrences respectively.\n\
- If no placeholder is present, an implicit \"{}\" at the end is assumed.\n\n\
Examples:\n\n \
- find all *.zip files and unzip them:\n\n \
fd -e zip -x unzip\n\n \
- find *.h and *.cpp files and run \"clang-format -i ..\" for each of them:\n\n \
fd -e h -e cpp -x clang-format -i\n\n \
- Convert all *.jpg files to *.png files:\n\n \
fd -e jpg -x convert {} {.}.png\
- Rename all *.jpeg files with extension *.jpg:\n\n \
fd '(.+)\\.(jpg)$' -x mv {} {1}.{2}\
",
),
)
Expand All @@ -431,7 +438,7 @@ pub fn build_app() -> Command<'static> {
'{/}': basename\n \
'{//}': parent directory\n \
'{.}': path without file extension\n \
'{/.}': basename without file extension\n\n\
'{/.}': basename without file extension\n \
If no placeholder is present, an implicit \"{}\" at the end is assumed.\n\n\
Examples:\n\n \
- Find all test_*.py files and open them in your favorite editor:\n\n \
Expand Down
53 changes: 53 additions & 0 deletions src/dir_entry.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
use std::ffi::OsStr;
use std::{
fs::{FileType, Metadata},
path::{Path, PathBuf},
collections::HashMap,
};
use std::borrow::Cow;

use once_cell::unsync::OnceCell;
use regex::bytes::Regex;

use crate::filesystem;

enum DirEntryInner {
Normal(ignore::DirEntry),
Expand All @@ -13,6 +19,7 @@ enum DirEntryInner {
pub struct DirEntry {
inner: DirEntryInner,
metadata: OnceCell<Option<Metadata>>,
match_list: HashMap<usize, HashMap<usize, String>>,
}

impl DirEntry {
Expand All @@ -21,13 +28,15 @@ impl DirEntry {
Self {
inner: DirEntryInner::Normal(e),
metadata: OnceCell::new(),
match_list: HashMap::new(),
}
}

pub fn broken_symlink(path: PathBuf) -> Self {
Self {
inner: DirEntryInner::BrokenSymlink(path),
metadata: OnceCell::new(),
match_list: HashMap::new(),
}
}

Expand All @@ -38,6 +47,10 @@ impl DirEntry {
}
}

pub fn matches(&self) -> &HashMap<usize, HashMap<usize, String>> {
&self.match_list
}

pub fn into_path(self) -> PathBuf {
match self.inner {
DirEntryInner::Normal(e) => e.into_path(),
Expand Down Expand Up @@ -67,6 +80,46 @@ impl DirEntry {
DirEntryInner::BrokenSymlink(_) => None,
}
}

pub fn is_match(&mut self, pattern: &Regex, search_full_path: bool) -> bool {
let search_str = self.get_search_str(search_full_path);
let search_res = filesystem::osstr_to_bytes(search_str.as_ref());
let mut found: HashMap<usize, HashMap<usize, String>> = HashMap::new();

for (ocurrence, matched) in pattern.captures_iter(&search_res).enumerate() {
let mut matched_groups: HashMap<usize, String> = HashMap::new();
for (group, group_match) in matched.iter().enumerate() {
if let Some(value) = group_match {
let cap = value.as_bytes();
let text = String::from_utf8(cap.to_vec()).unwrap();
matched_groups.insert(group, text );
}
}
found.insert(ocurrence, matched_groups);
}
self.match_list = found;
!self.match_list.is_empty()
}

fn get_search_str(&self, search_full_path: bool) -> Cow<OsStr> {
let entry_path = self.path();

let search_str: Cow<OsStr> = if search_full_path {
let path_abs_buf = filesystem::path_absolute_form(entry_path)
.expect("Retrieving absolute path succeeds");
Cow::Owned(path_abs_buf.as_os_str().to_os_string())
} else {
match entry_path.file_name() {
Some(filename) => Cow::Borrowed(filename),
None => unreachable!(
"Encountered file system entry without a file name. This should only \
happen for paths like 'foo/bar/..' or '/' which are not supposed to \
appear in a file system traversal."
),
}
};
search_str
}
}

impl PartialEq for DirEntry {
Expand Down
2 changes: 1 addition & 1 deletion src/exec/job.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub fn job(
// Drop the lock so that other threads can read from the receiver.
drop(lock);
// Generate a command, execute it and store its exit code.
results.push(cmd.execute(dir_entry.path(), Arc::clone(&out_perm), buffer_output))
results.push(cmd.execute(dir_entry.path(), dir_entry.matches(), Arc::clone(&out_perm), buffer_output))
}
// Returns error in case of any error.
merge_exitcodes(results)
Expand Down
67 changes: 51 additions & 16 deletions src/exec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ use std::io;
use std::iter;
use std::path::{Component, Path, PathBuf, Prefix};
use std::process::Stdio;
use std::str;
use std::sync::{Arc, Mutex};
use std::collections::HashMap;

use anyhow::{bail, Result};
use argmax::Command;
Expand Down Expand Up @@ -83,12 +85,12 @@ impl CommandSet {
self.mode == ExecutionMode::Batch
}

pub fn execute(&self, input: &Path, out_perm: Arc<Mutex<()>>, buffer_output: bool) -> ExitCode {
pub fn execute(&self, input: &Path, matches: &HashMap<usize, HashMap<usize, String>>, out_perm: Arc<Mutex<()>>, buffer_output: bool) -> ExitCode {
let path_separator = self.path_separator.as_deref();
let commands = self
.commands
.iter()
.map(|c| c.generate(input, path_separator));
.map(|c| c.generate(input, path_separator, matches));
execute_commands(commands, &out_perm, buffer_output)
}

Expand All @@ -108,7 +110,7 @@ impl CommandSet {
Ok(mut builders) => {
for path in paths {
for builder in &mut builders {
if let Err(e) = builder.push(&path, path_separator) {
if let Err(e) = builder.push(&path, path_separator, &HashMap::new()) {
return handle_cmd_error(Some(&builder.cmd), e);
}
}
Expand Down Expand Up @@ -148,9 +150,9 @@ impl CommandBuilder {
if arg.has_tokens() {
path_arg = Some(arg.clone());
} else if path_arg == None {
pre_args.push(arg.generate("", None));
pre_args.push(arg.generate("", None, &HashMap::new()));
} else {
post_args.push(arg.generate("", None));
post_args.push(arg.generate("", None, &HashMap::new()));
}
}

Expand All @@ -175,12 +177,12 @@ impl CommandBuilder {
Ok(cmd)
}

fn push(&mut self, path: &Path, separator: Option<&str>) -> io::Result<()> {
fn push(&mut self, path: &Path, separator: Option<&str>, matches: &HashMap<usize, HashMap<usize, String>>) -> io::Result<()> {
if self.limit > 0 && self.count >= self.limit {
self.finish()?;
}

let arg = self.path_arg.generate(path, separator);
let arg = self.path_arg.generate(path, separator, matches);
if !self
.cmd
.args_would_fit(iter::once(&arg).chain(&self.post_args))
Expand Down Expand Up @@ -221,8 +223,11 @@ impl CommandTemplate {
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
// GNU Parallel: /?\.?|//
// Positional : (?:(\d+)(?:\.(\d+))?(?::-([^\}]+)?)?)
static PLACEHOLDER_REGEX: &str = r"\{(?:/?\.?|//|(?:(\d+)(?:\.(\d+))?(?::-([^\}]+)?)?))\}";
static PLACEHOLDER_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\{(/?\.?|//)\}").unwrap());
Lazy::new(|| Regex::new(PLACEHOLDER_REGEX).unwrap());

let mut args = Vec::new();
let mut has_placeholder = false;
Expand All @@ -233,8 +238,9 @@ impl CommandTemplate {
let mut tokens = Vec::new();
let mut start = 0;

for placeholder in PLACEHOLDER_PATTERN.find_iter(arg) {
for matched_text in PLACEHOLDER_PATTERN.captures_iter(arg) {
// Leading text before the placeholder.
let placeholder = matched_text.get(0).unwrap();
if placeholder.start() > start {
tokens.push(Token::Text(arg[start..placeholder.start()].to_owned()));
}
Expand All @@ -247,7 +253,17 @@ impl CommandTemplate {
"{/}" => tokens.push(Token::Basename),
"{//}" => tokens.push(Token::Parent),
"{/.}" => tokens.push(Token::BasenameNoExt),
_ => unreachable!("Unhandled placeholder"),
_ => {
// regex pattern assures that ocurrence and group always are numbers
let num1: usize = matched_text.get(1).unwrap().as_str().parse().unwrap();
let default = matched_text.get(3).map_or("", |m| m.as_str()).to_string();
let token_regex = if let Some(num2) = matched_text.get(2) {
Token::Positional(num1, num2.as_str().parse().unwrap(), default)
} else {
Token::Positional(1, num1, default)
};
tokens.push(token_regex)
}
}

has_placeholder = true;
Expand Down Expand Up @@ -292,10 +308,10 @@ impl CommandTemplate {
///
/// Using the internal `args` field, and a supplied `input` variable, a `Command` will be
/// build.
fn generate(&self, input: &Path, path_separator: Option<&str>) -> io::Result<Command> {
let mut cmd = Command::new(self.args[0].generate(&input, path_separator));
fn generate(&self, input: &Path, path_separator: Option<&str>, matches: &HashMap<usize, HashMap<usize, String>>) -> io::Result<Command> {
let mut cmd = Command::new(self.args[0].generate(&input, path_separator, matches));
for arg in &self.args[1..] {
cmd.try_arg(arg.generate(&input, path_separator))?;
cmd.try_arg(arg.generate(&input, path_separator, matches))?;
}
Ok(cmd)
}
Expand All @@ -319,7 +335,7 @@ impl ArgumentTemplate {
/// Generate an argument from this template. If path_separator is Some, then it will replace
/// the path separator in all placeholder tokens. Text arguments and tokens are not affected by
/// path separator substitution.
pub fn generate(&self, path: impl AsRef<Path>, path_separator: Option<&str>) -> OsString {
pub fn generate(&self, path: impl AsRef<Path>, path_separator: Option<&str>, matches: &HashMap<usize, HashMap<usize, String>>) -> OsString {
use self::Token::*;
let path = path.as_ref();

Expand All @@ -342,6 +358,25 @@ impl ArgumentTemplate {
s.push(Self::replace_separator(path.as_ref(), path_separator))
}
Text(ref string) => s.push(string),
Positional(ocurrence, group, ref default) => {
// {0}, {M.0}, or {0.N} gets text from all ocurrences/matches
let match_count = matches.len() - 1;
let applied_matches = if ocurrence < 1 {
0..=match_count
} else {
let single_match = ocurrence - 1;
single_match..=single_match
};
for match_num in applied_matches {
if let Some(groups) = matches.get(&match_num) {
if let Some(re_group) = groups.get(&group) {
s.push(re_group);
continue
}
s.push(default)
}
}
}
}
}
s
Expand Down Expand Up @@ -554,7 +589,7 @@ mod tests {
let arg = ArgumentTemplate::Tokens(vec![Token::Placeholder]);
macro_rules! check {
($input:expr, $expected:expr) => {
assert_eq!(arg.generate($input, Some("#")), OsString::from($expected));
assert_eq!(arg.generate($input, Some("#"), &HashMap::new()), OsString::from($expected));
};
}

Expand All @@ -569,7 +604,7 @@ mod tests {
let arg = ArgumentTemplate::Tokens(vec![Token::Placeholder]);
macro_rules! check {
($input:expr, $expected:expr) => {
assert_eq!(arg.generate($input, Some("#")), OsString::from($expected));
assert_eq!(arg.generate($input, Some("#"), &HashMap::new()), OsString::from($expected));
};
}

Expand Down
2 changes: 2 additions & 0 deletions src/exec/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub enum Token {
NoExt,
BasenameNoExt,
Text(String),
Positional(usize, usize, String),
}

impl Display for Token {
Expand All @@ -23,6 +24,7 @@ impl Display for Token {
Token::NoExt => f.write_str("{.}")?,
Token::BasenameNoExt => f.write_str("{/.}")?,
Token::Text(ref string) => f.write_str(string)?,
Token::Positional(ocurrence, group, ref default) => f.write_str(format!("{{{}.{}:-{}}}", ocurrence, group, default).as_str())?,
}
Ok(())
}
Expand Down
28 changes: 5 additions & 23 deletions src/walk.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use std::ffi::OsStr;
use std::io;
use std::mem;
use std::path::PathBuf;
Expand All @@ -7,7 +6,7 @@ use std::sync::mpsc::{channel, Receiver, RecvTimeoutError, Sender};
use std::sync::{Arc, Mutex};
use std::thread;
use std::time::{Duration, Instant};
use std::{borrow::Cow, io::Write};
use std::io::Write;

use anyhow::{anyhow, Result};
use ignore::overrides::OverrideBuilder;
Expand Down Expand Up @@ -413,7 +412,7 @@ fn spawn_senders(
return ignore::WalkState::Quit;
}

let entry = match entry_o {
let mut entry = match entry_o {
Ok(ref e) if e.depth() == 0 => {
// Skip the root directory entry.
return ignore::WalkState::Continue;
Expand Down Expand Up @@ -456,29 +455,12 @@ fn spawn_senders(
}
}

// Check the name first, since it doesn't require metadata
let entry_path = entry.path();

let search_str: Cow<OsStr> = if config.search_full_path {
let path_abs_buf = filesystem::path_absolute_form(entry_path)
.expect("Retrieving absolute path succeeds");
Cow::Owned(path_abs_buf.as_os_str().to_os_string())
} else {
match entry_path.file_name() {
Some(filename) => Cow::Borrowed(filename),
None => unreachable!(
"Encountered file system entry without a file name. This should only \
happen for paths like 'foo/bar/..' or '/' which are not supposed to \
appear in a file system traversal."
),
}
};

if !pattern.is_match(&filesystem::osstr_to_bytes(search_str.as_ref())) {
if !entry.is_match(pattern.as_ref(), config.search_full_path) {
return ignore::WalkState::Continue;
}
}

// Filter out unwanted extensions.
let entry_path = entry.path();
if let Some(ref exts_regex) = config.extensions {
if let Some(path_str) = entry_path.file_name() {
if !exts_regex.is_match(&filesystem::osstr_to_bytes(path_str)) {
Expand Down