Skip to content

Commit

Permalink
feat: add std regex builtins
Browse files Browse the repository at this point in the history
Upstream issue: google/jsonnet#1039
  • Loading branch information
CertainLach committed Dec 10, 2023
1 parent 11193ce commit 103c2a5
Show file tree
Hide file tree
Showing 8 changed files with 416 additions and 88 deletions.
299 changes: 212 additions & 87 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ clap_complete = "4.4"
lsp-server = "0.7.4"
lsp-types = "0.94.1"

regex = "1.8.4"
lru = "0.10.0"

#[profile.test]
#opt-level = 1

Expand Down
4 changes: 4 additions & 0 deletions cmds/jrsonnet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ exp-destruct = ["jrsonnet-evaluator/exp-destruct"]
exp-object-iteration = ["jrsonnet-evaluator/exp-object-iteration"]
# Bigint type
exp-bigint = ["jrsonnet-evaluator/exp-bigint", "jrsonnet-cli/exp-bigint"]
# std.regex and co.
exp-regex = [
"jrsonnet-stdlib/exp-regex",
]
# obj?.field, obj?.['field']
exp-null-coaelse = [
"jrsonnet-evaluator/exp-null-coaelse",
Expand Down
3 changes: 3 additions & 0 deletions crates/jrsonnet-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ exp-null-coaelse = [
"jrsonnet-evaluator/exp-null-coaelse",
"jrsonnet-stdlib/exp-null-coaelse",
]
exp-regex = [
"jrsonnet-stdlib/exp-regex",
]
legacy-this-file = ["jrsonnet-stdlib/legacy-this-file"]

[dependencies]
Expand Down
16 changes: 16 additions & 0 deletions crates/jrsonnet-evaluator/src/typed/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,22 @@ impl Typed for String {
}
}

impl Typed for StrValue {
const TYPE: &'static ComplexValType = &ComplexValType::Simple(ValType::Str);

fn into_untyped(value: Self) -> Result<Val> {
Ok(Val::Str(value))
}

fn from_untyped(value: Val) -> Result<Self> {
<Self as Typed>::TYPE.check(&value)?;
match value {
Val::Str(s) => Ok(s),
_ => unreachable!(),
}
}
}

impl Typed for char {
const TYPE: &'static ComplexValType = &ComplexValType::Char;

Expand Down
7 changes: 7 additions & 0 deletions crates/jrsonnet-stdlib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ exp-preserve-order = ["jrsonnet-evaluator/exp-preserve-order"]
exp-bigint = ["num-bigint", "jrsonnet-evaluator/exp-bigint"]

exp-null-coaelse = ["jrsonnet-parser/exp-null-coaelse", "jrsonnet-evaluator/exp-null-coaelse"]
# std.regexMatch and other helpers
exp-regex = ["regex", "lru", "rustc-hash"]

[dependencies]
jrsonnet-evaluator.workspace = true
Expand Down Expand Up @@ -49,6 +51,11 @@ serde_yaml_with_quirks.workspace = true

num-bigint = { workspace = true, optional = true }

# regex
regex = { workspace = true, optional = true }
lru = { workspace = true, optional = true }
rustc-hash = { workspace = true, optional = true }

[build-dependencies]
jrsonnet-parser.workspace = true
structdump = { workspace = true, features = ["derive"] }
38 changes: 37 additions & 1 deletion crates/jrsonnet-stdlib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ mod sets;
pub use sets::*;
mod compat;
pub use compat::*;
#[cfg(feature = "exp-regex")]
mod regex;
#[cfg(feature = "exp-regex")]
pub use crate::regex::*;

pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
let mut builder = ObjValueBuilder::new();
Expand Down Expand Up @@ -185,6 +189,9 @@ pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
("setInter", builtin_set_inter::INST),
("setDiff", builtin_set_diff::INST),
("setUnion", builtin_set_union::INST),
// Regex
#[cfg(feature = "exp-regex")]
("regexQuoteMeta", builtin_regex_quote_meta::INST),
// Compat
("__compare", builtin___compare::INST),
]
Expand All @@ -207,9 +214,38 @@ pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
},
);
builder.method("trace", builtin_trace { settings });

builder.method("id", FuncVal::Id);

#[cfg(feature = "exp-regex")]
{
// Regex
let regex_cache = RegexCache::default();
builder.method(
"regexFullMatch",
builtin_regex_full_match {
cache: regex_cache.clone(),
},
);
builder.method(
"regexPartialMatch",
builtin_regex_partial_match {
cache: regex_cache.clone(),
},
);
builder.method(
"regexReplace",
builtin_regex_replace {
cache: regex_cache.clone(),
},
);
builder.method(
"regexGlobalReplace",
builtin_regex_global_replace {
cache: regex_cache.clone(),
},
);
};

builder.build()
}

Expand Down
134 changes: 134 additions & 0 deletions crates/jrsonnet-stdlib/src/regex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
use std::{cell::RefCell, hash::BuildHasherDefault, num::NonZeroUsize, rc::Rc};

use ::regex::Regex;
use jrsonnet_evaluator::{
error::{ErrorKind::*, Result},
val::StrValue,
IStr, ObjValueBuilder, Val,
};
use jrsonnet_macros::builtin;
use lru::LruCache;
use rustc_hash::FxHasher;

pub struct RegexCacheInner {
cache: RefCell<LruCache<IStr, Rc<Regex>, BuildHasherDefault<FxHasher>>>,
}
impl Default for RegexCacheInner {
fn default() -> Self {
Self {
cache: RefCell::new(LruCache::with_hasher(
NonZeroUsize::new(20).unwrap(),
BuildHasherDefault::default(),
)),
}
}
}
pub type RegexCache = Rc<RegexCacheInner>;
impl RegexCacheInner {
fn parse(&self, pattern: IStr) -> Result<Rc<Regex>> {
let mut cache = self.cache.borrow_mut();
if let Some(found) = cache.get(&pattern) {
return Ok(found.clone());
}
let regex = Regex::new(&pattern)
.map_err(|e| RuntimeError(format!("regex parse failed: {e}").into()))?;
let regex = Rc::new(regex);
cache.push(pattern, regex.clone());
Ok(regex)
}
}

pub fn regex_match_inner(regex: &Regex, str: String) -> Result<Val> {
let mut out = ObjValueBuilder::with_capacity(3);

let mut captures = Vec::with_capacity(regex.captures_len());
let mut named_captures = ObjValueBuilder::with_capacity(regex.capture_names().len());

let Some(captured) = regex.captures(&str) else {
return Ok(Val::Null)
};

for ele in captured.iter().skip(1) {
if let Some(ele) = ele {
captures.push(Val::Str(StrValue::Flat(ele.as_str().into())))
} else {
captures.push(Val::Str(StrValue::Flat(IStr::empty())))
}
}
for (i, name) in regex
.capture_names()
.skip(1)
.enumerate()
.flat_map(|(i, v)| Some((i, v?)))
{
let capture = captures[i].clone();
named_captures.member(name.into()).value(capture)?;
}

out.member("string".into())
.value_unchecked(Val::Str(captured.get(0).unwrap().as_str().into()));
out.member("captures".into())
.value_unchecked(Val::Arr(captures.into()));
out.member("namedCaptures".into())
.value_unchecked(Val::Obj(named_captures.build()));

Ok(Val::Obj(out.build()))
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_partial_match(
this: &builtin_regex_partial_match,
pattern: IStr,
str: String,
) -> Result<Val> {
let regex = this.cache.parse(pattern)?;
regex_match_inner(&regex, str)
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_full_match(
this: &builtin_regex_full_match,
pattern: StrValue,
str: String,
) -> Result<Val> {
let pattern = format!("^{pattern}$").into();
let regex = this.cache.parse(pattern)?;
regex_match_inner(&regex, str)
}

#[builtin]
pub fn builtin_regex_quote_meta(pattern: String) -> String {
regex::escape(&pattern)
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_replace(
this: &builtin_regex_replace,
str: String,
pattern: IStr,
to: String,
) -> Result<String> {
let regex = this.cache.parse(pattern)?;
let replaced = regex.replace(&str, to);
Ok(replaced.to_string())
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_global_replace(
this: &builtin_regex_global_replace,
str: String,
pattern: IStr,
to: String,
) -> Result<String> {
let regex = this.cache.parse(pattern)?;
let replaced = regex.replace_all(&str, to);
Ok(replaced.to_string())
}

0 comments on commit 103c2a5

Please sign in to comment.