From 4c7762002dd40a8331a736ff3ef7bab00152cae6 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Mon, 10 Apr 2023 14:40:05 +0100 Subject: [PATCH 1/3] [refactor] Simplify fn:replace; no need for it to extend fn:matches since it was changed to use Saxon --- .../exist/xquery/functions/fn/FnModule.java | 4 +- .../exist/xquery/functions/fn/FunReplace.java | 141 +++++------------- 2 files changed, 39 insertions(+), 106 deletions(-) diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java index cf0a6f04a68..413c58b5f3d 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java @@ -181,8 +181,8 @@ public class FnModule extends AbstractInternalModule { new FunctionDef(FunPosition.signature, FunPosition.class), new FunctionDef(FunQName.signature, FunQName.class), new FunctionDef(FunRemove.signature, FunRemove.class), - new FunctionDef(FunReplace.signatures[0], FunReplace.class), - new FunctionDef(FunReplace.signatures[1], FunReplace.class), + new FunctionDef(FunReplace.FS_REPLACE[0], FunReplace.class), + new FunctionDef(FunReplace.FS_REPLACE[1], FunReplace.class), new FunctionDef(FunReverse.signature, FunReverse.class), new FunctionDef(FunResolveURI.signatures[0], FunResolveURI.class), new FunctionDef(FunResolveURI.signatures[1], FunResolveURI.class), diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java index 42227175b5a..4dd74bb6654 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java @@ -28,44 +28,29 @@ import net.sf.saxon.functions.Replace; import net.sf.saxon.regex.RegularExpression; import org.exist.dom.QName; -import org.exist.xquery.Atomize; -import org.exist.xquery.Cardinality; -import org.exist.xquery.Dependency; -import org.exist.xquery.DynamicCardinalityCheck; -import org.exist.xquery.ErrorCodes; -import org.exist.xquery.Expression; -import org.exist.xquery.Function; -import org.exist.xquery.FunctionSignature; -import org.exist.xquery.Profiler; -import org.exist.xquery.XPathException; -import org.exist.xquery.XQueryContext; -import org.exist.xquery.util.Error; +import org.exist.xquery.*; import org.exist.xquery.value.FunctionParameterSequenceType; -import org.exist.xquery.value.FunctionReturnSequenceType; -import org.exist.xquery.value.Item; import org.exist.xquery.value.Sequence; -import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.StringValue; import org.exist.xquery.value.Type; +import static org.exist.xquery.FunctionDSL.*; import static org.exist.xquery.regex.RegexUtil.*; /** * @author Adam Retter * @author Wolfgang Meier */ -public class FunReplace extends FunMatches { - - private static final String FUNCTION_DESCRIPTION_3_PARAM = - "The function returns the xs:string that is obtained by replacing each non-overlapping substring " + - "of $input that matches the given $pattern with an occurrence of the $replacement string.\n\n"; - private static final String FUNCTION_DESCRIPTION_4_PARAM = +public class FunReplace extends BasicFunction { + + private static final QName FS_REPLACE_NAME = new QName("replace", Function.BUILTIN_FUNCTION_NS); + + private static final String FS_REPLACE_DESCRIPTION = "The function returns the xs:string that is obtained by replacing each non-overlapping substring " + "of $input that matches the given $pattern with an occurrence of the $replacement string.\n\n" + "The $flags argument is interpreted in the same manner as for the fn:matches() function.\n\n" + "Calling the four argument version with the $flags argument set to a " + - "zero-length string gives the same effect as using the three argument version.\n\n"; - private static final String FUNCTION_DESCRIPTION_COMMON = + "zero-length string gives the same effect as using the three argument version.\n\n" + "If $input is the empty sequence, it is interpreted as the zero-length string.\n\nIf two overlapping " + "substrings of $input both match the $pattern, then only the first one (that is, the one whose first " + "character comes first in the $input string) is replaced.\n\nWithin the $replacement string, a variable " + @@ -85,96 +70,49 @@ public class FunReplace extends FunMatches { "included \"as is\" in the replacement string, and the rules are reapplied using the number N " + "formed by stripping off this last digit."; - protected static final FunctionParameterSequenceType INPUT_ARG = new FunctionParameterSequenceType("input", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string"); - protected static final FunctionParameterSequenceType PATTERN_ARG = new FunctionParameterSequenceType("pattern", Type.STRING, Cardinality.EXACTLY_ONE, "The pattern to match"); - protected static final FunctionParameterSequenceType REPLACEMENT_ARG = new FunctionParameterSequenceType("replacement", Type.STRING, Cardinality.EXACTLY_ONE, "The string to replace the pattern with"); - protected static final FunctionParameterSequenceType FLAGS_ARG = new FunctionParameterSequenceType("flags", Type.STRING, Cardinality.EXACTLY_ONE, "The flags"); - protected static final FunctionReturnSequenceType RETURN_TYPE = new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the altered string"); - - public final static FunctionSignature[] signatures = { - new FunctionSignature( - new QName("replace", Function.BUILTIN_FUNCTION_NS), - FUNCTION_DESCRIPTION_3_PARAM + FUNCTION_DESCRIPTION_COMMON, - new SequenceType[] { INPUT_ARG, PATTERN_ARG, REPLACEMENT_ARG }, - RETURN_TYPE - ), - new FunctionSignature( - new QName("replace", Function.BUILTIN_FUNCTION_NS), - FUNCTION_DESCRIPTION_4_PARAM + FUNCTION_DESCRIPTION_COMMON, - new SequenceType[] { INPUT_ARG, PATTERN_ARG, REPLACEMENT_ARG, FLAGS_ARG }, - RETURN_TYPE - ) - }; + private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_INPUT = optParam("input", Type.STRING, "The input string"); + private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_PATTERN = param("pattern", Type.STRING, "The pattern to match"); + private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_REPLACEMENT = param("replacement", Type.STRING, "The string to replace the pattern with"); + + static final FunctionSignature [] FS_REPLACE = functionSignatures( + FS_REPLACE_NAME, + FS_REPLACE_DESCRIPTION, + returns(Type.STRING, "the altered string"), + arities( + arity( + FS_TOKENIZE_PARAM_INPUT, + FS_TOKENIZE_PARAM_PATTERN, + FS_TOKENIZE_PARAM_REPLACEMENT + ), + arity( + FS_TOKENIZE_PARAM_INPUT, + FS_TOKENIZE_PARAM_PATTERN, + FS_TOKENIZE_PARAM_REPLACEMENT, + param("flags", Type.STRING, Cardinality.EXACTLY_ONE, "The flags") + ) + ) + ); public FunReplace(final XQueryContext context, final FunctionSignature signature) { super(context, signature); } - - @Override - public void setArguments(List arguments) { - steps.clear(); - Expression arg = arguments.get(0); - arg = new DynamicCardinalityCheck(context, Cardinality.ZERO_OR_ONE, arg, - new Error(Error.FUNC_PARAM_CARDINALITY, "1", getSignature())); - if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC)) - {arg = new Atomize(context, arg);} - steps.add(arg); - - arg = arguments.get(1); - arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg, - new Error(Error.FUNC_PARAM_CARDINALITY, "2", getSignature())); - if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC)) - {arg = new Atomize(context, arg);} - steps.add(arg); - - arg = arguments.get(2); - arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg, - new Error(Error.FUNC_PARAM_CARDINALITY, "3", getSignature())); - if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC)) - {arg = new Atomize(context, arg);} - steps.add(arg); - - if (arguments.size() == 4) { - arg = arguments.get(3); - arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg, - new Error(Error.FUNC_PARAM_CARDINALITY, "4", getSignature())); - if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC)) - {arg = new Atomize(context, arg);} - steps.add(arg); - } - } @Override - public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { - if (context.getProfiler().isEnabled()) { - context.getProfiler().start(this); - context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies())); - if (contextSequence != null) { - context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence); - } - if (contextItem != null) { - context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence()); - } - } - + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { final Sequence result; - final Sequence stringArg = getArgument(0).eval(contextSequence, contextItem); + final Sequence stringArg = args[0]; if (stringArg.isEmpty()) { result = StringValue.EMPTY_STRING; } else { final String flags; - if (getSignature().getArgumentCount() == 4) { - flags = getArgument(3).eval(contextSequence, contextItem).getStringValue(); + if (args.length == 4) { + flags = args[3].itemAt(0).getStringValue(); } else { flags = ""; } - final String string = stringArg.getStringValue(); - final Sequence patternSeq = getArgument(1).eval(contextSequence, contextItem); - final String pattern = patternSeq.getStringValue(); - - final Sequence replaceSeq = getArgument(2).eval(contextSequence, contextItem); - final String replace = replaceSeq.getStringValue(); + final String pattern = args[1].itemAt(0).getStringValue(); + final String replace = args[2].itemAt(0).getStringValue(); final Configuration config = context.getBroker().getBrokerPool().getSaxonConfiguration(); @@ -210,11 +148,6 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr } } - if (context.getProfiler().isEnabled()) { - context.getProfiler().end(this, "", result); - } - - return result; - + return result; } } From fe3ff5cd423ccf7794b337a06aac10cc98705a6a Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Mon, 10 Apr 2023 14:48:17 +0100 Subject: [PATCH 2/3] [bugfix] Fix cardinality enforcement of fn:tokenize; no need for it to extend fn:matches Closes https://github.com/eXist-db/exist/issues/4863 --- .../exist/xquery/functions/fn/FunMatches.java | 2 +- .../xquery/functions/fn/FunTokenize.java | 49 +++++-------------- exist-core/src/test/xquery/regex.xml | 12 +++++ 3 files changed, 25 insertions(+), 38 deletions(-) diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java index 7f177c382ae..e35cfb49cc7 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java @@ -59,7 +59,7 @@ * * @author Wolfgang Meier */ -public class FunMatches extends Function implements Optimizable, IndexUseReporter { +public final class FunMatches extends Function implements Optimizable, IndexUseReporter { private static final FunctionParameterSequenceType FS_PARAM_INPUT = optParam("input", Type.STRING, "The input string"); private static final FunctionParameterSequenceType FS_PARAM_PATTERN = param("pattern", Type.STRING, "The pattern"); diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTokenize.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTokenize.java index f6f541c4887..f31b8b645f0 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTokenize.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunTokenize.java @@ -21,19 +21,13 @@ */ package org.exist.xquery.functions.fn; +import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.exist.dom.QName; import org.exist.util.PatternFactory; -import org.exist.xquery.Dependency; -import org.exist.xquery.ErrorCodes; -import org.exist.xquery.Function; -import org.exist.xquery.FunctionSignature; -import org.exist.xquery.Profiler; -import org.exist.xquery.XPathException; -import org.exist.xquery.XQueryContext; +import org.exist.xquery.*; import org.exist.xquery.value.FunctionParameterSequenceType; -import org.exist.xquery.value.Item; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.StringValue; import org.exist.xquery.value.Type; @@ -46,7 +40,7 @@ * @author Wolfgang Meier * @see https://www.w3.org/TR/xpath-functions-31/#func-tokenize */ -public class FunTokenize extends FunMatches { +public class FunTokenize extends BasicFunction { private static final QName FS_TOKENIZE_NAME = new QName("tokenize", Function.BUILTIN_FUNCTION_NS); @@ -78,20 +72,9 @@ public FunTokenize(final XQueryContext context, final FunctionSignature signatur } @Override - public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { - if (context.getProfiler().isEnabled()) { - context.getProfiler().start(this); - context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies())); - if (contextSequence != null) { - context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence); - } - if (contextItem != null) { - context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence()); - } - } - + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { final Sequence result; - final Sequence stringArg = getArgument(0).eval(contextSequence, contextItem); + final Sequence stringArg = args[0]; if (stringArg.isEmpty()) { result = Sequence.EMPTY_SEQUENCE; } else { @@ -100,34 +83,30 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr result = Sequence.EMPTY_SEQUENCE; } else { final int flags; - if (getSignature().getArgumentCount() == 3) { - flags = parseFlags(this, getArgument(2).eval(contextSequence, contextItem) - .getStringValue()); + if (args.length == 3) { + flags = parseFlags(this, args[2].itemAt(0).getStringValue()); } else { flags = 0; } final String pattern; - if(getArgumentCount() == 1) { + if (args.length == 1) { pattern = " "; string = FunNormalizeSpace.normalize(string); } else { if(hasLiteral(flags)) { // no need to change anything - pattern = getArgument(1).eval(contextSequence, contextItem).getStringValue(); + pattern = args[1].itemAt(0).getStringValue(); } else { final boolean ignoreWhitespace = hasIgnoreWhitespace(flags); final boolean caseBlind = hasCaseInsensitive(flags); - pattern = translateRegexp(this, getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind); + pattern = translateRegexp(this, args[1].itemAt(0).getStringValue(), ignoreWhitespace, caseBlind); } } try { - if (pat == null || (!pattern.equals(pat.pattern())) || flags != pat.flags()) { - pat = PatternFactory.getInstance().getPattern(pattern, flags); - } - - if(pat.matcher("").matches()) { + final Pattern pat = PatternFactory.getInstance().getPattern(pattern, flags); + if (pat.matcher("").matches()) { throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string"); } @@ -144,10 +123,6 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr } } - if (context.getProfiler().isEnabled()) { - context.getProfiler().end(this, "", result); - } - return result; } diff --git a/exist-core/src/test/xquery/regex.xml b/exist-core/src/test/xquery/regex.xml index d7badc2f059..e78aaa62c90 100644 --- a/exist-core/src/test/xquery/regex.xml +++ b/exist-core/src/test/xquery/regex.xml @@ -143,4 +143,16 @@ 12 3 5 6 + + fn:tokenize-single-input-1 + fn:tokenize("x,y", ",") + x y + + + + fn:tokenize-single-input-2 + fn:tokenize(("a", "b", "x,y"), ",") + XPTY0004 + + \ No newline at end of file From 92d2b6ec433dd8632bcc4e48c479773f672c4246 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Mon, 10 Apr 2023 15:12:25 +0100 Subject: [PATCH 3/3] [bugfix] fn:replace, fn:tokenize, and fn:analyze-string must throw FORX0003 when pattern matches an empty string Closes https://github.com/eXist-db/exist/issues/3803 --- .../xquery/functions/fn/FunAnalyzeString.java | 3 +++ .../exist/xquery/functions/fn/FunReplace.java | 3 +++ exist-core/src/test/xquery/regex.xml | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java index 1fd6bc13bc7..870fa3e392a 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java @@ -130,6 +130,9 @@ private void analyzeString(final MemTreeBuilder builder, final String input, Str try { final RegularExpression regularExpression = config.compileRegularExpression(pattern, flags, "XP30", warnings); + if (regularExpression.matches("")) { + throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string"); + } //TODO(AR) cache the regular expression... might be possible through Saxon config diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java index 4dd74bb6654..9ac681bd33b 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java @@ -120,6 +120,9 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro try { final RegularExpression regularExpression = config.compileRegularExpression(pattern, flags, "XP30", warnings); + if (regularExpression.matches("")) { + throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string"); + } //TODO(AR) cache the regular expression... might be possible through Saxon config diff --git a/exist-core/src/test/xquery/regex.xml b/exist-core/src/test/xquery/regex.xml index e78aaa62c90..fda766ff7a7 100644 --- a/exist-core/src/test/xquery/regex.xml +++ b/exist-core/src/test/xquery/regex.xml @@ -137,6 +137,12 @@ lo + + fn:replace-regex-match-empty-1 + fn:replace("12.34" , "^\D*", "") + FORX0003 + + fn:tokenize-qflag-1 fn:tokenize("12.3.5.6", ".", "q") @@ -155,4 +161,16 @@ XPTY0004 + + fn:tokenize-regex-match-empty-1 + fn:tokenize("12.34" , "^\D*") + FORX0003 + + + + fn:analyze-string-regex-match-empty-1 + fn:analyze-string("12.34" , "^\D*") + FORX0003 + + \ No newline at end of file