Skip to content

Commit

Permalink
Merge pull request #4864 from evolvedbinary/hotfix/replace-tokenize-a…
Browse files Browse the repository at this point in the history
…nalyze-string

Fixes to fn:replace, fn:tokenize, and fn:analyze-string
  • Loading branch information
line-o committed Apr 24, 2023
2 parents 650609f + 92d2b6e commit fc9c4fb
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 144 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,8 @@ public class FnModule extends AbstractInternalModule {
new FunctionDef(FunPosition.signature, FunPosition.class),
new FunctionDef(FunQName.signature, FunQName.class),
new FunctionDef(FunRemove.signature, FunRemove.class),
new FunctionDef(FunReplace.signatures[0], FunReplace.class),
new FunctionDef(FunReplace.signatures[1], FunReplace.class),
new FunctionDef(FunReplace.FS_REPLACE[0], FunReplace.class),
new FunctionDef(FunReplace.FS_REPLACE[1], FunReplace.class),
new FunctionDef(FunReverse.signature, FunReverse.class),
new FunctionDef(FunResolveURI.signatures[0], FunResolveURI.class),
new FunctionDef(FunResolveURI.signatures[1], FunResolveURI.class),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ private void analyzeString(final MemTreeBuilder builder, final String input, Str

try {
final RegularExpression regularExpression = config.compileRegularExpression(pattern, flags, "XP30", warnings);
if (regularExpression.matches("")) {
throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string");
}

//TODO(AR) cache the regular expression... might be possible through Saxon config

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
*
* @author <a href="mailto:[email protected]">Wolfgang Meier</a>
*/
public class FunMatches extends Function implements Optimizable, IndexUseReporter {
public final class FunMatches extends Function implements Optimizable, IndexUseReporter {

private static final FunctionParameterSequenceType FS_PARAM_INPUT = optParam("input", Type.STRING, "The input string");
private static final FunctionParameterSequenceType FS_PARAM_PATTERN = param("pattern", Type.STRING, "The pattern");
Expand Down
144 changes: 40 additions & 104 deletions exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,44 +28,29 @@
import net.sf.saxon.functions.Replace;
import net.sf.saxon.regex.RegularExpression;
import org.exist.dom.QName;
import org.exist.xquery.Atomize;
import org.exist.xquery.Cardinality;
import org.exist.xquery.Dependency;
import org.exist.xquery.DynamicCardinalityCheck;
import org.exist.xquery.ErrorCodes;
import org.exist.xquery.Expression;
import org.exist.xquery.Function;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.Profiler;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.util.Error;
import org.exist.xquery.*;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.Item;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;

import static org.exist.xquery.FunctionDSL.*;
import static org.exist.xquery.regex.RegexUtil.*;

/**
* @author <a href="mailto:[email protected]">Adam Retter</a>
* @author <a href="mailto:[email protected]">Wolfgang Meier</a>
*/
public class FunReplace extends FunMatches {

private static final String FUNCTION_DESCRIPTION_3_PARAM =
"The function returns the xs:string that is obtained by replacing each non-overlapping substring " +
"of $input that matches the given $pattern with an occurrence of the $replacement string.\n\n";
private static final String FUNCTION_DESCRIPTION_4_PARAM =
public class FunReplace extends BasicFunction {

private static final QName FS_REPLACE_NAME = new QName("replace", Function.BUILTIN_FUNCTION_NS);

private static final String FS_REPLACE_DESCRIPTION =
"The function returns the xs:string that is obtained by replacing each non-overlapping substring " +
"of $input that matches the given $pattern with an occurrence of the $replacement string.\n\n" +
"The $flags argument is interpreted in the same manner as for the fn:matches() function.\n\n" +
"Calling the four argument version with the $flags argument set to a " +
"zero-length string gives the same effect as using the three argument version.\n\n";
private static final String FUNCTION_DESCRIPTION_COMMON =
"zero-length string gives the same effect as using the three argument version.\n\n" +
"If $input is the empty sequence, it is interpreted as the zero-length string.\n\nIf two overlapping " +
"substrings of $input both match the $pattern, then only the first one (that is, the one whose first " +
"character comes first in the $input string) is replaced.\n\nWithin the $replacement string, a variable " +
Expand All @@ -85,103 +70,59 @@ public class FunReplace extends FunMatches {
"included \"as is\" in the replacement string, and the rules are reapplied using the number N " +
"formed by stripping off this last digit.";

protected static final FunctionParameterSequenceType INPUT_ARG = new FunctionParameterSequenceType("input", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string");
protected static final FunctionParameterSequenceType PATTERN_ARG = new FunctionParameterSequenceType("pattern", Type.STRING, Cardinality.EXACTLY_ONE, "The pattern to match");
protected static final FunctionParameterSequenceType REPLACEMENT_ARG = new FunctionParameterSequenceType("replacement", Type.STRING, Cardinality.EXACTLY_ONE, "The string to replace the pattern with");
protected static final FunctionParameterSequenceType FLAGS_ARG = new FunctionParameterSequenceType("flags", Type.STRING, Cardinality.EXACTLY_ONE, "The flags");
protected static final FunctionReturnSequenceType RETURN_TYPE = new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the altered string");

public final static FunctionSignature[] signatures = {
new FunctionSignature(
new QName("replace", Function.BUILTIN_FUNCTION_NS),
FUNCTION_DESCRIPTION_3_PARAM + FUNCTION_DESCRIPTION_COMMON,
new SequenceType[] { INPUT_ARG, PATTERN_ARG, REPLACEMENT_ARG },
RETURN_TYPE
),
new FunctionSignature(
new QName("replace", Function.BUILTIN_FUNCTION_NS),
FUNCTION_DESCRIPTION_4_PARAM + FUNCTION_DESCRIPTION_COMMON,
new SequenceType[] { INPUT_ARG, PATTERN_ARG, REPLACEMENT_ARG, FLAGS_ARG },
RETURN_TYPE
)
};
private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_INPUT = optParam("input", Type.STRING, "The input string");
private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_PATTERN = param("pattern", Type.STRING, "The pattern to match");
private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_REPLACEMENT = param("replacement", Type.STRING, "The string to replace the pattern with");

static final FunctionSignature [] FS_REPLACE = functionSignatures(
FS_REPLACE_NAME,
FS_REPLACE_DESCRIPTION,
returns(Type.STRING, "the altered string"),
arities(
arity(
FS_TOKENIZE_PARAM_INPUT,
FS_TOKENIZE_PARAM_PATTERN,
FS_TOKENIZE_PARAM_REPLACEMENT
),
arity(
FS_TOKENIZE_PARAM_INPUT,
FS_TOKENIZE_PARAM_PATTERN,
FS_TOKENIZE_PARAM_REPLACEMENT,
param("flags", Type.STRING, Cardinality.EXACTLY_ONE, "The flags")
)
)
);

public FunReplace(final XQueryContext context, final FunctionSignature signature) {
super(context, signature);
}

@Override
public void setArguments(List<Expression> arguments) {
steps.clear();
Expression arg = arguments.get(0);
arg = new DynamicCardinalityCheck(context, Cardinality.ZERO_OR_ONE, arg,
new Error(Error.FUNC_PARAM_CARDINALITY, "1", getSignature()));
if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC))
{arg = new Atomize(context, arg);}
steps.add(arg);

arg = arguments.get(1);
arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg,
new Error(Error.FUNC_PARAM_CARDINALITY, "2", getSignature()));
if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC))
{arg = new Atomize(context, arg);}
steps.add(arg);

arg = arguments.get(2);
arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg,
new Error(Error.FUNC_PARAM_CARDINALITY, "3", getSignature()));
if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC))
{arg = new Atomize(context, arg);}
steps.add(arg);

if (arguments.size() == 4) {
arg = arguments.get(3);
arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg,
new Error(Error.FUNC_PARAM_CARDINALITY, "4", getSignature()));
if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC))
{arg = new Atomize(context, arg);}
steps.add(arg);
}
}

@Override
public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException {
if (context.getProfiler().isEnabled()) {
context.getProfiler().start(this);
context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies()));
if (contextSequence != null) {
context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence);
}
if (contextItem != null) {
context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());
}
}

public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException {
final Sequence result;
final Sequence stringArg = getArgument(0).eval(contextSequence, contextItem);
final Sequence stringArg = args[0];
if (stringArg.isEmpty()) {
result = StringValue.EMPTY_STRING;
} else {
final String flags;
if (getSignature().getArgumentCount() == 4) {
flags = getArgument(3).eval(contextSequence, contextItem).getStringValue();
if (args.length == 4) {
flags = args[3].itemAt(0).getStringValue();
} else {
flags = "";
}

final String string = stringArg.getStringValue();
final Sequence patternSeq = getArgument(1).eval(contextSequence, contextItem);
final String pattern = patternSeq.getStringValue();

final Sequence replaceSeq = getArgument(2).eval(contextSequence, contextItem);
final String replace = replaceSeq.getStringValue();
final String pattern = args[1].itemAt(0).getStringValue();
final String replace = args[2].itemAt(0).getStringValue();

final Configuration config = context.getBroker().getBrokerPool().getSaxonConfiguration();

final List<String> warnings = new ArrayList<>(1);

try {
final RegularExpression regularExpression = config.compileRegularExpression(pattern, flags, "XP30", warnings);
if (regularExpression.matches("")) {
throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string");
}

//TODO(AR) cache the regular expression... might be possible through Saxon config

Expand Down Expand Up @@ -210,11 +151,6 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr
}
}

if (context.getProfiler().isEnabled()) {
context.getProfiler().end(this, "", result);
}

return result;

return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,13 @@
*/
package org.exist.xquery.functions.fn;

import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.exist.dom.QName;
import org.exist.util.PatternFactory;
import org.exist.xquery.Dependency;
import org.exist.xquery.ErrorCodes;
import org.exist.xquery.Function;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.Profiler;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.*;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.Item;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;
Expand All @@ -46,7 +40,7 @@
* @author <a href="mailto:[email protected]">Wolfgang Meier</a>
* @see <a href="https://www.w3.org/TR/xpath-functions-31/#func-tokenize">https://www.w3.org/TR/xpath-functions-31/#func-tokenize</a>
*/
public class FunTokenize extends FunMatches {
public class FunTokenize extends BasicFunction {

private static final QName FS_TOKENIZE_NAME = new QName("tokenize", Function.BUILTIN_FUNCTION_NS);

Expand Down Expand Up @@ -78,20 +72,9 @@ public FunTokenize(final XQueryContext context, final FunctionSignature signatur
}

@Override
public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException {
if (context.getProfiler().isEnabled()) {
context.getProfiler().start(this);
context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies()));
if (contextSequence != null) {
context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence);
}
if (contextItem != null) {
context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());
}
}

public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException {
final Sequence result;
final Sequence stringArg = getArgument(0).eval(contextSequence, contextItem);
final Sequence stringArg = args[0];
if (stringArg.isEmpty()) {
result = Sequence.EMPTY_SEQUENCE;
} else {
Expand All @@ -100,34 +83,30 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr
result = Sequence.EMPTY_SEQUENCE;
} else {
final int flags;
if (getSignature().getArgumentCount() == 3) {
flags = parseFlags(this, getArgument(2).eval(contextSequence, contextItem)
.getStringValue());
if (args.length == 3) {
flags = parseFlags(this, args[2].itemAt(0).getStringValue());
} else {
flags = 0;
}

final String pattern;
if(getArgumentCount() == 1) {
if (args.length == 1) {
pattern = " ";
string = FunNormalizeSpace.normalize(string);
} else {
if(hasLiteral(flags)) {
// no need to change anything
pattern = getArgument(1).eval(contextSequence, contextItem).getStringValue();
pattern = args[1].itemAt(0).getStringValue();
} else {
final boolean ignoreWhitespace = hasIgnoreWhitespace(flags);
final boolean caseBlind = hasCaseInsensitive(flags);
pattern = translateRegexp(this, getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind);
pattern = translateRegexp(this, args[1].itemAt(0).getStringValue(), ignoreWhitespace, caseBlind);
}
}

try {
if (pat == null || (!pattern.equals(pat.pattern())) || flags != pat.flags()) {
pat = PatternFactory.getInstance().getPattern(pattern, flags);
}

if(pat.matcher("").matches()) {
final Pattern pat = PatternFactory.getInstance().getPattern(pattern, flags);
if (pat.matcher("").matches()) {
throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string");
}

Expand All @@ -144,10 +123,6 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr
}
}

if (context.getProfiler().isEnabled()) {
context.getProfiler().end(this, "", result);
}

return result;
}

Expand Down
30 changes: 30 additions & 0 deletions exist-core/src/test/xquery/regex.xml
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,40 @@
<expected>lo</expected>
</test>

<test output="text">
<task>fn:replace-regex-match-empty-1</task>
<code>fn:replace("12.34" , "^\D*", "")</code>
<error>FORX0003</error>
</test>

<test output="text">
<task>fn:tokenize-qflag-1</task>
<code>fn:tokenize("12.3.5.6", ".", "q")</code>
<expected>12 3 5 6</expected>
</test>

<test output="text">
<task>fn:tokenize-single-input-1</task>
<code>fn:tokenize("x,y", ",")</code>
<expected>x y</expected>
</test>

<test output="text">
<task>fn:tokenize-single-input-2</task>
<code>fn:tokenize(("a", "b", "x,y"), ",")</code>
<error>XPTY0004</error>
</test>

<test output="text">
<task>fn:tokenize-regex-match-empty-1</task>
<code>fn:tokenize("12.34" , "^\D*")</code>
<error>FORX0003</error>
</test>

<test output="text">
<task>fn:analyze-string-regex-match-empty-1</task>
<code>fn:analyze-string("12.34" , "^\D*")</code>
<error>FORX0003</error>
</test>

</TestSet>

0 comments on commit fc9c4fb

Please sign in to comment.