Skip to content

Commit

Permalink
Add 'passageSortComparator' option in FieldHighlighter
Browse files Browse the repository at this point in the history
  • Loading branch information
Seunghan-Jung committed May 23, 2024
1 parent d078fb7 commit bf6eb2d
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 10 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,8 @@ Improvements
* GITHUB#13385: Add Intervals.noIntervals() method to produce an empty IntervalsSource.
(Aniketh Jain, Uwe Schindler, Alan Woodward))

* GITHUB#13276: UnifiedHighlighter: new 'passageSortComparator' option to allow sorting other than offset order. (Seunghan Jung)

Optimizations
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public class FieldHighlighter {
protected final int maxPassages;
protected final int maxNoHighlightPassages;
protected final PassageFormatter passageFormatter;
protected final Comparator<Passage> passageSortComparator;

public FieldHighlighter(
String field,
Expand All @@ -48,14 +49,16 @@ public FieldHighlighter(
PassageScorer passageScorer,
int maxPassages,
int maxNoHighlightPassages,
PassageFormatter passageFormatter) {
PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator) {
this.field = field;
this.fieldOffsetStrategy = fieldOffsetStrategy;
this.breakIterator = breakIterator;
this.passageScorer = passageScorer;
this.maxPassages = maxPassages;
this.maxNoHighlightPassages = maxNoHighlightPassages;
this.passageFormatter = passageFormatter;
this.passageSortComparator = passageSortComparator;
}

public String getField() {
Expand Down Expand Up @@ -191,8 +194,7 @@ protected Passage[] highlightOffsetsEnums(OffsetsEnum off) throws IOException {
maybeAddPassage(passageQueue, passageScorer, passage, contentLength);

Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
// sort in ascending order
Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset));
Arrays.sort(passages, passageSortComparator);
return passages;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -86,6 +87,7 @@
* <li>{@link #getBreakIterator(String)}: Customize how the text is divided into passages.
* <li>{@link #getScorer(String)}: Customize how passages are ranked.
* <li>{@link #getFormatter(String)}: Customize how snippets are formatted.
* <li>{@link #getPassageSortComparator(String)}: Customize how snippets are formatted.
* </ul>
*
* <p>This is thread-safe, notwithstanding the setters.
Expand Down Expand Up @@ -113,6 +115,8 @@ public class UnifiedHighlighter {
private static final PassageScorer DEFAULT_PASSAGE_SCORER = new PassageScorer();
private static final PassageFormatter DEFAULT_PASSAGE_FORMATTER = new DefaultPassageFormatter();
private static final int DEFAULT_MAX_HIGHLIGHT_PASSAGES = -1;
private static final Comparator<Passage> DEFAULT_PASSAGE_SORT_COMPARATOR =
Comparator.comparingInt(Passage::getStartOffset);

protected final IndexSearcher searcher; // if null, can only use highlightWithoutSearcher

Expand Down Expand Up @@ -151,6 +155,8 @@ public class UnifiedHighlighter {

private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;

private Comparator<Passage> passageSortComparator = DEFAULT_PASSAGE_SORT_COMPARATOR;

/**
* Constructs the highlighter with the given index searcher and analyzer.
*
Expand Down Expand Up @@ -276,6 +282,7 @@ public static class Builder {
private PassageFormatter formatter = DEFAULT_PASSAGE_FORMATTER;
private int maxNoHighlightPassages = DEFAULT_MAX_HIGHLIGHT_PASSAGES;
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
private Comparator<Passage> passageSortComparator = DEFAULT_PASSAGE_SORT_COMPARATOR;

/**
* Constructor for UH builder which accepts {@link IndexSearcher} and {@link Analyzer} objects.
Expand Down Expand Up @@ -402,6 +409,11 @@ public Builder withCacheFieldValCharsThreshold(int value) {
return this;
}

public Builder withPassageSortComparator(Comparator<Passage> value) {
this.passageSortComparator = value;
return this;
}

public UnifiedHighlighter build() {
return new UnifiedHighlighter(this);
}
Expand Down Expand Up @@ -463,6 +475,7 @@ public UnifiedHighlighter(Builder builder) {
this.formatter = builder.formatter;
this.maxNoHighlightPassages = builder.maxNoHighlightPassages;
this.cacheFieldValCharsThreshold = builder.cacheFieldValCharsThreshold;
this.passageSortComparator = builder.passageSortComparator;
}

/** Extracts matching terms */
Expand Down Expand Up @@ -614,6 +627,11 @@ protected PassageFormatter getFormatter(String field) {
return formatter;
}

/** Returns the {@link Comparator} to use for finally sorting passages. */
protected Comparator<Passage> getPassageSortComparator(String field) {
return passageSortComparator;
}

/**
* Returns the number of leading passages (as delineated by the {@link BreakIterator}) when no
* highlights could be found. If it's less than 0 (the default) then this defaults to the {@code
Expand Down Expand Up @@ -1119,7 +1137,8 @@ protected FieldHighlighter getFieldHighlighter(
getScorer(field),
maxPassages,
getMaxNoHighlightPassages(field),
getFormatter(field));
getFormatter(field),
getPassageSortComparator(field));
}

protected FieldHighlighter newFieldHighlighter(
Expand All @@ -1129,15 +1148,17 @@ protected FieldHighlighter newFieldHighlighter(
PassageScorer passageScorer,
int maxPassages,
int maxNoHighlightPassages,
PassageFormatter passageFormatter) {
PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator) {
return new FieldHighlighter(
field,
fieldOffsetStrategy,
breakIterator,
passageScorer,
maxPassages,
maxNoHighlightPassages,
passageFormatter);
passageFormatter,
passageSortComparator);
}

protected UHComponents getHighlightComponents(String field, Query query, Set<Term> allTerms) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.IOException;
import java.text.BreakIterator;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -129,6 +130,11 @@ protected PassageFormatter getFormatter(String field) {
return super.getFormatter(field);
}

@Override
protected Comparator<Passage> getPassageSortComparator(String field) {
return super.getPassageSortComparator(field);
}

@Override
public Analyzer getIndexAnalyzer() {
return super.getIndexAnalyzer();
Expand Down Expand Up @@ -186,7 +192,8 @@ protected FieldHighlighter getFieldHighlighter(
getScorer(field),
maxPassages,
getMaxNoHighlightPassages(field),
getFormatter(field));
getFormatter(field),
getPassageSortComparator(field));
}

@Override
Expand Down Expand Up @@ -240,7 +247,7 @@ public Object format(Passage[] passages, String content) {
public void testFieldHiglighterExtensibility() {
final String fieldName = "fieldName";
FieldHighlighter fieldHighlighter =
new FieldHighlighter(fieldName, null, null, null, 1, 1, null) {
new FieldHighlighter(fieldName, null, null, null, 1, 1, null, null) {
@Override
protected Passage[] highlightOffsetsEnums(OffsetsEnum offsetsEnums) throws IOException {
return super.highlightOffsetsEnums(offsetsEnums);
Expand All @@ -262,15 +269,17 @@ protected static class CustomFieldHighlighter extends FieldHighlighter {
PassageScorer passageScorer,
int maxPassages,
int maxNoHighlightPassages,
PassageFormatter passageFormatter) {
PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator) {
super(
field,
fieldOffsetStrategy,
breakIterator,
passageScorer,
maxPassages,
maxNoHighlightPassages,
passageFormatter);
passageFormatter,
passageSortComparator);
}

@Override
Expand Down

0 comments on commit bf6eb2d

Please sign in to comment.