-
Notifications
You must be signed in to change notification settings - Fork 971
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reduce memory usage of field maps in FieldInfos and BlockTree TermsReader. #13327
Changes from 1 commit
d6161e7
3aec6d5
8b7f89f
e64521d
3022306
cb7185f
9bab4bd
e4ba2e1
4b8db9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,7 @@ | |
import static org.apache.lucene.index.FieldInfo.verifySameStoreTermVectors; | ||
import static org.apache.lucene.index.FieldInfo.verifySameVectorOptions; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
|
@@ -35,6 +35,7 @@ | |
import java.util.stream.Collectors; | ||
import java.util.stream.StreamSupport; | ||
import org.apache.lucene.util.ArrayUtil; | ||
import org.apache.lucene.util.hppc.IntObjectHashMap; | ||
|
||
/** | ||
* Collection of {@link FieldInfo}s (accessible by number or by name). | ||
|
@@ -61,9 +62,8 @@ public class FieldInfos implements Iterable<FieldInfo> { | |
private final String parentField; | ||
|
||
// used only by fieldInfo(int) | ||
private final FieldInfo[] byNumber; | ||
|
||
private final HashMap<String, FieldInfo> byName = new HashMap<>(); | ||
private final FieldInfoByNumber byNumber; | ||
private final HashMap<String, FieldInfo> byName; | ||
private final Collection<FieldInfo> values; // for an unmodifiable iterator | ||
|
||
/** Constructs a new FieldInfos from an array of FieldInfo objects */ | ||
|
@@ -81,30 +81,13 @@ public FieldInfos(FieldInfo[] infos) { | |
String softDeletesField = null; | ||
String parentField = null; | ||
|
||
int size = 0; // number of elements in byNumberTemp, number of used array slots | ||
FieldInfo[] byNumberTemp = new FieldInfo[10]; // initial array capacity of 10 | ||
byName = new HashMap<>((int) (infos.length / 0.75f) + 1); | ||
dsmiley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for (FieldInfo info : infos) { | ||
if (info.number < 0) { | ||
throw new IllegalArgumentException( | ||
"illegal field number: " + info.number + " for field " + info.name); | ||
} | ||
size = info.number >= size ? info.number + 1 : size; | ||
dsmiley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (info.number >= byNumberTemp.length) { // grow array | ||
byNumberTemp = ArrayUtil.grow(byNumberTemp, info.number + 1); | ||
} | ||
FieldInfo previous = byNumberTemp[info.number]; | ||
if (previous != null) { | ||
throw new IllegalArgumentException( | ||
"duplicate field numbers: " | ||
+ previous.name | ||
+ " and " | ||
+ info.name | ||
+ " have: " | ||
+ info.number); | ||
} | ||
byNumberTemp[info.number] = info; | ||
|
||
previous = byName.put(info.name, info); | ||
FieldInfo previous = byName.put(info.name, info); | ||
if (previous != null) { | ||
throw new IllegalArgumentException( | ||
"duplicate field names: " | ||
|
@@ -156,15 +139,17 @@ public FieldInfos(FieldInfo[] infos) { | |
this.softDeletesField = softDeletesField; | ||
this.parentField = parentField; | ||
|
||
List<FieldInfo> valuesTemp = new ArrayList<>(infos.length); | ||
byNumber = new FieldInfo[size]; | ||
for (int i = 0; i < size; i++) { | ||
byNumber[i] = byNumberTemp[i]; | ||
if (byNumberTemp[i] != null) { | ||
valuesTemp.add(byNumberTemp[i]); | ||
} | ||
} | ||
values = Collections.unmodifiableCollection(valuesTemp); | ||
FieldInfo[] sortedFieldInfos = ArrayUtil.copyOfSubArray(infos, 0, infos.length); | ||
Arrays.sort(sortedFieldInfos, (fi1, fi2) -> Integer.compare(fi1.number, fi2.number)); | ||
int maxFieldNumber = infos.length == 0 ? -1 : sortedFieldInfos[infos.length - 1].number; | ||
// If there are many fields and the max field number is greater than twice the number | ||
// of fields, then a map structure is more compact to store the by-number mapping. | ||
byNumber = | ||
maxFieldNumber >= 2 * infos.length && maxFieldNumber >= 32 | ||
? new MapFieldInfoByNumber(infos) | ||
: new ArrayFieldInfoByNumber(infos, maxFieldNumber); | ||
// The iteration of FieldInfo is ordered by ascending field number. | ||
values = Collections.unmodifiableCollection(Arrays.asList(sortedFieldInfos)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nowadays, do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. List.of() makes a copy as it considers the input as an "untrusted array". Here we don't copy, just wrap. Actually we could keep just Arrays.asList(sortedFieldInfos) since we own it privately, so we know we don't modify it (only iterator(), which does not support removal for Arrays.asList). |
||
} | ||
|
||
/** | ||
|
@@ -323,10 +308,7 @@ public FieldInfo fieldInfo(int fieldNumber) { | |
if (fieldNumber < 0) { | ||
throw new IllegalArgumentException("Illegal field number: " + fieldNumber); | ||
} | ||
if (fieldNumber >= byNumber.length) { | ||
return null; | ||
} | ||
return byNumber[fieldNumber]; | ||
return byNumber.get(fieldNumber); | ||
} | ||
|
||
static final class FieldDimensions { | ||
|
@@ -821,4 +803,64 @@ FieldInfos finish() { | |
return new FieldInfos(byName.values().toArray(new FieldInfo[byName.size()])); | ||
} | ||
} | ||
|
||
private interface FieldInfoByNumber { | ||
dsmiley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
FieldInfo get(int fieldNumber); | ||
} | ||
|
||
private static class MapFieldInfoByNumber implements FieldInfoByNumber { | ||
|
||
private final IntObjectHashMap<FieldInfo> map; | ||
|
||
MapFieldInfoByNumber(FieldInfo[] fieldInfos) { | ||
map = new IntObjectHashMap<>(fieldInfos.length); | ||
for (FieldInfo fieldInfo : fieldInfos) { | ||
FieldInfo previous = map.put(fieldInfo.number, fieldInfo); | ||
if (previous != null) { | ||
throw new IllegalArgumentException( | ||
"duplicate field numbers: " | ||
+ previous.name | ||
+ " and " | ||
+ fieldInfo.name | ||
+ " have: " | ||
+ fieldInfo.number); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public FieldInfo get(int fieldNumber) { | ||
return map.get(fieldNumber); | ||
} | ||
} | ||
|
||
private static class ArrayFieldInfoByNumber implements FieldInfoByNumber { | ||
|
||
private static final FieldInfo[] EMPTY = new FieldInfo[0]; | ||
|
||
private final FieldInfo[] array; | ||
|
||
ArrayFieldInfoByNumber(FieldInfo[] fieldInfos, int maxFieldNumber) { | ||
array = fieldInfos.length == 0 ? EMPTY : new FieldInfo[maxFieldNumber + 1]; | ||
for (FieldInfo fieldInfo : fieldInfos) { | ||
FieldInfo previous = array[fieldInfo.number]; | ||
if (previous != null) { | ||
throw new IllegalArgumentException( | ||
"duplicate field numbers: " | ||
+ previous.name | ||
+ " and " | ||
+ fieldInfo.name | ||
+ " have: " | ||
+ fieldInfo.number); | ||
} | ||
array[fieldInfo.number] = fieldInfo; | ||
} | ||
} | ||
|
||
@Override | ||
public FieldInfo get(int fieldNumber) { | ||
return fieldNumber >= array.length ? null : array[fieldNumber]; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.lucene.util.hppc; | ||
|
||
import java.util.Iterator; | ||
import java.util.NoSuchElementException; | ||
|
||
/** | ||
* Simplifies the implementation of iterators a bit. Modeled loosely after Google Guava's API. | ||
* | ||
* <p>Forked from com.carrotsearch.hppc.AbstractIterator | ||
*/ | ||
public abstract class AbstractIterator<E> implements Iterator<E> { | ||
private static final int NOT_CACHED = 0; | ||
private static final int CACHED = 1; | ||
private static final int AT_END = 2; | ||
|
||
/** Current iterator state. */ | ||
private int state = NOT_CACHED; | ||
|
||
/** The next element to be returned from {@link #next()} if fetched. */ | ||
private E nextElement; | ||
|
||
@Override | ||
public boolean hasNext() { | ||
if (state == NOT_CACHED) { | ||
state = CACHED; | ||
nextElement = fetch(); | ||
} | ||
return state == CACHED; | ||
} | ||
|
||
@Override | ||
public E next() { | ||
if (!hasNext()) { | ||
throw new NoSuchElementException(); | ||
} | ||
|
||
state = NOT_CACHED; | ||
return nextElement; | ||
} | ||
|
||
/** Default implementation throws {@link UnsupportedOperationException}. */ | ||
@Override | ||
public void remove() { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
/** | ||
* Fetch next element. The implementation must return {@link #done()} when all elements have been | ||
* fetched. | ||
* | ||
* @return Returns the next value for the iterator or chain-calls {@link #done()}. | ||
*/ | ||
protected abstract E fetch(); | ||
|
||
/** | ||
* Call when done. | ||
* | ||
* @return Returns a unique sentinel value to indicate end-of-iteration. | ||
*/ | ||
protected final E done() { | ||
state = AT_END; | ||
return null; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This PR proposes to leverage the existing field-name -> FieldInfo map in FieldInfos to not repeat the ref to the field name strings here. Instead use the field number (specific to the FieldInfos) as key, so that we can use a compact primitive map.
Then below, in terms(String fieldName), we can use FieldInfos.fieldInfo(String fieldName) as a first mapping to the field number, and then use this compact map to get the Terms.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW this is what Lucene90PointsReader does today as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice. It could benefit from the IntObjectHashMap.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually there are many usages of Map<Integer, Object>. I could open some PRs when memory (and perf) matters after IntObjectHashMap is in.