Skip to content

Commit

Permalink
Merge pull request #1066 from weaviate/fix-dynamic-index-creation
Browse files Browse the repository at this point in the history
Pass dynamic quantizer inside hnsw/flat rather than at same level
  • Loading branch information
tsmith023 committed May 20, 2024
2 parents f0279ea + db451d8 commit 550100b
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 12 deletions.
28 changes: 24 additions & 4 deletions integration/test_collection_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ def test_hnsw_with_bq(collection_factory: CollectionFactory) -> None:
config = collection.config.get()
assert config.vector_index_type == VectorIndexType.HNSW
assert config.vector_index_config is not None
assert isinstance(config.vector_index_config, _VectorIndexConfigHNSW)
assert isinstance(config.vector_index_config.quantizer, _BQConfig)


Expand Down Expand Up @@ -760,9 +761,15 @@ def test_dynamic_collection(collection_factory: CollectionFactory) -> None:
distance_metric=VectorDistances.COSINE,
threshold=1000,
hnsw=Configure.VectorIndex.hnsw(
cleanup_interval_seconds=123, flat_search_cutoff=1234, vector_cache_max_objects=789
cleanup_interval_seconds=123,
flat_search_cutoff=1234,
vector_cache_max_objects=789,
quantizer=Configure.VectorIndex.Quantizer.pq(centroids=128),
),
flat=Configure.VectorIndex.flat(
vector_cache_max_objects=7643,
quantizer=Configure.VectorIndex.Quantizer.bq(rescore_limit=10),
),
flat=Configure.VectorIndex.flat(vector_cache_max_objects=7643),
),
ports=(8090, 50061),
)
Expand All @@ -775,16 +782,25 @@ def test_dynamic_collection(collection_factory: CollectionFactory) -> None:
assert config.vector_index_config.hnsw.cleanup_interval_seconds == 123
assert config.vector_index_config.hnsw.flat_search_cutoff == 1234
assert config.vector_index_config.hnsw.vector_cache_max_objects == 789
assert isinstance(config.vector_index_config.hnsw.quantizer, _PQConfig)
assert config.vector_index_config.hnsw.quantizer.centroids == 128
assert isinstance(config.vector_index_config.flat, _VectorIndexConfigFlat)
assert config.vector_index_config.flat.vector_cache_max_objects == 7643
assert isinstance(config.vector_index_config.flat.quantizer, _BQConfig)
assert config.vector_index_config.flat.quantizer.rescore_limit == 10

collection.config.update(
vectorizer_config=Reconfigure.VectorIndex.dynamic(
threshold=2000,
hnsw=Reconfigure.VectorIndex.hnsw(
flat_search_cutoff=4567, vector_cache_max_objects=678
flat_search_cutoff=4567,
vector_cache_max_objects=678,
quantizer=Reconfigure.VectorIndex.Quantizer.pq(centroids=128),
),
flat=Reconfigure.VectorIndex.flat(
vector_cache_max_objects=9876,
quantizer=Reconfigure.VectorIndex.Quantizer.bq(rescore_limit=11),
),
flat=Reconfigure.VectorIndex.flat(vector_cache_max_objects=9876),
),
)
config = collection.config.get()
Expand All @@ -795,5 +811,9 @@ def test_dynamic_collection(collection_factory: CollectionFactory) -> None:
assert config.vector_index_config.hnsw.cleanup_interval_seconds == 123
assert config.vector_index_config.hnsw.flat_search_cutoff == 4567
assert config.vector_index_config.hnsw.vector_cache_max_objects == 678
assert isinstance(config.vector_index_config.hnsw.quantizer, _PQConfig)
assert config.vector_index_config.hnsw.quantizer.centroids == 128
assert isinstance(config.vector_index_config.flat, _VectorIndexConfigFlat)
assert config.vector_index_config.flat.vector_cache_max_objects == 9876
assert isinstance(config.vector_index_config.flat.quantizer, _BQConfig)
assert config.vector_index_config.flat.quantizer.rescore_limit == 11
3 changes: 3 additions & 0 deletions integration/test_named_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from weaviate.collections.classes.config import (
PQConfig,
_VectorIndexConfigHNSW,
_VectorIndexConfigFlat,
Vectorizers,
)
Expand Down Expand Up @@ -474,6 +475,7 @@ def test_update_to_enable_quantizer_on_specific_named_vector(
assert config.vector_config is not None
assert config.vector_config["first"].vector_index_config is not None
assert config.vector_config["second"].vector_index_config is not None
assert isinstance(config.vector_config["second"].vector_index_config, _VectorIndexConfigHNSW)
assert config.vector_config["second"].vector_index_config.quantizer is None

collection.config.update(
Expand All @@ -490,6 +492,7 @@ def test_update_to_enable_quantizer_on_specific_named_vector(
assert config.vector_config is not None
assert config.vector_config["first"].vector_index_config is not None
assert config.vector_config["second"].vector_index_config is not None
assert isinstance(config.vector_config["second"].vector_index_config, _VectorIndexConfigHNSW)
assert isinstance(config.vector_config["second"].vector_index_config.quantizer, PQConfig)
assert config.vector_config["second"].vector_index_config.quantizer.centroids == 256

Expand Down
9 changes: 2 additions & 7 deletions weaviate/collections/classes/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1154,7 +1154,7 @@ def vector_index_type() -> str:


@dataclass
class _VectorIndexConfigDynamic(_VectorIndexConfig):
class _VectorIndexConfigDynamic(_ConfigBase):
distance_metric: VectorDistances
hnsw: Optional[VectorIndexConfigHNSW]
flat: Optional[VectorIndexConfigFlat]
Expand Down Expand Up @@ -1680,7 +1680,6 @@ def dynamic(
threshold: Optional[int] = None,
hnsw: Optional[_VectorIndexConfigHNSWCreate] = None,
flat: Optional[_VectorIndexConfigFlatCreate] = None,
quantizer: Optional[_BQConfigCreate] = None,
) -> _VectorIndexConfigDynamicCreate:
"""Create a `_VectorIndexConfigDynamicCreate` object to be used when defining the DYNAMIC vector index configuration of Weaviate.
Expand All @@ -1690,11 +1689,7 @@ def dynamic(
See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view!
""" # noqa: D417 (missing argument descriptions in the docstring)
return _VectorIndexConfigDynamicCreate(
distance=distance_metric,
threshold=threshold,
hnsw=hnsw,
flat=flat,
quantizer=quantizer,
distance=distance_metric, threshold=threshold, hnsw=hnsw, flat=flat, quantizer=None
)


Expand Down
1 change: 0 additions & 1 deletion weaviate/collections/classes/config_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ def __get_vector_index_config(
return _VectorIndexConfigDynamic(
distance_metric=VectorDistances(schema["vectorIndexConfig"]["distance"]),
threshold=schema["vectorIndexConfig"].get("threshold"),
quantizer=None,
hnsw=__get_hnsw_config(schema["vectorIndexConfig"]["hnsw"]),
flat=__get_flat_config(schema["vectorIndexConfig"]["flat"]),
)
Expand Down
11 changes: 11 additions & 0 deletions weaviate/collections/classes/config_vector_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,17 @@ class _VectorIndexConfigDynamicCreate(_VectorIndexConfigCreate):
def vector_index_type() -> VectorIndexType:
return VectorIndexType.DYNAMIC

def _to_dict(self) -> dict:
ret_dict = super()._to_dict()
if self.hnsw is not None:
ret_dict["hnsw"] = self.hnsw._to_dict()
if self.flat is not None:
ret_dict["flat"] = self.flat._to_dict()
if self.threshold is not None:
ret_dict["threshold"] = self.threshold

return ret_dict


class _VectorIndexConfigDynamicUpdate(_VectorIndexConfigUpdate):
threshold: Optional[int]
Expand Down

0 comments on commit 550100b

Please sign in to comment.