Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unsearchable properties #4896

Closed
wants to merge 36 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
7c0e77b
Added an environment variable for disabling the go profiler setup
bennycortese Feb 22, 2024
da79f82
Changed profiling to be more in line with how environment variables a…
bennycortese Feb 23, 2024
c0ecc89
Changed from enabled profiling to disabled profiling, should help for…
bennycortese Feb 29, 2024
9fa7cd1
Removed == true check because it was redundant
bennycortese Feb 29, 2024
656787f
Merge branch 'main' into optional_profiling
bennycortese Apr 1, 2024
faa4452
Updates to deal with merge conflict and general main branch changes
bennycortese Apr 1, 2024
4bf0b4d
Fixes for the current linter
bennycortese Apr 1, 2024
d1d03bc
Merge branch 'main' into optional_profiling
bennycortese Apr 16, 2024
b67a483
Update environment.go for comment I got
bennycortese Apr 21, 2024
7037851
Merge pull request #4828 from weaviate/prepare-v1.25.0-rc.0-release
antas-marcin May 3, 2024
0dd87e0
Merge pull request #4834 from weaviate/stable/v1.25
antas-marcin May 3, 2024
ea9576a
fix: don't return shutdown error incase the shard was already shutdow…
moogacs May 3, 2024
7ee0487
handle in the migrator case
moogacs May 3, 2024
5bddba5
Merge pull request #4837 from weaviate/fix-shard-shutdown
moogacs May 3, 2024
59e1a11
Revert "fix: don't return shutdown error incase the shard was already…
moogacs May 3, 2024
212f77f
Merge pull request #4838 from weaviate/revert-4837-fix-shard-shutdown
moogacs May 3, 2024
2f8a7cd
Merge branch 'main' into optional_profiling
bennycortese May 5, 2024
4e4a2d2
Merge pull request #4849 from weaviate/stable/v1.25
antas-marcin May 6, 2024
ae65f81
add retry on failure in CI for tests instability
reyreaud-l May 6, 2024
663dbb0
Merge pull request #4848 from weaviate/lre/retry-backup-test-gha
reyreaud-l May 6, 2024
2a02d51
fix replication acceptance test logic and always use 3 nodes (#4835)
moogacs May 6, 2024
5ff2487
Merge branch 'main' into optional_profiling
dirkkul May 6, 2024
03846c6
Merge pull request #4290 from bennycortese/optional_profiling
dirkkul May 6, 2024
fecf237
Merge pull request #4865 from weaviate/stable/v1.25
antas-marcin May 7, 2024
c8049f5
Remove unsearchable properties
donomii May 10, 2024
e77c197
Bump golangci/golangci-lint-action from 5 to 6
dependabot[bot] May 13, 2024
257b2d3
Merge pull request #4900 from weaviate/dependabot/github_actions/gola…
moogacs May 13, 2024
dfb5d7a
Update test for bm25 unsearchable property
donomii May 13, 2024
0d145b9
Remove invalid properties in aggregation kw search
donomii May 13, 2024
bf46c3d
gh-4764 Remove duplicate CONTEXTIONARY_URL variables in dev setup (#4…
aminst May 14, 2024
71ce38c
Remove unsearchable properties from kw.properties
donomii May 14, 2024
1e1db2a
Merge branch 'main' into no-search-unsearchable-properties
donomii May 14, 2024
d593134
Regenerate reformat
donomii May 14, 2024
0ea5e71
Filter properties correctly
donomii May 14, 2024
f0d505c
Regenerate reformat
donomii May 14, 2024
51df827
Regenerate reformat
donomii May 14, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/linter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
go-version: '1.21'
- uses: actions/checkout@v4
- name: golangci-lint
uses: golangci/golangci-lint-action@v5
uses: golangci/golangci-lint-action@v6
with:
# Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version.
version: v1.54
Expand Down
28 changes: 23 additions & 5 deletions .github/workflows/pull_requests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,13 @@ jobs:
go-version: '1.21'
cache: true
- name: Acceptance tests (modules)
run: ./test/run.sh ${{ matrix.test }}
uses: nick-fields/retry@v2
with:
# 15 Minute is a large enough timeout for most of our tests
timeout_minutes: 15
max_attempts: 3
command: ./test/run.sh ${{ matrix.test }}
on_retry_command: ./test/run.sh --cleanup
Modules-Acceptance-Tests-large:
name: modules-acceptance-tests-large
runs-on: ubuntu-latest-4-cores
Expand All @@ -141,7 +147,13 @@ jobs:
go-version: '1.21'
cache: true
- name: Acceptance tests Large (modules)
run: ./test/run.sh ${{ matrix.test }}
uses: nick-fields/retry@v2
with:
# 15 Minute is a large enough timeout for most of our tests
timeout_minutes: 15
max_attempts: 3
command: ./test/run.sh ${{ matrix.test }}
on_retry_command: ./test/run.sh --cleanup
Modules-Acceptance-Tests-gcp:
name: modules-acceptance-tests-gcp
runs-on: ubuntu-latest
Expand Down Expand Up @@ -183,7 +195,7 @@ jobs:
if : ${{ env.run_pipeline == 'true' || startsWith(github.ref, 'refs/tags') }}
run: PALM_APIKEY=$(gcloud auth print-access-token) ./test/run.sh ${{ matrix.test }}
Acceptance-Tests:
name: acceptance-tests
name: acceptance-tests
runs-on: ubuntu-latest
strategy:
fail-fast: false
Expand All @@ -197,10 +209,16 @@ jobs:
go-version: '1.21'
cache: true
- name: Acceptance tests
uses: nick-fields/retry@v2
env:
WCS_DUMMY_CI_PW: ${{ secrets.WCS_DUMMY_CI_PW }}
WCS_DUMMY_CI_PW_2: ${{ secrets.WCS_DUMMY_CI_PW_2 }}
run: ./test/run.sh ${{ matrix.test }}
WCS_DUMMY_CI_PW_2: ${{ secrets.WCS_DUMMY_CI_PW_2 }}
with:
# 15 Minute is a large enough timeout for most of our tests
timeout_minutes: 15
max_attempts: 3
command: ./test/run.sh ${{ matrix.test }}
on_retry_command: ./test/run.sh --cleanup
Codecov:
needs: [Unit-Tests, Integration-Tests]
name: codecov
Expand Down
4 changes: 4 additions & 0 deletions adapters/handlers/rest/configure_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,10 @@ func reasonableHttpClient(authConfig cluster.AuthConfig) *http.Client {
}

func setupGoProfiling(config config.Config, logger logrus.FieldLogger) {
if config.Profiling.Disabled {
return
}

enterrors.GoWrapper(func() {
portNumber := config.Profiling.Port
if portNumber == 0 {
Expand Down
19 changes: 19 additions & 0 deletions adapters/repos/db/aggregations_fixtures_for_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,25 @@ var productClass = &models.Class{
},
}

func boolRef(b bool) *bool {
return &b
}

var notIndexedClass = &models.Class{
Class: "NotIndexedClass",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{
Name: "name",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
IndexFilterable: boolRef(false),
IndexInverted: boolRef(false),
},
},
}

var companyClass = &models.Class{
Class: "AggregationsTestCompany",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
Expand Down
17 changes: 17 additions & 0 deletions adapters/repos/db/aggregations_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ func prepareCompanyTestSchemaAndData(repo *DB,
Objects: &models.Schema{
Classes: []*models.Class{
productClass,
notIndexedClass,
companyClass,
arrayTypesClass,
customerClass,
Expand All @@ -147,6 +148,8 @@ func prepareCompanyTestSchemaAndData(repo *DB,
migrator.AddClass(context.Background(), arrayTypesClass, schemaGetter.shardState))
require.Nil(t,
migrator.AddClass(context.Background(), customerClass, schemaGetter.shardState))
require.Nil(t,
migrator.AddClass(context.Background(), notIndexedClass, schemaGetter.shardState))
})

schemaGetter.schema = schema
Expand All @@ -165,6 +168,20 @@ func prepareCompanyTestSchemaAndData(repo *DB,
}
})

t.Run("import products into notIndexed class", func(t *testing.T) {
for i, schema := range products {
t.Run(fmt.Sprintf("importing product %d", i), func(t *testing.T) {
fixture := models.Object{
Class: notIndexedClass.Class,
ID: productsIds[i],
Properties: schema,
}
require.Nil(t,
repo.PutObject(context.Background(), &fixture, []float32{0.1, 0.2, 0.01, 0.2}, nil, nil, 0))
})
}
})

t.Run("import companies", func(t *testing.T) {
for j := 0; j < importFactor; j++ {
for i, schema := range companies {
Expand Down
3 changes: 3 additions & 0 deletions adapters/repos/db/aggregator/filtered.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ func (fa *filteredAggregator) bm25Objects(ctx context.Context, kw *searchparams.
return nil, nil, fmt.Errorf("bm25 objects: could not find class %s in schema", fa.params.ClassName)
}
cfg := inverted.ConfigFromModel(class.InvertedIndexConfig)

// kw.ChooseSearchableProperties(class)

objs, scores, err := inverted.NewBM25Searcher(cfg.BM25, fa.store, fa.getSchema.ReadOnlyClass,
propertyspecific.Indices{}, fa.classSearcher,
fa.GetPropertyLengthTracker(), fa.logger, fa.shardVersion,
Expand Down
2 changes: 2 additions & 0 deletions adapters/repos/db/aggregator/hybrid.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ func (a *Aggregator) bm25Objects(ctx context.Context, kw *searchparams.KeywordRa
}
cfg := inverted.ConfigFromModel(class.InvertedIndexConfig)

// kw.ChooseSearchableProperties(class)

objs, dists, err := inverted.NewBM25Searcher(cfg.BM25, a.store, a.getSchema.ReadOnlyClass,
propertyspecific.Indices{}, a.classSearcher,
a.GetPropertyLengthTracker(), a.logger, a.shardVersion,
Expand Down
8 changes: 8 additions & 0 deletions adapters/repos/db/bm25f_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,14 @@ func SetupClass(t require.TestingT, repo *DB, schemaGetter *fakeSchemaGetter, lo
IndexFilterable: &vFalse,
IndexSearchable: &vTrue,
},
// Test that bm25f handles this property being unsearchable
{
Name: "notSearchable",
DataType: schema.DataTypeTextArray.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
IndexFilterable: &vFalse,
IndexSearchable: &vFalse,
},
},
}

Expand Down
1 change: 1 addition & 0 deletions adapters/repos/db/inverted/bm25_searcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ func (b *BM25Searcher) BM25F(ctx context.Context, filterDocIds helpers.AllowList
return nil, nil, inverted.NewMissingSearchableIndexError(property)
}
}

class := b.getClass(className.String())
if class == nil {
return nil, nil, fmt.Errorf("could not find class %s in schema", className)
Expand Down
63 changes: 63 additions & 0 deletions entities/searchparams/retrieval.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@

package searchparams

import (
"fmt"
"strings"

"github.com/weaviate/weaviate/entities/models"
"github.com/weaviate/weaviate/entities/schema"
)

type NearVector struct {
Vector []float32 `json:"vector"`
Certainty float64 `json:"certainty"`
Expand All @@ -26,6 +34,61 @@ type KeywordRanking struct {
AdditionalExplanations bool `json:"additionalExplanations"`
}

// Indicates whether property should be indexed
// Index holds document ids with property of/containing particular value
// and number of its occurrences in that property
// (index created using bucket of StrategyMapCollection)
func HasSearchableIndex(prop *models.Property) bool {
switch dt, _ := schema.AsPrimitive(prop.DataType); dt {
case schema.DataTypeText, schema.DataTypeTextArray:
// by default property has searchable index only for text/text[] props
if prop.IndexSearchable == nil {
return true
}
return *prop.IndexSearchable
default:
return false
}
}

func PropertyHasSearchableIndex(class *models.Class, tentativePropertyName string) bool {
if class == nil {
return false
}

propertyName := strings.Split(tentativePropertyName, "^")[0]
p, err := schema.GetPropertyByName(class, propertyName)
if err != nil {
return false
}
return HasSearchableIndex(p)
}

// GetPropertyByName returns the class by its name
func GetPropertyByName(c *models.Class, propName string) (*models.Property, error) {
for _, prop := range c.Properties {
// Check if the name of the property is the given name, that's the property we need
if prop.Name == strings.Split(propName, ".")[0] {
return prop, nil
}
}
return nil, fmt.Errorf("Property %v not found %v", propName, c.Class)
}

func (k *KeywordRanking) ChooseSearchableProperties(class *models.Class) {
var validProperties []string
for _, prop := range k.Properties {
property, err := GetPropertyByName(class, prop)
if err != nil {
continue
}
if HasSearchableIndex(property) {
validProperties = append(validProperties, prop)
}
}
k.Properties = validProperties
}

type WeightedSearchResult struct {
SearchParams interface{} `json:"searchParams"`
Weight float64 `json:"weight"`
Expand Down
47 changes: 21 additions & 26 deletions test/acceptance/replication/crud_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ func immediateReplicaCRUD(t *testing.T) {
})

t.Run("OnNode-1", func(t *testing.T) {
_, err := getObjectFromNode(t, compose.ContainerURI(1), "Article", articleIDs[0], "node2")
_, err := getObjectFromNode(t, compose.ContainerURI(1), "Article", articleIDs[0], "node1")
assert.Equal(t, &objects.ObjectsClassGetNotFound{}, err)
})
t.Run("OnNode-2", func(t *testing.T) {
Expand Down Expand Up @@ -297,7 +297,7 @@ func eventualReplicaCRUD(t *testing.T) {
defer cancel()

compose, err := docker.New().
WithWeaviateCluster().
With3NodeCluster().
WithText2VecContextionary().
Start(ctx)
require.Nil(t, err)
Expand Down Expand Up @@ -340,31 +340,28 @@ func eventualReplicaCRUD(t *testing.T) {
createObjects(t, compose.GetWeaviate().URI(), batch)
})

t.Run("configure classes to replicate to node 2", func(t *testing.T) {
t.Run("configure classes to replicate to node 3", func(t *testing.T) {
ac := helper.GetClass(t, "Article")
ac.ReplicationConfig = &models.ReplicationConfig{
Factor: 2,
Factor: 3,
}
helper.UpdateClass(t, ac)

pc := helper.GetClass(t, "Paragraph")
pc.ReplicationConfig = &models.ReplicationConfig{
Factor: 2,
Factor: 3,
}
helper.UpdateClass(t, pc)
})

t.Run("StopNode-1", func(t *testing.T) {
stopNodeAt(ctx, t, compose, 1)
t.Run("StopNode-2", func(t *testing.T) {
stopNodeAt(ctx, t, compose, 2)
})

t.Run("assert all previous data replicated to node 2", func(t *testing.T) {
// TODO-RAFT : we need to avoid any sleeps, come back and remove it
// sleep 2 sec to make sure data not affected by EC issue
time.Sleep(2 * time.Second)
resp := gqlGet(t, compose.GetWeaviateNode2().URI(), "Article", replica.One)
t.Run("assert all previous data replicated to node 3", func(t *testing.T) {
resp := gqlGet(t, compose.GetWeaviateNode3().URI(), "Article", replica.One)
assert.Len(t, resp, len(articleIDs))
resp = gqlGet(t, compose.GetWeaviateNode2().URI(), "Paragraph", replica.One)
resp = gqlGet(t, compose.GetWeaviateNode3().URI(), "Paragraph", replica.One)
assert.Len(t, resp, len(paragraphIDs))
})

Expand Down Expand Up @@ -401,27 +398,26 @@ func eventualReplicaCRUD(t *testing.T) {
})

t.Run("RestartNode-2", func(t *testing.T) {
err = compose.Start(ctx, compose.GetWeaviateNode2().Name())
require.Nil(t, err)
startNodeAt(ctx, t, compose, 2)
})
})

t.Run("DeleteObject", func(t *testing.T) {
t.Run("OnNode-1", func(t *testing.T) {
deleteObject(t, compose.GetWeaviate().URI(), "Article", articleIDs[0])
t.Run("OnNode-2", func(t *testing.T) {
deleteObject(t, compose.GetWeaviateNode2().URI(), "Article", articleIDs[0])
})

t.Run("StopNode-1", func(t *testing.T) {
stopNodeAt(ctx, t, compose, 1)
t.Run("StopNode-2", func(t *testing.T) {
stopNodeAt(ctx, t, compose, 2)
})

t.Run("OnNode-2", func(t *testing.T) {
_, err := getObjectFromNode(t, compose.GetWeaviateNode2().URI(), "Article", articleIDs[0], "node2")
t.Run("OnNode-1", func(t *testing.T) {
_, err := getObjectFromNode(t, compose.GetWeaviate().URI(), "Article", articleIDs[0], "node1")
assert.Equal(t, &objects.ObjectsClassGetNotFound{}, err)
})

t.Run("RestartNode-1", func(t *testing.T) {
restartNode1(ctx, t, compose)
t.Run("RestartNode-2", func(t *testing.T) {
startNodeAt(ctx, t, compose, 2)
})
})

Expand All @@ -441,15 +437,14 @@ func eventualReplicaCRUD(t *testing.T) {
})

t.Run("RestartNode-2", func(t *testing.T) {
err = compose.Start(ctx, compose.GetWeaviateNode2().Name())
require.Nil(t, err)
startNodeAt(ctx, t, compose, 2)
})
})
})
}

func restartNode1(ctx context.Context, t *testing.T, compose *docker.DockerCompose) {
// since node1 is the gossip "leader", node 2 must be stopped and restarted
// since node1 is the gossip "leader", node 2 and 3 must be stopped and restarted
// after node1 to re-facilitate internode communication
eg := errgroup.Group{}
eg.Go(func() error {
Expand Down
5 changes: 1 addition & 4 deletions test/acceptance/replication/scale_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func multiShardScaleOut(t *testing.T) {
defer cancel()

compose, err := docker.New().
WithWeaviateCluster().
With3NodeCluster().
WithText2VecContextionary().
Start(ctx)
require.Nil(t, err)
Expand Down Expand Up @@ -134,9 +134,6 @@ func multiShardScaleOut(t *testing.T) {

t.Run("kill a node and check contents of remaining node", func(t *testing.T) {
stopNodeAt(ctx, t, compose, 2)
// TODO-RAFT : we need to avoid any sleeps, come back and remove it
// sleep 2 sec to make sure data not affected by EC issue
time.Sleep(2 * time.Second)
p := gqlGet(t, compose.GetWeaviate().URI(), paragraphClass.Class, replica.One)
assert.Len(t, p, 10)
a := gqlGet(t, compose.GetWeaviate().URI(), articleClass.Class, replica.One)
Expand Down
Loading
Loading