From 722b49bcbb8a92be92176260602e21397c36aff7 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Mon, 15 Apr 2024 12:55:04 +0200 Subject: [PATCH 01/14] add new "Search" section --- config/sidebar-learn.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/config/sidebar-learn.json b/config/sidebar-learn.json index 4279151ac3..70b43b88ca 100644 --- a/config/sidebar-learn.json +++ b/config/sidebar-learn.json @@ -61,6 +61,12 @@ } ] }, + { + "title": "search", + "slug": "search", + "routes": [ + ] + }, { "title": "Cloud features", "slug": "cloud", From 22756999e9aeb5d1580e9ad295eb7e9ea3ffb499 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 8 May 2024 16:51:57 +0200 Subject: [PATCH 02/14] new section will focus on ai-powered search --- config/sidebar-learn.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/sidebar-learn.json b/config/sidebar-learn.json index 70b43b88ca..94e132cc68 100644 --- a/config/sidebar-learn.json +++ b/config/sidebar-learn.json @@ -62,8 +62,8 @@ ] }, { - "title": "search", - "slug": "search", + "title": "AI-powered search", + "slug": "ai-powered-search", "routes": [ ] }, From 0d7b4f125b9c53f4f590819be7d2fc6bb45091cc Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Tue, 28 May 2024 16:37:17 +0200 Subject: [PATCH 03/14] Vector search reference (#2824) --- .code-samples.meilisearch.yaml | 33 ++++ reference/api/search.mdx | 44 +++++ reference/api/settings.mdx | 333 +++++++++++++++++++++++++++++++++ 3 files changed, 410 insertions(+) diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index d80ff1f4ef..d3a3167e42 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -1187,3 +1187,36 @@ index_settings_tutorial_api_put_setting_1: |- index_settings_tutorial_api_task_1: |- curl \ -X GET 'http://localhost:7700/tasks/TASK_UID' +get_embedders_1: |- + curl \ + -X GET 'http://localhost:7700/indexes/INDEX_NAME/settings/embedders' +update_embedders_1: |- + curl \ + -X PATCH 'http://localhost:7700/indexes/INDEX_NAME/settings' \ + -H 'Content-Type: application/json' \ + --data-binary '{ + "embedders": { + "default": { + "source": "huggingFace", + "model": "BAAI/bge-base-en-v1.5", + "documentTemplate": "A movie titled '{{doc.title}}' whose description starts with {{doc.overview|truncatewords: 20}}" + } + } + }' +reset_embedders_1: |- + curl \ + -X DELETE 'http://localhost:7700/indexes/INDEX_NAME/settings/embedders' +search_parameter_guide_hybrid_1: |- + curl -X POST 'localhost:7700/indexes/INDEX_NAME/search' \ + -H 'content-type: application/json' \ + --data-binary '{ + "q": "kitchen utensils", + "hybrid": { + "semanticRatio": 0.9, + "embedder": "default" + } + }' +search_parameter_guide_vector_1: |- + curl -X POST 'localhost:7700/indexes/INDEX_NAME/search' \ + -H 'content-type: application/json' \ + --data-binary '{ "vector": [0, 1, 2] }' diff --git a/reference/api/search.mdx b/reference/api/search.mdx index 107a5b8227..5708a7907c 100644 --- a/reference/api/search.mdx +++ b/reference/api/search.mdx @@ -54,6 +54,8 @@ By default, [this endpoint returns a maximum of 1000 results](/learn/advanced/kn | **[`matchingStrategy`](#matching-strategy)** | String | `last` | Strategy used to match query terms within documents | | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | +| **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | +| **[`vector`](#vector-experimental)** | Array of numbers | `null` | Search using a custom query vector | [Learn more about how to use each search parameter](#search-parameters). @@ -162,6 +164,8 @@ By default, [this endpoint returns a maximum of 1000 results](/learn/advanced/kn | **[`matchingStrategy`](#matching-strategy)** | String | `last` | Strategy used to match query terms within documents | | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | +| **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | +| **[`vector`](#vector-experimental)** | Array of numbers | `null` | Search using a custom query vector | [Learn more about how to use each search parameter](#search-parameters). @@ -248,6 +252,8 @@ This is not necessary when using the `POST` route or one of our [SDKs](/learn/wh | **[`matchingStrategy`](#matching-strategy)** | String | `last` | Strategy used to match query terms within documents | | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | +| **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | +| **[`vector`](#vector-experimental)** | Array of numbers | `null` | Search using a custom query vector | ### Query (q) @@ -1060,3 +1066,41 @@ The following query returns documents whose `overview` includes `"adventure"`: Results would not include documents containing `"adventure"` in other fields such as `title` or `genre`, even if these fields were present in the `searchableAttributes` list. + +### Hybrid search (experimental) + +**Parameter**: `hybrid`
+**Expected value**: An object with two fields: `embedder` and `semanticRatio`
+**Default value**: `null` + +Configures Meilisearch to return search results based on a query's meaning and context. + +`hybrid` must be an object. It accepts two fields: `embedder` and `semanticRatio`. + +`embedder` must be a string indicating an embedder configured with the `/settings` endpoint. If you don't specify an embedder and your index contains a single embedder, Meilisearch uses it by default. If an index contains multiple embedders, Meilisearch will use the embedder named `default`. + +`semanticRatio` must be a number between `0.0` and `1.0` indicating the proportion between keyword and semantic search results. `0.0` causes Meilisearch to only return keyword results. `1.0` causes Meilisearch to only return meaning-based results. Defaults to `0.5`. + + +Meilisearch will return an error if you use `hybrid` before activating your instance's `vectorStore` and [configuring an embedder](/reference/api/settings#embedders-experimental). + + +#### Example + + + +### Vector (experimental) + +Use a custom vector to perform a search query. Must be an array of numbers corresponding to the dimensions of the custom vector. + +`vector` is mandatory when performing searches with `userProvided` embedders. You may also use `vector` to override an embedder's automatic vector generation. + +`vector` dimensions must match the dimensions of the embedder. + +#### Example + + + + +Meilisearch will return an error if you use `vector` before activating your instance's `vectorStore` and [configuring a custom embedder](/reference/api/settings#embedders-experimental). + diff --git a/reference/api/settings.mdx b/reference/api/settings.mdx index 80795d65c8..9dde2cf9ed 100644 --- a/reference/api/settings.mdx +++ b/reference/api/settings.mdx @@ -165,6 +165,7 @@ If the provided index does not exist, it will be created. | **[`stopWords`](#stop-words)** | Array of strings | Empty | List of words ignored by Meilisearch when present in search queries | | **[`synonyms`](#synonyms)** | Object | Empty | List of associated words treated similarly | | **[`typoTolerance`](#typo-tolerance)** | Object | [Default object](#typo-tolerance-object) | Typo tolerance settings | +| **[`embedders`](#embedders-experimental)** | Object of objects | [Default object](#embedder-object) | Embedder required for performing meaning-based search queries | #### Example @@ -1910,3 +1911,335 @@ Reset an index's typo tolerance settings to their [default value](#typo-toleranc ``` You can use the returned `taskUid` to get more details on [the status of the task](/reference/api/tasks#get-one-task). + +## Embedders (experimental) + +Embedders translate documents and queries into vector embeddings. You must configure at least one embedder to use AI-powered search. + +### Embedders object + +The embedders object may contain up to 256 embedder objects. Each embedder object must be assigned a unique name: + +```json +{ + "default": { + "source": "huggingFace", + "model": "BAAI/bge-base-en-v1.5", + "documentTemplate": "A movie titled '{{doc.title}}' whose description starts with {{doc.overview|truncatewords: 20}}" + }, + "openai": { + "source": "openAi", + "apiKey": "OPENAI_API_KEY", + "model": "text-embedding-3-small", + "documentTemplate": "A movie titled {{doc.title}} whose description starts with {{doc.overview|truncatewords: 20}}", + } +} +``` + +These embedder objects may contain the following fields: + +| Name | Type | Default Value | Description | +|--:---------------------|--:---------------|--:-----------------------------------------------------------------------|--:-------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **`source`** | String | Empty | The third-party tool that will generate embeddings from documents. Must be `openAi`, `huggingFace`, `ollama`, `rest`, or `userProvided` | +| **`url`** | String | `http://localhost:11434/api/embeddings` | URL Meilisearch contacts when querying the embedder | +| **`apiKey`** | String | Empty | Authentication token Meilisearch should send with each request to the embedder. If not present, Meilisearch will attempt to read it from environment variables | +| **`model`** | String | Empty | The model your embedder uses when generating vectors | +| **`documentTemplate`** | String | `{% for field in fields %}{{field.name}}: {{field.value}}\n{% endfor %}` | Template defining the data Meilisearch sends the embedder | +| **`dimensions`** | Integer | Empty | Number of dimensions in the chosen model. If not supplied, Meilisearch tries to infer this value | +| **`revision`** | String | Empty | Model revision hash | +| **`inputField`** | Array of strings | [] | Path to location of document data in query Meilisearch sends to embedder | +| **`inputType`** | String | `text` | Type of data Meilisearch sends to embedder. Must be `text` or `textArray` | +| **`query`** | String | Object | Extra fields Meilisearch adds to queries Meilisearch sends to embedder | +| **`pathToEmbeddings`** | Array | [] | Path to vector embedding data in embedder response | +| **`embeddingObject`** | Array | [] | Name of the embedding object in embedder response | +| **`distribution`** | Object | Empty | Describes the natural distribution of search results. Must contain two fields, `mean` and `sigma`, each containing a numeric value between `0` and `1` | + +### Get embedder settings + + + +Get the embedders configured for an index. + +#### Path parameters + +| Name | Type | Description | +| :---------------- | :----- | :------------------------------------------------------------------------ | +| **`index_uid`** * | String | [`uid`](/learn/core_concepts/indexes#index-uid) of the requested index | + +#### Example + + + +##### Response: `200 OK` + +```json +{ + "default": { + "source": "openAi", + "apiKey": "OPENAI_API_KEY", + "model": "text-embedding-3-small", + "documentTemplate": "A movie titled {{doc.title}} whose description starts with {{doc.overview|truncatewords: 20}}", + "dimensions": 1536 + } +} +``` + +### Update embedder settings + + + +Partially update the embedder settings for an index. When this setting is updated Meilisearch may reindex all documents and regenerate their embeddings. + +#### Path parameters + +| Name | Type | Description | +| :---------------- | :----- | :------------------------------------------------------------------------ | +| **`index_uid`** * | String | [`uid`](/learn/core_concepts/indexes#index-uid) of the requested index | + +#### Body + +```json +{ + "default": { + "source": , + "url": , + "apiKey": , + "model": , + "documentTemplate": , + "dimensions": , + "revision": , + "inputField": [, , …], + "inputType": "text"|"textArray", + "query": , + "pathToEmbeddings": [, , …], + "embeddingObject": [, , …], + "distribution": { + "mean": , + "sigma": + } + } +} +``` + +Set an embedder to `null` to remove it from the embedders list. + +##### `source` + +Use `source` to configure an embedder's source. The following embedders can auto-generate vectors for documents and queries: + +- `openAi` +- `huggingFace` +- `ollama` + +Additionally, use `rest` to auto-generate embeddings with any embedder offering a REST API. + +You may also configure a `userProvided` embedder. In this case, you must manually include vector data in your documents' `_vector` field. You must also manually generate vectors for search queries. + +This field is mandatory. + +##### `url` + +Meilisearch queries `url` to generate vector embeddings for queries and documents. `url` must point to a REST-compatible embedder. + +This field is mandatory when using `rest` embedders. + +This field is optional when using `ollama` embedders. + +This field is incompatible with `openAi`, `huggingFace`, and `userProvided` embedders. + +##### `apiKey` + +Authentication token Meilisearch should send with each request to the embedder. + +This field is mandatory if using a protected `rest` embedder. + +This field is optional for `openAI` and `ollama` embedders. If you don't specify `apiKey`, Meilisearch will attempt to read it from environment variables `OPENAI_API_KEY` and `MEILI_OLLAMA_URL`, respectively. + +This field is incompatible with `huggingFace` and `userProvided` embedders. + +##### `model` + +The model your embedder uses when generating vectors. These are the officially supported models Meilisearch supports: + +- `openAi`: `openai-text-embedding-ada-002`, `text-embedding-3-small`, and `text-embedding-3-large` +- `huggingFace`: `BAAI/bge-base-en-v1.5` + +Other models, such as [HuggingFace's BERT models](https://huggingface.co/models?other=bert) or those provided by Ollama and REST embedders may also be compatible with Meilisearch. + +This field is mandatory for `Ollama` embedders. + +This field is optional for `openAi` and `huggingFace`. By default, Meilisearch uses `text-embedding-3-small` and `BAAI/bge-base-en-v1.5` respectively. + +This field is incompatible with `rest` and `userProvided` embedders. + +##### `documentTemplate` + +`documentTemplate` is a string containing a [Liquid template](https://shopify.github.io/liquid/basics/introduction). Meillisearch interpolates the template for each document and sends the resulting text to the embedder. The embedder then generates document vectors based on this text. + +You may use the following context values: + +- `{{doc.FIELD}}`: `doc` stands for the document itself. `FIELD` must correspond to an attribute present on all documents value will be replaced by the value of that field in the input document +- `{{fields}}`: a list of all the `field`s appearing in any document in the index. Each `field` object in this list has two properties: `name` and `value`. If a `field` does not exist in a document, `value` is `nil` + +For best results, build short templates that only contain highly relevant data. If working with a long field, consider [truncating it](https://shopify.github.io/liquid/filters/truncatewords/). If you do not manually set it, `documentTemplate` will include all document fields. This may lead to suboptimal performance and relevancy. + +This field is optional but strongly encouraged for all embedders. + +##### `dimensions` + +Number of dimensions in the chosen model. If not supplied, Meilisearch tries to infer this value. + +In most cases, `dimensions` should be the exact same value of your chosen model. Setting `dimensions` to a value lower than the model may lead to performance improvements and is only supported in the following OpenAI models: + +- `openAi`: `text-embedding-3-small`, `text-embedding-3-large` + +This field is mandatory for `userProvided` embedders. + +This field is optional for `openAi`, `huggingFace`, `ollama`, and `rest` embedders. + +##### `revision` + +Use this field to use a specific revision of a model. + +This field is optional for the `huggingFace` embedder. + +This field is incompatible with all other embedders. + +##### `inputField` + +Indicates the path to the field containing document data in the query Meilisearch sends to the embedder. + +This field must be an array of strings. Each string should indicate a field within the request object. For example, if `inputField` is `["A", "B", "C"]`, the document data should be in `request.A.B.C`. + +This field is optional for the `rest` embedder. + +This field is incompatible with all other embedders. + +##### `inputType` + +Defines how Meilisearch sends the data to the embedder. + +`text`: Meilisearch submits a single request per document, and receives a single embedding as a response. When using `text`, you must specify the path to the embedding response in `embeddingObject`. + +`textArray`: Meilisearch submits multiple documents in a single request, and receives the same number of embeddings as a response. When using `textArray`, you must specify the path to the array of results in `pathToEmbeddings`, then the path from each item of the array to the embedding in `embeddingObject`. + +`textArray` results in fewer requests to the embedder and is the preferred `inputType`. However, not all embedders support sending multiple documents in a single request. + +This field is optional for the `rest` embedder. + +This field is incompatible with all other embedders. + +##### `pathToEmbeddings` + +Array indicating the path to the field containing all vector embeddings in the embedder response. + +This field must be an array of strings. Each string should indicate a field within the request object. If `inputType` is `text`, the last string in the array should indicate an array of numbers with the same dimensions as the embedding models. If `inputType` is `textArray`, the last string in the array should indicate an array of objects, each of which corresponds to a single document in the original request. + +For example, if `pathToEmbeddings` is `["A", "B", "C"]`, the document data should be in `response.A.B.C`. + +This field is optional for the `rest` embedder. + +This field is incompatible with all other embedders. + +##### `embeddingObject` + +Array indicating the path to the field containing a single set of vector embedding in a `textArray` embedder response. + +This field must be an array of strings. Each string should indicate the field containing vector data for a single element when the embedder returns multiple embeddings. + +For example, if `pathToEmbeddings` is `["A", "B", "C"]`, the document data should be in `response.A.B.C`, where `C` is an array of objects. For each element in `C`, Meilisearch will look for the path configured in `embeddingObject`. If `embeddingObject` is `["D", "E", "F"]`, vector data should be in `element.D.E.F`. + +This field is optional for the `rest` embedder. + +This field is incompatible with all other embedders. + +##### `query` + +Extra fields Meilisearch adds to queries Meilisearch sends to embedder. These may include the chosen embeddings model and its dimensions. + +This field is optional for the `rest` embedder. + +This field is incompatible with all other embedders. + +##### `distribution` + +For mathematical reasons, the `_rankingScore` of semantic search results tend to be closely grouped around an average value that depends on the embedder and model used. This may result in relevant semantic hits being underrepresented and irrelevant semantic hits being overrepresented compared with keyword search hits. + +Use `distribution` when configuring an embedder to correct the returned `_rankingScore`s of the semantic hits with an affine transformation: + +```sh +curl \ + -X PATCH 'http://localhost:7700/indexes/INDEX_NAME/settings' \ + -H 'Content-Type: application/json' \ + --data-binary '{ + "embedders": { + "default": { + "source": "huggingFace", + "model": "MODEL_NAME", + "distribution": { + "mean": 0.7, + "sigma": 0.3 + } + } + } + }' +``` + +Configuring `distribution` requires a certain amount of trial and error, in which you must perform semantic searches and monitor the results. Based on their `rankingScore`s and relevancy, add the observed `mean` and `sigma` values for that index. + +`distribution` is an optional field compatible with all embedder sources. It must be an object with two fields: + +- `mean`: a number between `0` and `1` indicating the semantic score of "somewhat relevant" hits before using the `distribution` setting +- `sigma`: a number between `0` and `1` indicating the average absolute difference in `_rankingScore`s between "very relevant" hits and "somewhat relevant" hits, and "somewhat relevant" hits and "irrelevant hits". + +Changing `distribution` does not trigger a reindexing operation. + +#### Example + + + +##### Response: `202 Accepted` + +```json +{ + "taskUid": 1, + "indexUid": "kitchenware", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "2024-05-11T09:33:12.691402Z" +} +``` + +You can use the returned `taskUid` to get more details on [the status of the task](/reference/api/tasks#get-one-task). + +### Reset embedder settings + + + +Removes all embedders from your index. + +To remove a single embedder, use the [update embedder settings endpoint](#update-embedder-settings) and set the target embedder to `null`. + +#### Path parameters + +| Name | Type | Description | +| :---------------- | :----- | :------------------------------------------------------------------------ | +| **`index_uid`** * | String | [`uid`](/learn/core_concepts/indexes#index-uid) of the requested index | + +#### Example + + + +##### Response: `202 Accepted` + +```json +{ + "taskUid": 1, + "indexUid": "books", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "2022-04-14T20:53:32.863107Z" +} +``` + +You can use the returned `taskUid` to get more details on [the status of the task](/reference/api/tasks#get-one-task). From 99f5c8ec6d5f3c445348b2cfb8bbb3a4aee59b15 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Tue, 28 May 2024 16:38:14 +0200 Subject: [PATCH 04/14] Add keyword/hybrid search explanation (#2789) --- config/sidebar-learn.json | 5 ++++ learn/search/difference_keyword_ai_search.mdx | 28 +++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 learn/search/difference_keyword_ai_search.mdx diff --git a/config/sidebar-learn.json b/config/sidebar-learn.json index 94e132cc68..2a38719dfd 100644 --- a/config/sidebar-learn.json +++ b/config/sidebar-learn.json @@ -65,6 +65,11 @@ "title": "AI-powered search", "slug": "ai-powered-search", "routes": [ + { + "source": "learn/search/difference_keyword_ai_search.mdx", + "label": "Differences between keyword and AI-powered search", + "slug": "difference_keyword_ai_search" + } ] }, { diff --git a/learn/search/difference_keyword_ai_search.mdx b/learn/search/difference_keyword_ai_search.mdx new file mode 100644 index 0000000000..37896f820b --- /dev/null +++ b/learn/search/difference_keyword_ai_search.mdx @@ -0,0 +1,28 @@ +--- +title: Differences between keyword and AI-powered search — Meilisearch documentation +description: "Meilisearch offers two types of search: keyword search and AI-powered search. This article explains their differences and intended use cases." +--- + +# Differences between keyword and AI-powered search + +Meilisearch offers two types of search: keyword search and AI-powered search. This article explains their differences and intended use cases. + +## Keyword search + +Keyword search is another name for traditional full-text search. This is Meilisearch's default search type. When performing keyword searches, Meilisearch checks the indexed documents for acceptable matches to a set of search terms. + +For example, when searching for `"pink sandals"`, keyword search will only return clothing items explicitly mentioning these two terms. Searching for `"pink summer shoes for girls"` is likely to return less relevant results. + +## AI-powered search + +AI-powered search uses LLM providers such as OpenAI and Hugging Face to generate document embeddings. When users search in your application, Meilisearch return results based both on keywords and the semantic context of the query. + +By default, Meilisearch returns both keyword and semantic results. This is also called hybrid search. You may configure Meilisearch so it only returns semantic results. This is also called semantic search. + +With AI-powered hybrid search, searching for `"pink sandals"` will be more efficient, but queries for `"cute pink summer shoes for girls"` will still return relevant results including light-coloured open shoes. + +## Use cases + +Keyword search is a reliable choice that will work well in most scenarios. It is fast, less resource-intensive, and requires no extra configuration. It is best suited for situations where you need precise matches to a query and your users are familiar with relevant keywords. + +AI-powered search combines the flexibility of semantic search with the performance of keyword search. Most searches, whether short and precise or long and vague, will return very relevant search results. In most cases, AI-powered search will offer your users the best search experience, but will require extra configuration. AI-powered search may also entail extra costs if you use a third-party service such as OpenAI to generate document embeddings. From 508cbab606c3e803dab7e575a4959f2cf51de07c Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 29 May 2024 16:09:35 +0200 Subject: [PATCH 05/14] test label component --- reference/api/settings.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/api/settings.mdx b/reference/api/settings.mdx index 9dde2cf9ed..f01877400f 100644 --- a/reference/api/settings.mdx +++ b/reference/api/settings.mdx @@ -1912,7 +1912,7 @@ Reset an index's typo tolerance settings to their [default value](#typo-toleranc You can use the returned `taskUid` to get more details on [the status of the task](/reference/api/tasks#get-one-task). -## Embedders (experimental) +## Embedders Embedders translate documents and queries into vector embeddings. You must configure at least one embedder to use AI-powered search. From f029d283b3cc371e337bb32fa56288bb49f8f1f3 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Mon, 3 Jun 2024 18:08:18 +0200 Subject: [PATCH 06/14] AI-powered search tutorial (#2820) --------- Co-authored-by: Maria Craig --- assets/datasets/kitchenware.json | 27 +++++ config/sidebar-learn.json | 5 + .../search/getting_started_with_ai_search.mdx | 106 ++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 assets/datasets/kitchenware.json create mode 100644 learn/search/getting_started_with_ai_search.mdx diff --git a/assets/datasets/kitchenware.json b/assets/datasets/kitchenware.json new file mode 100644 index 0000000000..4d8c04eb2a --- /dev/null +++ b/assets/datasets/kitchenware.json @@ -0,0 +1,27 @@ +[ + { + "id": 0, + "name": "Wooden spoon", + "price": 1.50 + }, + { + "id": 1, + "name": "Microwave lid", + "price": 1.00 + }, + { + "id": 2, + "name": "Wooden chopping board", + "price": 9.50 + }, + { + "id": 3, + "name": "Plastic chopping board", + "price": 1.50 + }, + { + "id": 4, + "name": "Rolling pin", + "price": 2.50 + } +] diff --git a/config/sidebar-learn.json b/config/sidebar-learn.json index 2a38719dfd..abbd5f2513 100644 --- a/config/sidebar-learn.json +++ b/config/sidebar-learn.json @@ -65,6 +65,11 @@ "title": "AI-powered search", "slug": "ai-powered-search", "routes": [ + { + "source": "learn/search/getting_started_with_ai_search.mdx", + "label": "Getting started with AI-powered search", + "slug": "getting_started_with_ai_search" + }, { "source": "learn/search/difference_keyword_ai_search.mdx", "label": "Differences between keyword and AI-powered search", diff --git a/learn/search/getting_started_with_ai_search.mdx b/learn/search/getting_started_with_ai_search.mdx new file mode 100644 index 0000000000..a398590081 --- /dev/null +++ b/learn/search/getting_started_with_ai_search.mdx @@ -0,0 +1,106 @@ +--- +title: Getting started with AI-powered search — Meilisearch documentation +description: AI-powered search is an experimental technology that uses LLMs to retrieve search results. This tutorial shows you how to configure an OpenAI embedder and perform your first search. +--- + +# Getting started with AI-powered search + +[AI-powered search](https://meilisearch.com/solutions/vector-search?utm_campaign=vector-search&utm_source=aipowered-tutorial), sometimes also called vector search, is an experimental technology that uses [large language models](https://en.wikipedia.org/wiki/Large_language_model) to retrieve search results based on the meaning and context of a query. + +This tutorial will walk you through configuring AI-powered search in your Meilisearch project. You will activate the vector store setting, generate document embeddings with OpenAI, and perform your first search. + +## Requirements + +- a running Meilisearch project +- an [OpenAI API key](https://platform.openai.com/api-keys) +- a command-line console + +## Create a new index + +Create a `kitchenware` index and add [this kitchenware products dataset](/assets/datasets/kitchenware.json) to it. If necessary, consult the quick start for instructions on how to configure a basic Meilisearch install. + +## Activate AI-powered search + +First, activate the AI-powered search experimental feature. Exactly how to do that depends on whether you are using [Meilisearch Cloud](#meilisearch-cloud-projects) or [self-hosting Meilisearch](#self-hosted-instances). + +### Meilisearch Cloud projects + +If using Meilisearch Cloud, navigate to your project overview and find "Experimental features". Then check the "AI-powered search" box. + +![A section of the project overview interface titled "Experimental features". The image shows a few options, including "Vector store".](https://raw.githubusercontent.com/meilisearch/documentation/main/assets/images/vector-search/01-cloud-vector-store.png) + + +To ensure proper scaling of Meilisearch Cloud's latest AI-powered search offering, you must enter the waitlist before activating vector search. You will not be able to activate vector search in the Cloud interface or via the `/experimental-features` route until your sign up has been approved. + + +### Self-hosted instances + +Use [the `/experimental-features` route](/reference/api/experimental_features) to activate vector search during runtime: + +```sh +curl \ + -X PATCH 'http://localhost:7700/experimental-features/' \ + -H 'Content-Type: application/json' \ + --data-binary '{ + "vectorStore": true + }' +``` + +## Generate vector embeddings with OpenAI + +Next, you must generate vector embeddings for all documents in your dataset. Embeddings are mathematical representations of the meanings of words and sentences in your documents. Meilisearch relies on external providers to generate these embeddings. Use OpenAI for this tutorial. + +Use the `embedders` index setting of the [update `/settings` endpoint](/reference/api/settings) to configure a default [OpenAI](https://platform.openai.com/) embedder: + +```sh +curl \ + -X PATCH 'http://localhost:7700/indexes/kitchenware/settings' \ + -H 'Content-Type: application/json' \ + --data-binary '{ + "embedders": { + "default": { + "source": "openAi", + "apiKey": "OPEN_AI_API_KEY", + "model": "text-embedding-3-small", + "documentTemplate": "An object used in a kitchen named '{{doc.name}}'" + } + } + }' +``` + +Replace `OPEN_AI_API_KEY` with your [OpenAI API key](https://platform.openai.com/api-keys). You may use any key tier for this tutorial, but prefer [Tier 2 keys](https://platform.openai.com/docs/guides/rate-limits/usage-tiers?context=tier-two) for optimal performance in production environments. + +### `documentTemplate` + +`documentTemplate` describes a short [Liquid template](https://shopify.github.io/liquid/). The text inside curly brackets (`{{`) indicates a document field in dot notation, where `doc` indicates the document itself and the string that comes after the dot indicates a document attribute. Meilisearch replaces these brackets and their contents with the corresponding field value. + +The resulting text is the prompt OpenAI uses to generate document embeddings. + +For example, kitchenware documents have three fields: `id`, `name`, and `price`. If your `documentTemplate` is `"An object used in a kitchen named '{{doc.name}}'"`, the text Meilisearch will send to the embedder when indexing the first document is `"An object used in a kitchen named 'Wooden spoon'"`. + +For the best results, always provide a `documentTemplate`. Keep your templates short and only include highly relevant information. This ensures the best indexing performance and search result relevancy. + +## Perform an AI-powered search + +Perform AI-powered searches with `q` and `hybrid` to retrieve search results using the default embedder you configured in the previous step: + +```sh +curl \ + -X POST 'http://localhost:7700/indexes/kitchenware/search' \ + -H 'content-type: application/json' \ + --data-binary '{ + "q": "kitchen utensils made of wood", + "hybrid": { + "embedder": "default", + "semanticRatio": 0.7 + } + }' +``` + +Meilisearch will return a mix of semantic and keyword matches, prioritizing results that match the query's meaning and context. If you want Meilisearch to return more results based on the meaning and context of a search, set `semanticRatio` to a value greater than `0.5`. Setting `semanticRatio` to a value lower than `0.5`, instead, will return more keyword matches. + +## Conclusion + +You have seen how to setup and perform AI-powered searches with Meilisearch and OpenAI. For more in-depth information, consult the reference for embedders and the `hybrid` search parameter. + +AI-powered is an experimental Meilisearch feature and is undergoing active development—[join the discussion on GitHub](https://github.com/orgs/meilisearch/discussions/677). From f785e5cc9d9b5db52aca22711b975cfa59b13a07 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 12 Jun 2024 18:35:44 +0200 Subject: [PATCH 07/14] rectify terminology --- config/sidebar-learn.json | 4 +-- .../difference_full_text_ai_search.mdx | 30 +++++++++++++++++++ .../getting_started_with_ai_search.mdx | 22 +++++++------- learn/search/difference_keyword_ai_search.mdx | 28 ----------------- reference/api/search.mdx | 8 ++--- 5 files changed, 47 insertions(+), 45 deletions(-) create mode 100644 learn/ai_powered_search/difference_full_text_ai_search.mdx rename learn/{search => ai_powered_search}/getting_started_with_ai_search.mdx (77%) delete mode 100644 learn/search/difference_keyword_ai_search.mdx diff --git a/config/sidebar-learn.json b/config/sidebar-learn.json index abbd5f2513..7b64cec5c6 100644 --- a/config/sidebar-learn.json +++ b/config/sidebar-learn.json @@ -71,9 +71,9 @@ "slug": "getting_started_with_ai_search" }, { - "source": "learn/search/difference_keyword_ai_search.mdx", + "source": "learn/search/difference_full_text_ai_search.mdx", "label": "Differences between keyword and AI-powered search", - "slug": "difference_keyword_ai_search" + "slug": "difference_full_text_ai_search" } ] }, diff --git a/learn/ai_powered_search/difference_full_text_ai_search.mdx b/learn/ai_powered_search/difference_full_text_ai_search.mdx new file mode 100644 index 0000000000..dbc0120d27 --- /dev/null +++ b/learn/ai_powered_search/difference_full_text_ai_search.mdx @@ -0,0 +1,30 @@ +--- +title: Differences between full-text and AI-powered search — Meilisearch documentation +description: "Meilisearch offers two types of search: full-text search and AI-powered search. This article explains their differences and intended use cases." +--- + +# Differences between full-text and AI-powered search + +Meilisearch offers two types of search: full-text search and AI-powered search. This article explains their differences and intended use cases. + +## Full-text search + +This is Meilisearch's default search type. When performing a full-text search, Meilisearch checks the indexed documents for acceptable matches to a set of search terms. It is a fast and reliable search method. + +For example, when searching for `"pink sandals"`, full-text search will only return clothing items explicitly mentioning these two terms. Searching for `"pink summer shoes for girls"` is likely to return fewer and less relevant results. + +## AI-powered search + +AI-powered search is Meilisearch's newest search method. It returns results based on a query's meaning and context. + +AI-powered search uses LLM providers such as OpenAI and Hugging Face to generate vector embeddings representing the meaning and context of both query terms and documents. It then compares these vectors to find semantically similar search results. + +When using AI-powered search, Meilisearch returns both full-text and semantic results by default. This is also called hybrid search. + +With AI-powered search, searching for `"pink sandals"` will be more efficient, but queries for `"cute pink summer shoes for girls"` will still return relevant results including light-colored open shoes. + +## Use cases + +Full-text search is a reliable choice that works well in most scenarios. It is fast, less resource-intensive, and requires no extra configuration. It is best suited for situations where you need precise matches to a query and your users are familiar with the relevant keywords. + +AI-powered search combines the flexibility of semantic search with the performance of full-text search. Most searches, whether short and precise or long and vague, will return very relevant search results. In most cases, AI-powered search will offer your users the best search experience, but will require extra configuration. AI-powered search may also entail extra costs if you use a third-party service such as OpenAI to generate vector embeddings. diff --git a/learn/search/getting_started_with_ai_search.mdx b/learn/ai_powered_search/getting_started_with_ai_search.mdx similarity index 77% rename from learn/search/getting_started_with_ai_search.mdx rename to learn/ai_powered_search/getting_started_with_ai_search.mdx index a398590081..a7b06b9a2f 100644 --- a/learn/search/getting_started_with_ai_search.mdx +++ b/learn/ai_powered_search/getting_started_with_ai_search.mdx @@ -3,21 +3,21 @@ title: Getting started with AI-powered search — Meilisearch documentation description: AI-powered search is an experimental technology that uses LLMs to retrieve search results. This tutorial shows you how to configure an OpenAI embedder and perform your first search. --- -# Getting started with AI-powered search +# Getting started with AI-powered search -[AI-powered search](https://meilisearch.com/solutions/vector-search?utm_campaign=vector-search&utm_source=aipowered-tutorial), sometimes also called vector search, is an experimental technology that uses [large language models](https://en.wikipedia.org/wiki/Large_language_model) to retrieve search results based on the meaning and context of a query. +[AI-powered search](https://meilisearch.com/solutions/vector-search?utm_campaign=vector-search&utm_source=aipowered-tutorial), sometimes also called vector search and hybrid search, is an experimental technology that uses [large language models](https://en.wikipedia.org/wiki/Large_language_model) to retrieve search results based on the meaning and context of a query. This tutorial will walk you through configuring AI-powered search in your Meilisearch project. You will activate the vector store setting, generate document embeddings with OpenAI, and perform your first search. ## Requirements -- a running Meilisearch project -- an [OpenAI API key](https://platform.openai.com/api-keys) -- a command-line console +- A running Meilisearch project +- An [OpenAI API key](https://platform.openai.com/api-keys) +- A command-line console ## Create a new index -Create a `kitchenware` index and add [this kitchenware products dataset](/assets/datasets/kitchenware.json) to it. If necessary, consult the quick start for instructions on how to configure a basic Meilisearch install. +Create a `kitchenware` index and add [this kitchenware products dataset](/assets/datasets/kitchenware.json) to it. If necessary, consult the quick start for instructions on how to configure a basic Meilisearch installation. ## Activate AI-powered search @@ -30,7 +30,7 @@ If using Meilisearch Cloud, navigate to your project overview and find "Experime ![A section of the project overview interface titled "Experimental features". The image shows a few options, including "Vector store".](https://raw.githubusercontent.com/meilisearch/documentation/main/assets/images/vector-search/01-cloud-vector-store.png) -To ensure proper scaling of Meilisearch Cloud's latest AI-powered search offering, you must enter the waitlist before activating vector search. You will not be able to activate vector search in the Cloud interface or via the `/experimental-features` route until your sign up has been approved. +To ensure proper scaling of Meilisearch Cloud's latest AI-powered search offering, you must enter the waitlist before activating vector search. You will not be able to activate vector search in the Cloud interface or via the `/experimental-features` route until your sign-up has been approved. ### Self-hosted instances @@ -78,7 +78,7 @@ The resulting text is the prompt OpenAI uses to generate document embeddings. For example, kitchenware documents have three fields: `id`, `name`, and `price`. If your `documentTemplate` is `"An object used in a kitchen named '{{doc.name}}'"`, the text Meilisearch will send to the embedder when indexing the first document is `"An object used in a kitchen named 'Wooden spoon'"`. -For the best results, always provide a `documentTemplate`. Keep your templates short and only include highly relevant information. This ensures the best indexing performance and search result relevancy. +For the best results, always provide a `documentTemplate`. Keep your templates short and only include highly relevant information. This ensures optimal indexing performance and search result relevancy. ## Perform an AI-powered search @@ -97,10 +97,10 @@ curl \ }' ``` -Meilisearch will return a mix of semantic and keyword matches, prioritizing results that match the query's meaning and context. If you want Meilisearch to return more results based on the meaning and context of a search, set `semanticRatio` to a value greater than `0.5`. Setting `semanticRatio` to a value lower than `0.5`, instead, will return more keyword matches. +Meilisearch will return a mix of semantic and full-text matches, prioritizing results that match the query's meaning and context. If you want Meilisearch to return more results based on the meaning and context of a search, set `semanticRatio` to a value greater than `0.5`. Setting `semanticRatio` to a value lower than `0.5`, instead, will return more full-text matches. ## Conclusion -You have seen how to setup and perform AI-powered searches with Meilisearch and OpenAI. For more in-depth information, consult the reference for embedders and the `hybrid` search parameter. +You have seen how to set up and perform AI-powered searches with Meilisearch and OpenAI. For more in-depth information, consult the reference for embedders and the `hybrid` search parameter. -AI-powered is an experimental Meilisearch feature and is undergoing active development—[join the discussion on GitHub](https://github.com/orgs/meilisearch/discussions/677). +AI-powered search is an experimental Meilisearch feature and is undergoing active development—[join the discussion on GitHub](https://github.com/orgs/meilisearch/discussions/677). diff --git a/learn/search/difference_keyword_ai_search.mdx b/learn/search/difference_keyword_ai_search.mdx deleted file mode 100644 index 37896f820b..0000000000 --- a/learn/search/difference_keyword_ai_search.mdx +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Differences between keyword and AI-powered search — Meilisearch documentation -description: "Meilisearch offers two types of search: keyword search and AI-powered search. This article explains their differences and intended use cases." ---- - -# Differences between keyword and AI-powered search - -Meilisearch offers two types of search: keyword search and AI-powered search. This article explains their differences and intended use cases. - -## Keyword search - -Keyword search is another name for traditional full-text search. This is Meilisearch's default search type. When performing keyword searches, Meilisearch checks the indexed documents for acceptable matches to a set of search terms. - -For example, when searching for `"pink sandals"`, keyword search will only return clothing items explicitly mentioning these two terms. Searching for `"pink summer shoes for girls"` is likely to return less relevant results. - -## AI-powered search - -AI-powered search uses LLM providers such as OpenAI and Hugging Face to generate document embeddings. When users search in your application, Meilisearch return results based both on keywords and the semantic context of the query. - -By default, Meilisearch returns both keyword and semantic results. This is also called hybrid search. You may configure Meilisearch so it only returns semantic results. This is also called semantic search. - -With AI-powered hybrid search, searching for `"pink sandals"` will be more efficient, but queries for `"cute pink summer shoes for girls"` will still return relevant results including light-coloured open shoes. - -## Use cases - -Keyword search is a reliable choice that will work well in most scenarios. It is fast, less resource-intensive, and requires no extra configuration. It is best suited for situations where you need precise matches to a query and your users are familiar with relevant keywords. - -AI-powered search combines the flexibility of semantic search with the performance of keyword search. Most searches, whether short and precise or long and vague, will return very relevant search results. In most cases, AI-powered search will offer your users the best search experience, but will require extra configuration. AI-powered search may also entail extra costs if you use a third-party service such as OpenAI to generate document embeddings. diff --git a/reference/api/search.mdx b/reference/api/search.mdx index 5708a7907c..f2626c8d28 100644 --- a/reference/api/search.mdx +++ b/reference/api/search.mdx @@ -55,7 +55,7 @@ By default, [this endpoint returns a maximum of 1000 results](/learn/advanced/kn | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | | **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | -| **[`vector`](#vector-experimental)** | Array of numbers | `null` | Search using a custom query vector | +| **[`vector`](#vector)** | Array of numbers | `null` | Search using a custom query vector | [Learn more about how to use each search parameter](#search-parameters). @@ -165,7 +165,7 @@ By default, [this endpoint returns a maximum of 1000 results](/learn/advanced/kn | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | | **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | -| **[`vector`](#vector-experimental)** | Array of numbers | `null` | Search using a custom query vector | +| **[`vector`](#vector)** | Array of numbers | `null` | Search using a custom query vector | [Learn more about how to use each search parameter](#search-parameters). @@ -253,7 +253,7 @@ This is not necessary when using the `POST` route or one of our [SDKs](/learn/wh | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | | **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | -| **[`vector`](#vector-experimental)** | Array of numbers | `null` | Search using a custom query vector | +| **[`vector`](#vector)** | Array of numbers | `null` | Search using a custom query vector | ### Query (q) @@ -1089,7 +1089,7 @@ Meilisearch will return an error if you use `hybrid` before activating your inst -### Vector (experimental) +### Vector Use a custom vector to perform a search query. Must be an array of numbers corresponding to the dimensions of the custom vector. From 3c4e9cf46f74dcaca180dd829600386d3b92d6ad Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 12 Jun 2024 18:44:01 +0200 Subject: [PATCH 08/14] fix broken anchor links --- reference/api/search.mdx | 2 +- reference/api/settings.mdx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/reference/api/search.mdx b/reference/api/search.mdx index 9ed55b3a6f..cd62d22d98 100644 --- a/reference/api/search.mdx +++ b/reference/api/search.mdx @@ -1103,7 +1103,7 @@ Meilisearch will return an error if you use `hybrid` before activating your inst -### Vector +### Vector Use a custom vector to perform a search query. Must be an array of numbers corresponding to the dimensions of the custom vector. diff --git a/reference/api/settings.mdx b/reference/api/settings.mdx index 46ea1dfe55..97cbe28081 100644 --- a/reference/api/settings.mdx +++ b/reference/api/settings.mdx @@ -168,7 +168,7 @@ If the provided index does not exist, it will be created. | **[`stopWords`](#stop-words)** | Array of strings | Empty | List of words ignored by Meilisearch when present in search queries | | **[`synonyms`](#synonyms)** | Object | Empty | List of associated words treated similarly | | **[`typoTolerance`](#typo-tolerance)** | Object | [Default object](#typo-tolerance-object) | Typo tolerance settings | -| **[`embedders`](#embedders-experimental)** | Object of objects | [Default object](#embedder-object) | Embedder required for performing meaning-based search queries | +| **[`embedders`](#embedders-experimental)** | Object of objects | [Default object](#embedders-object) | Embedder required for performing meaning-based search queries | #### Example @@ -2009,7 +2009,7 @@ Reset an index's typo tolerance settings to their [default value](#typo-toleranc You can use the returned `taskUid` to get more details on [the status of the task](/reference/api/tasks#get-one-task). -## Embedders +## Embedders Embedders translate documents and queries into vector embeddings. You must configure at least one embedder to use AI-powered search. From eb9d1d0debe760c4c13c87c6f42ea31352e88fe4 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 12 Jun 2024 18:47:14 +0200 Subject: [PATCH 09/14] fix more broken anchors --- reference/api/search.mdx | 8 ++++---- reference/api/settings.mdx | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/reference/api/search.mdx b/reference/api/search.mdx index cd62d22d98..027864a450 100644 --- a/reference/api/search.mdx +++ b/reference/api/search.mdx @@ -55,7 +55,7 @@ By default, [this endpoint returns a maximum of 1000 results](/learn/advanced/kn | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | | **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | -| **[`vector`](#vector)** | Array of numbers | `null` | Search using a custom query vector | +| **[`vector`](#vector-)** | Array of numbers | `null` | Search using a custom query vector | [Learn more about how to use each search parameter](#search-parameters). @@ -165,7 +165,7 @@ By default, [this endpoint returns a maximum of 1000 results](/learn/advanced/kn | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | | **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | -| **[`vector`](#vector)** | Array of numbers | `null` | Search using a custom query vector | +| **[`vector`](#vector-)** | Array of numbers | `null` | Search using a custom query vector | [Learn more about how to use each search parameter](#search-parameters). @@ -253,7 +253,7 @@ This is not necessary when using the `POST` route or one of our [SDKs](/learn/wh | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | | **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | -| **[`vector`](#vector)** | Array of numbers | `null` | Search using a custom query vector | +| **[`vector`](#vector-)** | Array of numbers | `null` | Search using a custom query vector | ### Query (q) @@ -1103,7 +1103,7 @@ Meilisearch will return an error if you use `hybrid` before activating your inst -### Vector +### Vector Use a custom vector to perform a search query. Must be an array of numbers corresponding to the dimensions of the custom vector. diff --git a/reference/api/settings.mdx b/reference/api/settings.mdx index 97cbe28081..b922768b8f 100644 --- a/reference/api/settings.mdx +++ b/reference/api/settings.mdx @@ -168,7 +168,7 @@ If the provided index does not exist, it will be created. | **[`stopWords`](#stop-words)** | Array of strings | Empty | List of words ignored by Meilisearch when present in search queries | | **[`synonyms`](#synonyms)** | Object | Empty | List of associated words treated similarly | | **[`typoTolerance`](#typo-tolerance)** | Object | [Default object](#typo-tolerance-object) | Typo tolerance settings | -| **[`embedders`](#embedders-experimental)** | Object of objects | [Default object](#embedders-object) | Embedder required for performing meaning-based search queries | +| **[`embedders`](#embedders)** | Object of objects | [Default object](#embedders-object) | Embedder required for performing meaning-based search queries | #### Example @@ -2009,7 +2009,7 @@ Reset an index's typo tolerance settings to their [default value](#typo-toleranc You can use the returned `taskUid` to get more details on [the status of the task](/reference/api/tasks#get-one-task). -## Embedders +## Embedders Embedders translate documents and queries into vector embeddings. You must configure at least one embedder to use AI-powered search. From e60bb1164256de2cf92af7e0f08dbde78c47bd6c Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 12 Jun 2024 18:50:45 +0200 Subject: [PATCH 10/14] broken anchor --- reference/api/settings.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/api/settings.mdx b/reference/api/settings.mdx index b922768b8f..64c3156551 100644 --- a/reference/api/settings.mdx +++ b/reference/api/settings.mdx @@ -168,7 +168,7 @@ If the provided index does not exist, it will be created. | **[`stopWords`](#stop-words)** | Array of strings | Empty | List of words ignored by Meilisearch when present in search queries | | **[`synonyms`](#synonyms)** | Object | Empty | List of associated words treated similarly | | **[`typoTolerance`](#typo-tolerance)** | Object | [Default object](#typo-tolerance-object) | Typo tolerance settings | -| **[`embedders`](#embedders)** | Object of objects | [Default object](#embedders-object) | Embedder required for performing meaning-based search queries | +| **[`embedders`](#embedders-)** | Object of objects | [Default object](#embedders-object) | Embedder required for performing meaning-based search queries | #### Example From 2984d330d3f1e7c5f8d21862879eaccccb8d3a03 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 12 Jun 2024 18:52:43 +0200 Subject: [PATCH 11/14] fix more links --- config/sidebar-learn.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/sidebar-learn.json b/config/sidebar-learn.json index b8220381f6..19ccf6c36a 100644 --- a/config/sidebar-learn.json +++ b/config/sidebar-learn.json @@ -66,12 +66,12 @@ "slug": "ai-powered-search", "routes": [ { - "source": "learn/search/getting_started_with_ai_search.mdx", + "source": "learn/ai_powered/search/getting_started_with_ai_search.mdx", "label": "Getting started with AI-powered search", "slug": "getting_started_with_ai_search" }, { - "source": "learn/search/difference_full_text_ai_search.mdx", + "source": "learn/ai_powered_search/difference_full_text_ai_search.mdx", "label": "Differences between keyword and AI-powered search", "slug": "difference_full_text_ai_search" } From c1e4798e8867066b660c60eef8c54b6cbad9f7db Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Wed, 12 Jun 2024 18:54:07 +0200 Subject: [PATCH 12/14] make it brokin stop --- config/sidebar-learn.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/sidebar-learn.json b/config/sidebar-learn.json index 19ccf6c36a..f6243e0f54 100644 --- a/config/sidebar-learn.json +++ b/config/sidebar-learn.json @@ -66,7 +66,7 @@ "slug": "ai-powered-search", "routes": [ { - "source": "learn/ai_powered/search/getting_started_with_ai_search.mdx", + "source": "learn/ai_powered_search/getting_started_with_ai_search.mdx", "label": "Getting started with AI-powered search", "slug": "getting_started_with_ai_search" }, From a05a65e3f9baedcbd52d21cb40cfdf234bfa6f02 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Thu, 13 Jun 2024 17:31:27 +0200 Subject: [PATCH 13/14] address reviewer feedback --- .code-samples.meilisearch.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index 57f8be954c..23c018e33c 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -1198,9 +1198,10 @@ update_embedders_1: |- --data-binary '{ "embedders": { "default": { - "source": "huggingFace", - "model": "BAAI/bge-base-en-v1.5", - "documentTemplate": "A movie titled '{{doc.title}}' whose description starts with {{doc.overview|truncatewords: 20}}" + "source": "openAi", + "apiKey": "anOpenAiApiKey", + "model": "text-embedding-3-small", + "documentTemplate": "A document titled '{{doc.title}}' whose description starts with {{doc.overview|truncatewords: 20}}" } } }' From 53810470e9dc22ba1eb147a71b96fb2ccba40673 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Thu, 13 Jun 2024 17:33:51 +0200 Subject: [PATCH 14/14] rollback usage of noticeTag component to prevent link checker error --- reference/api/search.mdx | 8 ++++---- reference/api/settings.mdx | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/reference/api/search.mdx b/reference/api/search.mdx index 027864a450..1841f53971 100644 --- a/reference/api/search.mdx +++ b/reference/api/search.mdx @@ -55,7 +55,7 @@ By default, [this endpoint returns a maximum of 1000 results](/learn/advanced/kn | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | | **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | -| **[`vector`](#vector-)** | Array of numbers | `null` | Search using a custom query vector | +| **[`vector`](#vector-experimental)** | Array of numbers | `null` | Search using a custom query vector | [Learn more about how to use each search parameter](#search-parameters). @@ -165,7 +165,7 @@ By default, [this endpoint returns a maximum of 1000 results](/learn/advanced/kn | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | | **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | -| **[`vector`](#vector-)** | Array of numbers | `null` | Search using a custom query vector | +| **[`vector`](#vector-experimental)** | Array of numbers | `null` | Search using a custom query vector | [Learn more about how to use each search parameter](#search-parameters). @@ -253,7 +253,7 @@ This is not necessary when using the `POST` route or one of our [SDKs](/learn/wh | **[`showRankingScore`](#ranking-score)** | Boolean | `false` | Display the global ranking score of a document | | **[`attributesToSearchOn`](#customize-attributes-to-search-on-at-search-time)** | Array of strings | `["*"]` | Restrict search to the specified attributes | | **[`hybrid`](#hybrid-search-experimental)** | Object | `null` | Return results based on query keywords and meaning | -| **[`vector`](#vector-)** | Array of numbers | `null` | Search using a custom query vector | +| **[`vector`](#vector-experimental)** | Array of numbers | `null` | Search using a custom query vector | ### Query (q) @@ -1103,7 +1103,7 @@ Meilisearch will return an error if you use `hybrid` before activating your inst -### Vector +### Vector (experimental) Use a custom vector to perform a search query. Must be an array of numbers corresponding to the dimensions of the custom vector. diff --git a/reference/api/settings.mdx b/reference/api/settings.mdx index 64c3156551..2faec8f8c5 100644 --- a/reference/api/settings.mdx +++ b/reference/api/settings.mdx @@ -168,7 +168,7 @@ If the provided index does not exist, it will be created. | **[`stopWords`](#stop-words)** | Array of strings | Empty | List of words ignored by Meilisearch when present in search queries | | **[`synonyms`](#synonyms)** | Object | Empty | List of associated words treated similarly | | **[`typoTolerance`](#typo-tolerance)** | Object | [Default object](#typo-tolerance-object) | Typo tolerance settings | -| **[`embedders`](#embedders-)** | Object of objects | [Default object](#embedders-object) | Embedder required for performing meaning-based search queries | +| **[`embedders`](#embedders-experimental)** | Object of objects | [Default object](#embedders-object) | Embedder required for performing meaning-based search queries | #### Example @@ -2009,7 +2009,7 @@ Reset an index's typo tolerance settings to their [default value](#typo-toleranc You can use the returned `taskUid` to get more details on [the status of the task](/reference/api/tasks#get-one-task). -## Embedders +## Embedders (experimental) Embedders translate documents and queries into vector embeddings. You must configure at least one embedder to use AI-powered search.