From e7f009b920254249654ad0d020c64b8881606a86 Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Tue, 21 Oct 2025 18:42:15 +0200 Subject: [PATCH 01/16] Update docs --- .../quickstart.create_populate_collection.py | 59 ++ ...eate_populate_collection_custom_vectors.py | 69 +++ .../code/quickstart/clients.install.new.mdx | 37 ++ .../quickstart.create_populate_collection.mdx | 88 +++ ...ate_populate_collection_custom_vectors.mdx | 88 +++ .../code/quickstart/quickstart.query.rag.mdx | 107 ++-- docs/weaviate/quickstart/index.md | 551 +++--------------- src/css/custom.scss | 4 + 8 files changed, 474 insertions(+), 529 deletions(-) create mode 100644 _includes/code/python/quickstart.create_populate_collection.py create mode 100644 _includes/code/python/quickstart.create_populate_collection_custom_vectors.py create mode 100644 _includes/code/quickstart/clients.install.new.mdx create mode 100644 _includes/code/quickstart/quickstart.create_populate_collection.mdx create mode 100644 _includes/code/quickstart/quickstart.create_populate_collection_custom_vectors.mdx diff --git a/_includes/code/python/quickstart.create_populate_collection.py b/_includes/code/python/quickstart.create_populate_collection.py new file mode 100644 index 00000000..b9fea1c3 --- /dev/null +++ b/_includes/code/python/quickstart.create_populate_collection.py @@ -0,0 +1,59 @@ +# START CreateCollection +import weaviate +from weaviate.classes.init import Auth +from weaviate.classes.config import Configure, Property, DataType +import os + +# Best practice: store your credentials in environment variables +weaviate_url = os.environ["WEAVIATE_URL"] +weaviate_api_key = os.environ["WEAVIATE_API_KEY"] + +# Step 1.1: Connect to your Weaviate Cloud instance +client = weaviate.connect_to_weaviate_cloud( + cluster_url=weaviate_url, + auth_credentials=Auth.api_key(weaviate_api_key), +) + +# END CreateCollection + +# NOT SHOWN TO THE USER - DELETE EXISTING COLLECTION +client.collections.delete("Question") + +# START CreateCollection +# Step 1.2: Create a collection +# highlight-start +questions = client.collections.create( + name="Question", + vector_config=Configure.Vectors.text2vec_weaviate(), # Configure the Weaviate Embeddings vectorizer + # You can also use auto-schema, here we define the schema manually + properties=[ + Property(name="question", data_type=DataType.TEXT), + Property(name="answer", data_type=DataType.TEXT), + Property(name="category", data_type=DataType.TEXT), + ], +) +# highlight-end +# START CreateCollection + +# END CreateCollection +# fmt: off +# START CreateCollection +# Step 1.3: Import three objects +data_objects = [ + {"properties": {"question": "What is Python?", "answer": "Python is a high-level, interpreted programming language known for its simplicity and readability.", "category": "Programming"}}, + {"properties": {"question": "What is machine learning?", "answer": "Machine learning is a subset of AI that enables systems to learn and improve from experience without being explicitly programmed.", "category": "AI"}}, + {"properties": {"question": "What is a vector database?", "answer": "A vector database is a specialized database designed to store and query high-dimensional vectors efficiently.", "category": "Database"}}, +] +# END CreateCollection +# fmt: on +# START CreateCollection + +questions = client.collections.use("Question") +with questions.batch.dynamic() as batch: + for obj in data_objects: + batch.add_object(properties=obj) + +print(f"Imported {len(questions)} objects into the Question collection") + +client.close() # Free up resources +# END CreateCollection diff --git a/_includes/code/python/quickstart.create_populate_collection_custom_vectors.py b/_includes/code/python/quickstart.create_populate_collection_custom_vectors.py new file mode 100644 index 00000000..a74059ec --- /dev/null +++ b/_includes/code/python/quickstart.create_populate_collection_custom_vectors.py @@ -0,0 +1,69 @@ +# START CreateCollection +import weaviate +from weaviate.classes.init import Auth +from weaviate.classes.config import Configure, Property, DataType +import os + +# Best practice: store your credentials in environment variables +weaviate_url = os.environ["WEAVIATE_URL"] +weaviate_api_key = os.environ["WEAVIATE_API_KEY"] + +# Step 1.1: Connect to your Weaviate Cloud instance +client = weaviate.connect_to_weaviate_cloud( + cluster_url=weaviate_url, + auth_credentials=Auth.api_key(weaviate_api_key), +) + +# END CreateCollection + +# NOT SHOWN TO THE USER - DELETE EXISTING COLLECTION +client.collections.delete("Question") + +# START CreateCollection +# Step 1.2: Create a collection +# highlight-start +questions = client.collections.create( + name="Question", + vector_config=Configure.Vectors.self_provided(), # No automatic vectorization since we're providing vectors + properties=[ + Property(name="question", data_type=DataType.TEXT), + Property(name="answer", data_type=DataType.TEXT), + Property(name="category", data_type=DataType.TEXT), + ], +) +# highlight-end +# START CreateCollection + +# Import three hardcoded objects with their vectors +# END CreateCollection +# fmt: off +# START CreateCollection +# Step 1.3: Import three objects +data_objects = [ + { + "properties": {"question": "What is Python?", "answer": "Python is a high-level, interpreted programming language known for its simplicity and readability.", "category": "Programming"}, + "vector": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] + }, + { + "properties": {"question": "What is machine learning?", "answer": "Machine learning is a subset of AI that enables systems to learn and improve from experience without being explicitly programmed.", "category": "AI"}, + "vector": [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + }, + { + "properties": {"question": "What is a vector database?", "answer": "A vector database is a specialized database designed to store and query high-dimensional vectors efficiently.", "category": "Database"}, + "vector": [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] + } +] +# END CreateCollection +# fmt: on +# START CreateCollection + +# Insert the objects with vectors +questions = client.collections.get("Question") +with questions.batch.dynamic() as batch: + for obj in data_objects: + batch.add_object(properties=obj["properties"], vector=obj["vector"]) + +print(f"Imported {len(data_objects)} objects with vectors into the Question collection") + +client.close() # Free up resources +# END CreateCollection diff --git a/_includes/code/quickstart/clients.install.new.mdx b/_includes/code/quickstart/clients.install.new.mdx new file mode 100644 index 00000000..9ad1e129 --- /dev/null +++ b/_includes/code/quickstart/clients.install.new.mdx @@ -0,0 +1,37 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + + + +```bash +pip install -U weaviate-client +``` + + + + +```bash +npm install weaviate-client +``` + + + + +```bash +go get github.com/weaviate/weaviate-go-client/v5 +``` + + + + +```xml + + io.weaviate + client + 4.8.3 + +``` + + + diff --git a/_includes/code/quickstart/quickstart.create_populate_collection.mdx b/_includes/code/quickstart/quickstart.create_populate_collection.mdx new file mode 100644 index 00000000..67c072e9 --- /dev/null +++ b/_includes/code/quickstart/quickstart.create_populate_collection.mdx @@ -0,0 +1,88 @@ +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock"; +import PyCode from "!!raw-loader!/_includes/code/python/quickstart.create_populate_collection.py"; +import TSCode from "!!raw-loader!/_includes/code/typescript/quickstart.create_collection.ts"; +import GoCode from "!!raw-loader!/_includes/code/howto/go/docs/quickstart/2_1_create_collection/main.go"; +import JavaCode from "!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/quickstart/CreateCollection.java"; +import VectorConfigSyntax from "/_includes/vector-config-syntax.mdx"; +import VectorsAutoSchemaError from "/_includes/error-note-vectors-autoschema.mdx"; + + + + + + + + + + + +The collection also contains a configuration for the generative (RAG) integration: + +- OpenAI [generative AI integrations](/weaviate/model-providers/openai/generative) for retrieval augmented generation (RAG). + + + + + + +The collection also contains a configuration for the generative (RAG) integration: + +- OpenAI [generative AI integrations](/weaviate/model-providers/openai/generative) for retrieval augmented generation (RAG). + + + + + + + +The collection also contains a configuration for the generative (RAG) integration: + +- OpenAI [generative AI integrations](/weaviate/model-providers/openai/generative) for retrieval augmented generation (RAG). + +```bash +# Best practice: store your credentials in environment variables +# export WEAVIATE_URL="YOUR_INSTANCE_URL" # Your Weaviate instance URL +# export WEAVIATE_API_KEY="YOUR_API_KEY" # Your Weaviate instance API key + +curl -X POST \ +-H "Content-Type: application/json" \ +-H "Authorization: Bearer $WEAVIATE_API_KEY" \ +-d '{ + "class": "Question", + "vectorizer": "text2vec-weaviate", + "moduleConfig": { + "text2vec-weaviate": {}, + "generative-cohere": {} + } +}' \ +"$WEAVIATE_URL/v1/schema" +``` + + + diff --git a/_includes/code/quickstart/quickstart.create_populate_collection_custom_vectors.mdx b/_includes/code/quickstart/quickstart.create_populate_collection_custom_vectors.mdx new file mode 100644 index 00000000..6dedd6e8 --- /dev/null +++ b/_includes/code/quickstart/quickstart.create_populate_collection_custom_vectors.mdx @@ -0,0 +1,88 @@ +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock"; +import PyCode from "!!raw-loader!/_includes/code/python/quickstart.create_populate_collection_custom_vectors.py"; +import TSCode from "!!raw-loader!/_includes/code/typescript/quickstart.create_collection.ts"; +import GoCode from "!!raw-loader!/_includes/code/howto/go/docs/quickstart/2_1_create_collection/main.go"; +import JavaCode from "!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/quickstart/CreateCollection.java"; +import VectorConfigSyntax from "/_includes/vector-config-syntax.mdx"; +import VectorsAutoSchemaError from "/_includes/error-note-vectors-autoschema.mdx"; + + + + + + + + + + + +The collection also contains a configuration for the generative (RAG) integration: + +- OpenAI [generative AI integrations](/weaviate/model-providers/openai/generative) for retrieval augmented generation (RAG). + + + + + + +The collection also contains a configuration for the generative (RAG) integration: + +- OpenAI [generative AI integrations](/weaviate/model-providers/openai/generative) for retrieval augmented generation (RAG). + + + + + + + +The collection also contains a configuration for the generative (RAG) integration: + +- OpenAI [generative AI integrations](/weaviate/model-providers/openai/generative) for retrieval augmented generation (RAG). + +```bash +# Best practice: store your credentials in environment variables +# export WEAVIATE_URL="YOUR_INSTANCE_URL" # Your Weaviate instance URL +# export WEAVIATE_API_KEY="YOUR_API_KEY" # Your Weaviate instance API key + +curl -X POST \ +-H "Content-Type: application/json" \ +-H "Authorization: Bearer $WEAVIATE_API_KEY" \ +-d '{ + "class": "Question", + "vectorizer": "text2vec-weaviate", + "moduleConfig": { + "text2vec-weaviate": {}, + "generative-cohere": {} + } +}' \ +"$WEAVIATE_URL/v1/schema" +``` + + + diff --git a/_includes/code/quickstart/quickstart.query.rag.mdx b/_includes/code/quickstart/quickstart.query.rag.mdx index d5885216..74560601 100644 --- a/_includes/code/quickstart/quickstart.query.rag.mdx +++ b/_includes/code/quickstart/quickstart.query.rag.mdx @@ -1,63 +1,50 @@ -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; -import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; -import PyCode from '!!raw-loader!/_includes/code/python/quickstart.query.rag.py'; -import TSCode from '!!raw-loader!/_includes/code/typescript/quickstart.query.rag.ts'; -import GoCode from '!!raw-loader!/_includes/code/howto/go/docs/quickstart/3_2_rag/main.go'; -import JavaCode from '!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/quickstart/RAG.java'; - +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock"; +import PyCode from "!!raw-loader!/_includes/code/python/quickstart.query.rag.py"; +import TSCode from "!!raw-loader!/_includes/code/typescript/quickstart.query.rag.ts"; +import GoCode from "!!raw-loader!/_includes/code/howto/go/docs/quickstart/3_2_rag/main.go"; +import JavaCode from "!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/quickstart/RAG.java"; - - -We are using the OpenAI [generative AI integrations](/weaviate/model-providers/openai/generative) for retrieval augmented generation (RAG). - - - - - - - -We are using the OpenAI [generative AI integrations](/weaviate/model-providers/openai/generative) for retrieval augmented generation (RAG). - - - - - - - - - - - - - - - + + + + + + + + + + + + + + ```bash # Best practice: store your credentials in environment variables # export WEAVIATE_URL="YOUR_INSTANCE_URL" # Your Weaviate instance URL @@ -99,6 +86,6 @@ echo '{ -d @- \ $WEAVIATE_URL/v1/graphql ``` - - + + diff --git a/docs/weaviate/quickstart/index.md b/docs/weaviate/quickstart/index.md index 17ff5664..d2388e22 100644 --- a/docs/weaviate/quickstart/index.md +++ b/docs/weaviate/quickstart/index.md @@ -1,91 +1,66 @@ --- title: Quickstart (with cloud resources) -sidebar_position: 0 image: og/docs/quickstart-tutorial.jpg # tags: ['getting started'] -hide_table_of_contents: true --- import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; import SkipLink from '/src/components/SkipValidationLink' +import CardsSection from "/src/components/CardsSection"; -Expected time: 30 minutes Prerequisites: None -

- -:::info What you will learn - -This quickstart shows you how to combine Weaviate Cloud and Cohere to: - -1. Set up a Weaviate instance. (10 minutes) -1. Add and vectorize your data. (10 minutes) -1. Perform a semantic search and retrieval augmented generation (RAG). (10 minutes) - -```mermaid -flowchart LR - %% Define nodes with white backgrounds and darker borders - A1["Create Weaviate
Sandbox"] --> A2["Install client
library"] - A2 --> A3["Connect to
Weaviate"] - A3 --> B1["Define collection
(with an inference API)"] - B1 --> B2["Batch import
objects"] - B2 --> C1["Semantic search
(nearText)"] - C1 --> C2["RAG
(Generate)"] - - %% Group nodes in subgraphs with brand colors - subgraph sg1 ["1\. Setup"] - A1 - A2 - A3 - end - - subgraph sg2 ["2\. Populate"] - B1 - B2 - end - - subgraph sg3 ["3\. Query"] - C1 - C2 - end - - %% Style nodes with white background and darker borders - style A1 fill:#ffffff,stroke:#B9C8DF,color:#130C49 - style A2 fill:#ffffff,stroke:#B9C8DF,color:#130C49 - style A3 fill:#ffffff,stroke:#B9C8DF,color:#130C49 - style B1 fill:#ffffff,stroke:#B9C8DF,color:#130C49 - style B2 fill:#ffffff,stroke:#B9C8DF,color:#130C49 - style C1 fill:#ffffff,stroke:#B9C8DF,color:#130C49 - style C2 fill:#ffffff,stroke:#B9C8DF,color:#130C49 - - %% Style subgraphs with brand colors - style sg1 fill:#ffffff,stroke:#61BD73,stroke-width:2px,color:#130C49 - style sg2 fill:#ffffff,stroke:#130C49,stroke-width:2px,color:#130C49 - style sg3 fill:#ffffff,stroke:#7AD6EB,stroke-width:2px,color:#130C49 -``` - -Notes: - -- The code examples here are self-contained. You can copy and paste them into your own environment to try them out. - -- If you prefer to use locally hosted resources, see [Quickstart: locally hosted](./local.md). +export const quickstartOptions = [ +{ +title: "Vectorize objects on import", +description: +"Import objects and vectorize them with the Weaviate Embeddings service.", +link: "?import=vectorization#create-a-collection", +icon: "fas fa-compress-alt", +}, +{ +title: "Import custom vectors", +description: +"Import pre-computed vector embeddings.", +link: "?import=custom-embeddings#create-a-collection", +icon: "fas fa-puzzle-piece", +}, +]; + +
+ +
-::: +--- - + + -### Requirements +Weaviate is an open-source vector database built to power AI applications, from prototypes to production-scale systems. This quickstart guide will show you how to: -In order to perform Retrieval Augmented Generation (RAG) in the last step, you will need a [Cohere](https://dashboard.cohere.com/) account. You can use a free Cohere trial API key. +1. **Import data** - Create a collection and import data into it. The data will be vectorized with the Weaviate Embeddings service. +2. **Search** - Perform a similarity (vector) search on your data. +3. **RAG** - Perform Retrieval augmented generation (RAG) with a generative model. -If you have another preferred [model provider](/weaviate/model-providers), you can use that instead of Cohere. + + -
+Weaviate is an open-source vector database built to power AI applications, from prototypes to production-scale systems. This quickstart guide will show you how to: -## Step 1: Set up Weaviate +1. **Import data** - Create a collection and import data into it. +2. **Search** - Perform a similarity (vector) search on your data. +3. **RAG** - Perform Retrieval augmented generation (RAG) with a generative model. -### 1.1 Create a Weaviate database +
+
+ +## Prerequisites + +- A [Weaviate Cloud](https://console.weaviate.cloud/) Sandbox instance. + +
+How to set up a Weaviate Cloud Sandbox instance -Go to the [Weaviate Cloud console](https://console.weaviate.cloud) and create a free Sandbox instance. +Go to the [Weaviate Cloud console](https://console.weaviate.cloud) and create a free Sandbox instance as shown in the interactive example below.