diff --git a/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx new file mode 100644 index 0000000000..67040805bf --- /dev/null +++ b/src/content/docs/en/pages/guides/ai-inference/quick-start.mdx @@ -0,0 +1,40 @@ +--- +title: How to build a simple agent with AI Inference +description: The "+ Create" button accelerates your journey to start building with Azion. +meta_tags: >- + building, onboarding, create resources, Azion Web Platform, import from + GitHub +namespace: docs_guides_ai_inference_build_agent +permalink: /documentation/products/guides/ai-inference-agent/ +menu_namespace: AIInferenceMenu + +--- + + + +## Usage + +AI Inference can be used in a [Function] + +This function receives a POST request to the desired AI model and returns the response. + + +```javascript +const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": true, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +return modelResponse +``` + +This example uses the Qwen3 model. You can change the model and the request parameters according to your preferences. Check the [AI models reference](/en/documentation/products/ai/ai-inference/models/) for more information about the available models and how to use them in your application. + diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx index 99ba24511f..a47998ca5b 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx @@ -4,6 +4,7 @@ description: >- Qwen3-30B-A3B-Instruct-2507-FP8 is an instruction-tuned 30B-parameter FP8 causal language model for long-context (256K) text generation and reasoning, supporting chat/QA, summarization, multilingual tasks, math/science problem solving, coding, and tool-augmented workflows. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen' namespace: docs_edge_ai_models_qwen_3_30ba3b +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/qwen3-30ba3b/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx index 89db438047..4ccb27195e 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx @@ -4,6 +4,7 @@ description: >- BAAI/bge-reranker-v2-m3 is a lightweight reranker model with strong multilingual capabilities. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing' namespace: docs_edge_ai_models_baai_bge_reranker_v2_m3 +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m3/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx index 6a7d24d3c6..5d4de8f99a 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/internvl3.mdx @@ -4,6 +4,7 @@ description: >- InternVL3 is an advanced multimodal large language model with capabilities to encompass tool usage, GUI agents, industrial image analysis, 3D vision perception, and more. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing' namespace: docs_edge_ai_models_internvl3 +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/internvl3/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx index 4d75e3bfa0..d4389ad7e2 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/mistral-3-small.mdx @@ -4,6 +4,7 @@ description: >- Mistral 3 Small provides a range of capabilities, including text generation, image analysis, embeddings, and more. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, mistral' namespace: docs_edge_ai_models_mistral_3_small +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/mistral-3-small/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx index 554f22d9b3..e4df693992 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/nanonets-ocr-r.mdx @@ -4,6 +4,7 @@ description: >- Nanonets-OCR-s is an OCR model that converts document images to structured Markdown, preserving layout (headings, lists, tables) and basic tags. The output is easy to parse and feed into LLM pipelines. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen' namespace: docs_edge_ai_models_nanonets_ocr_s +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/nanonets-ocr-s/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx index d90ac3ffef..471c370b34 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-3b.mdx @@ -4,6 +4,7 @@ description: >- Qwen2.5 VL AWQ 3B is a vision-language model that supports 3 bilion parameters and offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen' namespace: docs_edge_ai_models_qwen_2_5_vl_3b +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-3b/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx index 28cec39803..e72c648e7f 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen-2-5-vl-7b.mdx @@ -4,6 +4,7 @@ description: >- Qwen2.5 VL AWQ 7B is a vision-language model that supports 7 billion parameters, offering advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen' namespace: docs_edge_ai_models_qwen_2_5_vl_7b +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/qwen-2-5-vl-7b/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx index 706ecde2b2..904f43400c 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/ai-models/qwen3-embedding-4b.mdx @@ -4,6 +4,7 @@ description: >- Qwen3 Embedding 4B is a 4B-parameter multilingual embedding model (36 layers, 32K context) that outputs 2560‑dim vectors for text/code retrieval, classification, clustering, and bitext mining. It supports instruction-conditioned embeddings and is optimized for efficient, cross-lingual representation learning. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing, qwen' namespace: docs_edge_ai_models_qwen_3_embedding_4b +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/qwen3-embedding-4b/ --- diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx index d15d1e9c8b..119d075993 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/edge-ai-reference.mdx @@ -1,59 +1,76 @@ --- title: AI Inference description: >- - Azion AI Inference empowers you to build and deploy intelligent applications that process data close to where it is generated. -meta_tags: 'ai inference, artificial intelligence, edge computing' + AI Inference enables you to run AI models directly on Azion’s highly distributed infrastructure. +meta_tags: 'ai inference, artificial intelligence, edge computing, ai assistant, ai agente' namespace: docs_edge_ai_reference permalink: /documentation/products/ai/ai-inference/ +menu_namespace: AIInferenceMenu + --- import LinkButton from 'azion-webkit/linkbutton'; -**AI Inference** empowers you to build and deploy intelligent applications that process data close to where it is generated. By combining artificial intelligence with edge computing, it eliminates the complexities of scaling and infrastructure management, enabling real-time decision-making and enhanced performance. +**AI Inference** enables you to run AI models directly on Azion’s highly distributed infrastructure. -With Azion AI Inference, you can seamlessly integrate AI capabilities into your applications, leveraging tools like Edge Functions, Edge Application, and the Azion API to create scalable, secure, and efficient solutions. +With Azion AI Inference, you can integrate AI capabilities into your applications, leveraging tools like **Functions**, **Applications**, **Vector Search**, and the Azion API to create scalable, secure, and efficient solutions. -AI Inference gives you access to: +Get started by deploying the AI Inference Starter Kit Template: -- **Run AI models on Edge Runtime**, enabling advanced AI architectures to execute directly at the edge for minimal latency and maximum performance. -- **Deploy autonomous AI agents** that analyze data and make decisions at the edge. -- **Real-time processing** with reduced latency and enhanced efficiency. -- All as part of a **complete platform**, including Edge Applications, Edge Functions, Edge SQL vector search, and more. + --- ## Features -### Available Models +### OpenAI-Compatible API + +Connect applications using Azion’s OpenAI-compatible endpoint format. + +### Run Edge optimized models -Access our catalog of open-source AI models that you can run directly on Azion Runtime. These models are optimized for edge deployment with minimal resource requirements. +- Run AI models on Azion’s globally distributed edge to minimize latency and enable real-time inference. +- Access a curated catalog of open-source models, ready to run on Azion Runtime and optimized for distributed deployment with low resource footprints. +- Native inference support for large language models (LLMs) and vision-language models (VLMs). -### Model customization +### Fine-Tune Models with LoRA -AI Inference allows you to fine-tune, train, and specialize models using **Low-Rank Adaptation (LoRA)**. This capability enables you to optimize models for specific tasks, ensuring they are both efficient and accurate for your business needs. +AI Inference allows you to fine-tune, train, and specialize models your own data and parameters. This capability enables you to optimize models for specific tasks, ensuring they are both efficient and accurate for your business needs. -### AI Agents +--- -AI Inference supports deploying AI agents like ReAct (Reasoning + Acting) at the edge, enabling advanced tasks such as context-aware responses, semantic search, and intelligent data processing. +### Examples of what you can build with AI Inference -### Integration with Edge SQL +- **AI Assistants**: Build and deploy AI assistants that serve thousands of users simultaneously with low latency, delivering real-time support, dynamic FAQs, and customer assistance without cloud overload. -Integrate with **Edge SQL** to enable vector search capabilities, allowing for semantic queries and hybrid search. This integration enhances AI-powered applications by providing precise, contextually relevant results and supporting efficient Retrieval-Augmented Generation (RAG) implementations. +- **AI Agents**: Build AI agents that automate multi‑step workflows, collapse days of manual effort into minutes, and free teams for higher‑value work—boosting productivity across operations. ---- +- **Automate Threat Detection and Takedown with AI**: Combine LLMs and vision-language models (VLMs) to monitor digital assets, spot phishing/abuse patterns in text and imagery, and automate threat classification and takedown across distributed environments. -## Related products +## Integration with SQL Database -- [Edge Application](/en/documentation/products/build/edge-application/): build applications that run directly on Azion's distributed network, delivering exceptional performance and customization options. -- [Edge Functions](/en/documentation/products/build/edge-application/edge-functions/): execute code closer to end users, enhancing performance and enabling custom logic for handling requests and responses. -- [Edge SQL](/en/documentation/products/store/edge-sql/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities at the edge. -- [Vector Search](/en/documentation/products/store/edge-sql/vector-search/): enable semantic search engines and AI-powered recommendations through vector embeddings at the edge. +Integrate your application with **SQL Database** to enable [vector search](/en/documentation/products/store/sql-database/vector-search/) capabilities, allowing for semantic queries and hybrid search. This integration enhances AI-powered applications by providing precise, contextually relevant results and supporting efficient Retrieval-Augmented Generation (RAG) implementations. + +## Limits + +These are the **default limits**: + +| Scope | Limit | +| ----- | ----- | +| Requests per minute | 300 | --- -Explore practical examples of how to implement AI solutions with Azion: +## Related products + +- [Applications](/en/documentation/products/build/applications/): build applications that run directly on Azion's distributed infrastructure, delivering exceptional performance and customization options. +- [Functions](/en/documentation/products/build/applications/functions/): execute code closer to end users, enhancing performance and enabling custom logic for handling requests and responses. +- [SQL Database](/en/documentation/products/store/sql-database/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities at the edge. Also enables [Vector Search](/en/documentation/products/store/sql-database/vector-search/) for performing semantic search and AI-powered recommendations through vector embedding. - - \ No newline at end of file diff --git a/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx b/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx index 939d33c850..e63ffb4833 100644 --- a/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx +++ b/src/content/docs/en/pages/main-menu/reference/ai-inference/models.mdx @@ -4,14 +4,15 @@ description: >- Edge AI offers a diverse range of edge-optimized models for various AI domains, ensuring efficient deployment and performance. meta_tags: 'ai inference, ai models, artificial intelligence, edge computing' namespace: docs_edge_ai_models +menu_namespace: AIInferenceMenu permalink: /documentation/products/ai/ai-inference/models/ --- import LinkButton from 'azion-webkit/linkbutton'; -Azion's edge-optimized models span multiple AI domains including text generation, image analysis, embeddings, and more. Each model is designed to balance performance and resource efficiency for edge deployment. +Azion's edge-optimized models span multiple AI domains including text generation, image analysis, embeddings, and more. Each model is designed to balance performance and resource efficiency for distributed deployment. -This page provides a list of models available for use with **Edge AI**. To learn more about it, visit the [Edge AI Reference](/en/documentation/products/ai/ai-inference/). +This page provides a list of models available for use with **AI Inference**. To learn more about it, visit the [AI Inference Reference](/en/documentation/products/ai/ai-inference/). ## Available Models diff --git a/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx new file mode 100644 index 0000000000..773c1107e3 --- /dev/null +++ b/src/content/docs/pt-br/pages/guias/ai-inference/quick-start.mdx @@ -0,0 +1,40 @@ +--- +title: How to build a simple agent with AI Inference +description: The "+ Create" button accelerates your journey to start building with Azion. +meta_tags: >- + building, onboarding, create resources, Azion Web Platform, import from + GitHub +namespace: docs_guides_ai_inference_build_agent +permalink: /documentacao/produtos/guias/ai-inference-agent/ +menu_namespace: AIInferenceMenu + +--- + + + +## Usage + +AI Inference can be used in a [Function] + +This function receives a POST request to the desired AI model and returns the response. + + +```javascript +const modelResponse = await Azion.AI.run("Qwen/Qwen3-30B-A3B-Instruct-2507-FP8", { + "stream": true, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +return modelResponse +``` + +This example uses the Qwen3 model. You can change the model and the request parameters according to your preferences. Check the [AI models reference](/en/documentation/products/ai/ai-inference/models/) for more information about the available models and how to use them in your application. + diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx index 6254adfed0..61ecb3ccfb 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/Qwen3-30b-a3b-Instruct-2507-fp8.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, ai models, inteligência artificial, edge computing, qwen' namespace: docs_edge_ai_models_qwen_3_30ba3b permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen3-30ba3b/ +menu_namespace: AIInferenceMenu --- **Qwen3-30B-A3B-Instruct-2507-FP8** é um modelo de linguagem causal FP8 ajustado por instruções com 30 bilhões de parâmetros para geração de texto de longo contexto (256K) e raciocínio, suportando chat/QA, sumarização, tarefas multilíngues, resolução de problemas de matemática/ciência, codificação e fluxos de trabalho aumentados por ferramentas. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx index 5c3bebb63b..191e346f64 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/baai-bge-reranker-v2-m3.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, ai models, artificial intelligence, edge computing' namespace: docs_edge_ai_models_baai_bge_reranker_v2_m3 permalink: /documentacao/produtos/ai/ai-inference/modelos/baai-bge-reranker-v2-m3/ +menu_namespace: AIInferenceMenu --- **BAAI/bge-reranker-v2-m3** é um modelo de reranking leve com fortes capacidades multilíngues. Ele é fácil de implementar e oferece inferência rápida. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx index f73ce0e88e..1975b951ff 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/internvl3.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, modelos de ia, inteligência artificial, edge computing' namespace: docs_edge_ai_models_internvl3 permalink: /documentacao/produtos/ai/ai-inference/modelos/internvl3/ +menu_namespace: AIInferenceMenu --- **InternVL3** é um Multimodal Large Language Model avançado (MLLM) com capacidades para abranger tool calling, agentes GUI, análise de imagem industrial, percepção de visão 3D e mais. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx index ee108738a1..1b416aacae 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/mistral-3-small.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, modelos ai, inteligência artificial, computação edge, mistral' namespace: docs_edge_ai_models_mistral_3_small permalink: /documentacao/produtos/ai/ai-inference/modelos/mistral-3-small/ +menu_namespace: AIInferenceMenu --- **Mistral 3 Small** é um modelo de linguagem que, embora sendo compacto, oferece capacidades comparáveis às de modelos maiores. Ele é ideal para agentes conversacionais, chamada de função, ajuste fino e inferência local com dados sensíveis. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx index f10aec2d95..1c63f04b65 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/nanonets-ocr-r.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, ai models, inteligência artificial, edge computing, qwen' namespace: docs_edge_ai_models_nanonets_ocr_s permalink: /documentacao/produtos/ai/ai-inference/modelos/nanonets-ocr-s/ +menu_namespace: AIInferenceMenu --- **Nanonets-OCR-s** é um modelo OCR que converte imagens de documentos em Markdown estruturado, preservando o layout (títulos, listas, tabelas) e tags básicas. A saída é fácil de analisar e alimentar em pipelines de LLM. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx index 5ba0cc2434..725715e80e 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-3b.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, modelos ai, inteligência artificial, edge computing, qwen' namespace: docs_edge_ai_models_qwen_2_5_vl_3b permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-3b/ +menu_namespace: AIInferenceMenu --- O **Qwen 2.5 VL AWQ 3B** é um modelo de linguagem e visão que oferece capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeo longo, localização visual e geração de saída estruturada. Ele suporta 3 bilhões de parâmetros. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx index 37086b812c..e0a1fa07d4 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen-2-5-vl-7b.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, modelos ai, inteligência artificial, computação edge, qwen' namespace: docs_edge_ai_models_qwen_2_5_vl_7b permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-7b/ +menu_namespace: AIInferenceMenu --- O **Qwen 2.5 VL AWQ 7B** é um modelo de linguagem e visão que suporta 7 bilhões de parâmetros, oferecendo capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeo longo, localização visual e geração de saída estruturada. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx index b986fe2e2c..376c2a25b4 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/ai-models/qwen3-embedding-4b.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, ai modelos, inteligência artificial, edge computing, qwen' namespace: docs_edge_ai_models_qwen_3_embedding_4b permalink: /documentacao/produtos/ai/ai-inference/modelos/qwen3-embedding-4b/ +menu_namespace: AIInferenceMenu --- **Qwen3 Embedding 4B** é um modelo de embedding multilíngue com 4 bilhões de parâmetros (36 camadas, 32K de contexto) que gera vetores de 2560 dimensões para recuperação de texto/código, classificação, agrupamento e mineração de bitexto. Ele suporta embeddings condicionados por instrução e é otimizado para aprendizado de representação eficiente e multilíngue. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx index 8be09bd7ed..3123cf3502 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/edge-ai-reference.mdx @@ -1,59 +1,75 @@ --- title: AI Inference description: >- - O AI Inference da Azion capacita você a construir e implementar aplicações inteligentes que processam dados perto de onde são gerados. -meta_tags: 'ai inference, inteligência artificial, edge computing' + A AI Inference permite que você execute modelos de AI diretamente na infraestrutura altamente distribuída da Azion. +meta_tags: 'inferência de ia, inteligência artificial, computação de borda, assistente de ia, agente de ia' namespace: docs_edge_ai_reference permalink: /documentacao/produtos/ai/ai-inference/ +menu_namespace: AIInferenceMenu + --- import LinkButton from 'azion-webkit/linkbutton'; -O **AI Inference** capacita você a construir e implementar aplicações inteligentes que processam dados perto de onde são gerados. Ao combinar inteligência artificial com edge computing, o AI Inference elimina as complexidades de escalabilidade e gerenciamento de infraestrutura, permitindo tomadas de decisão em tempo real e desempenho aprimorado. +**AI Inference** permite que você execute modelos de AI diretamente na infraestrutura altamente distribuída da Azion. -Com o AI Inference da Azion, você pode integrar perfeitamente capacidades de AI em suas aplicações, aproveitando ferramentas como Edge Functions, Edge Application e a API da Azion para criar soluções escaláveis, seguras e eficientes. +Com o AI Inference da Azion, você pode integrar capacidades de AI em suas aplicações, aproveitando ferramentas como **Functions**, **Applications**, **Vector Search** e a API da Azion para criar soluções escaláveis, seguras e eficientes. -O AI Inference possibilita: +Comece implantando o Template do Starter Kit do AI Inference: -- **Executar modelos de AI no Edge Runtime**, permitindo que arquiteturas avançadas de AI sejam executadas diretamente no edge para latência mínima e desempenho máximo. -- **Implementar agentes de AI autônomos** que analisam dados e tomam decisões no edge. -- **Processamento em tempo real** com latência reduzida e eficiência aprimorada. -- Tudo como parte de uma **plataforma completa**, incluindo Edge Application, Edge Functions, busca vetorial do Edge SQL e muito mais. + --- -## Recursos +## Funcionalidades -### Modelos disponíveis +### API Compatível com OpenAI -Acesse nosso catálogo de modelos de AI de código aberto que você pode executar diretamente no Runtime da Azion. Esses modelos são otimizados para implementação no edge com requisitos mínimos de recursos. +Conecte aplicações usando o formato de endpoint compatível com OpenAI da Azion. - +### Execute modelos otimizados para o edge -### Personalização de modelos +- Execute modelos de AI no edge, utilizando a infraestrutura globalmente distribuída da Azion para minimizar a latência e permitir inferência em tempo real. +- Acesse um catálogo selecionado de modelos de código aberto, prontos para rodar no Azion Runtime e otimizados para implantação distribuída com baixo consumo de recursos. +- Suporte nativo para inferência de modelos de linguagem de grande porte (LLMs) e modelos de visão-linguagem (VLMs). -O AI Inference permite que você ajuste, treine e especialize modelos usando **Low-Rank Adaptation (LoRA)**. Esse recurso permite que você otimize modelos para tarefas específicas, garantindo que sejam eficientes e precisos para as necessidades do seu negócio. + -### Agentes de AI +### Ajuste Fino de Modelos com LoRA -O AI Inference suporta a implementação de agentes de AI como ReAct (Raciocínio + Ação) no edge, permitindo tarefas avançadas como respostas contextuais, pesquisa semântica e processamento inteligente de dados. +O AI Inference permite que você ajuste, treine e especialize modelos com seus próprios dados e parâmetros. Essa capacidade permite otimizar modelos para tarefas específicas, garantindo que sejam eficientes e precisos para as necessidades do seu negócio. -### Integração com Edge SQL +--- -Integre o AI Inference com o **Edge SQL** para habilitar capacidades de busca vetorial, permitindo consultas semânticas e busca híbrida. Essa integração aprimora aplicativos alimentados por AI, fornecendo resultados precisos e contextualmente relevantes e suportando implementações eficientes de Retrieval Augmented Generation (RAG). +### Exemplos do que você pode construir com a AI Inference ---- +- **Assistentes de AI**: Construa e implante assistentes de AI que atendem milhares de usuários simultaneamente com baixa latência, oferecendo suporte em tempo real, FAQs dinâmicas e assistência ao cliente sem sobrecarga na nuvem. -## Produtos relacionados +- **Agentes de AI**: Construa agentes de AI que automatizam fluxos de trabalho de múltiplas etapas, reduzindo dias de esforço manual para minutos, e liberando equipes para trabalhos de maior valor—impulsionando a produtividade em todas as operações. + +- **Automatize a Detecção e Remoção de Ameaças com AI**: Combine LLMs e modelos de visão-linguagem (VLMs) para monitorar ativos digitais, identificar padrões de phishing/abuso em texto e imagens, e automatizar a classificação e remoção de ameaças em ambientes distribuídos. -- [Edge Application](/pt-br/documentacao/produtos/build/edge-application/): construa aplicações que executam diretamente na rede distribuída da Azion, oferecendo desempenho e opções de personalização excepcionais. -- [Edge Functions](/pt-br/documentacao/produtos/build/edge-application/edge-functions/): execute código mais próximo dos usuários finais, melhorando o desempenho e permitindo lógica personalizada para lidar com requisições e respostas. -- [Edge SQL](/pt-br/documentacao/produtos/store/edge-sql/): uma solução SQL edge-native projetada para aplicações serverless, fornecendo capacidades de armazenamento e consulta de dados no edge. -- [Vector Search](/pt-br/documentacao/produtos/store/edge-sql/vector-search/): ative motores de busca semântica e recomendações impulsionadas por AI através de embeddings vetoriais no edge. +## Integração com Banco de Dados SQL + +Integre sua aplicação com o **Banco de Dados SQL** para habilitar capacidades de [Vector Search](/pt-br/documentacao/produtos/store/sql-database/vector-search/), permitindo consultas semânticas e busca híbrida. Essa integração melhora as aplicações impulsionadas por AI ao fornecer resultados precisos e contextualmente relevantes e ao suportar implementações eficientes de Geração Aumentada por Recuperação (RAG). + +## Limites + +Estes são os **limites padrão**: + +| Escopo | Limite | +| ----- | ----- | +| Requests por minuto | 300 | --- -Explore exemplos práticos de como implementar soluções de AI com a Azion: +## Produtos relacionados - - \ No newline at end of file +- [Applications](/pt-br/documentacao/produtos/build/applications/): construa aplicações que rodam diretamente na infraestrutura distribuída da Azion, oferecendo desempenho excepcional e opções de personalização. +- [Functions](/pt-br/documentacao/produtos/build/applications/functions/): execute código mais próximo dos usuários finais, melhorando o desempenho e permitindo lógica personalizada para lidar com solicitações e respostas. +- [SQL Database](/pt-br/documentacao/produtos/store/sql-database/): uma solução SQL nativa de borda projetada para aplicações serverless, fornecendo capacidades de armazenamento e consulta de dados na borda. Também habilita [Vector Search](/pt-br/documentacao/produtos/store/sql-database/vector-search/) para realizar busca semântica e recomendações impulsionadas por AI através de incorporação vetorial. diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx index 2c71fd3350..3d6a457ac9 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/ai-inference/modelos.mdx @@ -5,6 +5,7 @@ description: >- meta_tags: 'ai inference, modelos ai, inteligência artificial, edge computing' namespace: docs_edge_ai_models permalink: /documentacao/produtos/ai/ai-inference/modelos/ +menu_namespace: AIInferenceMenu --- import LinkButton from 'azion-webkit/linkbutton'; diff --git a/src/data/availableMenu.ts b/src/data/availableMenu.ts index 2d8569e1ec..5972012090 100644 --- a/src/data/availableMenu.ts +++ b/src/data/availableMenu.ts @@ -10,5 +10,6 @@ export const availableMenus = [ { name: 'observeMenu', langs: ['en', 'pt-br'] }, { name: 'deployMenu', langs: ['en', 'pt-br'] }, { name: 'storeMenu', langs: ['en', 'pt-br'] }, - { name: 'libMenu', langs: ['en', 'pt-br'] } + { name: 'libMenu', langs: ['en', 'pt-br'] }, + { name: 'AIInferenceMenu', langs: ['en', 'pt-br'] } ] diff --git a/src/i18n/en/AIInferenceMenu.ts b/src/i18n/en/AIInferenceMenu.ts new file mode 100644 index 0000000000..a48cb1da42 --- /dev/null +++ b/src/i18n/en/AIInferenceMenu.ts @@ -0,0 +1,38 @@ +/** + * This configures the navigation sidebar. + * All other languages follow this ordering/structure and will fall back to + * English for any entries they haven’t translated. + * + * - All entries MUST include `text` and `key` + * - Heading entries MUST include `header: true` and `type` + * - Link entries MUST include `slug` (which excludes the language code) + */ +export default [ + { text: 'Documentation', header: true, onlyMobile: true, anchor: true, type: 'learn', slug: '/documentation/', key: 'documentation' }, + { text: 'Guides', header: true, onlyMobile: true, anchor: true, type: 'learn', slug: '/documentation/products/guides/', key: 'guides' }, + { text: 'Dev Tools', header: true, onlyMobile: true, anchor: true, type: 'learn', slug: '/documentation/devtools/', key: 'devTools' }, + + /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile /// + + { text: 'Overview', header: true, anchor: true, type: 'learn', key: 'overview-aiinference', slug: '/documentation/products/ai/ai-inference/', hasLabel: 'menu.aiinference' }, + //{ text: 'Get Started', header: true, anchor: true, type: 'learn', key: 'get-started-aiinference', slug: '/documentation/products/guides/ai-inference-agent/' }, + + { text: ' Available Models', header: true, type: 'learn', key: 'aiinference.models',slug: '/documentation/products/ai/ai-inference/models/', items: [ + { text: 'BAAI/bge reranker v2 m3', slug: '/documentation/products/ai/ai-inference/models/baai-bge-reranker-v2-m3', key: 'aiinference/BAAI/bge-reranker-v2-m3' }, + { text: 'InternVL3', slug: '/documentation/products/ai/ai-inference/models/internvl3', key: 'aiinference/InternVL3' }, + { text: 'Mistral 3 Small (24B AWQ)', slug: '/documentation/products/ai/ai-inference/models/mistral-3-small', key: 'aiinference/mistral-3-small' }, + { text: 'Qwen2.5 VL AWQ 3B', slug: '/documentation/products/ai/ai-inference/models/qwen-2-5-vl-3b', key: 'aiinference/qwen-2-5-vl-awq-3b' }, + { text: 'Qwen2.5 VL AWQ 7B', slug: '/documentation/products/ai/ai-inference/models/qwen-2-5-vl-7b', key: 'aiinference/qwen-2-5-vl-awq-7b' }, + { text: 'Qwen3 30B A3B Instruct 2507 FP8', slug: '/documentation/products/ai/ai-inference/models/qwen3-30ba3b', key: 'aiinference/qwen-3-instruct' }, + { text: 'Qwen3 Embedding 4B', slug: '/documentation/products/ai/ai-inference/models/qwen3-embedding-4b', key: 'aiinference/qwen3-embedding' }, + { text: 'Nanonets-OCR-s', slug: '/documentation/products/ai/ai-inference/models/nanonets-ocr-s/', key: 'aiinference/nanonets-OCR-s' }, + ]}, + + { text: 'Guides', header: true, type: 'learn', key: 'aiinference/guides', items: [ + { text: 'Deploy AI Inference Starter kit', header: true, anchor: true, type: 'learn', key: 'aiinference/starter-kit', slug: '/documentation/products/guides/ai-inference-starter-kit' }, + { text: 'Deploy LangGraph AI Agent Boilerplate', header: true, anchor: true, type: 'learn', key: 'aiinference/langgraph-boilerplate', slug: '/documentation/products/guides/langgraph-ai-agent-boilerplate' }, + ]}, + + + +] as const; diff --git a/src/i18n/en/ui.ts b/src/i18n/en/ui.ts index 934e9420ee..4c7edfb1cc 100644 --- a/src/i18n/en/ui.ts +++ b/src/i18n/en/ui.ts @@ -167,7 +167,8 @@ export default { 'menu.runtime': 'Azion Runtime', 'menu.store': 'Store', 'menu.storage': 'Edge Storage', - 'menu.edgeSQL': 'Edge SQL' + 'menu.edgeSQL': 'Edge SQL', + 'menu.aiinference':'AI Inference' }; diff --git a/src/i18n/pt-br/AIInferenceMenu.ts b/src/i18n/pt-br/AIInferenceMenu.ts new file mode 100644 index 0000000000..114e8c2114 --- /dev/null +++ b/src/i18n/pt-br/AIInferenceMenu.ts @@ -0,0 +1,38 @@ +/** + * This configures the navigation sidebar. + * All other languages follow this ordering/structure and will fall back to + * English for any entries they haven’t translated. + * + * - All entries MUST include `text` and `key` + * - Heading entries MUST include `header: true` and `type` + * - Link entries MUST include `slug` (which excludes the language code) + */ +export default [ + { text: 'Documentação', header: true, onlyMobile: true, anchor: true, slug: '/documentacao/', key: 'documentation' }, + { text: 'Guias',header: true, onlyMobile: true, anchor: true, slug: '/documentacao/produtos/guias/', key: 'guides' }, + { text: 'Dev Tools',header: true, onlyMobile: true, anchor: true, slug: '/documentacao/produtos/dev-tools/', key: 'devTools' }, + + /// START HERE :::: DO NOT REMOVE the strings above, it's a work around for header on mobile /// + + { text: 'Visão Geral', header: true, anchor: true, type: 'learn', key: 'overview-aiinference', slug: '/documentacao/produtos/ai/ai-inference/', hasLabel: 'menu.aiinference' }, + //{ text: 'Comece Agora', header: true, anchor: true, type: 'learn', key: 'get-started-aiinference', slug: '/documentacao/produtos/guias/ai-inference-agent/' }, + + { text: ' Modelos disponiveis', header: true, type: 'learn', key: 'aiinference.models',slug: '/documentacao/produtos/ai/ai-inference/modelos/', items: [ + { text: 'BAAI/bge reranker v2 m3', slug: '/documentacao/produtos/ai/ai-inference/modelos/baai-bge-reranker-v2-m3/', key: 'aiinference/BAAI/bge-reranker-v2-m3' }, + { text: 'InternVL3', slug: '/documentacao/produtos/ai/ai-inference/modelos/internvl3/', key: 'aiinference/InternVL3' }, + { text: 'Mistral 3 Small (24B AWQ)', slug: '/documentacao/produtos/ai/ai-inference/modelos/mistral-3-small/', key: 'aiinference/mistral-3-small' }, + { text: 'Qwen2.5 VL AWQ 3B', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-3b/', key: 'aiinference/qwen-2-5-vl-awq-3b' }, + { text: 'Qwen2.5 VL AWQ 7B', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen-2-5-vl-7b/', key: 'aiinference/qwen-2-5-vl-awq-7b' }, + { text: 'Qwen3 30B A3B Instruct 2507 FP8', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen3-30ba3b/', key: 'aiinference/qwen-3-instruct' }, + { text: 'Qwen3 Embedding 4B', slug: '/documentacao/produtos/ai/ai-inference/modelos/qwen3-embedding-4b/', key: 'aiinference/qwen3-embedding' }, + { text: 'Nanonets-OCR-s', slug: '/documentacao/produtos/ai/ai-inference/modelos/nanonets-ocr-s/', key: 'aiinference/nanonets-OCR-s' }, + ]}, + + { text: 'Guias', header: true, type: 'learn', key: 'aiinference/guides', items: [ + { text: 'Implemente o AI Inference Starter kit', header: true, anchor: true, type: 'learn', key: 'aiinference/starter-kit', slug: '/documentacao/produtos/guias/ai-inference-starter-kit/' }, + { text: 'Implemente LangGraph AI Agent Boilerplate', header: true, anchor: true, type: 'learn', key: 'aiinference/langgraph-boilerplate', slug: '/documentacao/produtos/guias/langgraph-ai-agent-boilerplate/' }, + ]}, + + + +] as const;