文章目录
- 1.构建问答引擎
- 2.构建基于FastAPI的后台
- 3.构建流式输出的前端
1.构建问答引擎
新建一个engine.py文件
import os
from llama_index.core.node_parser import SentenceSplitter# ---------------------
# step 1.设定key、模型url、推理模型名称以及embedding模型名称
# ---------------------
BASE_IP = "http://localhost:11434"
BASE_URL = BASE_IP + "/api/chat"
MODEL_NAME = "deepseek-r1"# ---------------------
# step 2.设置本地embedding模型与本地大模型
# ---------------------
from llama_index.embeddings.ollama import OllamaEmbeddingembedding = OllamaEmbedding(base_url=BASE_IP, model_name=MODEL_NAME)
from llama_index.llms.ollama import Ollamallm = Ollama(base_url=BASE_IP, model=MODEL_NAME,mperature=0.3, # 降低随机性system_prompt="你是一个严谨的中文问答助手,严格根据上下文信息回答问题")# 配置全局设置
from llama_index.core import Settings, PromptTemplateSettings.embed_model = embedding
Settings.llm = llm
Settings.node_parser = SentenceSplitter(chunk_size=512) # 添加文本分块from llama_index.core import VectorStoreIndex, StorageContext, Settings
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import SimpleDirectoryReader
import qdrant_client# load documents
documents = SimpleDirectoryReader(input_files=['./docs/雪山飞狐.txt'],file_metadata=lambda x: {"source": os.path.basename(x)}).load_data()
print('1.SimpleDirectoryReader载入document finish!')
# 连接Qdrant,并保存在本地的qdrant文件夹中
QDRANT_PATH = "./qdrant_storage" # 修改存储路径避免权限问题qclient = qdrant_client.QdrantClient(path=QDRANT_PATH)
vector_store = QdrantVectorStore(client=qclient, collection_name="wenda",# enable_hybrid=True # 启用混合搜索)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# 创建索引(自动分块+嵌入)
index = VectorStoreIndex.from_documents(documents,storage_context=storage_context,transformations=[Settings.node_parser], # 应用分块show_progress=True
)
print('2.连接Qdrant,并保存在本地的qdrant文件夹中 finish!')
# 构建检索器
from llama_index.core.retrievers import VectorIndexRetriever# 想要自定义参数,可以构造参数字典
emb = embedding.get_text_embedding("你好呀呀")
dimensions = len(emb)
kwargs = {'similarity_top_k': 8, 'index': index, 'dimensions': dimensions,# 'vector_store_query_mode':"hybrid", # 混合搜索模式'alpha': 0.5 # 平衡关键词与语义搜索} # 必要参数
retriever = VectorIndexRetriever(**kwargs)
print('3.构建检索器 finish!')
# 构建合成器
from llama_index.core.response_synthesizers import get_response_synthesizer, ResponseModeqa_prompt_tmpl = ("根据以下上下文信息:\n""---------------------\n""{context_str}\n""---------------------\n""使用{language_name}回答以下问题\n ""问题: {query_str}\n""答案: "
)
qa_prompt = PromptTemplate(qa_prompt_tmpl)
response_synthesizer = get_response_synthesizer(llm=llm, streaming=True,response_mode=ResponseMode.TREE_SUMMARIZE,summary_template=qa_prompt)print('4.构建合成器 finish!')
# 构建问答引擎
from llama_index.core.query_engine import RetrieverQueryEnginequery_engine = RetrieverQueryEngine(retriever=retriever,response_synthesizer=response_synthesizer,
)
print('5.构建问答引擎 finish!')
2.构建基于FastAPI的后台
新建一个main.py文件
import uvicorn # 导入ASGI服务器Uvicorn,用于运行FastAPI应用
from fastapi import FastAPI # 导入FastAPI核心类,用于创建Web应用实例
from fastapi.middleware.cors import CORSMiddleware # 导入CORS中间件,处理跨域请求
from fastapi.responses import StreamingResponse # 导入流式响应类,支持实时数据流传输
from engine import query_engine # 从自定义模块导入预构建的查询引擎(已实现流式生成功能)
# 创建FastAPI应用实例,作为Web服务的核心对象
app = FastAPI()
# 配置CORS中间件,允许所有来源(*)的跨域请求(适用于开发环境,生产环境需限制域名)
app.add_middleware(CORSMiddleware,allow_origins=["*"])# 定义GET请求路由,端点路径为/stream_chat
@app.get('/stream_chat')
async def stream_chat(param:str = "你好"): # 接收名为param的查询参数,默认值"你好"# 调用查询引擎处理参数response_stream = query_engine.query(param)def generate():"""定义生成器函数,用于逐块产生响应内容"""for text in response_stream.response_gen: # 遍历流式生成器的输出yield text # 逐次返回文本片段,实现增量传输# 创建流式响应对象,指定媒体类型为text/event-stream(SSE协议)return StreamingResponse(generate(), media_type='text/event-stream')if __name__ == '__main__':# 直接运行Uvicorn服务器,绑定所有网络接口(0.0.0.0),端口5000uvicorn.run(app, host='0.0.0.0', port=5000)
然后python main.py。运行如下所示
3.构建流式输出的前端
新建一个chat.html
<!DOCTYPE html>
<html lang="zh">
<head><meta charset="UTF-8"/><!-- 1. viewport确保移动端自适应布局 --><meta name="viewport" content="width=device-width, initial-scale=1.0"/><!-- 2. 外部资源引入 --><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css" rel="stylesheet"/><link href="https://ai-public.mastergo.com/gen_page/tailwind-custom.css" rel="stylesheet"/> <!-- 定制化Tailwind CSS样式 --><!-- 3. 动画样式定义 --><style id="animation-styles"><!-- 定义渐变背景动画gradient,用于主界面背景的流动效果 -->@keyframes gradient {0% { background-position: 0% 50%; } 50% { background-position: 100% 50%; } 100% { background-position: 0% 50%; } }</style><link rel="preconnect" href="https://fonts.googleapis.com"/><link rel="preconnect" href="https://fonts.gstatic.com" crossorigin=""/><link href="https://fonts.googleapis.com/css2?family=Pacifico&display=swap" rel="stylesheet"/><script src="https://cdn.tailwindcss.com/3.4.5?plugins=forms@0.5.7,typography@0.5.13,aspect-ratio@0.4.2,container-queries@0.1.1"></script><script src="https://ai-public.mastergo.com/gen_page/tailwind-config.min.js" data-color="#AC1BF5" data-border-radius="medium"></script><script src="https://cdn.staticfile.org/vue/3.4.5/vue.global.js"></script><script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
</head>
<body><!-- 1.启动页。fixed布局覆盖全屏,黑色背景;使用Pacifico手写字体显示标题;opacity-0配合后续JS实现淡入动画。 --><div id="splash" class="fixed inset-0 bg-black flex items-center justify-center z-50"><h1 class="text-7xl font-['Pacifico'] text-white opacity-0 transition-opacity duration-1000 animate-bounce">哈哈哈哈,我的RAG from wow-rag</h1></div><!-- 2.主聊天容器 --><div id="app" class="hidden h-screen bg-gradient-to-br from-purple-100 via-blue-100 to-purple-100 bg-[size:400%_400%] relative overflow-hidden"><div class="absolute inset-0 animate-[gradient_10s_linear_infinite] z-0"><div class="animate-pulse absolute w-64 h-64 bg-custom/10 rounded-full -top-32 -left-32"></div><div class="animate-pulse absolute w-96 h-96 bg-blue-200/20 rounded-full -bottom-48 -right-48"></div></div><div class="container mx-auto h-full max-w-4xl px-4 py-6 flex flex-col"><div id="chatContainer" class="flex-1 overflow-y-auto mb-4 space-y-4 h-[calc(100%-120px)] z-20"><div v-for="item in messageList.filter((v) => v.role !== 'system')" class="flex" :class="{ 'justify-start': item.role === 'me', 'justify-end': item.role !== 'me' }"><div class="max-w-[70%] space-y-1"><div class="text-xs text-gray-500">{[ roleAlias[item.role] ]}</div><div class="rounded-lg p-3 shadow-sm" :class="{'bg-custom text-white': item.role !== 'me', 'bg-white': item.role === 'me'}"><p v-html="markdownToHtml(item.content)"></p></div></div></div></div><div class="bg-white/80 backdrop-blur-sm rounded-lg p-4 shadow-lg"><div class="flex items-center space-x-2"><button class="text-gray-500 hover:text-custom"><i class="far fa-smile text-xl"></i></button><textareaid="messageInput"class="flex-1 bg-transparent border-none focus:ring-0 text-gray-800 placeholder-gray-400"placeholder="请输入您的问题..."v-model="messageContent"rows="3"></textarea><button id="sendButton" class="bg-custom text-white px-4 py-2 rounded-lg hover:bg-custom/80 transition-colors !rounded-button" @click="send()"><i class="fas fa-paper-plane"></i></button></div></div></div></div><!-- 3.Vue应用核心逻辑 --><script>// 流式数据渲染async function fetchStream(param) { const url = `http://127.0.0.1:5000/stream_chat?param=${encodeURIComponent(param)}`;const response = await fetch(url); const reader = response.body.getReader(); while (true) { const { value, done } = await reader.read(); if (done) { statusvue.isTalking = false; break; } const text = new TextDecoder("utf-8").decode(value); let len = statusvue.messageList.length;statusvue.messageList[len-1].content += text; } }const app = Vue.createApp({//数据模型data() {return {cdata: [1, 0, 0],roleAlias: { "me": "我", "GPT": "小助手", "system": "机器人" },messageList: [{ "role": "system", "content": "你是一个人工智能答疑机器人,擅长借助RAG文档帮助用户进行问题回答" },{ "role": "GPT", "content": "您好!我是WOW-RAG智能助手,很高兴为您服务。" },],isTalking: false,messageContent: "",}},delimiters: ['{[', ']}'],methods: {// 消息发送逻辑send() {const userMessage = this.messageContent.trim();if (!userMessage) return;// 添加用户消息this.messageList.push({ "role": "me", "content": userMessage });const message = document.getElementById('chatContainer');message.scrollTop = message.scrollHeight;// 添加助手消息占位符this.messageList.push({ "role": "GPT", "content": "" });this.isTalking = true;// 发送请求fetchStream(userMessage).then(() => {this.isTalking = false;});this.messageContent = '';},// 新增方法:将 Markdown 转换为 HTMLmarkdownToHtml(markdown) {return marked.parse(markdown);}}})const statusvue = app.mount('#app');// om加载事件document.addEventListener('DOMContentLoaded', () => {const splash = document.getElementById('splash');const app = document.getElementById('app');const splashText = splash.querySelector('h1');const playPauseBtn = document.getElementById('playPauseBtn');const volumeSlider = document.getElementById('volumeSlider');const volumeIcon = document.getElementById('volumeIcon');const messageInput = document.getElementById('messageInput');const sendButton = document.getElementById('sendButton');const chatContainer = document.getElementById('chatContainer');setTimeout(() => {splashText.style.opacity = '1';}, 100);setTimeout(() => {splash.style.opacity = '0';splash.style.transition = 'opacity 1s';setTimeout(() => {splash.style.display = 'none';app.classList.remove('hidden');}, 1000);}, 2000);function updateVolumeIcon(value) {let icon = 'fa-volume-up';if (value == 0) icon = 'fa-volume-mute';else if (value < 50) icon = 'fa-volume-down';volumeIcon.innerHTML = `<i class="fas ${icon}"></i>`;}sendButton.addEventListener('click', () => {const message = messageInput.value.trim();if (message) {statusvue.send();}});messageInput.addEventListener('keypress', (e) => {if (e.key === 'Enter') {sendButton.click();}});});</script>
</body>
</html>
使用方式有两种,一种用浏览器打开。第二种直接部署到静态服务器上也可以,例如在该代码的所在位置打开cmd窗口输入命令:python -m http.server 8080 --bind 0.0.0.0
太牛了,胡斐的爸爸是苗若兰。我只能说是我在文本加载环节,或者数据分割环节,还有数据检索环节、数据增强环节还有很大的改进空间,导致数据生成成这样。
最后我想说RAG是一个很大的工程,每一个环节做精做强不容易,数据清洗,数据加载,数据分割,数据持久化,数据检索方式,数据怎么增强,都不容易,更不用提与大模型相关的环节.