langchain调用llm模型
调用llm api
为了保护api,一般将api信息写在单独的环境变量文件.env
中。
# .env文件
API_KEY= ***
MODEL_NAME=gpt-3.5-turbo
BASE_URL= ***
在封装llm时使用dotenv.load_dotenv
,它会自动读取项目下的.env
(确保项目下有.env
),并将其转换为环境变量。
from dotenv import load_dotenvload_dotenv() # 自动读取.env
model=os.getenv("OPENAI_MODEL_NAME")
api_key=os.getenv("OPENAI_API_KEY")
base_url=os.getenv("BASE_URL")
# ... 其他你需要的环境变量
封装llm api比较简单,别忘了初始化的时候测试一下连接。
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv
import os
from typing import Optional, Dict, Any
import asyncio # 异步IO库class LLMService():def __init__(self): # 最好别在__init__初始化llmself.llm = Noneasync def initialize(self):load_dotenvself.llm = ChatOpenAI(model=os.getenv("OPENAI_MODEL_NAME"),temperature=0.7,streaming=True,api_key=os.getenv("OPENAI_API_KEY"),base_url=os.getenv("BASE_URL"))try:print("正在测试 API 连接...")response = await self.llm.ainvoke("测试消息")print("API 连接成功!")except Exception as e:print(f"API 连接错误: {str(e)}")raisedef get_llm(self):"""获取LLM实例"""return self.llm
调用api时:
from llm import LLMServiceimport asyncioasync def main(): # 注意,定义的异步io要在异步函数里面调用llm_service = LLMService()# 检查是否初始化if not llm_service.get_llm(): await llm_service.initialize()print(llm_service.get_llm().invoke("你好").content)if __name__ == "__main__":asyncio.run(main())
调用本地模型
用基于transformers的GLM9B举个例子。
from langchain.llms.base import LLM
from typing import Any, List, Optional, Iterator
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from pydantic import Field
from threading import Thread
import osclass GLM9B(LLM):model_path: str = Field()device_map: str = Field()system_prompt: Optional[str] = Field(default=None)tokenizer: Optional[Any] = Field(default=None)model: Optional[Any] = Field(default=None)def __init__(self,model_path: str = "<YOUR_PATH>",device_map: str = "auto",system_prompt: str = None):super().__init__(model_path=model_path, device_map=device_map)self.model_path = model_pathself.device_map = device_mapself.system_prompt = system_promptself.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)self.model = AutoModelForCausalLM.from_pretrained(self.model_path,torch_dtype=torch.bfloat16,low_cpu_mem_usage=True,trust_remote_code=True,device_map=self.device_map).eval()def _stream(self, prompt: str, stop: Optional[List[str]] = None) -> Iterator[str]:messages = []if self.system_prompt:messages.append({"role": "system", "content": self.system_prompt})messages.append({"role": "user", "content": prompt})inputs = self.tokenizer.apply_chat_template(messages,add_generation_prompt=True,tokenize=True,return_tensors="pt",return_dict=True).to(self.model.device)streamer = TextIteratorStreamer(self.tokenizer, skip_special_tokens=True)gen_kwargs = {"max_length": 25000,"do_sample": True,"top_k": 1,"streamer": streamer}# 在后台线程中运行模型生成thread = Thread(target=self.model.generate, kwargs={**inputs,**gen_kwargs})thread.start()# 从streamer中获取生成的tokensfor text in streamer:yield textdef _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:return "".join(self._stream(prompt, stop))@propertydef _llm_type(self) -> str:return "GLM9B"
调用时:
llm = GLM9B(model_path="<YOUR_PATH>",device_map="auto",system_prompt="你是一个乐于助人的AI助手。")# 流式输出测试
for chunk in llm._stream("你好"):print(chunk, end="", flush=True)