
随着大模型本地化部署的普及,基于 FastAPI 封装大模型接口并实现鉴权、可视化交互,成为实现落地大模型应用的核心场景。前一篇博文我们讲解了大模型本地化部署以及api鉴权调用的基础示例,今天我们在初级理论的基础上强化实际应用,以“本地大模型文本生成 API+Streamlit 可视化前端”为核心案例,从代码分解、执行流程、技术栈解析、价值细节、实际应用意义五个维度,由浅入深讲解完整的开发与扩展逻辑。
今天我们有针对性的实现的本地大模型轻量级应用解决方案,核心目标是将本地化部署的大模型,模型我们选择相对较小的Qwen1.5-1.8B-Chat模型,封装为带鉴权的 HTTP 接口,并配套可视化交互前端,实现“大模型能力→API 服务→可视化操作”的全链路落地,案例既保留基础的 API Key/JWT 鉴权核心,又通过扩展实现参数定制、限流、历史记录等结合实际应用的功能,适配我们在实际业务落地的需求场景。

简单来说,这个示例解决了两个核心问题:
前端(app.py):技术栈 Streamlit
核心功能:
后端(main.py):技术栈:FastAPI + Transformers
核心功能:

流程说明:

流程说明:
新建requirements.txt,补充可视化所需依赖:
# 原有依赖 fastapi>=0.104.1 uvicorn>=0.24.0 pydantic>=2.4.2 transformers>=4.35.2 torch>=2.1.0 python-dotenv>=1.0.0 python-jose>=3.3.0 cryptography>=41.0.7 # 新增可视化依赖 streamlit>=1.28.2 requests>=2.31.0
安装依赖:pip install -r requirements.txt
核心库简介:
# 配置加载模块
load_dotenv() # 加载环境变量,解耦敏感配置
VALID_API_KEY = os.getenv("VALID_API_KEY", "default_key_123") # 缺省值保证容错
JWT_SECRET_KEY = os.getenv("JWT_SECRET_KEY", "my_jwt_secret_123")
# 单例模型加载类(LocalLLM)
class LocalLLM:
_instance = None # 单例模式:避免重复加载模型占用内存
_initialized = False
def __new__(cls): # 控制实例唯一
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
def __init__(self):
if not LocalLLM._initialized:
# 模型加载核心逻辑:路径校验、内存优化、异常捕获
self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
self.model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float32,
device_map="cpu", # 适配无GPU环境
low_cpu_mem_usage=True # 减少内存占用
)
self.model.eval() # 推理模式,禁用梯度计算
LocalLLM._initialized = True核心价值:单例模式避免多次加载模型导致的内存溢出,环境变量解耦敏感配置,异常捕获提升程序鲁棒性。
# API Key鉴权:请求头校验
def generate_by_apikey(request: LLMRequest, x_api_key: str = Header(None)):
if not x_api_key or x_api_key != VALID_API_KEY:
raise HTTPException(status_code=401, detail="API Key错误")
# JWT鉴权:令牌生成与验证
def create_jwt_token():
token_data = {"sub": "local_llm_user", "exp": datetime.utcnow() + timedelta(minutes=30)}
return jwt.encode(token_data, JWT_SECRET_KEY, algorithm="HS256")
def verify_jwt_token(credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme)):
try:
payload = jwt.decode(credentials.credentials, JWT_SECRET_KEY, algorithms=["HS256"])
if payload["exp"] < datetime.utcnow().timestamp():
raise HTTPException(status_code=401, detail="令牌过期")
return payload["sub"]
except JWTError:
raise HTTPException(status_code=401, detail="令牌无效")核心价值:双鉴权模式适配不同场景(API Key 适合服务器间调用,JWT 适合多端用户调用),过期校验避免令牌滥用。
# 1. 生成参数扩展:支持temperature/top_p/返回格式
def generate_text(self, prompt, max_length, temperature=0.7, top_p=0.9, return_format="text"):
outputs = self.model.generate(
**inputs,
max_length=max_length,
temperature=temperature, # 控制随机性
top_p=top_p, # 控制采样范围
pad_token_id=self.tokenizer.eos_token_id
)
if return_format == "json":
return {"prompt": prompt, "result": result} # 多格式返回
# 2. 限流功能:内存级IP限流
rate_limit_store = {} # 无需数据库,内存存储限流记录
def check_rate_limit(client_ip: str):
now = time.time()
if client_ip not in rate_limit_store:
rate_limit_store[client_ip] = {"count": 0, "start_time": now}
if now - rate_limit_store[client_ip]["start_time"] > 60: # 1分钟窗口
rate_limit_store[client_ip]["count"] = 0
if rate_limit_store[client_ip]["count"] >= 10: # 每分钟10次
raise HTTPException(status_code=429, detail="调用频率超限")
rate_limit_store[client_ip]["count"] += 1
# 3. 历史记录:本地JSON文件存储
def save_generate_history(prompt: str, result: str, auth_type: str, client_ip: str):
history = []
if os.path.exists("generate_history.json"):
with open("generate_history.json", "r", encoding="utf-8") as f:
history = json.load(f)
history.append({
"id": len(history)+1,
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"prompt": prompt,
"result": result,
"auth_type": auth_type,
"client_ip": client_ip
})
with open("generate_history.json", "w", encoding="utf-8") as f:
json.dump(history, f, ensure_ascii=False, indent=2)核心价值:无插件扩展实现生产级功能,内存限流/文件存储降低部署成本,参数扩展提升接口灵活性。
# 基础接口:获取JWT令牌
@app.post("/get-token")
def get_jwt_token(): ...
# 扩展接口:刷新JWT令牌
@app.post("/refresh-token")
def refresh_jwt_token(refresh_token: str = Header(None)): ...
# 核心接口:文本生成(API Key/JWT双版本)
@app.post("/generate-text-apikey")
def generate_by_apikey(...): ...
@app.post("/generate-text-jwt")
def generate_by_jwt(...): ...
# 扩展接口:查询历史记录
@app.get("/get-history")
def get_history(limit: int = Query(10, ge=1, le=100)): ...核心价值:接口职责单一,扩展接口兼容原有逻辑,Query 参数校验避免非法查询。
# 页面配置:适配大屏/小屏
st.set_page_config(
page_title="本地大模型可视化平台",
page_icon="🤖",
layout="wide",
initial_sidebar_state="expanded"
)
# 会话状态:保存鉴权信息/历史记录(跨刷新不丢失)
if "api_key" not in st.session_state:
st.session_state.api_key = "default_key_123"
if "jwt_token" not in st.session_state:
st.session_state.jwt_token = ""
if "history" not in st.session_state:
st.session_state.history = []核心价值:Streamlit 会话状态替代前端存储,无需 Cookie/本地存储,简化开发。
# 侧边栏鉴权选择:API Key/JWT切换
auth_type = st.sidebar.radio("选择鉴权方式", ("API Key鉴权", "JWT令牌鉴权"))
# JWT令牌获取/刷新
def get_jwt_token_full():
response = requests.post(f"{BASE_URL}/get-token")
if response.status_code == 200:
st.session_state.jwt_token = response.json()["access_token"]
st.session_state.refresh_token = response.json()["refresh_token"]
def refresh_jwt_token():
headers = {"Refresh-Token": st.session_state.refresh_token}
response = requests.post(f"{BASE_URL}/refresh-token", headers=headers)
st.session_state.jwt_token = response.json()["access_token"]核心价值:可视化操作替代手动调用接口,令牌刷新自动化,降低用户操作成本。
# 生成参数可视化:滑块+单选框
max_length = st.slider("生成长度", 10, 1000, 150, 10)
temperature = st.slider("随机性(0.1-1.0)", 0.1, 1.0, 0.7, 0.1)
top_p = st.slider("采样策略(0.1-1.0)", 0.1, 1.0, 0.9, 0.1)
return_format = st.radio("返回格式", ("text", "json"), horizontal=True)
# 生成请求:适配后端参数
def generate_text(prompt, max_length, temperature, top_p, return_format):
headers = {"Content-Type": "application/json"}
if auth_type == "API Key鉴权":
headers["X-API-Key"] = st.session_state.api_key
url = f"{BASE_URL}/generate-text-apikey"
else:
headers["Authorization"] = f"Bearer {st.session_state.jwt_token}"
url = f"{BASE_URL}/generate-text-jwt"
data = {
"prompt": prompt,
"max_length": max_length,
"temperature": temperature,
"top_p": top_p,
"return_format": return_format
}
response = requests.post(url, headers=headers, json=data)
# 异常适配:限流/令牌过期
if response.status_code == 429:
return False, "调用频率超限,请稍后重试"
if response.status_code == 401 and "过期" in response.json()["detail"]:
return False, "令牌过期,请点击刷新令牌重试"核心价值:可视化参数适配后端扩展,异常提示本地化,提升用户体验。
# 结果展示:文本/JSON切换
if success:
st.subheader("✅ 生成结果")
if return_format == "json":
st.json(json.loads(result)) # JSON格式化展示
else:
st.markdown(f"> {result}") # 文本友好展示
# 历史记录:本地+后端同步
if st.sidebar.button("🔄 同步后端历史"):
response = requests.get(f"{BASE_URL}/get-history", params={"limit": history_limit})
st.session_state.history = response.json()["data"]
# 历史记录展示:倒序+展开式
for idx, item in enumerate(reversed(st.session_state.history)):
with st.expander(f"📝 {item['timestamp']} | 鉴权:{item['auth_type']}"):
st.markdown(f"**提示词**:{item['prompt']}")
st.markdown(f"**生成结果**:{item['result']}")核心价值:多格式展示适配不同需求,历史记录同步实现多端数据统一。
步骤 1:环境准备
步骤 2:启动服务


步骤 3:核心操作



步骤 1:新建请求
步骤 2:添加 API Key 请求头
步骤 3:填写请求体(提示词 + 生成长度)
{
"prompt": "写一段秋天的文案,温馨风格",
"max_length": 150
}步骤 4:发送请求并查看结果
{
"code": 200,
"message": "生成成功",
"data": {
"prompt": "写一段秋天的文案,温馨风格",
"result": "在这个金黄的季节里,我们迎来了秋风的旋律,带来了丰收的喜悦和宁静的夜晚。秋天,是大自然赋予大地最深沉的色彩,是万物生长的季节,更是情感交融、心灵洗礼的时刻。\n\n天空中的云彩被染成了淡淡的橙色,阳光透过稀疏的树叶,洒在大地上,像是一幅精致的画卷。田野里的稻谷金黄一片,像是海洋中金色的波浪,随风摇曳,散发着诱人的香气。果园里的苹果熟透了,红彤彤的果实挂满了枝头,仿佛是一个个羞涩的小姑娘,在向人们展示着它们的甜蜜和美丽。"
}
}接口调用成功的结果展示:

key值错误时的提示:

验证Value错误时的提示:

# 后端扩展示例:多模型加载
class LocalLLM:
def __init__(self):
self.models = {
"qwen": AutoModelForCausalLM.from_pretrained("path/to/qwen"),
"baichuan": AutoModelForCausalLM.from_pretrained("path/to/baichuan")
}
self.tokenizers = {
"qwen": AutoTokenizer.from_pretrained("path/to/qwen"),
"baichuan": AutoTokenizer.from_pretrained("path/to/baichuan")
}# 后端扩展示例:IP白名单
WHITE_LIST_IPS = ["192.168.1.100", "127.0.0.1"]
def check_rate_limit(client_ip: str):
if client_ip in WHITE_LIST_IPS:
return # 白名单IP不限流
# 原有限流逻辑...# 前端扩展示例:导出历史记录
import pandas as pd
if st.button("导出历史记录为Excel"):
df = pd.DataFrame(st.session_state.history)
df.to_excel("generate_history.xlsx", index=False)
st.success("导出成功!")今天我们以“本地大模型 API 封装 + 可视化交互” 为核心,从基础的鉴权接口实现,到业务扩展,完整覆盖了大模型本地化应用的核心环节。我们可通过基础模块理解核心逻辑,实际应用可基于扩展模块实现业务定制;先跑通再折腾!第一步先按说明装依赖、启动前后端,用默认配置生成一段文本,先感受全流程。
初次运行一般会遇到一些问题,比如模型加载失败就检查路径,端口被占就改 8080 为 8090等,按照提示来修正即可。初期不用纠结复杂功能,先把基础的鉴权和生成用熟,比如试试两种鉴权方式的区别。想扩展就从简单的来,比如加个导出历史记录的按钮,或者调调 temperature 参数看效果,最后记住,这个示例是脚手架,不用死磕原有代码,按自己的需求小步改,跑通一个小功能就很收获!
# 260104-Streamlit_jwt_前端2
import streamlit as st
import requests
import json
from datetime import datetime
# ====================== 基础配置(无核心变化) ======================
st.set_page_config(
page_title="本地大模型可视化平台",
page_icon="🤖",
layout="wide",
initial_sidebar_state="expanded"
)
BASE_URL = "http://127.0.0.1:8080"
# 扩展:初始化更多会话状态
if "api_key" not in st.session_state:
st.session_state.api_key = "default_key_123"
if "jwt_token" not in st.session_state:
st.session_state.jwt_token = ""
if "refresh_token" not in st.session_state: # 新增:刷新令牌
st.session_state.refresh_token = ""
if "history" not in st.session_state:
st.session_state.history = []
if "return_format" not in st.session_state: # 新增:返回格式
st.session_state.return_format = "text"
# ====================== 侧边栏:扩展鉴权配置 ======================
st.sidebar.title("🔐 鉴权配置")
auth_type = st.sidebar.radio(
"选择鉴权方式",
("API Key鉴权", "JWT令牌鉴权"),
index=0
)
# API Key鉴权配置(无变化)
if auth_type == "API Key鉴权":
api_key = st.sidebar.text_input(
"输入API Key",
value=st.session_state.api_key,
type="password"
)
st.session_state.api_key = api_key
# JWT令牌鉴权配置(扩展:新增刷新令牌)
else:
# 扩展:获取令牌(返回刷新令牌)
def get_jwt_token_full():
try:
response = requests.post(f"{BASE_URL}/get-token")
if response.status_code == 200:
data = response.json()
st.session_state.jwt_token = data["access_token"]
st.session_state.refresh_token = data["refresh_token"] # 保存刷新令牌
st.sidebar.success(f"✅ 令牌获取成功\n访问令牌30分钟\n刷新令牌7天")
else:
st.sidebar.error(f"❌ 获取失败:{response.json()['detail']}")
except requests.exceptions.ConnectionError:
st.sidebar.error("❌ 连接失败:请先启动FastAPI后端服务")
except Exception as e:
st.sidebar.error(f"❌ 未知错误:{str(e)}")
# 扩展:刷新令牌函数
def refresh_jwt_token():
if not st.session_state.refresh_token:
st.sidebar.error("❌ 无刷新令牌,请先获取访问令牌")
return
try:
headers = {"Refresh-Token": st.session_state.refresh_token}
response = requests.post(f"{BASE_URL}/refresh-token", headers=headers)
if response.status_code == 200:
data = response.json()
st.session_state.jwt_token = data["access_token"]
st.sidebar.success(f"✅ 令牌刷新成功\n新令牌有效期30分钟")
else:
st.sidebar.error(f"❌ 刷新失败:{response.json()['detail']}")
except requests.exceptions.ConnectionError:
st.sidebar.error("❌ 连接失败:请先启动FastAPI后端服务")
except Exception as e:
st.sidebar.error(f"❌ 刷新错误:{str(e)}")
# 扩展:新增按钮布局
col_btn1, col_btn2 = st.sidebar.columns(2)
with col_btn1:
st.button(
"获取令牌",
on_click=get_jwt_token_full,
type="primary",
use_container_width=True
)
with col_btn2:
st.button(
"刷新令牌", # 新增:刷新令牌按钮
on_click=refresh_jwt_token,
use_container_width=True
)
# 显示令牌
jwt_token = st.sidebar.text_input(
"当前访问令牌",
value=st.session_state.jwt_token,
type="password"
)
st.session_state.jwt_token = jwt_token
# 清空历史记录(无变化)
if st.sidebar.button("🗑️ 清空本地历史", use_container_width=True):
st.session_state.history = []
st.sidebar.success("本地历史记录已清空!")
# ====================== 核心函数(扩展) ======================
# 扩展:同步后端历史记录
def sync_history(limit: int = 10):
try:
response = requests.get(f"{BASE_URL}/get-history", params={"limit": limit})
if response.status_code == 200:
data = response.json()["data"]
st.session_state.history = data
st.sidebar.success(f"✅ 同步成功,获取{len(data)}条记录")
else:
st.sidebar.error(f"❌ 同步失败:{response.json()['detail']}")
except requests.exceptions.ConnectionError:
st.sidebar.error("❌ 连接失败:请先启动FastAPI后端服务")
except Exception as e:
st.sidebar.error(f"❌ 同步错误:{str(e)}")
# 扩展:历史记录配置(移到函数定义之后)
st.sidebar.divider()
st.sidebar.subheader("📜 历史记录配置")
history_limit = st.sidebar.number_input(
"查询历史记录条数",
min_value=1,
max_value=100,
value=10
)
if st.sidebar.button("🔄 同步后端历史", use_container_width=True):
sync_history(history_limit)
# 扩展:生成文本(适配新增参数)
def generate_text(prompt, max_length, temperature, top_p, return_format):
headers = {"Content-Type": "application/json"}
# 选择接口和鉴权信息(无核心变化)
if auth_type == "API Key鉴权":
url = f"{BASE_URL}/generate-text-apikey"
headers["X-API-Key"] = st.session_state.api_key
else:
url = f"{BASE_URL}/generate-text-jwt"
headers["Authorization"] = f"Bearer {st.session_state.jwt_token}"
# 扩展:请求体新增参数
data = {
"prompt": prompt,
"max_length": max_length,
"temperature": temperature,
"top_p": top_p,
"return_format": return_format
}
try:
with st.spinner("🤖 模型正在生成内容..."):
response = requests.post(url, headers=headers, json=data)
# 扩展:适配限流错误(429)
if response.status_code == 200:
result = response.json()["data"]["result"]
# 保存到本地历史(兼容后端格式)
st.session_state.history.append({
"id": len(st.session_state.history) + 1,
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"prompt": prompt,
"result": result,
"auth_type": auth_type,
"client_ip": "127.0.0.1"
})
return True, result
elif response.status_code == 429:
return False, f"❌ {response.json()['detail']}"
elif response.status_code == 401 and "过期" in response.json()["detail"] and auth_type == "JWT令牌鉴权":
# 自动提示刷新令牌
return False, f"❌ {response.json()['detail']}\n👉 可点击侧边栏「刷新令牌」按钮重试"
else:
return False, f"❌ 生成失败:{response.json()['detail']}"
except requests.exceptions.ConnectionError:
return False, "❌ 连接失败:请先启动FastAPI后端服务"
except Exception as e:
return False, f"❌ 未知错误:{str(e)}"
# ====================== 主页面:扩展交互区域 ======================
st.title("🤖 本地大模型文本生成平台")
st.divider()
# 扩展:生成参数配置(新增temperature、top_p、return_format)
col1, col2 = st.columns([6, 4])
with col1:
prompt = st.text_area(
"输入提示词",
placeholder="例如:写一段秋天的温馨文案、解释JWT鉴权的核心原理...",
height=100,
max_chars=500
)
with col2:
st.subheader("⚙️ 生成参数")
max_length = st.slider("生成长度", 10, 1000, 150, 10)
temperature = st.slider("随机性(0.1-1.0)", 0.1, 1.0, 0.7, 0.1)
top_p = st.slider("采样策略(0.1-1.0)", 0.1, 1.0, 0.9, 0.1)
# 扩展:返回格式选择
return_format = st.radio(
"返回格式",
("text", "json"),
index=0,
horizontal=True
)
st.session_state.return_format = return_format
# 生成按钮(无变化)
if st.button("🚀 开始生成", type="primary", use_container_width=True):
if not prompt.strip():
st.error("提示词不能为空!")
else:
success, result = generate_text(
prompt,
max_length,
temperature,
top_p,
return_format
)
if success:
st.subheader("✅ 生成结果")
# 扩展:按格式展示结果
if return_format == "json":
st.json(json.loads(result))
else:
st.markdown(f"> {result}")
else:
st.error(result)
# ====================== 历史记录区域(适配后端格式) ======================
st.divider()
st.subheader("📜 生成历史")
if st.session_state.history:
for idx, item in enumerate(reversed(st.session_state.history)):
# 适配后端返回的字段(id/timestamp/auth_type等)
time_str = item.get("timestamp", "未知时间")
auth_str = item.get("auth_type", "未知")
with st.expander(f"📝 {time_str} | 鉴权:{auth_str} | ID:{item.get('id', idx+1)}"):
st.markdown(f"**提示词**:{item['prompt']}")
st.markdown(f"**生成结果**:{item['result']}")
else:
st.info("暂无生成记录,开始你的第一次创作吧!\n👉 可点击侧边栏「同步后端历史」获取服务器记录")# model_path = "D:\\modelscope\\hub\\qwen\\Qwen1___5-1___8B-Chat"
# 260103-调用JWT鉴权的接口
# 扩展版:新增生成参数、历史记录、令牌刷新、限流、格式控制
# 导入需要的库
from fastapi import FastAPI, Header, HTTPException, Depends, Query
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from pydantic import BaseModel, Field
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from dotenv import load_dotenv
from jose import jwt, JWTError
from datetime import datetime, timedelta
import os
import uvicorn
import json
import time
from typing import Optional, Literal
# ====================== 第一步:加载配置(新增限流配置) ======================
try:
load_dotenv()
# API Key配置
VALID_API_KEY = os.getenv("VALID_API_KEY", "default_key_123")
# JWT配置
JWT_SECRET_KEY = os.getenv("JWT_SECRET_KEY", "my_jwt_secret_123")
JWT_ALGORITHM = "HS256"
JWT_EXPIRE_MINUTES = 30
JWT_REFRESH_EXPIRE_DAYS = 7 # 刷新令牌有效期7天
# 新增:限流配置
RATE_LIMIT = 10 # 单IP每分钟最大调用次数
RATE_LIMIT_WINDOW = 60 # 限流窗口(秒)
# 新增:历史记录存储路径
HISTORY_FILE = "generate_history.json"
print("✅ 配置加载成功")
except Exception as e:
print(f"❌ 配置加载失败:{str(e)}")
exit(1)
# ====================== 工具函数(新增:限流+历史记录) ======================
# 限流存储(内存级,重启后清空)
rate_limit_store = {}
def check_rate_limit(client_ip: str):
"""检查IP限流"""
now = time.time()
# 初始化IP记录
if client_ip not in rate_limit_store:
rate_limit_store[client_ip] = {"count": 0, "start_time": now}
ip_record = rate_limit_store[client_ip]
# 重置超时窗口
if now - ip_record["start_time"] > RATE_LIMIT_WINDOW:
ip_record["count"] = 0
ip_record["start_time"] = now
# 检查限流
if ip_record["count"] >= RATE_LIMIT:
raise HTTPException(
status_code=429,
detail=f"❌ 调用频率超限(单IP每分钟最多{RATE_LIMIT}次),请稍后重试"
)
# 计数+1
ip_record["count"] += 1
def save_generate_history(prompt: str, result: str, auth_type: str, client_ip: str):
"""保存生成记录到本地JSON文件"""
history = []
# 读取现有记录
if os.path.exists(HISTORY_FILE):
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f:
history = json.load(f)
except:
history = []
# 新增记录
history.append({
"id": len(history) + 1,
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"prompt": prompt,
"result": result,
"auth_type": auth_type,
"client_ip": client_ip
})
# 写入文件
with open(HISTORY_FILE, "w", encoding="utf-8") as f:
json.dump(history, f, ensure_ascii=False, indent=2)
def get_generate_history(limit: int = 10):
"""获取生成历史记录"""
if not os.path.exists(HISTORY_FILE):
return []
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f:
history = json.load(f)
# 按ID倒序,取最新的limit条
return sorted(history, key=lambda x: x["id"], reverse=True)[:limit]
except:
return []
# ====================== 第二步:单例加载模型(无核心变化) ======================
class LocalLLM:
_instance = None
_initialized = False
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
def __init__(self):
if not LocalLLM._initialized:
model_path = "D:\\modelscope\\hub\\qwen\\Qwen1___5-1___8B-Chat"
try:
print("正在加载本地大模型...")
if not os.path.exists(model_path):
raise FileNotFoundError(
f"模型文件夹不存在:{model_path}\n解决:1. 运行download_small_model.py下载小模型;2. 确认路径正确")
self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
self.model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float32,
device_map="cpu",
trust_remote_code=True,
low_cpu_mem_usage=True
)
self.model.eval()
LocalLLM._initialized = True
print("✅ 模型加载完成!")
except FileNotFoundError as e:
print(f"❌ 模型加载失败:{str(e)}")
exit(1)
except RuntimeError as e:
if "out of memory" in str(e).lower():
print(f"❌ 内存不足,请关闭其他程序(如浏览器/微信),或换更小的模型")
else:
print(f"❌ 模型运行错误:{str(e)}")
exit(1)
except Exception as e:
print(f"❌ 模型加载未知错误:{str(e)}")
exit(1)
# 扩展:新增temperature、top_p参数,支持格式返回
def generate_text(self, prompt, max_length, temperature=0.7, top_p=0.9, return_format="text"):
try:
if not prompt.strip():
raise ValueError("提示词不能为空")
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
if inputs.input_ids.shape[1] > 512:
raise ValueError(f"提示词过长({inputs.input_ids.shape[1]}token),最大支持512token")
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_length=max_length,
temperature=temperature, # 新增参数
top_p=top_p, # 新增参数
pad_token_id=self.tokenizer.eos_token_id
)
result = self.tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "").strip()
if not result:
raise ValueError("模型未生成内容,请换提示词")
# 扩展:支持不同返回格式
if return_format == "json":
return {"prompt": prompt, "result": result}
return result
except ValueError as e:
raise HTTPException(status_code=400, detail=f"参数错误:{str(e)}")
except Exception as e:
raise HTTPException(status_code=500, detail=f"推理错误:{str(e)}")
# 创建模型实例
try:
llm = LocalLLM()
except Exception as e:
print(f"❌ 模型初始化失败:{str(e)}")
exit(1)
# ====================== 第三步:JWT鉴权扩展(新增刷新令牌) ======================
bearer_scheme = HTTPBearer()
# 扩展:生成刷新令牌
def create_refresh_token():
"""生成刷新令牌(有效期7天)"""
try:
token_data = {
"sub": "local_llm_refresh",
"exp": datetime.utcnow() + timedelta(days=JWT_REFRESH_EXPIRE_DAYS)
}
refresh_token = jwt.encode(token_data, JWT_SECRET_KEY, algorithm=JWT_ALGORITHM)
return refresh_token
except Exception as e:
raise HTTPException(status_code=500, detail=f"生成刷新令牌失败:{str(e)}")
# 原有:生成访问令牌
def create_jwt_token():
token_data = {
"sub": "local_llm_user",
"exp": datetime.utcnow() + timedelta(minutes=JWT_EXPIRE_MINUTES)
}
token = jwt.encode(token_data, JWT_SECRET_KEY, algorithm=JWT_ALGORITHM)
return token
# 原有:验证访问令牌
def verify_jwt_token(credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme)):
try:
token = credentials.credentials
payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[JWT_ALGORITHM])
expire = payload.get("exp")
if expire and datetime.utcfromtimestamp(expire) < datetime.utcnow():
raise HTTPException(status_code=401, detail="❌ JWT令牌已过期\n解决:调用/refresh-token刷新令牌")
return payload.get("sub")
except JWTError as e:
raise HTTPException(status_code=401, detail=f"❌ JWT令牌无效/签名错误\n解决:检查令牌是否正确,或重新获取")
except Exception as e:
raise HTTPException(status_code=401, detail=f"❌ 令牌验证失败:{str(e)}")
# 扩展:验证刷新令牌并生成新的访问令牌
def verify_refresh_token(refresh_token: str):
try:
payload = jwt.decode(refresh_token, JWT_SECRET_KEY, algorithms=[JWT_ALGORITHM])
expire = payload.get("exp")
if expire and datetime.utcfromtimestamp(expire) < datetime.utcnow():
raise HTTPException(status_code=401, detail="❌ 刷新令牌已过期,请重新获取访问令牌")
# 生成新的访问令牌
new_access_token = create_jwt_token()
return new_access_token
except JWTError as e:
raise HTTPException(status_code=401, detail=f"❌ 刷新令牌无效/签名错误")
except Exception as e:
raise HTTPException(status_code=401, detail=f"❌ 刷新令牌验证失败:{str(e)}")
# ====================== 第四步:FastAPI接口扩展 ======================
app = FastAPI(title="本地大模型API(扩展版)", description="新增参数、历史、限流、刷新令牌")
# 扩展:请求体模型(新增temperature、top_p、return_format)
class LLMRequest(BaseModel):
prompt: str = Field(..., description="提示词", max_length=500)
max_length: int = Field(100, description="生成长度", ge=10, le=1000)
temperature: float = Field(0.7, description="随机性(0-1,值越高越随机)", ge=0.1, le=1.0)
top_p: float = Field(0.9, description="采样策略(0.1-1.0)", ge=0.1, le=1.0)
return_format: Literal["text", "json"] = Field("text", description="返回格式")
# 1. 扩展:获取JWT令牌(新增刷新令牌返回)
@app.post("/get-token", summary="获取JWT令牌(含刷新令牌)")
def get_jwt_token():
access_token = create_jwt_token()
refresh_token = create_refresh_token() # 新增返回刷新令牌
return {
"code": 200,
"message": "令牌生成成功(访问令牌30分钟,刷新令牌7天)",
"access_token": access_token,
"refresh_token": refresh_token, # 新增字段
"token_type": "bearer"
}
# 2. 新增:刷新JWT令牌接口
@app.post("/refresh-token", summary="刷新JWT访问令牌")
def refresh_jwt_token(refresh_token: str = Header(None)):
if not refresh_token:
raise HTTPException(status_code=400, detail="❌ 请传入Refresh-Token请求头")
new_access_token = verify_refresh_token(refresh_token)
return {
"code": 200,
"message": "令牌刷新成功(新令牌有效期30分钟)",
"access_token": new_access_token,
"token_type": "bearer"
}
# 3. 扩展:API Key鉴权生成接口(新增参数、限流、历史记录)
@app.post("/generate-text-apikey", summary="API Key鉴权-文本生成(扩展版)")
def generate_by_apikey(
request: LLMRequest,
x_api_key: str = Header(None),
client_ip: str = Header(None, alias="X-Real-IP") # 客户端IP(反向代理场景)
):
# 1. 鉴权校验
if not x_api_key:
raise HTTPException(status_code=401, detail="❌ 请传入X-API-Key")
if x_api_key.strip() != VALID_API_KEY:
raise HTTPException(status_code=401, detail="❌ API Key错误")
# 2. 限流校验(扩展)
real_ip = client_ip or "127.0.0.1"
check_rate_limit(real_ip)
# 3. 生成文本(扩展参数)
result = llm.generate_text(
request.prompt,
request.max_length,
request.temperature,
request.top_p,
request.return_format
)
# 4. 保存历史记录(扩展)
save_generate_history(request.prompt, result, "api_key", real_ip)
return {"code": 200, "message": "生成成功", "data": {"prompt": request.prompt, "result": result}}
# 4. 扩展:JWT鉴权生成接口(新增参数、限流、历史记录)
@app.post("/generate-text-jwt", summary="JWT鉴权-文本生成(扩展版)")
def generate_by_jwt(
request: LLMRequest,
username: str = Depends(verify_jwt_token),
client_ip: str = Header(None, alias="X-Real-IP")
):
# 1. 限流校验(扩展)
real_ip = client_ip or "127.0.0.1"
check_rate_limit(real_ip)
# 2. 生成文本(扩展参数)
result = llm.generate_text(
request.prompt,
request.max_length,
request.temperature,
request.top_p,
request.return_format
)
# 3. 保存历史记录(扩展)
save_generate_history(request.prompt, result, "jwt", real_ip)
return {
"code": 200,
"message": f"用户{username}生成成功",
"data": {"prompt": request.prompt, "result": result}
}
# 5. 新增:查询生成历史接口
@app.get("/get-history", summary="查询生成历史记录")
def get_history(limit: int = Query(10, ge=1, le=100)):
history = get_generate_history(limit)
return {
"code": 200,
"message": f"获取最近{limit}条记录成功",
"data": history
}
# ====================== 第五步:启动服务(无核心变化) ======================
if __name__ == "__main__":
try:
uvicorn.run(
"main:app",
host="0.0.0.0",
port=8080,
reload=False,
log_level="debug",
access_log=True
)
except OSError as e:
if "address already in use" in str(e).lower():
print(f"❌ 服务启动失败:端口8080被占用\n解决:1. 改端口为8090;2. 结束占用8080的程序")
else:
print(f"❌ 服务启动失败:{str(e)}")
exit(1)
except Exception as e:
print(f"❌ 服务启动失败:{str(e)}\n解决:检查Python依赖是否安装完整")
exit(1)原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。