发布时间:2025-10-16
# 安装Python和虚拟环境工具sudo apt update && sudo apt install -y python3 python3-venv python3-pip git gcc# 创建虚拟环境并激活python3 -m venv phi3-envsource phi3-env/bin/activate # Ubuntu/Debian# 若为CentOS:source phi3-env/bin/activate
# 升级pippip install --upgrade pip# 安装PyTorch(CPU版,适配低配置)pip3 install torch==2.1.0+cpu torchvision==0.16.0+cpu torchaudio==2.1.0+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html# 安装模型运行依赖pip install transformers==4.38.2 accelerate==0.30.1 sentencepiece==0.1.99 flask==2.3.3 # flask用于搭建API服务
# 安装模型下载工具(可选,加速下载)pip install huggingface-hub[cli]# 登录Hugging Face(需注册账号,获取访问令牌:https://huggingface.co/settings/tokens)huggingface-cli login# 下载INT4量化版模型(约4GB,8G内存适配最佳)huggingface-cli download microsoft/Phi-3-mini-4K-Instruct --local-dir phi3-model --local-dir-use-symlinks False --revision main
from flask import Flask, request, jsonifyfrom transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfigapp = Flask(__name__)# 配置量化参数(关键:降低内存占用)bnb_config = BitsAndBytesConfig( load_in_4bit=True, # 启用4位量化 bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float32)# 加载模型和tokenizertokenizer = AutoTokenizer.from_pretrained("./phi3-model")model = AutoModelForCausalLM.from_pretrained( "./phi3-model", quantization_config=bnb_config, device_map="auto", # 自动分配设备(CPU优先) trust_remote_code=True)# 定义生成函数(私人AI助手核心逻辑)def generate_response(prompt, max_new_tokens=512, temperature=0.7): inputs = tokenizer( f"<|user|>\n{prompt}\n<|assistant|>", return_tensors="pt", truncation=True, max_length=4096 ).to(model.device) outputs = model.generate( **inputs, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=True, eos_token_id=tokenizer.eos_token_id ) return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()# 搭建API接口(支持HTTP调用)@app.route("/api/chat", methods=["POST"])def chat_api(): data = request.json prompt = data.get("prompt", "请介绍一下自己") response = generate_response(prompt) return jsonify({"response": response})if __name__ == "__main__": # 启动服务(默认端口5000,允许外网访问) app.run(host="0.0.0.0", port=5000, debug=False) # 生产环境关闭debug
# 后台启动服务(避免终端关闭后停止)nohup python phi3_server.py > phi3.log 2>&1 &# 查看启动日志(确认是否成功)tail -f phi3.log# 成功标识:"Running on http://0.0.0.0:5000"
# 使用curl测试APIcurl -X POST http://你的服务器IP:5000/api/chat \-H "Content-Type: application/json" \-d '{"prompt": "请帮我写一个Python爬虫脚本,爬取网页标题"}'
sudo fallocate -l 4G /swapfilesudo chmod 600 /swapfilesudo mkswap /swapfile && sudo swapon /swapfile
pip install gunicorngunicorn -w 2 -b 0.0.0.0:5000 phi3_server:app # 2个工作进程,匹配2核CPU