{"id":80257,"date":"2026-03-04T19:41:57","date_gmt":"2026-03-04T11:41:57","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/80257.html"},"modified":"2026-03-04T19:41:57","modified_gmt":"2026-03-04T11:41:57","slug":"%e6%9c%8d%e5%8a%a1%e5%99%a8%e4%b8%8a%e9%83%a8%e7%bd%b2%e5%a4%a7%e6%a8%a1%e5%9e%8b%ef%bc%88ubuntu24-04-3%ef%bc%89","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/80257.html","title":{"rendered":"\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5927\u6a21\u578b\uff08ubuntu24.04.3\uff09"},"content":{"rendered":"<h3>\u4e00\u3001\u521b\u5efa\u865a\u62df\u73af\u5883<\/h3>\n<p>\u9996\u5148\u521b\u5efa\u865a\u62df\u73af\u5883<\/p>\n<p>conda create -n ai_dev<\/p>\n<p>\u6fc0\u6d3b\u865a\u62df\u73af\u5883<\/p>\n<p>conda activate ai_dev<\/p>\n<p>\u4f7f\u7528conda\u5b89\u88c5pytorch<\/p>\n<p>conda \u4f1a\u81ea\u52a8\u5904\u7406 CUDA \u7248\u672c\u517c\u5bb9\u548c\u4f9d\u8d56&#xff0c;\u907f\u514d pip \u5b89\u88c5\u65f6\u7684\u7248\u672c\u4e0d\u5339\u914d\u95ee\u9898<\/p>\n<p>conda install pytorch&#061;&#061;2.2.0 torchvision&#061;&#061;0.17.0 torchaudio&#061;&#061;2.2.0 pytorch-cuda&#061;12.1 -c pytorch -c nvidia<\/p>\n<p>pytorch\u5b89\u88c5\u6210\u529f<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"1262\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2026\/03\/20260304114151-69a81a7f34f41.png\" width=\"2165\" \/><\/p>\n<p>\u5728 ai_dev \u73af\u5883\u4e2d\u6267\u884c\u4ee5\u4e0b\u547d\u4ee4&#xff0c;\u68c0\u67e5 PyTorch \u548c CUDA \u662f\u5426\u53ef\u7528&#xff1a;<\/p>\n<p>python -c &#034;import torch; print(&#039;PyTorch\u7248\u672c:&#039;, torch.__version__); print(&#039;CUDA\u53ef\u7528:&#039;, torch.cuda.is_available())&#034;<\/p>\n<p>\u6b64\u5904\u6211\u62a5\u4e86\u9519\u8bef<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"1114\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2026\/03\/20260304114152-69a81a808ebd8.png\" width=\"2180\" \/><\/p>\n<p>\u964d\u7ea7numpy<\/p>\n<p>conda \u4f1a\u81ea\u52a8\u5904\u7406\u4f9d\u8d56\u51b2\u7a81&#xff0c;\u5982\u679c opencv-python \u9700\u8981 numpy&gt;&#061;2&#xff0c;\u5b83\u4f1a\u63d0\u793a\u4f60\u662f\u5426\u964d\u7ea7 opencv-python&#xff0c;\u9009\u62e9 y \u5373\u53ef\u3002<\/p>\n<p>conda install &#034;numpy&lt;2&#034;<\/p>\n<p>\u6267\u884c\u4ee5\u4e0b\u547d\u4ee4&#xff0c;\u68c0\u67e5 NumPy \u7248\u672c\u548c PyTorch \u521d\u59cb\u5316\u662f\u5426\u6b63\u5e38&#xff1a;<\/p>\n<p>python -c &#034;<br \/>\nimport numpy<br \/>\nimport torch<br \/>\nprint(&#039;NumPy\u7248\u672c:&#039;, numpy.__version__)<br \/>\nprint(&#039;PyTorch\u7248\u672c:&#039;, torch.__version__)<br \/>\nprint(&#039;CUDA\u53ef\u7528:&#039;, torch.cuda.is_available())<br \/>\n&#034;<\/p>\n<p>\u73af\u5883\u5df2\u7ecf\u5c31\u7eea&#xff1a;<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"747\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2026\/03\/20260304114153-69a81a81ca2fd.png\" width=\"1452\" \/><\/p>\n<h3>\u4e8c\u3001\u5927\u6a21\u578b\u90e8\u7f72<\/h3>\n<p>\u5b89\u88c5git-lfs<\/p>\n<p>sudo apt update &amp;&amp; sudo apt install -y git-lfs<\/p>\n<p>\u521b\u5efa\u76ee\u5f55\u5e76\u4e0b\u8f7d\u5f00\u6e90\u6a21\u578b<\/p>\n<p># 1. \u521b\u5efa\u76ee\u5f55\u7ed3\u6784&#xff08;\u5728\u4f60\u7684\u7528\u6237\u76ee\u5f55\u4e0b&#xff09;<br \/>\nmkdir -p ~\/ai_project ~\/models\/llm ~\/models\/sdxl<br \/>\ncd ~\/ai_project<\/p>\n<p># 2. \u4e0b\u8f7d Qwen-7B-Chat&#xff08;\u5f00\u6e90LLM&#xff0c;\u6587\u672c\u6838\u5fc3&#xff09;<br \/>\ngit lfs install<br \/>\ngit clone https:\/\/www.modelscope.cn\/qwen\/Qwen-7B-Chat.git \/home\/ubuntu\/models\/llm\/Qwen-7B-Chat<\/p>\n<p>\u67e5\u770b\u6a21\u578b\u90e8\u7f72\u4f4d\u7f6e<\/p>\n<p>ls ~\/models\/llm\/Qwen-7B-Chat-Int4\/<\/p>\n<p>\u6a21\u578b\u6743\u91cd\u6587\u4ef6&#xff1a;<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"255\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2026\/03\/20260304114154-69a81a822df23.png\" width=\"1996\" \/><\/p>\n<p>\u786e\u4fdd\u5728 ai_project \u76ee\u5f55\u4e0b&#xff1a;<\/p>\n<p>cd ~\/ai_project<\/p>\n<p>\u521b\u5efa llm_deploy.py \u6587\u4ef6<\/p>\n<p>vim llm_deploy.py<\/p>\n<ul>\n<li>\u6309 i \u952e&#xff08;\u5e95\u90e8\u51fa\u73b0 &#8212; INSERT &#8211;&#xff09;&#xff0c;\u8fdb\u5165\u7f16\u8f91\u6a21\u5f0f&#xff1b;<\/li>\n<li>\u7c98\u8d34\u4ee3\u7801<\/li>\n<\/ul>\n<p># \u96a7\u9053\u901a\u7528\u95ee\u9898\u95ee\u7b54&#xff08;\u5f7b\u5e95\u622a\u65ad\u65e0\u5173\u5185\u5bb9&#xff09;<br \/>\ndef general_qa(self, query):<br \/>\n    prompt &#061; f&#034;&#034;&#034;<br \/>\n    \u4f60\u662f\u4e13\u4e1a\u7684\u96a7\u9053\u5de5\u7a0b\u54a8\u8be2\u52a9\u624b&#xff0c;\u4ec5\u56de\u7b54\u96a7\u9053\u76f8\u5173\u4e13\u4e1a\u95ee\u9898&#xff0c;\u56de\u7b54\u8981\u7b80\u6d01\u3001\u4e13\u4e1a\u3001\u6613\u61c2\u3002<br \/>\n    \u7528\u6237\u95ee\u9898&#xff1a;{query}<br \/>\n    \u5f3a\u5236\u8981\u6c42&#xff1a;\u53ea\u8f93\u51fa\u95ee\u9898\u7684\u6838\u5fc3\u56de\u7b54&#xff0c;\u7981\u6b62\u7eed\u5199\u4efb\u4f55\u65e0\u5173\u5185\u5bb9\u3001\u6d4b\u8bd5\u9898\u3001\u5176\u4ed6\u9886\u57df\u77e5\u8bc6&#xff01;\u56de\u7b54\u5b8c\u7acb\u5373\u505c\u6b62\u3002<br \/>\n    &#034;&#034;&#034;<br \/>\n    start_time &#061; time.time()<br \/>\n    messages &#061; [{&#034;role&#034;: &#034;user&#034;, &#034;content&#034;: prompt}]<br \/>\n    text &#061; self.tokenizer.apply_chat_template(<br \/>\n        messages,<br \/>\n        tokenize&#061;False,<br \/>\n        add_generation_prompt&#061;True<br \/>\n    )<br \/>\n    model_inputs &#061; self.tokenizer([text], return_tensors&#061;&#034;pt&#034;).to(DEVICE)<\/p>\n<p>    # \u79fb\u9664\u4e86\u4e0d\u652f\u6301\u7684 stop_sequence \u53c2\u6570<br \/>\n    generated_ids &#061; self.model.generate(<br \/>\n        **model_inputs,<br \/>\n        generation_config&#061;self.model.generation_config<br \/>\n    )<br \/>\n    generated_ids &#061; [<br \/>\n        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)<br \/>\n    ]<br \/>\n    response &#061; self.tokenizer.batch_decode(generated_ids, skip_special_tokens&#061;True)[0]<\/p>\n<p>    # \u540e\u5904\u7406&#xff1a;\u622a\u65ad\u6240\u6709\u65e0\u5173\u5185\u5bb9&#xff0c;\u53ea\u4fdd\u7559\u6838\u5fc3\u56de\u7b54<br \/>\n    stop_keywords &#061; [<br \/>\n        &#034;\u51c6\u5676\u5c14\u76c6\u5730&#034;, &#034;\u4e0b\u5217\u54ea\u4e2a\u9009\u9879&#034;, &#034;\u4ee5\u4e0b\u54ea\u4e2a\u9009\u9879&#034;, &#034;\u8bba\u6587\u7684\u5173\u952e\u8bcd&#034;,<br \/>\n        &#034;Photoshop&#034;, &#034;\u7f16\u7a0b\u8bed\u8a00&#034;, &#034;\u9009\u62e9\u9898&#034;, &#034;1. &#034;, &#034;2. &#034;, &#034;3. &#034;, &#034;4. &#034;,<br \/>\n        &#034;\u5473\u7cbe\u7684\u4e3b\u8981\u6210\u5206\u662f\u4ec0\u4e48&#xff1f;&#034;, &#034;iMore\u8be6\u7ec6\u89e3\u91ca\u4e00\u4e0b&#034;, &#034;\u5b9a\u4e49\u201c\u4eba\u5de5\u667a\u80fd\u201d&#034;,<br \/>\n        &#034;\u7701\u7565\u53f7\u7684\u4f5c\u7528\u662f\u4ec0\u4e48&#034;, &#034;\u6162\u6027\u963b\u585e\u6027\u80ba\u75be\u75c5\u7684\u8bca\u65ad\u4f9d\u636e&#034;, &#034;\u6d77\u5916\u5e02\u573a&#034;,<br \/>\n        &#034;\u667a\u80fd\u5065\u5eb7\u624b\u73af&#034;, &#034;\u4ea7\u54c1\u540d\u79f0&#034;, &#034;\u76ee\u6807\u53d7\u4f17&#034;, &#034;\u9884\u7b97&#034;, &#034;\u65b9\u6cd5&#034;, &#034;\u89e3\u91ca&#034;,<br \/>\n        &#034;\u9884\u8ba1\u6548\u679c&#034;, &#034;\u60f3\u51fa\u4e00\u79cd\u65b0\u7684\u65b9\u5f0f&#034;, &#034;\u63a8\u5e7f\u4e00\u6b3e\u65b0\u4ea7\u54c1&#034;, &#034;\u4e3a\u4ec0\u4e48\u8fd9\u79cd\u65b9\u6cd5\u6709\u6548&#034;<br \/>\n    ]<br \/>\n    for kw in stop_keywords:<br \/>\n        if kw in response:<br \/>\n            response &#061; response.split(kw)[0].strip()<br \/>\n            break<\/p>\n<p>    # \u6e05\u7406\u9996\u5c3e\u591a\u4f59\u7a7a\u683c\/\u6362\u884c<br \/>\n    response &#061; response.strip()<\/p>\n<p>    mem_used &#061; torch.cuda.max_memory_allocated(DEVICE) \/ 1024 \/ 1024 \/ 1024<br \/>\n    print(f&#034;\u901a\u7528\u95ee\u7b54\u63a8\u7406\u8017\u65f6&#xff1a;{time.time()-start_time:.2f}s&#xff0c;\u663e\u5b58\u5360\u7528&#xff1a;{mem_used:.2f}G&#034;)<br \/>\n    return response<\/p>\n<ul>\n<li>\u6309 Esc \u952e\u9000\u51fa\u7f16\u8f91\u6a21\u5f0f&#xff0c;\u8f93\u5165 :wq \u6309\u56de\u8f66&#xff0c;\u4fdd\u5b58\u5e76\u9000\u51fa\u3002<\/li>\n<\/ul>\n<p>\u6267\u884c ls \u547d\u4ee4&#xff0c;\u7ec8\u7aef\u663e\u793a llm_deploy.py \u5c31\u8bf4\u660e\u6587\u4ef6\u521b\u5efa\u6210\u529f<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"139\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2026\/03\/20260304114154-69a81a825e477.png\" width=\"944\" \/><\/p>\n<p>\u8fd0\u884cLLM<\/p>\n<p>\u6267\u884c\u547d\u4ee4&#xff08;\u6307\u5b9a\u7528\u5361 0&#xff0c;\u907f\u514d\u663e\u5b58\u51b2\u7a81&#xff09;&#xff1a;<\/p>\n<p>CUDA_VISIBLE_DEVICES&#061;0 python llm_deploy.py<\/p>\n<p>\u8fd9\u91cc\u62a5\u9519\u6ca1\u6709\u5b89\u88c5transformers\u4f9d\u8d56&#xff1a;<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"212\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2026\/03\/20260304114154-69a81a827317d.png\" width=\"1482\" \/><\/p>\n<p>\u5b89\u88c5transformers&#xff1a;<\/p>\n<p>python -m pip install transformers&#061;&#061;4.37.2 torchvision&#061;&#061;0.17.2 &#8211;break-system-packages<\/p>\n<p>\u518d\u6b21\u8fd0\u884cLLM<\/p>\n<p>\u8fd9\u91cc\u62a5\u9519<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"136\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2026\/03\/20260304114154-69a81a82942a1.png\" width=\"1779\" \/><\/p>\n<p>\u5b89\u88c5\u8fd9\u4e2a\u5305&#xff1a;<\/p>\n<p>python -m pip install tiktoken &#8211;break-system-packages<\/p>\n<p>\u5728\u5b89\u88c5\u53e6\u5916\u4e24\u4e2a\u4f9d\u8d56\u5305<\/p>\n<p>python -m pip install einops transformers_stream_generator &#8211;break-system-packages<\/p>\n<p>\u5b89\u88c5torch<\/p>\n<p>python -m pip install torch&#061;&#061;2.3.0 torchvision&#061;&#061;0.18.0 &#8211;break-system-packages<\/p>\n<p>\u5b89\u88c5transformers_stream_generator<\/p>\n<p>python -m pip install transformers_stream_generator &#8211;break-system-packages<\/p>\n<p>\u56e0\u4e3a\u52a0\u8f7d GPTQ \u91cf\u5316\u6a21\u578b\u9700\u8981 auto-gptq \u5305&#xff0c;\u6211\u4eec\u5148\u628a\u5b83\u88c5\u4e0a&#xff1a;<\/p>\n<p>\u5b89\u88c5gptq<\/p>\n<p>python -m pip install auto-gptq &#8211;break-system-packages<\/p>\n<p>\u5b89\u88c5\u4e00\u4e2agptq\u4f9d\u8d56\u5e93optimum<\/p>\n<p>python -m pip install optimum &#8211;break-system-packages<\/p>\n<h3>\u4e09\u3001\u5c01\u88c5api<\/h3>\n<p>\u73b0\u5728\u5927\u6a21\u578b\u5df2\u7ecf\u53ef\u4ee5\u542f\u52a8\u4e86&#xff0c;\u6211\u4eec\u5c01\u88c5\u4e00\u4e2aapi\u5b9e\u73b0\u524d\u540e\u7aef\u5206\u79bb\u3002<\/p>\n<p>\u5c01\u88c5fastapi<\/p>\n<p># \u786e\u4fdd\u4f60\u5728 ai_dev \u73af\u5883\u4e2d<br \/>\nconda activate ai_dev<\/p>\n<p># \u4f7f\u7528 conda \u5b89\u88c5 fastapi \u548c uvicorn<br \/>\nconda install fastapi uvicorn -c conda-forge<\/p>\n<p>\u5148\u786e\u4fdd\u4f60\u5728 ~\/ai_project \u8fd9\u4e2a\u6587\u4ef6\u5939\u91cc&#xff1a;<\/p>\n<p>cd ~\/ai_project<\/p>\n<p>\u521b\u5efa\u5e76\u6253\u5f00 tunnel_llm_api.py \u6587\u4ef6&#xff1a;<\/p>\n<p>vim tunnel_llm_api.py<\/p>\n<p>\u7c98\u8d34\u4ee3\u7801<\/p>\n<p>import torch<br \/>\nimport json<br \/>\nimport time<br \/>\nfrom fastapi import FastAPI, HTTPException<br \/>\nfrom pydantic import BaseModel<br \/>\nfrom transformers import AutoModelForCausalLM, AutoTokenizer<br \/>\nfrom transformers.generation import GenerationConfig<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u521d\u59cb\u5316FastAPI &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\napp &#061; FastAPI(title&#061;&#034;\u96a7\u9053LLM API\u670d\u52a1&#034;, version&#061;&#034;1.0&#034;)<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u6a21\u578b\u914d\u7f6e &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nDEVICE &#061; &#034;cuda:0&#034;<br \/>\nMODEL_PATH &#061; &#034;\/home\/ubuntu\/models\/llm\/Qwen-7B-Chat&#034;<br \/>\nTORCH_DTYPE &#061; torch.float16<br \/>\nMAX_NEW_TOKENS &#061; 2048<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u5168\u5c40\u52a0\u8f7d\u6a21\u578b&#xff08;\u542f\u52a8\u65f6\u52a0\u8f7d\u4e00\u6b21&#xff09; &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nprint(&#034;\u5f00\u59cb\u52a0\u8f7dLLM\u6a21\u578b&#8230;&#034;)<br \/>\ntokenizer &#061; AutoTokenizer.from_pretrained(<br \/>\n    MODEL_PATH, trust_remote_code&#061;True, resume_download&#061;True<br \/>\n)<br \/>\nmodel &#061; AutoModelForCausalLM.from_pretrained(<br \/>\n    MODEL_PATH,<br \/>\n    device_map&#061;DEVICE,<br \/>\n    torch_dtype&#061;TORCH_DTYPE,<br \/>\n    trust_remote_code&#061;True,<br \/>\n    resume_download&#061;True<br \/>\n)<br \/>\n# \u901a\u7528\u95ee\u7b54\u914d\u7f6e<br \/>\ngeneral_gen_config &#061; GenerationConfig(<br \/>\n    temperature&#061;0.7, top_p&#061;0.9, repetition_penalty&#061;1.1, max_new_tokens&#061;MAX_NEW_TOKENS, do_sample&#061;True<br \/>\n)<br \/>\n# \u7ed3\u6784\u5316\u4efb\u52a1\u914d\u7f6e<br \/>\nstructured_gen_config &#061; GenerationConfig(<br \/>\n    temperature&#061;0.01, top_p&#061;0.9, repetition_penalty&#061;1.2, max_new_tokens&#061;MAX_NEW_TOKENS, do_sample&#061;False<br \/>\n)<br \/>\nprint(&#034;\u6a21\u578b\u52a0\u8f7d\u5b8c\u6210&#xff01;&#034;)<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u6570\u636e\u6a21\u578b&#xff08;API\u5165\u53c2&#xff09; &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nclass QueryRequest(BaseModel):<br \/>\n    query: str<br \/>\n    type: str &#061; &#034;general&#034;  # general:\u901a\u7528\u95ee\u7b54, intent:\u610f\u56fe\u8bc6\u522b, warn:\u9884\u8b66, disease:\u75c5\u5bb3, collision:\u78b0\u649e<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u8f85\u52a9\u51fd\u6570 &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\ndef extract_complete_json(text):<br \/>\n    start_idx &#061; text.find(&#039;{&#039;)<br \/>\n    if start_idx &#061;&#061; -1:<br \/>\n        return None<br \/>\n    brace_count &#061; 0<br \/>\n    end_idx &#061; -1<br \/>\n    for i in range(start_idx, len(text)):<br \/>\n        if text[i] &#061;&#061; &#039;{&#039;:<br \/>\n            brace_count &#043;&#061; 1<br \/>\n        elif text[i] &#061;&#061; &#039;}&#039;:<br \/>\n            brace_count -&#061; 1<br \/>\n            if brace_count &#061;&#061; 0:<br \/>\n                end_idx &#061; i<br \/>\n                break<br \/>\n    if end_idx &#061;&#061; -1:<br \/>\n        return None<br \/>\n    return text[start_idx:end_idx&#043;1]<\/p>\n<p>def general_qa(query):<br \/>\n    prompt &#061; f&#034;&#034;&#034;<br \/>\n    \u4f60\u662f\u4e13\u4e1a\u7684\u96a7\u9053\u5de5\u7a0b\u54a8\u8be2\u52a9\u624b&#xff0c;\u4ec5\u56de\u7b54\u96a7\u9053\u76f8\u5173\u4e13\u4e1a\u95ee\u9898&#xff0c;\u56de\u7b54\u8981\u7b80\u6d01\u3001\u4e13\u4e1a\u3001\u6613\u61c2\u3002<br \/>\n    \u7528\u6237\u95ee\u9898&#xff1a;{query}<br \/>\n    \u5f3a\u5236\u8981\u6c42&#xff1a;\u53ea\u8f93\u51fa\u95ee\u9898\u7684\u6838\u5fc3\u56de\u7b54&#xff0c;\u7981\u6b62\u7eed\u5199\u4efb\u4f55\u65e0\u5173\u5185\u5bb9&#xff01;<br \/>\n    &#034;&#034;&#034;<br \/>\n    messages &#061; [{&#034;role&#034;: &#034;user&#034;, &#034;content&#034;: prompt}]<br \/>\n    text &#061; tokenizer.apply_chat_template(messages, tokenize&#061;False, add_generation_prompt&#061;True)<br \/>\n    model_inputs &#061; tokenizer([text], return_tensors&#061;&#034;pt&#034;).to(DEVICE)<br \/>\n    generated_ids &#061; model.generate(**model_inputs, generation_config&#061;general_gen_config)<br \/>\n    generated_ids &#061; [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]<br \/>\n    response &#061; tokenizer.batch_decode(generated_ids, skip_special_tokens&#061;True)[0]<\/p>\n<p>    # \u622a\u65ad\u65e0\u5173\u5185\u5bb9<br \/>\n    stop_keywords &#061; [&#034;\u51c6\u5676\u5c14\u76c6\u5730&#034;, &#034;\u4e0b\u5217\u54ea\u4e2a\u9009\u9879&#034;, &#034;Photoshop&#034;, &#034;1. &#034;]<br \/>\n    for kw in stop_keywords:<br \/>\n        if kw in response:<br \/>\n            response &#061; response.split(kw)[0].strip()<br \/>\n    return response.strip()<\/p>\n<p>def generate_structured(prompt):<br \/>\n    messages &#061; [{&#034;role&#034;: &#034;user&#034;, &#034;content&#034;: prompt}]<br \/>\n    text &#061; tokenizer.apply_chat_template(messages, tokenize&#061;False, add_generation_prompt&#061;True)<br \/>\n    model_inputs &#061; tokenizer([text], return_tensors&#061;&#034;pt&#034;).to(DEVICE)<br \/>\n    generated_ids &#061; model.generate(**model_inputs, generation_config&#061;structured_gen_config)<br \/>\n    generated_ids &#061; [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]<br \/>\n    response &#061; tokenizer.batch_decode(generated_ids, skip_special_tokens&#061;True)[0]<\/p>\n<p>    try:<br \/>\n        json_str &#061; extract_complete_json(response)<br \/>\n        if json_str is None:<br \/>\n            raise ValueError(&#034;\u672a\u627e\u5230\u5b8c\u6574JSON&#034;)<br \/>\n        return json.loads(json_str)<br \/>\n    except Exception as e:<br \/>\n        return {&#034;error&#034;: &#034;\u683c\u5f0f\u9519\u8bef&#034;, &#034;detail&#034;: str(e)}<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; API\u63a5\u53e3 &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n&#064;app.post(&#034;\/api\/chat&#034;)<br \/>\nasync def chat(request: QueryRequest):<br \/>\n    try:<br \/>\n        start_time &#061; time.time()<br \/>\n        if request.type &#061;&#061; &#034;general&#034;:<br \/>\n            # \u901a\u7528\u95ee\u7b54<br \/>\n            result &#061; {&#034;answer&#034;: general_qa(request.query)}<br \/>\n        elif request.type &#061;&#061; &#034;intent&#034;:<br \/>\n            # \u610f\u56fe\u8bc6\u522b<br \/>\n            prompt &#061; f&#034;&#034;&#034;<br \/>\n            \u89d2\u8272&#xff1a;\u96a7\u9053\u573a\u666f\u610f\u56fe\u8bc6\u522b\u6a21\u578b<br \/>\n            \u8f93\u5165&#xff1a;{request.query}<br \/>\n            \u8f93\u51fa\u683c\u5f0f&#xff1a;{{&#034;intent&#034;:&#034;traffic_query&#034;,&#034;slot&#034;:{{&#034;tunnel_name&#034;:&#034;\u9e4f\u9e44\u96a7\u9053&#034;,&#034;date&#034;:&#034;\u4eca\u5929&#034;}}}}<br \/>\n            &#034;&#034;&#034;<br \/>\n            result &#061; generate_structured(prompt)<br \/>\n        elif request.type &#061;&#061; &#034;warn&#034;:<br \/>\n            # \u9884\u8b66\u751f\u6210&#xff08;\u793a\u4f8b\u53c2\u6570&#xff0c;\u53ef\u4ecequery\u63d0\u53d6&#xff09;<br \/>\n            prompt &#061; f&#034;&#034;&#034;<br \/>\n            \u89d2\u8272&#xff1a;\u96a7\u9053\u9884\u8b66\u751f\u6210\u6a21\u578b<br \/>\n            \u8f93\u5165&#xff1a;\u6307\u6807&#061;\u4e00\u6c27\u5316\u78b3&#xff0c;\u5f53\u524d\u503c&#061;200.22ppm&#xff0c;\u9608\u503c&#061;150ppm<br \/>\n            \u8f93\u51fa\u683c\u5f0f&#xff1a;{{&#034;warn_text&#034;:&#034;&#034;,&#034;suggestion&#034;:&#034;&#034;}}<br \/>\n            &#034;&#034;&#034;<br \/>\n            result &#061; generate_structured(prompt)<br \/>\n        elif request.type &#061;&#061; &#034;disease&#034;:<br \/>\n            # \u75c5\u5bb3\u9884\u6d4b<br \/>\n            prompt &#061; f&#034;&#034;&#034;<br \/>\n            \u89d2\u8272&#xff1a;\u96a7\u9053\u75c5\u5bb3\u9884\u6d4b\u6a21\u578b<br \/>\n            \u8f93\u5165&#xff1a;\u75c5\u5bb3\u7c7b\u578b&#061;\u6a2a\u5411\u88c2\u7f1d&#xff0c;\u5f53\u524d\u9762\u79ef&#061;0.4540\u33a1&#xff0c;\u9884\u6d4b\u65f6\u95f4&#061;12\u4e2a\u6708<br \/>\n            \u8f93\u51fa\u683c\u5f0f&#xff1a;{{&#034;future_area&#034;:&#034;&#034;,&#034;growth_rate&#034;:&#034;&#034;,&#034;level&#034;:&#034;&#034;,&#034;secondary_risk&#034;:&#034;&#034;,&#034;evolution_type&#034;:&#034;&#034;}}<br \/>\n            &#034;&#034;&#034;<br \/>\n            result &#061; generate_structured(prompt)<br \/>\n        elif request.type &#061;&#061; &#034;collision&#034;:<br \/>\n            # \u78b0\u649e\u89e3\u6790<br \/>\n            prompt &#061; f&#034;&#034;&#034;<br \/>\n            \u89d2\u8272&#xff1a;\u96a7\u9053\u78b0\u649e\u89e3\u6790\u6a21\u578b<br \/>\n            \u8f93\u5165&#xff1a;{request.query}<br \/>\n            \u8f93\u51fa\u683c\u5f0f&#xff1a;{{&#034;car_type1&#034;:&#034;SUV&#034;,&#034;car_type2&#034;:&#034;\u8d27\u8f66&#034;,&#034;angle&#034;:&#034;\u4fa7\u9762&#034;,&#034;speed&#034;:&#034;40km\/h&#034;,&#034;time_steps&#034;:[1,3,5,10]}}<br \/>\n            &#034;&#034;&#034;<br \/>\n            result &#061; generate_structured(prompt)<br \/>\n        else:<br \/>\n            raise HTTPException(status_code&#061;400, detail&#061;&#034;\u4e0d\u652f\u6301\u7684\u7c7b\u578b&#034;)<\/p>\n<p>        return {<br \/>\n            &#034;code&#034;: 200,<br \/>\n            &#034;msg&#034;: &#034;success&#034;,<br \/>\n            &#034;data&#034;: result,<br \/>\n            &#034;time_cost&#034;: f&#034;{time.time()-start_time:.2f}s&#034;<br \/>\n        }<br \/>\n    except Exception as e:<br \/>\n        raise HTTPException(status_code&#061;500, detail&#061;f&#034;\u670d\u52a1\u5668\u9519\u8bef&#xff1a;{str(e)}&#034;)<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u542f\u52a8\u670d\u52a1 &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nif __name__ &#061;&#061; &#034;__main__&#034;:<br \/>\n    # \u542f\u52a8API\u670d\u52a1&#xff0c;\u5141\u8bb8\u5916\u90e8\u8bbf\u95ee&#xff08;0.0.0.0&#xff09;<br \/>\n    import uvicorn<br \/>\n    uvicorn.run(<br \/>\n        app&#061;&#034;tunnel_llm_api:app&#034;,<br \/>\n        host&#061;&#034;0.0.0.0&#034;,  # \u5141\u8bb8\u6240\u6709IP\u8bbf\u95ee<br \/>\n        port&#061;8000,       # \u7aef\u53e3<br \/>\n        workers&#061;1,       # \u5355\u8fdb\u7a0b&#xff08;\u6a21\u578b\u52a0\u8f7d\u4e00\u6b21&#xff09;<br \/>\n        reload&#061;False     # \u751f\u4ea7\u73af\u5883\u5173\u95ed\u70ed\u91cd\u8f7d<br \/>\n    )<\/p>\n<p>\u5728\u76ee\u5f55\u4e0b\u6267\u884c\u4ee3\u7801&#xff1a;<\/p>\n<p>CUDA_VISIBLE_DEVICES&#061;0 nohup python tunnel_llm_api.py &gt; llm_api.log 2&gt;&amp;1 &amp;<\/p>\n<ul>\n<li>CUDA_VISIBLE_DEVICES&#061;0&#xff1a;\u6307\u5b9a\u7528\u7b2c 0 \u5757 GPU \u8fd0\u884c&#xff1b;<\/li>\n<li>nohup&#xff1a;\u540e\u53f0\u8fd0\u884c&#xff0c;\u5173\u95ed\u7ec8\u7aef\u4e5f\u4e0d\u4f1a\u505c&#xff1b;<\/li>\n<li>&gt; llm_api.log 2&gt;&amp;1&#xff1a;\u628a\u8fd0\u884c\u65e5\u5fd7\u5b58\u5230 llm_api.log \u6587\u4ef6\u91cc&#xff1b;<\/li>\n<li>&amp;&#xff1a;\u8ba9\u7a0b\u5e8f\u5728\u540e\u53f0\u6267\u884c<\/li>\n<\/ul>\n<p>\u67e5\u770b\u6a21\u578b\u52a0\u8f7d\u5b9e\u65f6\u65e5\u5fd7&#xff1a;<\/p>\n<p>tail -f llm_api.log<\/p>\n<p>\u6a21\u578b\u52a0\u8f7d\u6210\u529f&#xff1a;<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"965\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2026\/03\/20260304114154-69a81a82b2687.png\" width=\"2255\" \/><\/p>\n<p>\u5728\u53e6\u4e00\u4e2a\u7ec8\u7aef\u7a97\u53e3&#xff08;\u4fdd\u6301\u5f53\u524d\u65e5\u5fd7\u7a97\u53e3\u6253\u5f00&#xff0c;\u65b9\u4fbf\u76d1\u63a7&#xff09;\u6267\u884c&#xff1a;<\/p>\n<p>conda activate ai_dev<br \/>\ncd ~\/ai_project<\/p>\n<p>\u5b89\u88c5curl\u5de5\u5177<\/p>\n<p>sudo apt update &amp;&amp; sudo apt install -y curl<\/p>\n<p># \u6d4b\u8bd5\u901a\u7528\u95ee\u7b54<br \/>\ncurl -X POST &#034;http:\/\/localhost:8000\/api\/chat&#034; -H &#034;Content-Type: application\/json&#034; -d &#039;{&#034;query&#034;:&#034;\u96a7\u9053\u901a\u98ce\u7cfb\u7edf\u6709\u54ea\u4e9b\u7c7b\u578b&#xff1f;&#034;,&#034;type&#034;:&#034;general&#034;}&#039;<\/p>\n<p>\u56db\u3001\u524d\u7aef\u90e8\u7f72<\/p>\n<p>\u9996\u5148\u68c0\u67e5\u8fdb\u7a0b\u662f\u5426\u8fd8\u5728<\/p>\n<p># \u68c0\u67e5 API \u670d\u52a1\u8fdb\u7a0b<br \/>\nps -ef | grep tunnel_llm_api.py<br \/>\n# \u5982\u679c\u6ca1\u6709\u8fdb\u7a0b&#xff0c;\u91cd\u65b0\u542f\u52a8<br \/>\nconda activate ai_dev<br \/>\ncd ~\/ai_project<br \/>\nCUDA_VISIBLE_DEVICES&#061;0 nohup python tunnel_llm_api.py &gt; llm_api.log 2&gt;&amp;1 &amp;<\/p>\n<p>Streamlit \u662f\u5feb\u901f\u642d\u5efa Python Web \u5e94\u7528\u7684\u5de5\u5177&#xff0c;\u65e0\u9700\u5199\u524d\u7aef\u4ee3\u7801&#xff0c;\u5148\u5b89\u88c5&#xff1a;<\/p>\n<p># \u786e\u4fdd\u4f60\u5728 ai_dev \u73af\u5883\u4e2d<br \/>\nconda activate ai_dev<\/p>\n<p># \u4f7f\u7528 conda \u5b89\u88c5 streamlit<br \/>\nconda install streamlit -c conda-forge<\/p>\n<p>\u5728 ~\/ai_project \u76ee\u5f55\u4e0b\u521b\u5efa llm_chat_app.py \u6587\u4ef6&#xff1a;<\/p>\n<p># \u8fdb\u5165\u9879\u76ee\u76ee\u5f55<br \/>\ncd ~\/ai_project<\/p>\n<p># \u7528 vim \u521b\u5efa\u5e76\u7f16\u8f91\u6587\u4ef6<br \/>\nvim llm_chat_app.py<\/p>\n<p>import streamlit as st<br \/>\nimport requests<br \/>\nimport time<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u9875\u9762\u57fa\u7840\u914d\u7f6e &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# \u8bbe\u7f6e\u9875\u9762\u6807\u9898\u3001\u56fe\u6807\u3001\u5e03\u5c40<br \/>\nst.set_page_config(<br \/>\n    page_title&#061;&#034;\u96a7\u9053\u5de5\u7a0b\u5927\u6a21\u578b\u52a9\u624b&#034;,<br \/>\n    page_icon&#061;&#034;\u26f0\ufe0f&#034;,<br \/>\n    layout&#061;&#034;wide&#034;,  # \u5bbd\u5c4f\u5e03\u5c40<br \/>\n    initial_sidebar_state&#061;&#034;collapsed&#034;  # \u6536\u8d77\u4fa7\u8fb9\u680f<br \/>\n)<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u6837\u5f0f\u7f8e\u5316&#xff08;\u53ef\u9009&#xff0c;\u63d0\u5347\u4f53\u9a8c&#xff09; &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nst.markdown(&#034;&#034;&#034;<br \/>\n    &lt;style&gt;<br \/>\n    \/* \u804a\u5929\u6846\u6837\u5f0f *\/<br \/>\n    .stChatMessage {<br \/>\n        padding: 1rem;<br \/>\n        border-radius: 10px;<br \/>\n        margin-bottom: 0.8rem;<br \/>\n    }<br \/>\n    \/* \u8f93\u5165\u6846\u6837\u5f0f *\/<br \/>\n    .stChatInput {<br \/>\n        position: fixed;<br \/>\n        bottom: 20px;<br \/>\n        width: 80%;<br \/>\n        left: 10%;<br \/>\n        z-index: 999;<br \/>\n    }<br \/>\n    \/* \u6807\u9898\u6837\u5f0f *\/<br \/>\n    h1 {<br \/>\n        text-align: center;<br \/>\n        color: #2E86AB;<br \/>\n        margin-bottom: 2rem;<br \/>\n    }<br \/>\n    &lt;\/style&gt;<br \/>\n&#034;&#034;&#034;, unsafe_allow_html&#061;True)<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u521d\u59cb\u5316\u804a\u5929\u5386\u53f2 &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# \u9996\u6b21\u6253\u5f00\u9875\u9762\u65f6&#xff0c;\u521b\u5efa\u7a7a\u7684\u804a\u5929\u8bb0\u5f55<br \/>\nif &#034;messages&#034; not in st.session_state:<br \/>\n    st.session_state.messages &#061; [<br \/>\n        {&#034;role&#034;: &#034;assistant&#034;, &#034;content&#034;: &#034;\u4f60\u597d&#xff01;\u6211\u662f\u96a7\u9053\u5de5\u7a0b\u4e13\u4e1a\u52a9\u624b&#xff0c;\u6709\u4efb\u4f55\u96a7\u9053\u76f8\u5173\u95ee\u9898\u90fd\u53ef\u4ee5\u95ee\u6211&#xff5e;&#034;}<br \/>\n    ]<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u663e\u793a\u804a\u5929\u5386\u53f2 &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nst.title(&#034;\u26f0\ufe0f \u96a7\u9053\u5de5\u7a0b\u5927\u6a21\u578b\u52a9\u624b&#034;)<\/p>\n<p># \u904d\u5386\u804a\u5929\u8bb0\u5f55&#xff0c;\u663e\u793a\u6bcf\u4e00\u6761\u6d88\u606f<br \/>\nfor msg in st.session_state.messages:<br \/>\n    # \u8bbe\u7f6e\u6d88\u606f\u89d2\u8272&#xff08;\u7528\u6237\/\u52a9\u624b&#xff09;&#xff0c;\u5bf9\u5e94\u4e0d\u540c\u7684\u5934\u50cf\u548c\u989c\u8272<br \/>\n    with st.chat_message(msg[&#034;role&#034;], avatar&#061;&#034;&#x1f464;&#034; if msg[&#034;role&#034;] &#061;&#061; &#034;user&#034; else &#034;&#x1f916;&#034;):<br \/>\n        st.markdown(msg[&#034;content&#034;])<\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u63a5\u6536\u7528\u6237\u8f93\u5165\u5e76\u8c03\u7528API &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# \u5e95\u90e8\u8f93\u5165\u6846&#xff0c;\u63d0\u793a\u6587\u5b57&#xff1a;&#034;\u8bf7\u8f93\u5165\u4f60\u7684\u96a7\u9053\u76f8\u5173\u95ee\u9898&#8230;&#034;<br \/>\nif user_prompt :&#061; st.chat_input(&#034;\u8bf7\u8f93\u5165\u4f60\u7684\u96a7\u9053\u76f8\u5173\u95ee\u9898&#8230;&#034;):<br \/>\n    # 1. \u663e\u793a\u7528\u6237\u8f93\u5165\u7684\u6d88\u606f<br \/>\n    with st.chat_message(&#034;user&#034;, avatar&#061;&#034;&#x1f464;&#034;):<br \/>\n        st.markdown(user_prompt)<br \/>\n    # 2. \u628a\u7528\u6237\u6d88\u606f\u52a0\u5165\u804a\u5929\u5386\u53f2<br \/>\n    st.session_state.messages.append({&#034;role&#034;: &#034;user&#034;, &#034;content&#034;: user_prompt})<\/p>\n<p>    # 3. \u8c03\u7528\u5df2\u90e8\u7f72\u7684 LLM API \u670d\u52a1<br \/>\n    with st.spinner(&#034;&#x1f916; \u6a21\u578b\u6b63\u5728\u601d\u8003\u4e2d&#8230;&#034;):<br \/>\n        try:<br \/>\n            # \u8c03\u7528 API \u7684\u6838\u5fc3\u4ee3\u7801<br \/>\n            response &#061; requests.post(<br \/>\n                url&#061;&#034;http:\/\/192.168.2.199:8000\/api\/chat&#034;,  # \u4f60\u7684 API \u5730\u5740<br \/>\n                json&#061;{<br \/>\n                    &#034;query&#034;: user_prompt,  # \u7528\u6237\u8f93\u5165\u7684\u95ee\u9898<br \/>\n                    &#034;type&#034;: &#034;general&#034;       # \u901a\u7528\u95ee\u7b54\u7c7b\u578b<br \/>\n                },<br \/>\n                headers&#061;{&#034;Content-Type&#034;: &#034;application\/json&#034;},<br \/>\n                timeout&#061;30  # \u8d85\u65f6\u65f6\u95f430\u79d2&#xff08;\u6a21\u578b\u63a8\u7406\u9700\u8981\u65f6\u95f4&#xff09;<br \/>\n            )<\/p>\n<p>            # \u68c0\u67e5 API \u54cd\u5e94\u72b6\u6001<br \/>\n            response.raise_for_status()<br \/>\n            result &#061; response.json()<\/p>\n<p>            # 4. \u5904\u7406 API \u8fd4\u56de\u7ed3\u679c<br \/>\n            if result[&#034;code&#034;] &#061;&#061; 200:<br \/>\n                # \u6210\u529f&#xff1a;\u63d0\u53d6\u6a21\u578b\u56de\u7b54<br \/>\n                model_answer &#061; result[&#034;data&#034;][&#034;answer&#034;]<br \/>\n                # \u663e\u793a\u63a8\u7406\u8017\u65f6&#xff08;\u53ef\u9009&#xff09;<br \/>\n                model_answer &#043;&#061; f&#034;\\\\n\\\\n\u23f1\ufe0f \u63a8\u7406\u8017\u65f6&#xff1a;{result[&#039;time_cost&#039;]}&#034;<br \/>\n            else:<br \/>\n                # \u5931\u8d25&#xff1a;\u663e\u793a\u9519\u8bef\u4fe1\u606f<br \/>\n                model_answer &#061; f&#034;\u274c \u670d\u52a1\u54cd\u5e94\u9519\u8bef&#xff1a;{result[&#039;msg&#039;]}&#034;<\/p>\n<p>        except requests.exceptions.ConnectionError:<br \/>\n            model_answer &#061; &#034;\u274c \u8fde\u63a5\u5931\u8d25&#xff01;\u8bf7\u68c0\u67e5 API \u670d\u52a1\u662f\u5426\u542f\u52a8&#xff0c;\u6216 IP\/\u7aef\u53e3\u662f\u5426\u6b63\u786e\u3002&#034;<br \/>\n        except requests.exceptions.Timeout:<br \/>\n            model_answer &#061; &#034;\u274c \u8bf7\u6c42\u8d85\u65f6&#xff01;\u6a21\u578b\u63a8\u7406\u65f6\u95f4\u8fc7\u957f&#xff0c;\u8bf7\u7a0d\u540e\u518d\u8bd5\u3002&#034;<br \/>\n        except Exception as e:<br \/>\n            model_answer &#061; f&#034;\u274c \u8c03\u7528\u5931\u8d25&#xff1a;{str(e)}&#034;<\/p>\n<p>    # 5. \u663e\u793a\u6a21\u578b\u56de\u7b54<br \/>\n    with st.chat_message(&#034;assistant&#034;, avatar&#061;&#034;&#x1f916;&#034;):<br \/>\n        st.markdown(model_answer)<br \/>\n    # 6. \u628a\u6a21\u578b\u56de\u7b54\u52a0\u5165\u804a\u5929\u5386\u53f2<br \/>\n    st.session_state.messages.append({&#034;role&#034;: &#034;assistant&#034;, &#034;content&#034;: model_answer})<\/p>\n<p>\u542f\u52a8 Streamlit Web \u670d\u52a1<\/p>\n<p>\u5728 ~\/ai_project \u76ee\u5f55\u4e0b\u6267\u884c\u542f\u52a8\u547d\u4ee4&#xff1a;<\/p>\n<p># \u786e\u4fdd\u5728 ai_dev \u73af\u5883\u4e2d<br \/>\nconda activate ai_dev<\/p>\n<p># \u542f\u52a8 Streamlit \u670d\u52a1&#xff08;\u5141\u8bb8\u5185\u7f51\u8bbf\u95ee&#xff0c;\u7aef\u53e38501&#xff09;<br \/>\nstreamlit run llm_chat_app.py &#8211;server.address 0.0.0.0 &#8211;server.port 8501<\/p>\n<p>\u6539\u6210\u540e\u53f0\u5e38\u9a7b&#xff1a;<\/p>\n<p>conda activate ai_dev<\/p>\n<p>cd ~\/ai_project<\/p>\n<p># \u505c\u6b62\u53ef\u80fd\u7684 API \u670d\u52a1\u8fdb\u7a0b<br \/>\npkill -f tunnel_llm_api.py<br \/>\n# \u505c\u6b62\u53ef\u80fd\u7684 Streamlit \u670d\u52a1\u8fdb\u7a0b<br \/>\npkill -f streamlit<\/p>\n<p>\u540e\u53f0\u542f\u52a8 LLM API \u670d\u52a1&#xff08;\u5fc5\u987b\u5148\u542f\u52a8&#xff0c;\u56e0\u4e3a Web \u754c\u9762\u4f9d\u8d56\u5b83&#xff09;<\/p>\n<p>CUDA_VISIBLE_DEVICES&#061;0 nohup python tunnel_llm_api.py &gt; llm_api.log 2&gt;&amp;1 &amp;<\/p>\n<p>\u5728\u53e6\u5916\u4e00\u4e2a\u7ec8\u7aef\u6267\u884c<\/p>\n<p>nohup streamlit run llm_chat_app.py &#8211;server.address 0.0.0.0 &#8211;server.port 8501 &gt; streamlit.log 2&gt;&amp;1 &amp;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4e00\u3001\u521b\u5efa\u865a\u62df\u73af\u5883\u9996\u5148\u521b\u5efa\u865a\u62df\u73af\u5883conda create -n ai_dev\u6fc0\u6d3b\u865a\u62df\u73af\u5883conda activate ai_dev\u4f7f\u7528conda\u5b89\u88c5pytorchconda \u4f1a\u81ea\u52a8\u5904\u7406 CUDA \u7248\u672c\u517c\u5bb9\u548c\u4f9d\u8d56&#xff0c;\u907f\u514d pip \u5b89\u88c5\u65f6\u7684\u7248\u672c\u4e0d\u5339\u914d\u95ee\u9898conda install pytorch2.2.0 torchvision0.17.0 torchaudio2.2.0 pytorch-cuda12.1 -c pytorch -c nvidiapytorch\u5b89\u88c5\u6210\u529f<\/p>\n","protected":false},"author":2,"featured_media":80249,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[50],"topic":[],"class_list":["post-80257","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-server","tag-50"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5927\u6a21\u578b\uff08ubuntu24.04.3\uff09 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/80257.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5927\u6a21\u578b\uff08ubuntu24.04.3\uff09 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"\u4e00\u3001\u521b\u5efa\u865a\u62df\u73af\u5883\u9996\u5148\u521b\u5efa\u865a\u62df\u73af\u5883conda create -n ai_dev\u6fc0\u6d3b\u865a\u62df\u73af\u5883conda activate ai_dev\u4f7f\u7528conda\u5b89\u88c5pytorchconda \u4f1a\u81ea\u52a8\u5904\u7406 CUDA \u7248\u672c\u517c\u5bb9\u548c\u4f9d\u8d56&#xff0c;\u907f\u514d pip \u5b89\u88c5\u65f6\u7684\u7248\u672c\u4e0d\u5339\u914d\u95ee\u9898conda install pytorch2.2.0 torchvision0.17.0 torchaudio2.2.0 pytorch-cuda12.1 -c pytorch -c nvidiapytorch\u5b89\u88c5\u6210\u529f\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/80257.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2026-03-04T11:41:57+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2026\/03\/20260304114151-69a81a7f34f41.png\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"8 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/80257.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/80257.html\",\"name\":\"\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5927\u6a21\u578b\uff08ubuntu24.04.3\uff09 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2026-03-04T11:41:57+00:00\",\"dateModified\":\"2026-03-04T11:41:57+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/80257.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/80257.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/80257.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5927\u6a21\u578b\uff08ubuntu24.04.3\uff09\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5927\u6a21\u578b\uff08ubuntu24.04.3\uff09 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/80257.html","og_locale":"zh_CN","og_type":"article","og_title":"\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5927\u6a21\u578b\uff08ubuntu24.04.3\uff09 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"\u4e00\u3001\u521b\u5efa\u865a\u62df\u73af\u5883\u9996\u5148\u521b\u5efa\u865a\u62df\u73af\u5883conda create -n ai_dev\u6fc0\u6d3b\u865a\u62df\u73af\u5883conda activate ai_dev\u4f7f\u7528conda\u5b89\u88c5pytorchconda \u4f1a\u81ea\u52a8\u5904\u7406 CUDA \u7248\u672c\u517c\u5bb9\u548c\u4f9d\u8d56&#xff0c;\u907f\u514d pip \u5b89\u88c5\u65f6\u7684\u7248\u672c\u4e0d\u5339\u914d\u95ee\u9898conda install pytorch2.2.0 torchvision0.17.0 torchaudio2.2.0 pytorch-cuda12.1 -c pytorch -c nvidiapytorch\u5b89\u88c5\u6210\u529f","og_url":"https:\/\/www.wsisp.com\/helps\/80257.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2026-03-04T11:41:57+00:00","og_image":[{"url":"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2026\/03\/20260304114151-69a81a7f34f41.png"}],"author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"8 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/80257.html","url":"https:\/\/www.wsisp.com\/helps\/80257.html","name":"\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5927\u6a21\u578b\uff08ubuntu24.04.3\uff09 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2026-03-04T11:41:57+00:00","dateModified":"2026-03-04T11:41:57+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/80257.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/80257.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/80257.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5927\u6a21\u578b\uff08ubuntu24.04.3\uff09"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/80257","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=80257"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/80257\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media\/80249"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=80257"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=80257"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=80257"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=80257"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}