{"id":23367,"date":"2025-04-19T07:36:50","date_gmt":"2025-04-18T23:36:50","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/23367.html"},"modified":"2025-04-19T07:36:50","modified_gmt":"2025-04-18T23:36:50","slug":"%e3%80%90%e5%bc%80%e6%ba%90%e5%a4%a7%e6%a8%a1%e5%9e%8b%e9%83%a8%e7%bd%b2%e3%80%91%e5%a6%82%e4%bd%95%e5%9c%a8%e6%9c%8d%e5%8a%a1%e5%99%a8%e4%b8%8a%e9%83%a8%e7%bd%b2%e5%bc%80%e6%ba%90%e5%a4%a7%e6%a8%a1","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/23367.html","title":{"rendered":"\u3010\u5f00\u6e90\u5927\u6a21\u578b\u90e8\u7f72\u3011\u5982\u4f55\u5728\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b GLM-4-9B-Chat \u5e76\u5e94\u7528\u5230RAG\u5e94\u7528\uff1f"},"content":{"rendered":"<p>\u672c\u5730\u670d\u52a1\u5668\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b\u6709\u4e00\u4e2a\u524d\u63d0&#xff0c;\u5c31\u662f\u5f97\u6709 GPU \u663e\u5361\u8d44\u6e90&#xff0c;\u5728\u6211\u4e0b\u9762\u7684\u4f8b\u5b50\u4e2d\u6211\u79df\u7528\u4e86 autodl \u4e2d\u7684\u7b97\u529b\u8d44\u6e90&#xff0c;\u5177\u4f53\u662f\u79df\u7528\u4e86\u4e00\u5f20\u6d88\u8d39\u7ea7\u522b\u7684 RTX 3090 \u663e\u5361\u3002<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250418233646-6802e20e1da94.png\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\" \/><\/p>\n<h3>\u73af\u5883\u914d\u7f6e<\/h3>\n<ul>\n<li>\u64cd\u4f5c\u7cfb\u7edf\u53ca\u7248\u672c&#xff1a;ubuntu 22.04<\/li>\n<li>CUDA \u7248\u672c&#xff1a; 12.1<\/li>\n<li>pytorch \u7248\u672c&#xff1a;2.3.0&#043;cu121<\/li>\n<\/ul>\n<h4>pip \u6362\u6e90\u548c\u5b89\u88c5\u4f9d\u8d56\u5305\u3002<\/h4>\n<p># \u5347\u7ea7pip<br \/>\npython -m pip install &#8211;upgrade pip<br \/>\n# \u66f4\u6362 pypi \u6e90\u52a0\u901f\u5e93\u7684\u5b89\u88c5<br \/>\npip config set global.index-url https:\/\/pypi.tuna.tsinghua.edu.cn\/simple<\/p>\n<p>pip install fastapi&#061;&#061;0.104.1<br \/>\npip install uvicorn&#061;&#061;0.24.0.post1<br \/>\npip install requests&#061;&#061;2.25.1<br \/>\npip install modelscope&#061;&#061;1.9.5<br \/>\npip install transformers&#061;&#061;4.42.4<br \/>\npip install streamlit&#061;&#061;1.24.0<br \/>\npip install sentencepiece&#061;&#061;0.1.99<br \/>\npip install accelerate&#061;&#061;0.24.1<br \/>\npip install tiktoken&#061;&#061;0.7.0<\/p>\n<p>\u8fd9\u91cc\u8981\u6ce8\u610f transformers \u7684\u7248\u672c\u662f 4.42.4<\/p>\n<h3>\u6a21\u578b\u4e0b\u8f7d<\/h3>\n<p>GLM-4-9B-Chat \u6a21\u578b\u5927\u5c0f\u4e3a 18 GB&#xff0c;\u4e0b\u8f7d\u6a21\u578b\u5927\u6982\u9700\u8981 10~20 \u5206\u949f\u3002<\/p>\n<p>\u7531\u4e8e\u540e\u9762\u6211\u4eec\u8981\u4f7f\u7528\u4e00\u4e2a\u5f00\u6e90\u7684 embedding \u6a21\u578b BAAI\/bge-base-zh-v1.5<\/p>\n<p>\u6240\u4ee5\u4f7f\u7528\u4ee5\u4e0b\u4ee3\u7801\u4e0b\u8f7d 2 \u4e2a\u6a21\u578b\u6587\u4ef6\u5230\u672c\u5730\u6587\u4ef6\u7cfb\u7edf&#xff1a;<\/p>\n<p>\u8fd0\u884c python download.py<\/p>\n<p>import torch<br \/>\nfrom modelscope import snapshot_download, AutoModel, AutoTokenizer<br \/>\nimport os<br \/>\nmodel_dir &#061; snapshot_download(&#039;ZhipuAI\/glm-4-9b-chat&#039;, cache_dir&#061;&#039;\/root\/autodl-tmp&#039;, revision&#061;&#039;master&#039;)<br \/>\nembedding_model_dir &#061; snapshot_download(&#039;BAAI\/bge-base-zh-v1.5&#039;, cache_dir&#061;&#039;\/root\/autodl-tmp&#039;, revision&#061;&#039;master&#039;)<\/p>\n<h4>\u6a21\u578b\u6d4b\u8bd5<\/h4>\n<p>GLM \u5f00\u6e90\u6a21\u578b\u5b98\u65b9\u7ed9\u4e86\u4e00\u4e2a Demo \u65b9\u4fbf\u6211\u4eec\u505a\u6d4b\u8bd5&#xff0c;\u4ee5\u4e0b\u662f\u4ee3\u7801&#xff1a;<\/p>\n<p>\u8fd0\u884c python trans_cli_demo.py<\/p>\n<p>&#034;&#034;&#034;<br \/>\nThis script creates a CLI demo with transformers backend for the glm-4-9b model,<br \/>\nallowing users to interact with the model through a command-line interface.<\/p>\n<p>Usage:<br \/>\n&#8211; Run the script to start the CLI demo.<br \/>\n&#8211; Interact with the model by typing questions and receiving responses.<\/p>\n<p>Note: The script includes a modification to handle markdown to plain text conversion,<br \/>\nensuring that the CLI interface displays formatted text correctly.<\/p>\n<p>If you use flash attention, you should install the flash-attn and  add attn_implementation&#061;&#034;flash_attention_2&#034; in model loading.<br \/>\n&#034;&#034;&#034;<\/p>\n<p>import os<br \/>\nimport torch<br \/>\nfrom threading import Thread<br \/>\nfrom transformers import AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer, AutoModelForCausalLM<\/p>\n<p>MODEL_PATH &#061; os.environ.get(&#039;MODEL_PATH&#039;, &#039;\/root\/autodl-tmp\/ZhipuAI\/glm-4-9b-chat&#039;)<\/p>\n<p>tokenizer &#061; AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code&#061;True)<\/p>\n<p>model &#061; AutoModelForCausalLM.from_pretrained(<br \/>\n    MODEL_PATH,<br \/>\n    trust_remote_code&#061;True,<br \/>\n    device_map&#061;&#034;auto&#034;<br \/>\n).eval()<\/p>\n<p>class StopOnTokens(StoppingCriteria):<br \/>\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -&gt; bool:<br \/>\n        stop_ids &#061; model.config.eos_token_id<br \/>\n        for stop_id in stop_ids:<br \/>\n            if input_ids[0][-1] &#061;&#061; stop_id:<br \/>\n                return True<br \/>\n        return False<\/p>\n<p>if __name__ &#061;&#061; &#034;__main__&#034;:<br \/>\n    history &#061; []<br \/>\n    max_length &#061; 8192<br \/>\n    top_p &#061; 0.8<br \/>\n    temperature &#061; 0.6<br \/>\n    stop &#061; StopOnTokens()<\/p>\n<p>    print(&#034;Welcome to the GLM-4-9B CLI chat. Type your messages below.&#034;)<br \/>\n    while True:<br \/>\n        user_input &#061; input(&#034;\\\\nYou: &#034;)<br \/>\n        if user_input.lower() in [&#034;exit&#034;, &#034;quit&#034;]:<br \/>\n            break<br \/>\n        history.append([user_input, &#034;&#034;])<\/p>\n<p>        messages &#061; []<br \/>\n        for idx, (user_msg, model_msg) in enumerate(history):<br \/>\n            if idx &#061;&#061; len(history) &#8211; 1 and not model_msg:<br \/>\n                messages.append({&#034;role&#034;: &#034;user&#034;, &#034;content&#034;: user_msg})<br \/>\n                break<br \/>\n            if user_msg:<br \/>\n                messages.append({&#034;role&#034;: &#034;user&#034;, &#034;content&#034;: user_msg})<br \/>\n            if model_msg:<br \/>\n                messages.append({&#034;role&#034;: &#034;assistant&#034;, &#034;content&#034;: model_msg})<br \/>\n        model_inputs &#061; tokenizer.apply_chat_template(<br \/>\n            messages,<br \/>\n            add_generation_prompt&#061;True,<br \/>\n            tokenize&#061;True,<br \/>\n            return_tensors&#061;&#034;pt&#034;<br \/>\n        ).to(model.device)<br \/>\n        streamer &#061; TextIteratorStreamer(<br \/>\n            tokenizer&#061;tokenizer,<br \/>\n            timeout&#061;60,<br \/>\n            skip_prompt&#061;True,<br \/>\n            skip_special_tokens&#061;True<br \/>\n        )<br \/>\n        generate_kwargs &#061; {<br \/>\n            &#034;input_ids&#034;: model_inputs,<br \/>\n            &#034;streamer&#034;: streamer,<br \/>\n            &#034;max_new_tokens&#034;: max_length,<br \/>\n            &#034;do_sample&#034;: False,  # \u6539\u4e3a False<br \/>\n            &#034;top_p&#034;: top_p,<br \/>\n            &#034;temperature&#034;: temperature,<br \/>\n            &#034;stopping_criteria&#034;: StoppingCriteriaList([stop]),<br \/>\n            &#034;repetition_penalty&#034;: 1.2,<br \/>\n            &#034;eos_token_id&#034;: model.config.eos_token_id,<br \/>\n        }<br \/>\n        try:<br \/>\n            t &#061; Thread(target&#061;model.generate, kwargs&#061;generate_kwargs)<br \/>\n            t.start()<br \/>\n            print(&#034;GLM-4:&#034;, end&#061;&#034;&#034;, flush&#061;True)<br \/>\n            for new_token in streamer:<br \/>\n                if new_token:<br \/>\n                    print(new_token, end&#061;&#034;&#034;, flush&#061;True)<br \/>\n                    history[-1][1] &#043;&#061; new_token<br \/>\n        except Exception as e:<br \/>\n            print(f&#034;An error occurred: {e}&#034;)<br \/>\n            print(f&#034;Error type: {type(e)}&#034;)<br \/>\n            import traceback<br \/>\n            traceback.print_exc()<\/p>\n<p>        history[-1][1] &#061; history[-1][1].strip()<\/p>\n<p>\u6ce8\u610f\u4ee5\u4e0a\u4ee3\u7801\u548c GLM \u5b98\u65b9\u63d0\u4f9b\u7684\u53ef\u80fd\u4e0d\u592a\u4e00\u6837&#xff0c;\u56e0\u4e3a\u5b98\u65b9\u7684\u6709\u7684\u62a5\u9519&#xff0c;\u6240\u4ee5\u6211\u7565\u4e3a\u4fee\u6539\u4e86\u4e00\u4e0b\u3002<\/p>\n<p>\u76f4\u63a5\u8fd0\u884c trans_cli_demo.py \u5c31\u53ef\u4ee5\u548c\u6a21\u578b\u4ea4\u4e92\u4e86<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250418233646-6802e20e97c16.png\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\" \/><\/p>\n<h4>\u5229\u7528 FastApi \u8c03\u7528\u6a21\u578b<\/h4>\n<p>\u8fd0\u884c\u4ee5\u4e0b\u4ee3\u7801\u521b\u5efa\u5e76\u542f\u52a8 Api \u670d\u52a1&#xff1a;<\/p>\n<p>\u8fd0\u884c python api.py<\/p>\n<p>from fastapi import FastAPI, Request<br \/>\nfrom transformers import AutoTokenizer, AutoModelForCausalLM<br \/>\nimport uvicorn<br \/>\nimport json<br \/>\nimport datetime<br \/>\nimport torch<\/p>\n<p># \u8bbe\u7f6e\u8bbe\u5907\u53c2\u6570<br \/>\nDEVICE &#061; &#034;cuda&#034;  # \u4f7f\u7528CUDA<br \/>\nDEVICE_ID &#061; &#034;0&#034;  # CUDA\u8bbe\u5907ID&#xff0c;\u5982\u679c\u672a\u8bbe\u7f6e\u5219\u4e3a\u7a7a<br \/>\nCUDA_DEVICE &#061; f&#034;{DEVICE}:{DEVICE_ID}&#034; if DEVICE_ID else DEVICE  # \u7ec4\u5408CUDA\u8bbe\u5907\u4fe1\u606f<\/p>\n<p># \u6e05\u7406GPU\u5185\u5b58\u51fd\u6570<br \/>\ndef torch_gc():<br \/>\n    if torch.cuda.is_available():  # \u68c0\u67e5\u662f\u5426\u53ef\u7528CUDA<br \/>\n        with torch.cuda.device(CUDA_DEVICE):  # \u6307\u5b9aCUDA\u8bbe\u5907<br \/>\n            torch.cuda.empty_cache()  # \u6e05\u7a7aCUDA\u7f13\u5b58<br \/>\n            torch.cuda.ipc_collect()  # \u6536\u96c6CUDA\u5185\u5b58\u788e\u7247<\/p>\n<p># \u521b\u5efaFastAPI\u5e94\u7528<br \/>\napp &#061; FastAPI()<\/p>\n<p># \u5904\u7406POST\u8bf7\u6c42\u7684\u7aef\u70b9<br \/>\n&#064;app.post(&#034;\/&#034;)<br \/>\nasync def create_item(request: Request):<br \/>\n    global model, tokenizer  # \u58f0\u660e\u5168\u5c40\u53d8\u91cf\u4ee5\u4fbf\u5728\u51fd\u6570\u5185\u90e8\u4f7f\u7528\u6a21\u578b\u548c\u5206\u8bcd\u5668<br \/>\n    json_post_raw &#061; await request.json()  # \u83b7\u53d6POST\u8bf7\u6c42\u7684JSON\u6570\u636e<br \/>\n    json_post &#061; json.dumps(json_post_raw)  # \u5c06JSON\u6570\u636e\u8f6c\u6362\u4e3a\u5b57\u7b26\u4e32<br \/>\n    json_post_list &#061; json.loads(json_post)  # \u5c06\u5b57\u7b26\u4e32\u8f6c\u6362\u4e3aPython\u5bf9\u8c61<br \/>\n    prompt &#061; json_post_list.get(&#039;prompt&#039;)  # \u83b7\u53d6\u8bf7\u6c42\u4e2d\u7684\u63d0\u793a<br \/>\n    history &#061; json_post_list.get(&#039;history&#039;)  # \u83b7\u53d6\u8bf7\u6c42\u4e2d\u7684\u5386\u53f2\u8bb0\u5f55<br \/>\n    max_length &#061; json_post_list.get(&#039;max_length&#039;, 2048)  # \u83b7\u53d6\u8bf7\u6c42\u4e2d\u7684\u6700\u5927\u957f\u5ea6<br \/>\n    top_p &#061; json_post_list.get(&#039;top_p&#039;, 0.7)  # \u83b7\u53d6\u8bf7\u6c42\u4e2d\u7684top_p\u53c2\u6570<br \/>\n    temperature &#061; json_post_list.get(&#039;temperature&#039;, 0.95)  # \u83b7\u53d6\u8bf7\u6c42\u4e2d\u7684\u6e29\u5ea6\u53c2\u6570<\/p>\n<p>    # \u51c6\u5907\u8f93\u5165<br \/>\n    messages &#061; []<br \/>\n    if history:<br \/>\n        for h in history:<br \/>\n            messages.append({&#034;role&#034;: &#034;user&#034;, &#034;content&#034;: h[0]})<br \/>\n            messages.append({&#034;role&#034;: &#034;assistant&#034;, &#034;content&#034;: h[1]})<br \/>\n    messages.append({&#034;role&#034;: &#034;user&#034;, &#034;content&#034;: prompt})<\/p>\n<p>    input_ids &#061; tokenizer.apply_chat_template(messages, return_tensors&#061;&#034;pt&#034;).to(model.device)<\/p>\n<p>    # \u751f\u6210\u56de\u590d<br \/>\n    with torch.no_grad():<br \/>\n        outputs &#061; model.generate(<br \/>\n            input_ids,<br \/>\n            max_new_tokens&#061;max_length,<br \/>\n            do_sample&#061;True,<br \/>\n            top_p&#061;top_p,<br \/>\n            temperature&#061;temperature,<br \/>\n        )<\/p>\n<p>    response &#061; tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens&#061;True)<\/p>\n<p>    now &#061; datetime.datetime.now()  # \u83b7\u53d6\u5f53\u524d\u65f6\u95f4<br \/>\n    time &#061; now.strftime(&#034;%Y-%m-%d %H:%M:%S&#034;)  # \u683c\u5f0f\u5316\u65f6\u95f4\u4e3a\u5b57\u7b26\u4e32<br \/>\n    # \u6784\u5efa\u54cd\u5e94JSON<br \/>\n    answer &#061; {<br \/>\n        &#034;response&#034;: response,<br \/>\n        &#034;history&#034;: history &#043; [[prompt, response]],<br \/>\n        &#034;status&#034;: 200,<br \/>\n        &#034;time&#034;: time<br \/>\n    }<br \/>\n    # \u6784\u5efa\u65e5\u5fd7\u4fe1\u606f<br \/>\n    log &#061; &#034;[&#034; &#043; time &#043; &#034;] &#034; &#043; &#039;&#034;, prompt:&#034;&#039; &#043; prompt &#043; &#039;&#034;, response:&#034;&#039; &#043; repr(response) &#043; &#039;&#034;&#039;<br \/>\n    print(log)  # \u6253\u5370\u65e5\u5fd7<br \/>\n    torch_gc()  # \u6267\u884cGPU\u5185\u5b58\u6e05\u7406<br \/>\n    return answer  # \u8fd4\u56de\u54cd\u5e94<\/p>\n<p># \u4e3b\u51fd\u6570\u5165\u53e3<br \/>\nif __name__ &#061;&#061; &#039;__main__&#039;:<br \/>\n    # \u52a0\u8f7d\u9884\u8bad\u7ec3\u7684\u5206\u8bcd\u5668\u548c\u6a21\u578b<br \/>\n    tokenizer &#061; AutoTokenizer.from_pretrained(&#034;\/root\/autodl-tmp\/ZhipuAI\/glm-4-9b-chat&#034;, trust_remote_code&#061;True)<br \/>\n    model &#061; AutoModelForCausalLM.from_pretrained(<br \/>\n        &#034;\/root\/autodl-tmp\/ZhipuAI\/glm-4-9b-chat&#034;,<br \/>\n        torch_dtype&#061;torch.bfloat16,<br \/>\n        trust_remote_code&#061;True,<br \/>\n        device_map&#061;&#034;auto&#034;,<br \/>\n    )<br \/>\n    model.eval()  # \u8bbe\u7f6e\u6a21\u578b\u4e3a\u8bc4\u4f30\u6a21\u5f0f<br \/>\n    # \u542f\u52a8FastAPI\u5e94\u7528<br \/>\n    # \u75286006\u7aef\u53e3\u53ef\u4ee5\u5c06autodl\u7684\u7aef\u53e3\u6620\u5c04\u5230\u672c\u5730&#xff0c;\u4ece\u800c\u5728\u672c\u5730\u4f7f\u7528api<br \/>\n    uvicorn.run(app, host&#061;&#039;0.0.0.0&#039;, port&#061;6006, workers&#061;1)  # \u5728\u6307\u5b9a\u7aef\u53e3\u548c\u4e3b\u673a\u4e0a\u542f\u52a8\u5e94\u7528<\/p>\n<p>\u6d4b\u8bd5\u670d\u52a1<\/p>\n<p>curl -X POST &#034;http:\/\/127.0.0.1:6006&#034; \\\\<br \/>\n     -H &#039;Content-Type: application\/json&#039; \\\\<br \/>\n     -d &#039;{&#034;prompt&#034;: &#034;\u4f60\u597d&#034;, &#034;history&#034;: []}&#039;<\/p>\n<p>\u5229\u7528 FastApi \u540c\u6837\u53ef\u4ee5\u6d4b\u8bd5\u6a21\u578b\u7684\u8c03\u7528\u548c\u4ea4\u4e92\u3002<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250418233647-6802e20f045da.png\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\" \/><\/p>\n<p>\u6ce8\u610f&#xff0c;\u4ee5\u4e0a\u4ee3\u7801\u4f60\u53ef\u80fd\u4f1a\u5728\u7f51\u7edc\u4e0a\u627e\u5230\u7c7b\u4f3c\u7684&#xff0c;\u6211\u5728\u6700\u5f00\u59cb\u4f7f\u7528\u90a3\u4e9b\u4ee3\u7801\u7684\u65f6\u5019\u62a5\u5404\u79cd\u9519&#xff0c;\u539f\u56e0\u5927\u6982\u5305\u62ec\u6a21\u578b\u548c\u4ee3\u7801\u7248\u672c\u4e0d\u517c\u5bb9&#xff0c;\u7ec4\u4ef6\u5e93\u7248\u672c\u95ee\u9898\u7b49\u3002\u6240\u4ee5\u4ee5\u4e0a\u4ee3\u7801\u662f\u7ecf\u8fc7\u6211\u7684\u4fee\u6539\u4e4b\u540e\u53ef\u8fd0\u884c\u7684\u4ee3\u7801<\/p>\n<h3>RAG<\/h3>\n<p>\u5728\u4e4b\u524d\u7684\u6587\u7ae0\u4e2d\u6211\u4eec\u901a\u8fc7 Ollama \u5728\u7b14\u8bb0\u672c\u7535\u8111\u4e0a\u90e8\u7f72\u8fc7\u5927\u6a21\u578b&#xff0c;\u901a\u8fc7\u5927\u6a21\u578b\u4ea7\u54c1\u7684 API \u8c03\u7528\u8fc7\u5927\u6a21\u578b &#xff0c;\u552f\u72ec\u6ca1\u6709\u5728\u670d\u52a1\u5668\u4e0a\u79c1\u6709\u5316\u90e8\u7f72\u4e00\u4e2a\u5927\u6a21\u578b\u3002<\/p>\n<p>\u524d\u6587\u6211\u4eec\u5df2\u7ecf\u5728\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u597d\u4e86\u5927\u6a21\u578b glm-4-9b-chat \u8fd9\u662f\u4e00\u4e2a\u62e5\u6709 90 \u4ebf\u53c2\u6570\u7684\u6a21\u578b\u3002\u4e0b\u9762\u6211\u4eec\u4ecb\u7ecd\u5982\u4f55\u5728 llamaindex \u4e2d\u8c03\u7528\u5b83\u3002<\/p>\n<p>\u5f88\u7b80\u5355&#xff0c;\u9996\u5148\u6211\u4eec\u8fd8\u662f\u5148\u81ea\u5b9a\u4e49\u4e00\u4e2aLLM &#xff0c;\u53c2\u8003\u4ee5\u4e0b\u4ee3\u7801&#xff1a;<\/p>\n<p>import logging<br \/>\nfrom typing import Any, List, Optional<br \/>\nfrom llama_index.core.llms import (<br \/>\n    CustomLLM,<br \/>\n    CompletionResponse,<br \/>\n    CompletionResponseGen,<br \/>\n    LLMMetadata,<br \/>\n)<br \/>\nfrom llama_index.core.llms.callbacks import llm_completion_callback<br \/>\nfrom transformers import AutoTokenizer, AutoModelForCausalLM<br \/>\nimport torch<\/p>\n<p># \u8bbe\u7f6e\u65e5\u5fd7<br \/>\nlogging.basicConfig(level&#061;logging.DEBUG)<br \/>\nlogger &#061; logging.getLogger(__name__)<\/p>\n<p>class LocalGLM4(CustomLLM):<\/p>\n<p>    context_window: int &#061; 8192  # \u9ed8\u8ba4\u4e0a\u4e0b\u6587\u7a97\u53e3\u5927\u5c0f<br \/>\n    num_output: int &#061; 2048  # \u9ed8\u8ba4\u8f93\u51fa\u7684token\u6570\u91cf<br \/>\n    model_name: str &#061; &#034;glm-4-9b-chat&#034;  # \u6a21\u578b\u540d\u79f0<br \/>\n    tokenizer: object &#061; None  # \u5206\u8bcd\u5668<br \/>\n    model: object &#061; None  # \u6a21\u578b<\/p>\n<p>    def __init__(self, pretrained_model_name_or_path: str):<br \/>\n        super().__init__()<\/p>\n<p>        # GPU\u65b9\u5f0f\u52a0\u8f7d\u6a21\u578b<br \/>\n        self.tokenizer &#061; AutoTokenizer.from_pretrained(<br \/>\n            pretrained_model_name_or_path, trust_remote_code&#061;True<br \/>\n        )<br \/>\n        self.model &#061; AutoModelForCausalLM.from_pretrained(<br \/>\n            pretrained_model_name_or_path,<br \/>\n            torch_dtype&#061;torch.float16,  # \u6216\u8005\u4f7f\u7528 torch.bfloat16<br \/>\n            low_cpu_mem_usage&#061;True,<br \/>\n            trust_remote_code&#061;True,<br \/>\n            device_map&#061;&#034;auto&#034;,<br \/>\n        )<\/p>\n<p>        # CPU\u65b9\u5f0f\u52a0\u8f7d\u6a21\u578b<br \/>\n        # self.tokenizer &#061; AutoTokenizer.from_pretrained(pretrained_model_name_or_path, device_map&#061;&#034;cpu&#034;, trust_remote_code&#061;True)<br \/>\n        # self.model &#061; AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path, device_map&#061;&#034;cpu&#034;, trust_remote_code&#061;True)<br \/>\n        # self.model &#061; self.model.float()<\/p>\n<p>        # \u5c1d\u8bd5\u83b7\u53d6\u6a21\u578b\u7684\u5b9e\u9645\u4e0a\u4e0b\u6587\u7a97\u53e3\u5927\u5c0f<br \/>\n        if hasattr(self.model.config, &#039;seq_length&#039;):<br \/>\n            self.context_window &#061; self.model.config.seq_length<br \/>\n        elif hasattr(self.model.config, &#039;max_position_embeddings&#039;):<br \/>\n            self.context_window &#061; self.model.config.max_position_embeddings<br \/>\n        logger.info(f&#034;Using context window size: {self.context_window}&#034;)<\/p>\n<p>    &#064;property<br \/>\n    def metadata(self) -&gt; LLMMetadata:<br \/>\n        &#034;&#034;&#034;Get LLM metadata.&#034;&#034;&#034;<br \/>\n        # \u5f97\u5230LLM\u7684\u5143\u6570\u636e<br \/>\n        return LLMMetadata(<br \/>\n            context_window&#061;self.context_window,<br \/>\n            num_output&#061;self.num_output,<br \/>\n            model_name&#061;self.model_name,<br \/>\n        )<\/p>\n<p>    &#064;llm_completion_callback()<br \/>\n    def complete(self, prompt: str, **kwargs: Any) -&gt; CompletionResponse:<br \/>\n        # \u5b8c\u6210\u51fd\u6570<br \/>\n        print(&#034;\u5b8c\u6210\u51fd\u6570&#034;)<\/p>\n<p>        inputs &#061; self.tokenizer.encode(prompt, return_tensors&#061;&#034;pt&#034;).cuda()  # GPU\u65b9\u5f0f<br \/>\n        # inputs &#061; self.tokenizer.encode(prompt, return_tensors&#061;&#039;pt&#039;)  # CPU\u65b9\u5f0f<br \/>\n        outputs &#061; self.model.generate(inputs, max_length&#061;self.num_output)<br \/>\n        response &#061; self.tokenizer.decode(outputs[0])<br \/>\n        return CompletionResponse(text&#061;response)<\/p>\n<p>    &#064;llm_completion_callback()<br \/>\n    def stream_complete(self, prompt: str, **kwargs: Any) -&gt; CompletionResponseGen:<br \/>\n        # \u6d41\u5f0f\u5b8c\u6210\u51fd\u6570<br \/>\n        print(&#034;\u6d41\u5f0f\u5b8c\u6210\u51fd\u6570&#034;)<\/p>\n<p>        inputs &#061; self.tokenizer.encode(prompt, return_tensors&#061;&#034;pt&#034;).cuda()  # GPU\u65b9\u5f0f<br \/>\n        # inputs &#061; self.tokenizer.encode(prompt, return_tensors&#061;&#039;pt&#039;)  # CPU\u65b9\u5f0f<br \/>\n        outputs &#061; self.model.generate(inputs, max_length&#061;self.num_output)<br \/>\n        response &#061; self.tokenizer.decode(outputs[0])<br \/>\n        for token in response:<br \/>\n            yield CompletionResponse(text&#061;token, delta&#061;token)<\/p>\n<p>\u5269\u4e0b\u7684\u6b65\u9aa4\u8ddf\u4e4b\u524d\u7684\u8c03\u7528\u65b9\u5f0f\u3001\u4ee3\u7801\u7f16\u7a0b\u6a21\u578b\u51e0\u4e4e\u6ca1\u6709\u4efb\u4f55\u533a\u522b&#xff1a;<\/p>\n<p>    embed_model_path &#061; &#034;\/root\/autodl-tmp\/BAAI\/bge-base-zh-v1.5&#034;<br \/>\n    pretrained_model_name_or_path &#061; r&#034;\/root\/autodl-tmp\/ZhipuAI\/glm-4-9b-chat&#034;<\/p>\n<p>    # \u8bbe\u7f6eLLM\u548c\u5d4c\u5165\u6a21\u578b<br \/>\n    logger.info(&#034;Setting up LLM and embedding model&#034;)<br \/>\n    Settings.llm &#061; LocalGLM4(pretrained_model_name_or_path)<br \/>\n    Settings.embed_model &#061; HuggingFaceEmbedding(<br \/>\n        model_name&#061;f&#034;{embed_model_path}&#034;, device&#061;&#034;cuda&#034;<br \/>\n    )<\/p>\n<p>    # \u4ece\u6307\u5b9a\u76ee\u5f55\u52a0\u8f7d\u6587\u6863\u6570\u636e<br \/>\n    logger.info(&#034;Loading documents&#034;)<br \/>\n    documents &#061; SimpleDirectoryReader(input_files&#061;[&#034;.\/data\/sample.txt&#034;]).load_data()<\/p>\n<p>    # \u521b\u5efa\u7d22\u5f15\u548c\u67e5\u8be2\u5f15\u64ce<br \/>\n    logger.info(&#034;Creating index and query engine&#034;)<br \/>\n    index &#061; VectorStoreIndex.from_documents(documents)<br \/>\n    query_engine &#061; index.as_query_engine(streaming&#061;False)<\/p>\n<p>    # \u6267\u884c\u67e5\u8be2<br \/>\n    logger.info(&#034;Executing query&#034;)<br \/>\n    response &#061; query_engine.query(query)<\/p>\n<p>    # \u5904\u7406\u5e76\u8f93\u51fa\u54cd\u5e94<br \/>\n    if hasattr(response, &#034;response_gen&#034;):<br \/>\n        # \u6d41\u5f0f\u8f93\u51fa<br \/>\n        for text in response.response_gen:<br \/>\n            print(text, end&#061;&#034;&#034;, flush&#061;True)<br \/>\n            sys.stdout.flush()  # \u786e\u4fdd\u7acb\u5373\u8f93\u51fa<br \/>\n    else:<br \/>\n        # \u975e\u6d41\u5f0f\u8f93\u51fa<br \/>\n        print(response.response, end&#061;&#034;&#034;, flush&#061;True)<\/p>\n<p>\u76f8\u5173\u4ee3\u7801\u53ef\u4ee5\u5728\u8fd9\u91cc\u67e5\u770b&#xff1a;github.com\/xiaobox\/lla\u2026<\/p>\n<h3>\u603b\u7ed3<\/h3>\n<p>\u5229\u7528\u79df\u7528\u7684 GPU \u8d44\u6e90\u90e8\u7f72\u4e86\u5f00\u6e90\u5927\u6a21\u578b glm-4-9b-chat &#xff0c;\u901a\u8fc7\u719f\u6089\u90e8\u7f72\u65b9\u5f0f\u548c\u6d41\u7a0b&#xff0c;\u4f60\u53ef\u4ee5\u7167\u732b\u753b\u864e\u90e8\u7f72\u5176\u4ed6\u5f00\u6e90\u6a21\u578b\u3002\u63a5\u7740\u6211\u4eec\u5c06\u4e4b\u524d RAG \u9879\u76ee\u4e2d\u5bf9LLM\u7684\u8c03\u7528\u6539\u4e3a\u670d\u52a1\u5668\u90e8\u7f72\u7684\u672c\u5730\u5f00\u6e90\u6a21\u578b&#xff0c;\u5b9e\u73b0\u4e86\u6a21\u578b\u548c\u8c03\u7528\u7684\u79c1\u6709\u5316\u3002\u5e0c\u671b\u8fd9\u7bc7\u6587\u7ae0\u80fd\u591f\u5e2e\u52a9\u5230\u6709\u7c7b\u4f3c\u9700\u6c42\u7684\u670b\u53cb\u3002<\/p>\n<h3>\u5982\u4f55\u7cfb\u7edf\u7684\u53bb\u5b66\u4e60\u5927\u6a21\u578bLLM &#xff1f;<\/h3>\n<p>\u5927\u6a21\u578b\u65f6\u4ee3&#xff0c;\u706b\u7206\u51fa\u5708\u7684LLM\u5927\u6a21\u578b\u8ba9\u7a0b\u5e8f\u5458\u4eec\u5f00\u59cb\u91cd\u65b0\u8bc4\u4f30\u81ea\u5df1\u7684\u672c\u9886\u3002 \u201cAI\u4f1a\u53d6\u4ee3\u90a3\u4e9b\u884c\u4e1a&#xff1f;\u201d\u201c\u8c01\u7684\u996d\u7897\u53c8\u5c06\u4e0d\u4fdd\u4e86&#xff1f;\u201d\u7b49\u95ee\u9898\u70ed\u8bae\u4e0d\u65ad\u3002<\/p>\n<p>\u4e8b\u5b9e\u4e0a&#xff0c;\u62a2\u4f60\u996d\u7897\u7684\u4e0d\u662fAI&#xff0c;\u800c\u662f\u4f1a\u5229\u7528AI\u7684\u4eba\u3002<\/p>\n<p>\u7ee7\u79d1\u5927\u8baf\u98de\u3001\u963f\u91cc\u3001\u534e\u4e3a\u7b49\u5de8\u5934\u516c\u53f8\u53d1\u5e03AI\u4ea7\u54c1\u540e&#xff0c;\u5f88\u591a\u4e2d\u5c0f\u4f01\u4e1a\u4e5f\u9646\u7eed\u8fdb\u573a&#xff01;\u8d85\u9ad8\u5e74\u85aa&#xff0c;\u6316\u6398AI\u5927\u6a21\u578b\u4eba\u624d&#xff01; \u5982\u4eca\u5927\u5382\u8001\u677f\u4eec&#xff0c;\u4e5f\u66f4\u503e\u5411\u4e8e\u4f1aAI\u7684\u4eba&#xff0c;\u666e\u901a\u7a0b\u5e8f\u5458&#xff0c;\u8fd8\u6709\u5e94\u5bf9\u7684\u673a\u4f1a\u5417&#xff1f;<\/p>\n<h6>\u4e0e\u5176\u7126\u8651\u2026\u2026<\/h6>\n<p>\u4e0d\u5982\u6210\u4e3a\u300c\u638c\u63e1AI\u5de5\u5177\u7684\u6280\u672f\u4eba\u300d&#xff0c;\u6bd5\u7adfAI\u65f6\u4ee3&#xff0c;\u8c01\u5148\u5c1d\u8bd5&#xff0c;\u8c01\u5c31\u80fd\u5360\u5f97\u5148\u673a&#xff01;<\/p>\n<p>\u4f46\u662fLLM\u76f8\u5173\u7684\u5185\u5bb9\u5f88\u591a&#xff0c;\u73b0\u5728\u7f51\u4e0a\u7684\u8001\u8bfe\u7a0b\u8001\u6559\u6750\u5173\u4e8eLLM\u53c8\u592a\u5c11\u3002\u6240\u4ee5\u73b0\u5728\u5c0f\u767d\u5165\u95e8\u5c31\u53ea\u80fd\u9760\u81ea\u5b66&#xff0c;\u5b66\u4e60\u6210\u672c\u548c\u95e8\u69db\u5f88\u9ad8\u3002<\/p>\n<p>\u9488\u5bf9\u6240\u6709\u81ea\u5b66\u9047\u5230\u56f0\u96be\u7684\u540c\u5b66\u4eec&#xff0c;\u6211\u5e2e\u5927\u5bb6\u7cfb\u7edf\u68b3\u7406\u5927\u6a21\u578b\u5b66\u4e60\u8109\u7edc&#xff0c;\u5c06\u8fd9\u4efd LLM\u5927\u6a21\u578b\u8d44\u6599 \u5206\u4eab\u51fa\u6765&#xff1a;\u5305\u62ecLLM\u5927\u6a21\u578b\u4e66\u7c4d\u3001640\u5957\u5927\u6a21\u578b\u884c\u4e1a\u62a5\u544a\u3001LLM\u5927\u6a21\u578b\u5b66\u4e60\u89c6\u9891\u3001LLM\u5927\u6a21\u578b\u5b66\u4e60\u8def\u7ebf\u3001\u5f00\u6e90\u5927\u6a21\u578b\u5b66\u4e60\u6559\u7a0b\u7b49, &#x1f61d;\u6709\u9700\u8981\u7684\u5c0f\u4f19\u4f34&#xff0c;\u53ef\u4ee5 \u626b\u63cf\u4e0b\u65b9\u4e8c\u7ef4\u7801\u9886\u53d6&#x1f193;\u2193\u2193\u2193<\/p>\n<p>&#x1f449;<font color=\"#FF0000\">CSDN\u5927\u793c\u5305<\/font>&#x1f381;&#xff1a;\u5168\u7f51\u6700\u5168\u300aLLM\u5927\u6a21\u578b\u5165\u95e8&#043;\u8fdb\u9636\u5b66\u4e60\u8d44\u6e90\u5305\u300b\u514d\u8d39\u5206\u4eab<b><font color=\"#177f3e\">&#xff08;\u5b89\u5168\u94fe\u63a5&#xff0c;\u653e\u5fc3\u70b9\u51fb&#xff09;<\/font><\/b>&#x1f448;<\/p>\n<p>\u200b<img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250418233647-6802e20f5411c.png\" \/><\/p>\n<h3>\u4e00\u3001LLM\u5927\u6a21\u578b\u7ecf\u5178\u4e66\u7c4d<\/h3>\n<p>AI\u5927\u6a21\u578b\u5df2\u7ecf\u6210\u4e3a\u4e86\u5f53\u4eca\u79d1\u6280\u9886\u57df\u7684\u4e00\u5927\u70ed\u70b9&#xff0c;\u90a3\u4ee5\u4e0b\u8fd9\u4e9b\u5927\u6a21\u578b\u4e66\u7c4d\u5c31\u662f\u975e\u5e38\u4e0d\u9519\u7684\u5b66\u4e60\u8d44\u6e90\u3002<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250418233647-6802e20f8148e.png\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\" \/><\/p>\n<h3>\u4e8c\u3001640\u5957LLM\u5927\u6a21\u578b\u62a5\u544a\u5408\u96c6<\/h3>\n<p>\u8fd9\u5957\u5305\u542b640\u4efd\u62a5\u544a\u7684\u5408\u96c6&#xff0c;\u6db5\u76d6\u4e86\u5927\u6a21\u578b\u7684\u7406\u8bba\u7814\u7a76\u3001\u6280\u672f\u5b9e\u73b0\u3001\u884c\u4e1a\u5e94\u7528\u7b49\u591a\u4e2a\u65b9\u9762\u3002\u65e0\u8bba\u60a8\u662f\u79d1\u7814\u4eba\u5458\u3001\u5de5\u7a0b\u5e08&#xff0c;\u8fd8\u662f\u5bf9AI\u5927\u6a21\u578b\u611f\u5174\u8da3\u7684\u7231\u597d\u8005&#xff0c;\u8fd9\u5957\u62a5\u544a\u5408\u96c6\u90fd\u5c06\u4e3a\u60a8\u63d0\u4f9b\u5b9d\u8d35\u7684\u4fe1\u606f\u548c\u542f\u793a\u3002(\u51e0\u4e4e\u6db5\u76d6\u6240\u6709\u884c\u4e1a)<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250418233647-6802e20fd8c93.png\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\" \/><\/p>\n<h3>\u4e09\u3001LLM\u5927\u6a21\u578b\u7cfb\u5217\u89c6\u9891\u6559\u7a0b<\/h3>\n<p><img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250418233648-6802e21077344.png\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\" \/><\/p>\n<h4>\u56db\u3001LLM\u5927\u6a21\u578b\u5f00\u6e90\u6559\u7a0b&#xff08;LLaLA\/Meta\/chatglm\/chatgpt&#xff09;<\/h4>\n<p><img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250418233649-6802e211467a4.png\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\" \/><\/p>\n<h2>LLM\u5927\u6a21\u578b\u5b66\u4e60\u8def\u7ebf \u2193<\/h2>\n<h4>\u9636\u6bb51&#xff1a;AI\u5927\u6a21\u578b\u65f6\u4ee3\u7684\u57fa\u7840\u7406\u89e3<\/h4>\n<ul>\n<li>\n<p>\u76ee\u6807&#xff1a;\u4e86\u89e3AI\u5927\u6a21\u578b\u7684\u57fa\u672c\u6982\u5ff5\u3001\u53d1\u5c55\u5386\u7a0b\u548c\u6838\u5fc3\u539f\u7406\u3002<\/p>\n<\/li>\n<li>\n<p>\u5185\u5bb9&#xff1a;<\/p>\n<ul>\n<li>L1.1 \u4eba\u5de5\u667a\u80fd\u7b80\u8ff0\u4e0e\u5927\u6a21\u578b\u8d77\u6e90<\/li>\n<li>L1.2 \u5927\u6a21\u578b\u4e0e\u901a\u7528\u4eba\u5de5\u667a\u80fd<\/li>\n<li>L1.3 GPT\u6a21\u578b\u7684\u53d1\u5c55\u5386\u7a0b<\/li>\n<li>L1.4 \u6a21\u578b\u5de5\u7a0b<\/li>\n<li>L1.4.1 \u77e5\u8bc6\u5927\u6a21\u578b<\/li>\n<li>L1.4.2 \u751f\u4ea7\u5927\u6a21\u578b<\/li>\n<li>L1.4.3 \u6a21\u578b\u5de5\u7a0b\u65b9\u6cd5\u8bba<\/li>\n<li>L1.4.4 \u6a21\u578b\u5de5\u7a0b\u5b9e\u8df5<\/li>\n<li>L1.5 GPT\u5e94\u7528\u6848\u4f8b<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h4>\u9636\u6bb52&#xff1a;AI\u5927\u6a21\u578bAPI\u5e94\u7528\u5f00\u53d1\u5de5\u7a0b<\/h4>\n<ul>\n<li>\n<p>\u76ee\u6807&#xff1a;\u638c\u63e1AI\u5927\u6a21\u578bAPI\u7684\u4f7f\u7528\u548c\u5f00\u53d1&#xff0c;\u4ee5\u53ca\u76f8\u5173\u7684\u7f16\u7a0b\u6280\u80fd\u3002<\/p>\n<\/li>\n<li>\n<p>\u5185\u5bb9&#xff1a;<\/p>\n<ul>\n<li>L2.1 API\u63a5\u53e3<\/li>\n<li>L2.1.1 OpenAI API\u63a5\u53e3<\/li>\n<li>L2.1.2 Python\u63a5\u53e3\u63a5\u5165<\/li>\n<li>L2.1.3 BOT\u5de5\u5177\u7c7b\u6846\u67b6<\/li>\n<li>L2.1.4 \u4ee3\u7801\u793a\u4f8b<\/li>\n<li>L2.2 Prompt\u6846\u67b6<\/li>\n<li>L2.3 \u6d41\u6c34\u7ebf\u5de5\u7a0b<\/li>\n<li>L2.4 \u603b\u7ed3\u4e0e\u5c55\u671b<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h4>\u9636\u6bb53&#xff1a;AI\u5927\u6a21\u578b\u5e94\u7528\u67b6\u6784\u5b9e\u8df5<\/h4>\n<ul>\n<li>\n<p>\u76ee\u6807&#xff1a;\u6df1\u5165\u7406\u89e3AI\u5927\u6a21\u578b\u7684\u5e94\u7528\u67b6\u6784&#xff0c;\u5e76\u80fd\u591f\u8fdb\u884c\u79c1\u6709\u5316\u90e8\u7f72\u3002<\/p>\n<\/li>\n<li>\n<p>\u5185\u5bb9&#xff1a;<\/p>\n<ul>\n<li>L3.1 Agent\u6a21\u578b\u6846\u67b6<\/li>\n<li>L3.2 MetaGPT<\/li>\n<li>L3.3 ChatGLM<\/li>\n<li>L3.4 LLAMA<\/li>\n<li>L3.5 \u5176\u4ed6\u5927\u6a21\u578b\u4ecb\u7ecd<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h4>\u9636\u6bb54&#xff1a;AI\u5927\u6a21\u578b\u79c1\u6709\u5316\u90e8\u7f72<\/h4>\n<ul>\n<li>\n<p>\u76ee\u6807&#xff1a;\u638c\u63e1\u591a\u79cdAI\u5927\u6a21\u578b\u7684\u79c1\u6709\u5316\u90e8\u7f72&#xff0c;\u5305\u62ec\u591a\u6a21\u6001\u548c\u7279\u5b9a\u9886\u57df\u6a21\u578b\u3002<\/p>\n<\/li>\n<li>\n<p>\u5185\u5bb9&#xff1a;<\/p>\n<ul>\n<li>L4.1 \u6a21\u578b\u79c1\u6709\u5316\u90e8\u7f72\u6982\u8ff0<\/li>\n<li>L4.2 \u6a21\u578b\u79c1\u6709\u5316\u90e8\u7f72\u7684\u5173\u952e\u6280\u672f<\/li>\n<li>L4.3 \u6a21\u578b\u79c1\u6709\u5316\u90e8\u7f72\u7684\u5b9e\u65bd\u6b65\u9aa4<\/li>\n<li>L4.4 \u6a21\u578b\u79c1\u6709\u5316\u90e8\u7f72\u7684\u5e94\u7528\u573a\u666f<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<p>\u8fd9\u4efd LLM\u5927\u6a21\u578b\u8d44\u6599 \u5305\u62ecLLM\u5927\u6a21\u578b\u4e66\u7c4d\u3001640\u5957\u5927\u6a21\u578b\u884c\u4e1a\u62a5\u544a\u3001LLM\u5927\u6a21\u578b\u5b66\u4e60\u89c6\u9891\u3001LLM\u5927\u6a21\u578b\u5b66\u4e60\u8def\u7ebf\u3001\u5f00\u6e90\u5927\u6a21\u578b\u5b66\u4e60\u6559\u7a0b\u7b49, &#x1f61d;\u6709\u9700\u8981\u7684\u5c0f\u4f19\u4f34&#xff0c;\u53ef\u4ee5 \u626b\u63cf\u4e0b\u65b9\u4e8c\u7ef4\u7801\u9886\u53d6&#x1f193;\u2193\u2193\u2193<\/p>\n<p>&#x1f449;<font color=\"#FF0000\">CSDN\u5927\u793c\u5305<\/font>&#x1f381;&#xff1a;\u5168\u7f51\u6700\u5168\u300aLLM\u5927\u6a21\u578b\u5165\u95e8&#043;\u8fdb\u9636\u5b66\u4e60\u8d44\u6e90\u5305\u300b\u514d\u8d39\u5206\u4eab<b><font color=\"#177f3e\">&#xff08;\u5b89\u5168\u94fe\u63a5&#xff0c;\u653e\u5fc3\u70b9\u51fb&#xff09;<\/font><\/b>&#x1f448;<\/p>\n<p>\u200b<img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250418233647-6802e20f5411c.png\" \/><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb964\u6b21\uff0c\u70b9\u8d5e16\u6b21\uff0c\u6536\u85cf20\u6b21\u3002\u672c\u5730\u670d\u52a1\u5668\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b\u6709\u4e00\u4e2a\u524d\u63d0\uff0c\u5c31\u662f\u5f97\u6709 GPU \u663e\u5361\u8d44\u6e90\uff0c\u5728\u6211\u4e0b\u9762\u7684\u4f8b\u5b50\u4e2d\u6211\u79df\u7528\u4e86 autodl \u4e2d\u7684\u7b97\u529b\u8d44\u6e90\uff0c\u5177\u4f53\u662f\u79df\u7528\u4e86\u4e00\u5f20\u6d88\u8d39\u7ea7\u522b\u7684 RTX 3090 \u663e\u5361\u3002_\u5927\u6a21\u578b\u5728\u670d\u52a1\u5668\u90e8\u7f72<\/p>\n","protected":false},"author":2,"featured_media":23359,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[1789,175,347,1788,50,132,1790],"topic":[],"class_list":["post-23367","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-server","tag-chatglm","tag-chatgpt","tag-llama","tag-llama3","tag-50","tag-132","tag-1790"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u3010\u5f00\u6e90\u5927\u6a21\u578b\u90e8\u7f72\u3011\u5982\u4f55\u5728\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b GLM-4-9B-Chat \u5e76\u5e94\u7528\u5230RAG\u5e94\u7528\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/23367.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u3010\u5f00\u6e90\u5927\u6a21\u578b\u90e8\u7f72\u3011\u5982\u4f55\u5728\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b GLM-4-9B-Chat \u5e76\u5e94\u7528\u5230RAG\u5e94\u7528\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb964\u6b21\uff0c\u70b9\u8d5e16\u6b21\uff0c\u6536\u85cf20\u6b21\u3002\u672c\u5730\u670d\u52a1\u5668\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b\u6709\u4e00\u4e2a\u524d\u63d0\uff0c\u5c31\u662f\u5f97\u6709 GPU \u663e\u5361\u8d44\u6e90\uff0c\u5728\u6211\u4e0b\u9762\u7684\u4f8b\u5b50\u4e2d\u6211\u79df\u7528\u4e86 autodl \u4e2d\u7684\u7b97\u529b\u8d44\u6e90\uff0c\u5177\u4f53\u662f\u79df\u7528\u4e86\u4e00\u5f20\u6d88\u8d39\u7ea7\u522b\u7684 RTX 3090 \u663e\u5361\u3002_\u5927\u6a21\u578b\u5728\u670d\u52a1\u5668\u90e8\u7f72\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/23367.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2025-04-18T23:36:50+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250418233646-6802e20e1da94.png\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"8 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/23367.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/23367.html\",\"name\":\"\u3010\u5f00\u6e90\u5927\u6a21\u578b\u90e8\u7f72\u3011\u5982\u4f55\u5728\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b GLM-4-9B-Chat \u5e76\u5e94\u7528\u5230RAG\u5e94\u7528\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2025-04-18T23:36:50+00:00\",\"dateModified\":\"2025-04-18T23:36:50+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/23367.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/23367.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/23367.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u3010\u5f00\u6e90\u5927\u6a21\u578b\u90e8\u7f72\u3011\u5982\u4f55\u5728\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b GLM-4-9B-Chat \u5e76\u5e94\u7528\u5230RAG\u5e94\u7528\uff1f\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"\u3010\u5f00\u6e90\u5927\u6a21\u578b\u90e8\u7f72\u3011\u5982\u4f55\u5728\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b GLM-4-9B-Chat \u5e76\u5e94\u7528\u5230RAG\u5e94\u7528\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/23367.html","og_locale":"zh_CN","og_type":"article","og_title":"\u3010\u5f00\u6e90\u5927\u6a21\u578b\u90e8\u7f72\u3011\u5982\u4f55\u5728\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b GLM-4-9B-Chat \u5e76\u5e94\u7528\u5230RAG\u5e94\u7528\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb964\u6b21\uff0c\u70b9\u8d5e16\u6b21\uff0c\u6536\u85cf20\u6b21\u3002\u672c\u5730\u670d\u52a1\u5668\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b\u6709\u4e00\u4e2a\u524d\u63d0\uff0c\u5c31\u662f\u5f97\u6709 GPU \u663e\u5361\u8d44\u6e90\uff0c\u5728\u6211\u4e0b\u9762\u7684\u4f8b\u5b50\u4e2d\u6211\u79df\u7528\u4e86 autodl \u4e2d\u7684\u7b97\u529b\u8d44\u6e90\uff0c\u5177\u4f53\u662f\u79df\u7528\u4e86\u4e00\u5f20\u6d88\u8d39\u7ea7\u522b\u7684 RTX 3090 \u663e\u5361\u3002_\u5927\u6a21\u578b\u5728\u670d\u52a1\u5668\u90e8\u7f72","og_url":"https:\/\/www.wsisp.com\/helps\/23367.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2025-04-18T23:36:50+00:00","og_image":[{"url":"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250418233646-6802e20e1da94.png"}],"author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"8 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/23367.html","url":"https:\/\/www.wsisp.com\/helps\/23367.html","name":"\u3010\u5f00\u6e90\u5927\u6a21\u578b\u90e8\u7f72\u3011\u5982\u4f55\u5728\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b GLM-4-9B-Chat \u5e76\u5e94\u7528\u5230RAG\u5e94\u7528\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2025-04-18T23:36:50+00:00","dateModified":"2025-04-18T23:36:50+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/23367.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/23367.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/23367.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"\u3010\u5f00\u6e90\u5927\u6a21\u578b\u90e8\u7f72\u3011\u5982\u4f55\u5728\u670d\u52a1\u5668\u4e0a\u90e8\u7f72\u5f00\u6e90\u5927\u6a21\u578b GLM-4-9B-Chat \u5e76\u5e94\u7528\u5230RAG\u5e94\u7528\uff1f"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/23367","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=23367"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/23367\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media\/23359"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=23367"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=23367"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=23367"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=23367"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}