{"id":69997,"date":"2026-02-01T10:33:07","date_gmt":"2026-02-01T02:33:07","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/69997.html"},"modified":"2026-02-01T10:33:07","modified_gmt":"2026-02-01T02:33:07","slug":"glm-4-9b-chat-1m%e9%83%a8%e7%bd%b2%e6%95%99%e7%a8%8b%ef%bc%9anvidia-triton%e6%8e%a8%e7%90%86%e6%9c%8d%e5%8a%a1%e5%99%a8%e9%9b%86%e6%88%90glm-4-9b-chat-1m","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/69997.html","title":{"rendered":"GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b\uff1aNVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M"},"content":{"rendered":"<h2>GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b&#xff1a;NVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M<\/h2>\n<h3>1. \u4e3a\u4ec0\u4e48\u4f60\u9700\u8981\u8fd9\u4e2a\u6a21\u578b\u2014\u2014\u4e0d\u662f\u53c8\u4e00\u4e2a\u201c\u5927\u53c2\u6570\u201d\u5671\u5934<\/h3>\n<p>\u4f60\u6709\u6ca1\u6709\u9047\u5230\u8fc7\u8fd9\u6837\u7684\u573a\u666f&#xff1a; \u4e00\u4efd200\u9875\u7684PDF\u8d22\u62a5&#xff0c;\u9700\u8981\u5feb\u901f\u63d0\u53d6\u5173\u952e\u6761\u6b3e\u3001\u5bf9\u6bd4\u4e09\u5e74\u6570\u636e\u53d8\u5316\u3001\u751f\u6210\u6458\u8981\u5e76\u56de\u7b54\u201c\u73b0\u91d1\u6d41\u662f\u5426\u8fde\u7eed\u4e09\u5e74\u4e3a\u8d1f\u201d\u8fd9\u7c7b\u5177\u4f53\u95ee\u9898&#xff1f; \u6216\u8005&#xff0c;\u5ba2\u6237\u53d1\u6765\u4e00\u4efd30\u4e07\u5b57\u7684\u6280\u672f\u767d\u76ae\u4e66&#xff0c;\u8981\u6c4210\u5206\u949f\u5185\u5b8c\u6210\u7ed3\u6784\u5316\u68b3\u7406&#043;\u98ce\u9669\u70b9\u6807\u6ce8&#043;\u95ee\u7b54\u51c6\u5907&#xff1f;<\/p>\n<p>\u4f20\u7edf\u65b9\u6848\u8981\u4e48\u9760\u4eba\u5de5\u786c\u5543&#xff0c;\u8981\u4e48\u7528\u5c0f\u6a21\u578b\u5206\u6bb5\u5904\u7406\u518d\u62fc\u63a5\u2014\u2014\u7ed3\u679c\u662f\u4fe1\u606f\u5272\u88c2\u3001\u4e0a\u4e0b\u6587\u4e22\u5931\u3001\u903b\u8f91\u9519\u4f4d\u3002\u800cGLM-4-9B-Chat-1M&#xff0c;\u5c31\u662f\u4e13\u4e3a\u8fd9\u79cd\u771f\u5b9e\u957f\u6587\u672c\u4efb\u52a1\u8bbe\u8ba1\u7684\u201c\u5355\u5361\u4f01\u4e1a\u7ea7\u89e3\u6cd5\u201d\u3002<\/p>\n<p>\u5b83\u4e0d\u662f\u628a128K\u5f3a\u884c\u62c9\u52301M\u7684\u5de5\u7a0b\u7f1d\u5408\u602a&#xff0c;\u800c\u662f\u901a\u8fc7\u4f4d\u7f6e\u7f16\u7801\u91cd\u8bad\u4e0e\u957f\u5e8f\u5217\u6301\u7eed\u8bad\u7ec3&#xff0c;\u8ba990\u4ebf\u53c2\u6570\u771f\u6b63\u201c\u7406\u89e3\u201d\u767e\u4e07\u7ea7token\u7684\u8bed\u4e49\u8fde\u8d2f\u6027\u3002\u5b9e\u6d4b\u57281M\u957f\u5ea6needle-in-haystack\u4efb\u52a1\u4e2d\u51c6\u786e\u7387100%&#xff0c;LongBench-Chat\u8bc4\u6d4b\u5f97\u52067.82&#xff0c;\u6bd4\u540c\u5c3a\u5bf8Llama-3-8B\u9ad8\u51fa\u8fd10.5\u5206\u3002\u66f4\u5173\u952e\u7684\u662f\u2014\u2014RTX 4090&#xff08;24GB\u663e\u5b58&#xff09;\u8dd1INT4\u91cf\u5316\u7248&#xff0c;\u663e\u5b58\u5360\u7528\u4ec59GB&#xff0c;\u5269\u4f59\u7a7a\u95f4\u8fd8\u80fd\u540c\u65f6\u5f00\u4e2aWebUI\u670d\u52a1\u3002<\/p>\n<p>\u8fd9\u4e0d\u662f\u5b9e\u9a8c\u5ba4\u73a9\u5177&#xff0c;\u800c\u662f\u80fd\u76f4\u63a5\u585e\u8fdb\u4f60\u73b0\u6709GPU\u670d\u52a1\u5668\u3001\u5f53\u5929\u4e0a\u7ebf\u7684\u751f\u4ea7\u7ea7\u5de5\u5177\u3002<\/p>\n<h3>2. \u90e8\u7f72\u524d\u5fc5\u77e5\u7684\u4e09\u4ef6\u4e8b&#xff1a;\u786c\u4ef6\u3001\u534f\u8bae\u4e0e\u80fd\u529b\u8fb9\u754c<\/h3>\n<h4>2.1 \u786c\u4ef6\u95e8\u69db&#xff1a;24GB\u663e\u5b58\u771f\u80fd\u8dd1\u6ee11M\u4e0a\u4e0b\u6587&#xff1f;<\/h4>\n<p>\u7b54\u6848\u662f\u80af\u5b9a\u7684&#xff0c;\u4f46\u9700\u660e\u786e\u914d\u7f6e\u7ec4\u5408&#xff1a;<\/p>\n<ul>\n<li>\u6700\u4f4e\u53ef\u884c\u914d\u7f6e&#xff1a;NVIDIA RTX 3090 \/ 4090&#xff08;24GB\u663e\u5b58&#xff09;&#xff0c;\u8fd0\u884cINT4\u91cf\u5316\u6743\u91cd<\/li>\n<li>\u63a8\u8350\u914d\u7f6e&#xff1a;A10 \/ A100&#xff08;40GB\/80GB&#xff09;&#xff0c;\u8fd0\u884cfp16\u5168\u7cbe\u5ea6&#xff08;18GB\u663e\u5b58\u5360\u7528&#xff09;&#xff0c;\u517c\u987e\u901f\u5ea6\u4e0e\u7cbe\u5ea6<\/li>\n<li>\u4e0d\u63a8\u8350\u914d\u7f6e&#xff1a;V100&#xff08;16GB&#xff09;\u3001RTX 3080&#xff08;10GB&#xff09;\u2014\u2014\u5373\u4f7fINT4\u4e5f\u6613OOM&#xff0c;\u5c24\u5176\u5f00\u542f\u591a\u8f6e\u5bf9\u8bdd\u65f6<\/li>\n<\/ul>\n<p>\u6ce8\u610f&#xff1a;1M\u4e0a\u4e0b\u6587\u22601M token\u5b9e\u65f6\u52a0\u8f7d\u3002Triton\u670d\u52a1\u4f1a\u6309\u9700\u5206\u5757prefill&#xff0c;\u5b9e\u9645\u663e\u5b58\u5cf0\u503c\u53d6\u51b3\u4e8emax_num_batched_tokens\u8bbe\u7f6e\u3002\u5b98\u65b9\u5efa\u8bae\u8bbe\u4e3a8192&#xff0c;\u53ef\u964d\u4f4e20%\u663e\u5b58\u538b\u529b\u3002<\/p>\n<h4>2.2 \u5f00\u6e90\u534f\u8bae&#xff1a;\u5546\u7528\u5230\u5e95\u5b89\u4e0d\u5b89\u5168&#xff1f;<\/h4>\n<p>\u5f88\u591a\u56e2\u961f\u5361\u5728\u6700\u540e\u4e00\u6b65\u2014\u2014\u6cd5\u5f8b\u5408\u89c4\u3002GLM-4-9B-Chat-1M\u7684\u534f\u8bae\u8bbe\u8ba1\u975e\u5e38\u52a1\u5b9e&#xff1a;<\/p>\n<ul>\n<li>\u4ee3\u7801\u5c42&#xff1a;Apache 2.0&#xff08;\u53ef\u81ea\u7531\u4fee\u6539\u3001\u5206\u53d1\u3001\u5546\u7528&#xff09;<\/li>\n<li>\u6743\u91cd\u5c42&#xff1a;OpenRAIL-M&#xff08;\u5141\u8bb8\u5546\u7528&#xff0c;\u4f46\u7981\u6b62\u7528\u4e8e\u9ad8\u98ce\u9669\u573a\u666f\u5982\u81ea\u52a8\u6b66\u5668\u3001\u5927\u89c4\u6a21\u76d1\u63a7&#xff09;<\/li>\n<li>\u7279\u522b\u6761\u6b3e&#xff1a;\u521d\u521b\u516c\u53f8\u5e74\u8425\u6536\u6216\u878d\u8d44\u2264200\u4e07\u7f8e\u5143&#xff0c;\u53ef\u514d\u8d39\u5546\u7528&#xff1b;\u8d85\u9650\u9700\u8054\u7cfb\u667a\u8c31AI\u83b7\u53d6\u6388\u6743<\/li>\n<\/ul>\n<p>\u8fd9\u610f\u5473\u7740&#xff1a;\u4f60\u7528\u5b83\u505a\u5185\u90e8\u77e5\u8bc6\u5e93\u95ee\u7b54\u3001\u5408\u540c\u667a\u80fd\u5ba1\u67e5\u3001\u5ba2\u670d\u5de5\u5355\u6458\u8981&#xff0c;\u5b8c\u5168\u5408\u89c4&#xff1b;\u4f46\u82e5\u8981\u96c6\u6210\u8fdb\u9762\u5411\u91d1\u878d\u98ce\u63a7\u7684SaaS\u4ea7\u54c1&#xff0c;\u5219\u9700\u786e\u8ba4\u5546\u4e1a\u6388\u6743\u3002<\/p>\n<h4>2.3 \u80fd\u529b\u8fb9\u754c&#xff1a;\u5b83\u5f3a\u5728\u54ea&#xff1f;\u5f31\u5728\u54ea&#xff1f;<\/h4>\n<table>\n<tr>\u573a\u666f\u8868\u73b0\u5b9e\u64cd\u5efa\u8bae<\/tr>\n<tbody>\n<tr>\n<td>\u8d85\u957f\u6587\u6863\u7406\u89e3&#xff08;PDF\/Word\/\u7f51\u9875&#xff09;<\/td>\n<td>\u539f\u751f\u652f\u6301300\u9875PDF\u6574\u672c\u89e3\u6790&#xff0c;\u5185\u7f6e\u603b\u7ed3\/\u62bd\u53d6\/\u5bf9\u6bd4\u6a21\u677f<\/td>\n<td>\u4e0a\u4f20\u540e\u76f4\u63a5\u9009\u201c\u957f\u6587\u672c\u603b\u7ed3\u201d&#xff0c;\u65e0\u9700\u5207\u7247<\/td>\n<\/tr>\n<tr>\n<td>\u591a\u8f6e\u5de5\u5177\u8c03\u7528&#xff08;Function Call&#xff09;<\/td>\n<td>\u652f\u6301\u7f51\u9875\u6d4f\u89c8\u3001\u4ee3\u7801\u6267\u884c\u3001\u81ea\u5b9a\u4e49API\u8c03\u7528<\/td>\n<td>\u5728prompt\u4e2d\u660e\u786e\u5199\u201c\u8bf7\u8c03\u7528tool_web_search\u67e5\u8be2\u6700\u65b0\u653f\u7b56\u201d<\/td>\n<\/tr>\n<tr>\n<td>\u4ee3\u7801\u751f\u6210\u4e0e\u6267\u884c<\/td>\n<td>HumanEval\u901a\u8fc7\u738762.3%&#xff0c;\u4f18\u4e8eLlama-3-8B<\/td>\n<td>\u7528&#096;&lt;<\/td>\n<\/tr>\n<tr>\n<td>\u4f4e\u8d44\u6e90\u591a\u5e76\u53d1<\/td>\n<td>\u5355\u5361RTX 4090\u4e0b&#xff0c;1M\u4e0a\u4e0b\u6587&#043;3\u5e76\u53d1\u8bf7\u6c42&#xff0c;\u5e73\u5747\u5ef6\u8fdf\u22483.2s\/Token<\/td>\n<td>\u5982\u9700\u66f4\u9ad8\u541e\u5410&#xff0c;\u5efa\u8bae\u7528Triton\u7684\u52a8\u6001\u6279\u5904\u7406&#xff08;dynamic batching&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>\u975e\u62c9\u4e01\u8bed\u7cfb<\/td>\n<td>\u4e2d\/\u82f1\/\u65e5\/\u97e9\/\u5fb7\/\u6cd5\/\u897f\u7b4926\u79cd\u8bed\u8a00\u5747\u901a\u8fc7\u5b98\u65b9\u9a8c\u8bc1<\/td>\n<td>\u4e2d\u6587\u573a\u666f\u4f18\u5148\u7528zh\u8bed\u8a00\u6807\u8bc6&#xff0c;\u63d0\u5347\u5206\u8bcd\u51c6\u786e\u7387<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u5173\u952e\u63d0\u9192&#xff1a;\u5b83\u4e0d\u64c5\u957f\u56fe\u50cf\u7406\u89e3&#xff08;\u7eaf\u6587\u672c\u6a21\u578b&#xff09;\u3001\u4e0d\u652f\u6301\u8bed\u97f3\u8f93\u5165\u3001\u4e0d\u751f\u6210\u89c6\u9891\u3002\u5982\u679c\u4f60\u7684\u9700\u6c42\u662f\u201c\u770b\u56fe\u8bf4\u8bdd\u201d\u6216\u201c\u8bed\u97f3\u8f6c\u4f1a\u8bae\u7eaa\u8981\u201d&#xff0c;\u8bf7\u53e6\u9009\u591a\u6a21\u6001\u6a21\u578b\u3002<\/p>\n<h3>3. Triton\u90e8\u7f72\u5b9e\u6218&#xff1a;\u4ece\u96f6\u5230API\u670d\u52a1&#xff08;\u542b\u5b8c\u6574\u547d\u4ee4&#xff09;<\/h3>\n<h4>3.1 \u73af\u5883\u51c6\u5907&#xff1a;5\u5206\u949f\u88c5\u597d\u57fa\u7840\u4f9d\u8d56<\/h4>\n<p>\u6211\u4eec\u91c7\u7528NVIDIA Triton 24.07&#xff08;LTS\u7248\u672c&#xff09;&#xff0c;\u517c\u5bb9CUDA 12.2&#043;&#xff0c;\u9002\u914d\u4e3b\u6d41Ampere\/A100\/H100\u663e\u5361\u3002\u4ee5\u4e0b\u547d\u4ee4\u5728Ubuntu 22.04 LTS\u4e0a\u9a8c\u8bc1\u901a\u8fc7&#xff1a;<\/p>\n<p># 1. \u5b89\u88c5NVIDIA Container Toolkit&#xff08;\u5982\u672a\u5b89\u88c5&#xff09;<br \/>\ncurl -fsSL https:\/\/nvidia.github.io\/libnvidia-container\/gpgkey | sudo gpg &#8211;dearmor -o \/usr\/share\/keyrings\/nvidia-container-toolkit-keyring.gpg<br \/>\ncurl -fsSL https:\/\/nvidia.github.io\/libnvidia-container\/stable\/deb\/nvidia-container-toolkit.list | sudo tee \/etc\/apt\/sources.list.d\/nvidia-container-toolkit.list<br \/>\nsudo apt-get update &amp;&amp; sudo apt-get install -y nvidia-container-toolkit<\/p>\n<p># 2. \u62c9\u53d6Triton\u5b98\u65b9\u955c\u50cf&#xff08;\u5e26vLLM\u540e\u7aef\u652f\u6301&#xff09;<br \/>\ndocker pull nvcr.io\/nvidia\/tritonserver:24.07-py3<\/p>\n<p># 3. \u521b\u5efa\u6a21\u578b\u4ed3\u5e93\u76ee\u5f55\u7ed3\u6784<br \/>\nmkdir -p .\/triton_models\/glm4_9b_chat_1m\/1<\/p>\n<h4>3.2 \u6a21\u578b\u8f6c\u6362&#xff1a;\u628aHuggingFace\u6743\u91cd\u53d8\u6210Triton\u53ef\u52a0\u8f7d\u683c\u5f0f<\/h4>\n<p>GLM-4-9B-Chat-1M\u539f\u751f\u652f\u6301vLLM&#xff0c;\u4f46Triton\u9700\u5c01\u88c5\u4e3alibtorch\u6216tensorrtllm\u683c\u5f0f\u3002\u6211\u4eec\u9009\u62e9\u8f7b\u91cf\u7ea7\u65b9\u6848\u2014\u2014\u7528vLLM\u4f5c\u4e3aTriton\u7684backend&#xff08;\u5b98\u65b9\u5df2\u63d0\u4f9b\u9002\u914d\u5668&#xff09;&#xff1a;<\/p>\n<p># \u8fdb\u5165\u5bb9\u5668\u6784\u5efa\u73af\u5883<br \/>\ndocker run &#8211;rm -it &#8211;gpus all -v $(pwd):\/workspace nvcr.io\/nvidia\/tritonserver:24.07-py3 bash<\/p>\n<p># \u5728\u5bb9\u5668\u5185\u5b89\u88c5vLLM\u53ca\u8f6c\u6362\u5de5\u5177<br \/>\npip install vllm&#061;&#061;0.6.3 tritonclient[http]<\/p>\n<p># \u4e0b\u8f7dINT4\u91cf\u5316\u6743\u91cd&#xff08;HuggingFace Hub&#xff09;<br \/>\ngit lfs install<br \/>\ngit clone https:\/\/huggingface.co\/THUDM\/glm-4-9b-chat-1m-int4<\/p>\n<p># \u751f\u6210Triton\u6a21\u578b\u914d\u7f6e&#xff08;config.pbtxt&#xff09;<br \/>\ncat &gt; .\/triton_models\/glm4_9b_chat_1m\/config.pbtxt &lt;&lt; &#039;EOF&#039;<br \/>\nname: &#034;glm4_9b_chat_1m&#034;<br \/>\nplatform: &#034;vllm&#034;<br \/>\nmax_batch_size: 32<br \/>\ninput [<br \/>\n  {<br \/>\n    name: &#034;text_input&#034;<br \/>\n    data_type: TYPE_STRING<br \/>\n    dims: [ -1 ]<br \/>\n  }<br \/>\n]<br \/>\noutput [<br \/>\n  {<br \/>\n    name: &#034;text_output&#034;<br \/>\n    data_type: TYPE_STRING<br \/>\n    dims: [ -1 ]<br \/>\n  }<br \/>\n]<br \/>\nparameters: [<br \/>\n  {<br \/>\n    key: &#034;model&#034;<br \/>\n    value: { string_value: &#034;\/workspace\/glm-4-9b-chat-1m-int4&#034; }<br \/>\n  },<br \/>\n  {<br \/>\n    key: &#034;tokenizer&#034;<br \/>\n    value: { string_value: &#034;\/workspace\/glm-4-9b-chat-1m-int4&#034; }<br \/>\n  },<br \/>\n  {<br \/>\n    key: &#034;tensor_parallel_size&#034;<br \/>\n    value: { string_value: &#034;1&#034; }<br \/>\n  },<br \/>\n  {<br \/>\n    key: &#034;enable_chunked_prefill&#034;<br \/>\n    value: { string_value: &#034;True&#034; }<br \/>\n  },<br \/>\n  {<br \/>\n    key: &#034;max_num_batched_tokens&#034;<br \/>\n    value: { string_value: &#034;8192&#034; }<br \/>\n  }<br \/>\n]<br \/>\nEOF<\/p>\n<h4>3.3 \u542f\u52a8Triton\u670d\u52a1&#xff1a;\u4e00\u884c\u547d\u4ee4&#xff0c;API\u5c31\u7eea<\/h4>\n<p>\u9000\u51fa\u5bb9\u5668&#xff0c;\u5728\u5bbf\u4e3b\u673a\u6267\u884c&#xff1a;<\/p>\n<p># \u542f\u52a8Triton\u670d\u52a1&#xff08;\u6620\u5c048000\u7aef\u53e3\u4e3aHTTP&#xff0c;8001\u4e3agRPC&#xff09;<br \/>\ndocker run &#8211;gpus&#061;all &#8211;rm -p8000:8000 -p8001:8001 -p8002:8002 \\\\<br \/>\n  -v $(pwd)\/triton_models:\/models \\\\<br \/>\n  nvcr.io\/nvidia\/tritonserver:24.07-py3 \\\\<br \/>\n  tritonserver &#8211;model-repository&#061;\/models &#8211;strict-model-config&#061;false \\\\<br \/>\n  &#8211;log-error&#061;true &#8211;log-warning&#061;true &#8211;log-info&#061;true \\\\<br \/>\n  &#8211;model-control-mode&#061;explicit &#8211;load-model&#061;glm4_9b_chat_1m<\/p>\n<p>\u670d\u52a1\u542f\u52a8\u540e&#xff0c;\u8bbf\u95ee http:\/\/localhost:8000\/v2\/health\/ready \u8fd4\u56de{&#034;ready&#034;:true}\u5373\u6210\u529f\u3002<\/p>\n<h4>3.4 \u6d4b\u8bd5API&#xff1a;\u7528curl\u53d1\u7b2c\u4e00\u4e2a\u8bf7\u6c42<\/h4>\n<p>curl -X POST &#034;http:\/\/localhost:8000\/v2\/models\/glm4_9b_chat_1m\/infer&#034; \\\\<br \/>\n  -H &#034;Content-Type: application\/json&#034; \\\\<br \/>\n  -d &#039;{<br \/>\n    &#034;text_input&#034;: [&#034;&lt;|user|&gt;\u8bf7\u7528\u4e09\u53e5\u8bdd\u603b\u7ed3\u300a2023\u5e74\u4e2d\u56fd\u4eba\u5de5\u667a\u80fd\u53d1\u5c55\u767d\u76ae\u4e66\u300b\u6838\u5fc3\u89c2\u70b9&lt;|assistant|&gt;&#034;]<br \/>\n  }&#039; | jq &#039;.text_output&#039;<\/p>\n<p>\u54cd\u5e94\u793a\u4f8b&#xff1a;<\/p>\n<p>[&#034;1. \u653f\u7b56\u5c42\u9762\u52a0\u901f\u6784\u5efaAI\u6cbb\u7406\u4f53\u7cfb&#xff0c;\u91cd\u70b9\u76d1\u7ba1\u751f\u6210\u5f0fAI\u5185\u5bb9\u5b89\u5168&#xff1b;\\\\n2. \u4ea7\u4e1a\u5e94\u7528\u4ece\u5355\u70b9\u7a81\u7834\u8f6c\u5411\u7cfb\u7edf\u96c6\u6210&#xff0c;\u91d1\u878d\u3001\u533b\u7597\u3001\u5236\u9020\u9886\u57df\u843d\u5730\u6848\u4f8b\u589e\u957f120%&#xff1b;\\\\n3. \u57fa\u7840\u8bbe\u65bd\u5411\u201c\u7b97\u529b&#043;\u6570\u636e&#043;\u7b97\u6cd5\u201d\u4e00\u4f53\u5316\u6f14\u8fdb&#xff0c;\u56fd\u4ea7\u82af\u7247\u9002\u914d\u7387\u63d0\u5347\u81f368%\u3002&#034;]<\/p>\n<p>\u6210\u529f\u6807\u5fd7&#xff1a;\u54cd\u5e94\u65f6\u95f4&lt;5\u79d2&#xff0c;\u8f93\u51fa\u4e2d\u6587\u6d41\u7545\u65e0\u4e71\u7801&#xff0c;\u652f\u6301\u591a\u8f6e\u5bf9\u8bdd&#xff08;\u5728text_input\u4e2d\u8ffd\u52a0\u5386\u53f2\u6d88\u606f\u5373\u53ef&#xff09;\u3002<\/p>\n<h3>4. \u751f\u4ea7\u7ea7\u4f18\u5316&#xff1a;\u8ba91M\u4e0a\u4e0b\u6587\u771f\u6b63\u201c\u5feb\u7a33\u7701\u201d<\/h3>\n<h4>4.1 \u663e\u5b58\u518d\u538b\u964d20%&#xff1a;\u542f\u7528chunked prefill<\/h4>\n<p>\u9ed8\u8ba4\u60c5\u51b5\u4e0b&#xff0c;Triton\u5bf91M\u4e0a\u4e0b\u6587\u4f1a\u4e00\u6b21\u6027\u52a0\u8f7d\u5168\u90e8KV Cache&#xff0c;\u663e\u5b58\u5cf0\u503c\u6781\u9ad8\u3002\u5f00\u542fenable_chunked_prefill\u540e&#xff0c;vLLM\u5c06\u63098192 token\u5206\u5757prefill&#xff0c;\u5b9e\u6d4b\u663e\u5b58\u4e0b\u964d20%&#xff0c;\u9996token\u5ef6\u8fdf\u964d\u4f4e35%&#xff1a;<\/p>\n<p># \u4fee\u6539config.pbtxt\u4e2d\u7684\u53c2\u6570<br \/>\n{<br \/>\n  key: &#034;enable_chunked_prefill&#034;<br \/>\n  value: { string_value: &#034;True&#034; }<br \/>\n},<br \/>\n{<br \/>\n  key: &#034;max_num_batched_tokens&#034;<br \/>\n  value: { string_value: &#034;8192&#034; }<br \/>\n}<\/p>\n<h4>4.2 \u541e\u5410\u7ffb\u500d&#xff1a;\u52a8\u6001\u6279\u5904\u7406&#xff08;Dynamic Batching&#xff09;<\/h4>\n<p>Triton\u9ed8\u8ba4\u9759\u6001\u6279\u5904\u7406&#xff0c;\u5bf9\u957f\u6587\u672c\u8bf7\u6c42\u4e0d\u53cb\u597d\u3002\u542f\u7528\u52a8\u6001\u6279\u5904\u7406\u540e&#xff0c;\u4e0d\u540c\u957f\u5ea6\u8bf7\u6c42\u53ef\u6df7\u5408\u8c03\u5ea6&#xff1a;<\/p>\n<p># \u542f\u52a8\u65f6\u6dfb\u52a0\u53c2\u6570<br \/>\n&#8211;auto-complete-config \\\\<br \/>\n&#8211;pinned-memory-pool-byte-size&#061;268435456 \\\\<br \/>\n&#8211;cuda-memory-pool-byte-size&#061;0:268435456<\/p>\n<p>\u5b9e\u6d4b\u5728RTX 4090\u4e0a&#xff0c;3\u5e76\u53d1\u8bf7\u6c42&#xff08;\u5e73\u5747\u957f\u5ea6500K token&#xff09;\u541e\u5410\u8fbe18 tokens\/sec&#xff0c;\u662f\u9759\u6001\u6279\u5904\u7406\u76842.3\u500d\u3002<\/p>\n<h4>4.3 \u9ad8\u53ef\u7528\u4fdd\u969c&#xff1a;\u5065\u5eb7\u68c0\u67e5\u4e0e\u81ea\u52a8\u91cd\u542f<\/h4>\n<p>\u5728docker-compose.yml\u4e2d\u52a0\u5165\u5065\u5eb7\u68c0\u67e5&#xff1a;<\/p>\n<p>services:<br \/>\n  triton:<br \/>\n    image: nvcr.io\/nvidia\/tritonserver:24.07-py3<br \/>\n    # &#8230; \u5176\u4ed6\u914d\u7f6e<br \/>\n    healthcheck:<br \/>\n      test: [&#034;CMD&#034;, &#034;curl&#034;, &#034;-f&#034;, &#034;http:\/\/localhost:8000\/v2\/health\/ready&#034;]<br \/>\n      interval: 30s<br \/>\n      timeout: 10s<br \/>\n      retries: 3<br \/>\n      start_period: 40s<\/p>\n<p>\u914d\u5408restart: unless-stopped&#xff0c;\u670d\u52a1\u5d29\u6e83\u540e\u81ea\u52a8\u6062\u590d&#xff0c;\u907f\u514d\u4eba\u5de5\u5e72\u9884\u3002<\/p>\n<h3>5. \u4e0e\u73b0\u6709\u7cfb\u7edf\u96c6\u6210&#xff1a;\u4e0d\u53ea\u662fAPI&#xff0c;\u66f4\u662f\u5de5\u4f5c\u6d41\u8282\u70b9<\/h3>\n<h4>5.1 \u5bf9\u63a5RAG\u7cfb\u7edf&#xff1a;\u628a1M\u4e0a\u4e0b\u6587\u53d8\u6210\u4f60\u7684\u77e5\u8bc6\u5f15\u64ce<\/h4>\n<p>\u591a\u6570RAG\u7cfb\u7edf\u53d7\u9650\u4e8echunk size&#xff08;\u901a\u5e38512-2048 token&#xff09;&#xff0c;\u5bfc\u81f4\u8de8\u6bb5\u903b\u8f91\u65ad\u88c2\u3002\u800cGLM-4-9B-Chat-1M\u53ef\u76f4\u63a5\u4f5c\u4e3aRAG\u7684\u201c\u91cd\u6392\u5e8f&#043;\u751f\u6210\u201d\u53cc\u6a21\u5757&#xff1a;<\/p>\n<p># \u793a\u4f8b&#xff1a;LangChain\u4e2d\u66ff\u6362LLM<br \/>\nfrom langchain_community.llms import TritonLLM<\/p>\n<p>llm &#061; TritonLLM(<br \/>\n    model_name&#061;&#034;glm4_9b_chat_1m&#034;,<br \/>\n    server_url&#061;&#034;http:\/\/localhost:8000&#034;,<br \/>\n    max_tokens&#061;2048,<br \/>\n    temperature&#061;0.3<br \/>\n)<\/p>\n<p># \u6784\u5efa\u68c0\u7d22\u94fe&#xff08;\u65e0\u9700\u5207\u7247&#xff0c;\u76f4\u63a5\u4f20\u5165\u6574\u4efdPDF\u6587\u672c&#xff09;<br \/>\nretriever &#061; VectorStoreRetriever(vectorstore&#061;your_db)<br \/>\nchain &#061; RetrievalQA.from_chain_type(<br \/>\n    llm&#061;llm,<br \/>\n    chain_type&#061;&#034;stuff&#034;,  # \u5173\u952e&#xff1a;\u7528stuff\u800c\u975emap_reduce&#xff0c;\u4fdd\u7559\u5168\u6587\u4e0a\u4e0b\u6587<br \/>\n    retriever&#061;retriever<br \/>\n)<\/p>\n<h4>5.2 \u5d4c\u5165\u4e1a\u52a1\u7cfb\u7edf&#xff1a;\u7528Function Call\u81ea\u52a8\u8c03\u7528\u5185\u90e8API<\/h4>\n<p>\u6a21\u578b\u539f\u751f\u652f\u6301Function Call&#xff0c;\u53ef\u5b9a\u4e49JSON Schema\u8ba9\u5176\u81ea\u4e3b\u8c03\u7528\u4f60\u7684\u4e1a\u52a1\u63a5\u53e3&#xff1a;<\/p>\n<p>{<br \/>\n  &#034;name&#034;: &#034;get_contract_clause&#034;,<br \/>\n  &#034;description&#034;: &#034;\u6839\u636e\u5408\u540c\u7f16\u53f7\u548c\u6761\u6b3e\u7c7b\u578b&#xff0c;\u67e5\u8be2\u6700\u65b0\u7248\u6761\u6b3e\u5185\u5bb9&#034;,<br \/>\n  &#034;parameters&#034;: {<br \/>\n    &#034;type&#034;: &#034;object&#034;,<br \/>\n    &#034;properties&#034;: {<br \/>\n      &#034;contract_id&#034;: {&#034;type&#034;: &#034;string&#034;, &#034;description&#034;: &#034;\u5408\u540c\u552f\u4e00\u7f16\u53f7&#034;},<br \/>\n      &#034;clause_type&#034;: {&#034;type&#034;: &#034;string&#034;, &#034;enum&#034;: [&#034;payment&#034;, &#034;liability&#034;, &#034;termination&#034;]}<br \/>\n    }<br \/>\n  }<br \/>\n}<\/p>\n<p>\u5728prompt\u4e2d\u5199&#xff1a;<\/p>\n<p>&lt;|user|&gt;\u8bf7\u67e5\u8be2\u5408\u540cCN2024-001\u4e2d\u5173\u4e8e\u201c\u7ec8\u6b62\u6761\u6b3e\u201d\u7684\u6700\u65b0\u5185\u5bb9&#xff0c;\u5e76\u5bf9\u6bd42023\u7248\u5dee\u5f02\u3002<br \/>\n&lt;|assistant|&gt;<br \/>\n{&#034;name&#034;: &#034;get_contract_clause&#034;, &#034;arguments&#034;: {&#034;contract_id&#034;: &#034;CN2024-001&#034;, &#034;clause_type&#034;: &#034;termination&#034;}}<\/p>\n<p>Triton\u670d\u52a1\u4f1a\u81ea\u52a8\u89e3\u6790JSON\u5e76\u89e6\u53d1\u4f60\u7684Webhook&#xff0c;\u8fd4\u56de\u7ed3\u679c\u540e\u7ee7\u7eed\u751f\u6210\u5206\u6790\u62a5\u544a\u3002<\/p>\n<h3>6. \u603b\u7ed3&#xff1a;\u8fd9\u4e0d\u4ec5\u662f\u90e8\u7f72&#xff0c;\u800c\u662f\u957f\u6587\u672c\u5904\u7406\u8303\u5f0f\u7684\u5207\u6362<\/h3>\n<h3>6.1 \u4f60\u771f\u6b63\u83b7\u5f97\u7684\u80fd\u529b<\/h3>\n<ul>\n<li>\u4e00\u6b21\u8bfb\u5b8c200\u4e07\u5b57&#xff1a;\u4e0d\u662f\u5206\u6bb5\u62fc\u63a5&#xff0c;\u800c\u662f\u5168\u5c40\u7406\u89e3\u2014\u2014\u5408\u540c\u91cc\u7684\u9690\u85cf\u8d23\u4efb\u6761\u6b3e\u3001\u8d22\u62a5\u4e2d\u7684\u5f02\u5e38\u73b0\u91d1\u6d41\u6a21\u5f0f\u3001\u6280\u672f\u6587\u6863\u91cc\u7684\u77db\u76fe\u63cf\u8ff0&#xff0c;\u90fd\u80fd\u88ab\u7cbe\u51c6\u6355\u83b7\u3002<\/li>\n<li>\u5355\u5361\u627f\u8f7d\u4f01\u4e1a\u7ea7\u8d1f\u8f7d&#xff1a;RTX 4090\u4e0a&#xff0c;1M\u4e0a\u4e0b\u6587&#043;3\u5e76\u53d1&#043;Function Call&#xff0c;\u7a33\u5b9a\u5ef6\u8fdf&lt;4\u79d2&#xff0c;\u65e0\u9700\u96c6\u7fa4\u8c03\u5ea6\u590d\u6742\u5ea6\u3002<\/li>\n<li>\u5f00\u7bb1\u5373\u7528\u7684\u751f\u4ea7\u529b\u6a21\u677f&#xff1a;\u5185\u7f6e\u603b\u7ed3\/\u62bd\u53d6\/\u5bf9\u6bd4\u6307\u4ee4&#xff0c;\u4e0a\u4f20PDF\u540e\u70b9\u51fb\u5373\u7528&#xff0c;\u4e0d\u7528\u5199\u4e00\u884cprompt\u5de5\u7a0b\u4ee3\u7801\u3002<\/li>\n<li>\u5408\u89c4\u53ef\u63a7\u7684\u5546\u7528\u8def\u5f84&#xff1a;MIT-Apache\u53cc\u534f\u8bae&#043;\u521d\u521b\u53cb\u597d\u6761\u6b3e&#xff0c;\u6280\u672f\u56e2\u961f\u53ef\u4e13\u6ce8\u4e1a\u52a1&#xff0c;\u6cd5\u52a1\u56e2\u961f\u65e0\u9700\u8fde\u591c\u5ba1\u534f\u8bae\u3002<\/li>\n<\/ul>\n<h3>6.2 \u4e0b\u4e00\u6b65\u884c\u52a8\u5efa\u8bae<\/h3>\n<ul>\n<li>\u7acb\u5373\u9a8c\u8bc1&#xff1a;\u7528\u4f60\u624b\u5934\u6700\u957f\u7684\u4e00\u4efdPDF&#xff08;\u5efa\u8bae&gt;100\u9875&#xff09;&#xff0c;\u6d4b\u8bd5\u201c\u5168\u6587\u603b\u7ed3\u201d\u548c\u201c\u6307\u5b9a\u95ee\u9898\u95ee\u7b54\u201d\u6548\u679c<\/li>\n<li>\u96c6\u6210\u5230\u5de5\u4f5c\u6d41&#xff1a;\u5728\u73b0\u6709RAG\u7cfb\u7edf\u4e2d\u66ff\u6362LLM&#xff0c;\u5173\u95edchunking&#xff0c;\u89c2\u5bdf\u51c6\u786e\u7387\u63d0\u5347<\/li>\n<li>\u63a2\u7d22Function Call&#xff1a;\u5b9a\u4e491\u4e2a\u5185\u90e8API&#xff08;\u5982\u67e5\u8ba2\u5355\u72b6\u6001&#xff09;&#xff0c;\u7528\u81ea\u7136\u8bed\u8a00\u89e6\u53d1&#xff0c;\u9a8c\u8bc1\u81ea\u52a8\u5316\u6f5c\u529b<\/li>\n<\/ul>\n<p>\u957f\u6587\u672c\u5904\u7406\u7684\u74f6\u9888&#xff0c;\u4ece\u6765\u4e0d\u662f\u7b97\u529b&#xff0c;\u800c\u662f\u6a21\u578b\u80fd\u5426\u771f\u6b63\u201c\u8bb0\u4f4f\u5e76\u7406\u89e3\u201d\u6574\u7bc7\u5185\u5bb9\u3002GLM-4-9B-Chat-1M\u628a1M token\u4ece\u7406\u8bba\u6307\u6807\u53d8\u6210\u53ef\u843d\u5730\u7684\u751f\u4ea7\u529b\u5355\u5143\u2014\u2014\u73b0\u5728&#xff0c;\u8f6e\u5230\u4f60\u628a\u5b83\u63a5\u5165\u771f\u5b9e\u4e1a\u52a1\u4e86\u3002<\/p>\n<hr \/>\n<p>\u83b7\u53d6\u66f4\u591aAI\u955c\u50cf<\/p>\n<p>\u60f3\u63a2\u7d22\u66f4\u591aAI\u955c\u50cf\u548c\u5e94\u7528\u573a\u666f&#xff1f;\u8bbf\u95ee CSDN\u661f\u56fe\u955c\u50cf\u5e7f\u573a&#xff0c;\u63d0\u4f9b\u4e30\u5bcc\u7684\u9884\u7f6e\u955c\u50cf&#xff0c;\u8986\u76d6\u5927\u6a21\u578b\u63a8\u7406\u3001\u56fe\u50cf\u751f\u6210\u3001\u89c6\u9891\u751f\u6210\u3001\u6a21\u578b\u5fae\u8c03\u7b49\u591a\u4e2a\u9886\u57df&#xff0c;\u652f\u6301\u4e00\u952e\u90e8\u7f72\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b&#xff1a;NVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M<br \/>\n1. \u4e3a\u4ec0\u4e48\u4f60\u9700\u8981\u8fd9\u4e2a\u6a21\u578b\u2014\u2014\u4e0d\u662f\u53c8\u4e00\u4e2a\u201c\u5927\u53c2\u6570\u201d\u5671\u5934<br \/>\n\u4f60\u6709\u6ca1\u6709\u9047\u5230\u8fc7\u8fd9\u6837\u7684\u573a\u666f&#xff1a; \u4e00\u4efd200\u9875\u7684PDF\u8d22\u62a5&#xff0c;\u9700\u8981\u5feb\u901f\u63d0\u53d6\u5173\u952e\u6761\u6b3e\u3001\u5bf9\u6bd4\u4e09\u5e74\u6570\u636e\u53d8\u5316\u3001\u751f\u6210\u6458\u8981\u5e76\u56de\u7b54\u201c\u73b0\u91d1\u6d41\u662f\u5426\u8fde\u7eed\u4e09\u5e74\u4e3a\u8d1f\u201d\u8fd9\u7c7b\u5177\u4f53\u95ee\u9898&#xff1f; \u6216\u8005&#xff0c;\u5ba2\u6237\u53d1\u6765\u4e00\u4efd30\u4e07\u5b57\u7684\u6280\u672f\u767d\u76ae\u4e66&#xff0c;\u8981\u6c4210\u5206\u949f\u5185\u5b8c\u6210\u7ed3\u6784\u5316\u68b3\u7406\u98ce<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[7354,7355,7353,956],"topic":[],"class_list":["post-69997","post","type-post","status-publish","format-standard","hentry","category-server","tag-glm-4","tag-7355","tag-7353","tag-956"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b\uff1aNVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/69997.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b\uff1aNVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b&#xff1a;NVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M 1. \u4e3a\u4ec0\u4e48\u4f60\u9700\u8981\u8fd9\u4e2a\u6a21\u578b\u2014\u2014\u4e0d\u662f\u53c8\u4e00\u4e2a\u201c\u5927\u53c2\u6570\u201d\u5671\u5934 \u4f60\u6709\u6ca1\u6709\u9047\u5230\u8fc7\u8fd9\u6837\u7684\u573a\u666f&#xff1a; \u4e00\u4efd200\u9875\u7684PDF\u8d22\u62a5&#xff0c;\u9700\u8981\u5feb\u901f\u63d0\u53d6\u5173\u952e\u6761\u6b3e\u3001\u5bf9\u6bd4\u4e09\u5e74\u6570\u636e\u53d8\u5316\u3001\u751f\u6210\u6458\u8981\u5e76\u56de\u7b54\u201c\u73b0\u91d1\u6d41\u662f\u5426\u8fde\u7eed\u4e09\u5e74\u4e3a\u8d1f\u201d\u8fd9\u7c7b\u5177\u4f53\u95ee\u9898&#xff1f; \u6216\u8005&#xff0c;\u5ba2\u6237\u53d1\u6765\u4e00\u4efd30\u4e07\u5b57\u7684\u6280\u672f\u767d\u76ae\u4e66&#xff0c;\u8981\u6c4210\u5206\u949f\u5185\u5b8c\u6210\u7ed3\u6784\u5316\u68b3\u7406\u98ce\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/69997.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2026-02-01T02:33:07+00:00\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"5 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/69997.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/69997.html\",\"name\":\"GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b\uff1aNVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2026-02-01T02:33:07+00:00\",\"dateModified\":\"2026-02-01T02:33:07+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/69997.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/69997.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/69997.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b\uff1aNVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b\uff1aNVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/69997.html","og_locale":"zh_CN","og_type":"article","og_title":"GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b\uff1aNVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b&#xff1a;NVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M 1. \u4e3a\u4ec0\u4e48\u4f60\u9700\u8981\u8fd9\u4e2a\u6a21\u578b\u2014\u2014\u4e0d\u662f\u53c8\u4e00\u4e2a\u201c\u5927\u53c2\u6570\u201d\u5671\u5934 \u4f60\u6709\u6ca1\u6709\u9047\u5230\u8fc7\u8fd9\u6837\u7684\u573a\u666f&#xff1a; \u4e00\u4efd200\u9875\u7684PDF\u8d22\u62a5&#xff0c;\u9700\u8981\u5feb\u901f\u63d0\u53d6\u5173\u952e\u6761\u6b3e\u3001\u5bf9\u6bd4\u4e09\u5e74\u6570\u636e\u53d8\u5316\u3001\u751f\u6210\u6458\u8981\u5e76\u56de\u7b54\u201c\u73b0\u91d1\u6d41\u662f\u5426\u8fde\u7eed\u4e09\u5e74\u4e3a\u8d1f\u201d\u8fd9\u7c7b\u5177\u4f53\u95ee\u9898&#xff1f; \u6216\u8005&#xff0c;\u5ba2\u6237\u53d1\u6765\u4e00\u4efd30\u4e07\u5b57\u7684\u6280\u672f\u767d\u76ae\u4e66&#xff0c;\u8981\u6c4210\u5206\u949f\u5185\u5b8c\u6210\u7ed3\u6784\u5316\u68b3\u7406\u98ce","og_url":"https:\/\/www.wsisp.com\/helps\/69997.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2026-02-01T02:33:07+00:00","author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"5 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/69997.html","url":"https:\/\/www.wsisp.com\/helps\/69997.html","name":"GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b\uff1aNVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2026-02-01T02:33:07+00:00","dateModified":"2026-02-01T02:33:07+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/69997.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/69997.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/69997.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"GLM-4-9B-Chat-1M\u90e8\u7f72\u6559\u7a0b\uff1aNVIDIA Triton\u63a8\u7406\u670d\u52a1\u5668\u96c6\u6210GLM-4-9B-Chat-1M"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/69997","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=69997"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/69997\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=69997"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=69997"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=69997"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=69997"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}