{"id":38868,"date":"2025-05-21T12:46:12","date_gmt":"2025-05-21T04:46:12","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/38868.html"},"modified":"2025-05-21T12:46:12","modified_gmt":"2025-05-21T04:46:12","slug":"%e3%80%90github%e5%bc%80%e6%ba%90%e9%a1%b9%e7%9b%ae%e5%ae%9e%e6%88%98%e3%80%91fastchat-%e5%ae%9e%e6%88%98%e8%a7%a3%e6%9e%90%ef%bc%9a%e5%a4%9a%e6%a8%a1%e5%9e%8b-llm-chat-api-%e6%9c%8d%e5%8a%a1%e5%99%a8","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/38868.html","title":{"rendered":"\u3010GitHub\u5f00\u6e90\u9879\u76ee\u5b9e\u6218\u3011FastChat \u5b9e\u6218\u89e3\u6790\uff1a\u591a\u6a21\u578b LLM Chat API \u670d\u52a1\u5668\u6784\u5efa\u4e0e\u63a8\u7406\u90e8\u7f72\u5168\u6d41\u7a0b\u6307\u5357"},"content":{"rendered":"<h2>FastChat \u5b9e\u6218\u89e3\u6790&#xff1a;\u591a\u6a21\u578b LLM Chat API \u670d\u52a1\u5668\u6784\u5efa\u4e0e\u63a8\u7406\u90e8\u7f72\u5168\u6d41\u7a0b\u6307\u5357<\/h2>\n<h3>\u5173\u952e\u8bcd&#xff1a;<\/h3>\n<p>FastChat\u3001LLM \u670d\u52a1\u6846\u67b6\u3001OpenAI API \u66ff\u4ee3\u3001Chat Completion\u3001LLM Inference Server\u3001\u6a21\u578b\u90e8\u7f72\u3001\u5e76\u53d1\u4f18\u5316\u3001vLLM\u3001\u591a\u7528\u6237\u591a\u6a21\u578b\u3001\u4f01\u4e1a\u7ea7\u843d\u5730<\/p>\n<h2>\u6458\u8981&#xff1a;<\/h2>\n<p>FastChat \u662f\u7531 LM-SYS \u56e2\u961f\u5f00\u6e90\u7684\u8f7b\u91cf\u7ea7\u5927\u8bed\u8a00\u6a21\u578b&#xff08;LLM&#xff09;\u670d\u52a1\u6846\u67b6&#xff0c;\u65e8\u5728\u63d0\u4f9b\u4e00\u4e2a\u4e0e OpenAI API \u5b8c\u5168\u517c\u5bb9\u7684\u591a\u6a21\u578b Chat \u63a5\u53e3\u670d\u52a1\u5e73\u53f0\u3002\u8be5\u6846\u67b6\u539f\u751f\u652f\u6301\u591a\u4e2a\u70ed\u95e8\u6a21\u578b&#xff08;\u5982 LLaMA\u3001ChatGLM\u3001Qwen\u3001Mistral \u7b49&#xff09;&#xff0c;\u5e76\u53ef\u642d\u914d vLLM \u5b9e\u73b0\u9ad8\u6548\u63a8\u7406\u3002\u9879\u76ee\u63d0\u4f9b\u5b8c\u6574\u7684 Chat \u670d\u52a1\u3001\u7ba1\u7406\u540e\u7aef\u3001Web UI \u548c CLI \u5de5\u5177&#xff0c;\u5e7f\u6cdb\u5e94\u7528\u4e8e\u6a21\u578b\u5fae\u8c03\u6d4b\u8bd5\u3001\u591a\u6a21\u578b\u5bf9\u6bd4\u8bc4\u4f30\u3001\u4f01\u4e1a\u5185\u90e8\u5927\u6a21\u578b\u670d\u52a1\u6784\u5efa\u573a\u666f\u3002\u672c\u6587\u5c06\u7cfb\u7edf\u68b3\u7406 FastChat \u7684\u67b6\u6784\u8bbe\u8ba1\u3001\u90e8\u7f72\u6d41\u7a0b\u4e0e\u6027\u80fd\u4f18\u5316\u5b9e\u8df5&#xff0c;\u5e76\u4ee5\u771f\u5b9e\u6848\u4f8b\u63a2\u7d22\u5176\u5728\u4f01\u4e1a\u7ea7\u63a8\u7406\u5e73\u53f0\u4e2d\u7684\u843d\u5730\u8def\u5f84\u3002<\/p>\n<h3>\u76ee\u5f55&#xff1a;<\/h3>\n<p>\u7b2c\u4e00\u7ae0&#xff1a;\u9879\u76ee\u80cc\u666f\u4e0e\u67b6\u6784\u6982\u89c8&#xff08;\u9644 GitHub \u5730\u5740&#xff09; \u7b2c\u4e8c\u7ae0&#xff1a;\u652f\u6301\u6a21\u578b\u5217\u8868\u4e0e API \u63a5\u53e3\u517c\u5bb9\u6027\u5206\u6790 \u7b2c\u4e09\u7ae0&#xff1a;FastChat \u542f\u52a8\u6a21\u5f0f\u4e0e\u6838\u5fc3\u7ec4\u4ef6\u8be6\u89e3 \u7b2c\u56db\u7ae0&#xff1a;\u81ea\u5b9a\u4e49\u6a21\u578b\u63a5\u5165\u4e0e\u591a\u6a21\u578b\u8def\u7531\u7b56\u7565\u5b9e\u73b0 \u7b2c\u4e94\u7ae0&#xff1a;\u642d\u914d vLLM \u5b9e\u73b0\u9ad8\u6548\u63a8\u7406\u90e8\u7f72\u5b9e\u8df5 \u7b2c\u516d\u7ae0&#xff1a;Chat Completion \u4e0e Streaming API \u5e76\u53d1\u6027\u80fd\u4f18\u5316 \u7b2c\u4e03\u7ae0&#xff1a;\u591a\u7528\u6237\u4efb\u52a1\u7ba1\u7406\u4e0e\u8eab\u4efd\u8ba4\u8bc1\u673a\u5236\u8bbe\u8ba1 \u7b2c\u516b\u7ae0&#xff1a;\u524d\u7aef Web UI \/ CLI \u5de5\u5177\u94fe\u5b9e\u6218\u5e94\u7528 \u7b2c\u4e5d\u7ae0&#xff1a;\u4f01\u4e1a\u7ea7\u573a\u666f\u843d\u5730\u6848\u4f8b\u4e0e\u5b89\u5168\u7b56\u7565\u5206\u6790 \u7b2c\u5341\u7ae0&#xff1a;\u6a21\u578b\u8bc4\u6d4b\u3001\u5bf9\u8bdd\u65e5\u5fd7\u4e0e\u63a8\u7406\u76d1\u63a7\u7cfb\u7edf\u5efa\u8bbe\u65b9\u6848<\/p>\n<h3>\u7b2c\u4e00\u7ae0&#xff1a;\u9879\u76ee\u80cc\u666f\u4e0e\u67b6\u6784\u6982\u89c8&#xff08;\u9644 GitHub \u5730\u5740&#xff09;<\/h3>\n<p>\u9879\u76ee\u5730\u5740&#xff1a;https:\/\/github.com\/lm-sys\/FastChat<\/p>\n<p>FastChat \u662f\u7531\u6e05\u534e\u5927\u5b66\u548c CMU \u7684\u8054\u5408\u7814\u7a76\u56e2\u961f LM-SYS \u63a8\u51fa\u7684\u5f00\u6e90\u9879\u76ee&#xff0c;\u521d\u8877\u662f\u4e3a\u5927\u8bed\u8a00\u6a21\u578b&#xff08;LLM&#xff09;\u6784\u5efa\u4e00\u4e2a\u7edf\u4e00\u3001\u7075\u6d3b\u4e14\u9ad8\u6027\u80fd\u7684 Chat API \u670d\u52a1\u5e73\u53f0\u3002\u5176\u6838\u5fc3\u76ee\u6807\u662f\u63d0\u4f9b\u4e0e OpenAI API \u8bed\u4e49\u5b8c\u5168\u517c\u5bb9\u7684 chat\/completions \u548c completions \u63a5\u53e3&#xff0c;\u540c\u65f6\u652f\u6301\u591a\u6a21\u578b\u52a0\u8f7d\u3001Chat UI \u5c55\u793a\u3001CLI \u4f7f\u7528\u3001\u6a21\u578b\u8bc4\u6d4b\u7b49\u529f\u80fd\u3002<\/p>\n<p>FastChat \u9879\u76ee\u5728\u5f00\u6e90\u793e\u533a\u4e2d\u7684\u5e94\u7528\u4e3b\u8981\u805a\u7126\u4e8e\u4ee5\u4e0b\u51e0\u4e2a\u65b9\u5411&#xff1a;<\/p>\n<ul>\n<li>\u63d0\u4f9b\u672c\u5730\u90e8\u7f72\u7684 LLM Chat API&#xff0c;\u66ff\u4ee3 OpenAI \u4e91\u670d\u52a1&#xff1b;<\/li>\n<li>\u652f\u6301\u591a\u79cd\u5f00\u6e90\u5927\u6a21\u578b\u63a5\u5165\u5e76\u7edf\u4e00\u7ba1\u7406&#xff1b;<\/li>\n<li>\u4e0e vLLM \u8054\u52a8\u63d0\u4f9b\u9ad8\u541e\u5410\u3001\u4f4e\u5ef6\u8fdf\u63a8\u7406\u670d\u52a1&#xff1b;<\/li>\n<li>\u4f5c\u4e3a\u5fae\u8c03\u540e\u6a21\u578b\u6548\u679c\u9a8c\u8bc1\u5e73\u53f0&#xff1b;<\/li>\n<li>\u4f01\u4e1a\u5185\u90e8\u6784\u5efa LLM Gateway \u4e0e\u79c1\u6709 Chat \u670d\u52a1\u3002<\/li>\n<\/ul>\n<p>FastChat \u7684\u6574\u4f53\u67b6\u6784\u6a21\u5757\u5982\u4e0b\u6240\u793a&#xff1a;<\/p>\n<table>\n<tr>\u6a21\u5757\u63cf\u8ff0<\/tr>\n<tbody>\n<tr>\n<td>fastchat.serve.controller<\/td>\n<td>\u63a7\u5236\u8282\u70b9&#xff0c;\u8d1f\u8d23\u8def\u7531\u8bf7\u6c42\u3001\u7ba1\u7406\u6a21\u578b\u5de5\u4f5c\u5668\u72b6\u6001<\/td>\n<\/tr>\n<tr>\n<td>fastchat.serve.model_worker<\/td>\n<td>\u6a21\u578b\u5de5\u4f5c\u5668&#xff0c;\u6bcf\u4e2a\u6a21\u578b\u5bf9\u5e94\u4e00\u4e2a\u72ec\u7acb\u8fdb\u7a0b<\/td>\n<\/tr>\n<tr>\n<td>fastchat.serve.openai_api_server<\/td>\n<td>\u63d0\u4f9b\u4e0e OpenAI API \u5b8c\u5168\u517c\u5bb9\u7684\u63a5\u53e3\u670d\u52a1<\/td>\n<\/tr>\n<tr>\n<td>fastchat.serve.gradio_web_server<\/td>\n<td>\u63d0\u4f9b\u57fa\u7840 Web UI&#xff0c;\u7528\u4e8e\u5bf9\u8bdd\u5c55\u793a\u4e0e\u6a21\u578b\u6d4b\u8bd5<\/td>\n<\/tr>\n<tr>\n<td>fastchat.serve.cli<\/td>\n<td>\u63d0\u4f9b\u547d\u4ee4\u884c\u4ea4\u4e92\u65b9\u5f0f&#xff08;\u672c\u5730\u6d4b\u8bd5\u3001\u751f\u6210\u811a\u672c\u7b49&#xff09;<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u5176\u8fd0\u884c\u539f\u7406\u7c7b\u4f3c\u4e8e\u8f7b\u91cf\u7ea7\u7684\u5fae\u670d\u52a1\u8c03\u5ea6\u7cfb\u7edf&#xff0c;\u901a\u8fc7\u63a7\u5236\u5668\u534f\u8c03\u6a21\u578b Worker \u5bf9\u5916\u63d0\u4f9b\u63a8\u7406\u670d\u52a1&#xff0c;\u5177\u5907\u5982\u4e0b\u7279\u6027&#xff1a;<\/p>\n<ul>\n<li>\u652f\u6301\u591a\u6a21\u578b\u90e8\u7f72&#xff0c;\u7edf\u4e00\u670d\u52a1\u5165\u53e3&#xff1b;<\/li>\n<li>\u6bcf\u4e2a Worker \u72ec\u7acb\u8fdb\u7a0b&#xff0c;\u652f\u6301\u591a\u5361\u5e76\u884c\u6216\u5f02\u6784\u6a21\u578b\u90e8\u7f72&#xff1b;<\/li>\n<li>Chat API \u4e0e OpenAI \u4fdd\u6301\u9ad8\u5ea6\u4e00\u81f4&#xff0c;\u652f\u6301 Streaming\u3001Stop Token\u3001Temperature\u3001Top-p \u7b49\u53c2\u6570&#xff1b;<\/li>\n<li>\u4e0e vLLM \u7b49\u63a8\u7406\u4f18\u5316\u5f15\u64ce\u517c\u5bb9\u826f\u597d&#xff0c;\u9002\u5408\u9ad8\u5e76\u53d1\u573a\u666f\u3002<\/li>\n<\/ul>\n<p>\u5f97\u76ca\u4e8e\u5176\u6a21\u5757\u5316\u8bbe\u8ba1&#xff0c;FastChat \u975e\u5e38\u9002\u5408\u4f5c\u4e3a\u4f01\u4e1a\u7ea7\u79c1\u6709\u5927\u6a21\u578b\u670d\u52a1\u5e73\u53f0\u7684\u6838\u5fc3\u7ec4\u4ef6\u4e4b\u4e00\u3002<\/p>\n<hr \/>\n<h3>\u7b2c\u4e8c\u7ae0&#xff1a;\u652f\u6301\u6a21\u578b\u5217\u8868\u4e0e API \u63a5\u53e3\u517c\u5bb9\u6027\u5206\u6790<\/h3>\n<p>FastChat \u5728\u4e3b\u7ebf\u7248\u672c\u4e2d\u5df2\u652f\u6301\u5305\u62ec Meta LLaMA \u7cfb\u5217\u3001ChatGLM\u3001Qwen\u3001Baichuan\u3001Mistral\u3001InternLM \u7b49\u591a\u4e2a\u56fd\u5185\u5916\u4e3b\u6d41\u5f00\u6e90\u5927\u8bed\u8a00\u6a21\u578b&#xff0c;\u5747\u53ef\u901a\u8fc7\u914d\u7f6e\u76f4\u63a5\u52a0\u8f7d\u6216\u5fae\u8c03\u540e\u70ed\u542f\u52a8\u63a5\u5165\u3002<\/p>\n<h4>1. \u5b98\u65b9\u652f\u6301\u7684\u4e3b\u6d41\u6a21\u578b<\/h4>\n<table>\n<tr>\u6a21\u578b\u540d\u79f0\u67b6\u6784\u517c\u5bb9\u6027\u8bf4\u660e<\/tr>\n<tbody>\n<tr>\n<td>LLaMA \/ LLaMA 2<\/td>\n<td>\u539f\u751f\u652f\u6301&#xff0c;Meta \u7ed3\u6784<\/td>\n<\/tr>\n<tr>\n<td>ChatGLM \/ GLM3<\/td>\n<td>\u5b8c\u6574\u517c\u5bb9&#xff0c;\u9700\u8bbe\u7f6e tokenizer patch<\/td>\n<\/tr>\n<tr>\n<td>Qwen \u7cfb\u5217<\/td>\n<td>\u652f\u6301 7B \/ 14B&#xff0c;\u53ef\u7528\u4e8e chat\/completion<\/td>\n<\/tr>\n<tr>\n<td>Baichuan \u7cfb\u5217<\/td>\n<td>\u652f\u6301 baichuan-13B\u3001baichuan2 \u7b49<\/td>\n<\/tr>\n<tr>\n<td>Mistral \/ Mixtral<\/td>\n<td>\u652f\u6301\u6a21\u578b\u5408\u5e76\u63a8\u7406&#xff0c;\u7ed3\u6784\u590d\u7528<\/td>\n<\/tr>\n<tr>\n<td>InternLM \u7cfb\u5217<\/td>\n<td>\u652f\u6301 7B \/ 20B&#xff0c;\u53ef\u5b9a\u5236\u7cfb\u7edf\u63d0\u793a\u4e0e\u591a\u8f6e\u7ed3\u6784<\/td>\n<\/tr>\n<tr>\n<td>Falcon \/ Vicuna<\/td>\n<td>HuggingFace \u517c\u5bb9\u6a21\u578b\u5747\u53ef\u52a0\u8f7d<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u6b64\u5916&#xff0c;\u5bf9\u4e8e HuggingFace \u4e0a\u4efb\u610f\u652f\u6301 AutoModelForCausalLM \u7684\u6a21\u578b&#xff0c;\u4e5f\u53ef\u4ee5\u901a\u8fc7 &#8211;model-path \u53c2\u6570\u8fdb\u884c\u975e\u5b98\u65b9\u6a21\u578b\u6ce8\u518c\u3002<\/p>\n<h4>2. API \u63a5\u53e3\u517c\u5bb9\u6027<\/h4>\n<p>FastChat \u63d0\u4f9b\u4ee5\u4e0b\u4e09\u79cd\u6807\u51c6\u63a5\u53e3&#xff0c;\u5747\u4e0e OpenAI API \u4fdd\u6301\u9ad8\u5ea6\u4e00\u81f4&#xff1a;<\/p>\n<h5>\/v1\/chat\/completions<\/h5>\n<p><span class=\"token constant\">POST<\/span> <span class=\"token operator\">\/<\/span>v1<span class=\"token operator\">\/<\/span>chat<span class=\"token operator\">\/<\/span>completions<br \/>\n<span class=\"token punctuation\">{<\/span><br \/>\n  <span class=\"token string\">&#034;model&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;chatglm3&#034;<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;messages&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token punctuation\">[<\/span><br \/>\n    <span class=\"token punctuation\">{<\/span><span class=\"token string\">&#034;role&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;system&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;content&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;\u4f60\u662f\u4e00\u4e2a\u6709\u5e2e\u52a9\u7684\u52a9\u624b&#034;<\/span><span class=\"token punctuation\">}<\/span><span class=\"token punctuation\">,<\/span><br \/>\n    <span class=\"token punctuation\">{<\/span><span class=\"token string\">&#034;role&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;user&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;content&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;\u4f60\u597d&#xff0c;\u8bf7\u4ecb\u7ecd\u4e00\u4e0bFastChat&#034;<\/span><span class=\"token punctuation\">}<\/span><br \/>\n  <span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;temperature&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token number\">0.7<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;stream&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token boolean\">true<\/span><br \/>\n<span class=\"token punctuation\">}<\/span><\/p>\n<ul>\n<li>\u5b8c\u6574\u652f\u6301 OpenAI \u89c4\u8303&#xff0c;\u5305\u62ec\u591a\u8f6e\u5bf9\u8bdd\u683c\u5f0f&#xff1b;<\/li>\n<li>\u652f\u6301 Streaming \u957f\u8fde\u63a5\u8f93\u51fa&#xff1b;<\/li>\n<li>\u652f\u6301 stop \u5b57\u7b26\u3001top_p\u3001presence_penalty \u7b49\u8c03\u63a7\u53c2\u6570\u3002<\/li>\n<\/ul>\n<h5>\/v1\/completions<\/h5>\n<p>\u652f\u6301\u4f20\u7edf Prompt-based \u7684 LLM \u5b8c\u6574\u8f93\u51fa&#xff1a;<\/p>\n<p><span class=\"token punctuation\">{<\/span><br \/>\n  <span class=\"token string\">&#034;model&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;qwen-7b&#034;<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;prompt&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;\u8bf7\u5199\u4e00\u9996\u8bd7&#034;<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;temperature&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token number\">0.9<\/span><br \/>\n<span class=\"token punctuation\">}<\/span><\/p>\n<h5>\/v1\/embeddings<\/h5>\n<p>FastChat \u5df2\u652f\u6301\u901a\u8fc7\u6a21\u578b\u7f16\u7801\u5668\u8f93\u51fa Token\/Embedding \u5411\u91cf\u8868\u793a&#xff0c;\u7528\u4e8e\u4e0e\u68c0\u7d22\u7cfb\u7edf\u3001Agent Memory \u7b49\u4e0b\u6e38\u573a\u666f\u96c6\u6210\u3002<\/p>\n<h4>3. \u591a\u6a21\u578b\u5207\u6362\u7b56\u7565<\/h4>\n<p>\u901a\u8fc7\u63a7\u5236\u5668\u53c2\u6570 &#8211;model-list-mode&#061;auto \u6216\u7528\u6237\u8bf7\u6c42\u4f53\u4e2d\u7684 model \u5b57\u6bb5\u6307\u5b9a\u4f7f\u7528\u6a21\u578b\u3002<\/p>\n<p>\u793a\u4f8b&#xff1a;<\/p>\n<p><span class=\"token string\">&#034;model&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;chatglm3&#034;<\/span><\/p>\n<p>\u652f\u6301\u52a8\u6001\u6ce8\u518c\u4e0e\u8fd0\u884c\u65f6\u70ed\u5207\u6362\u3002<\/p>\n<h3>\u7b2c\u4e09\u7ae0&#xff1a;FastChat \u542f\u52a8\u6a21\u5f0f\u4e0e\u6838\u5fc3\u7ec4\u4ef6\u8be6\u89e3<\/h3>\n<p>FastChat \u4ee5\u6a21\u5757\u5316\u3001\u8fdb\u7a0b\u7ea7\u89e3\u8026\u7684\u65b9\u5f0f\u8fd0\u884c\u5404\u9879\u670d\u52a1\u7ec4\u4ef6\u3002\u4e00\u4e2a\u5b8c\u6574\u7684\u670d\u52a1\u90e8\u7f72\u901a\u5e38\u81f3\u5c11\u5305\u542b\u4e09\u7c7b\u6838\u5fc3\u8fdb\u7a0b&#xff1a;controller \u63a7\u5236\u5668\u3001model_worker \u6a21\u578b\u5de5\u4f5c\u8fdb\u7a0b&#xff0c;\u4ee5\u53ca openai_api_server \u6216 web_server \u670d\u52a1\u524d\u7aef\u3002\u6bcf\u7c7b\u8fdb\u7a0b\u5747\u53ef\u72ec\u7acb\u8fd0\u884c&#xff0c;\u5f7c\u6b64\u901a\u8fc7 RESTful API \u6216\u5f02\u6b65\u961f\u5217\u901a\u4fe1&#xff0c;\u5177\u5907\u826f\u597d\u7684\u6c34\u5e73\u6269\u5c55\u80fd\u529b\u3002<\/p>\n<h4>1. \u542f\u52a8\u7ec4\u4ef6\u6982\u89c8<\/h4>\n<table>\n<tr>\u7ec4\u4ef6\u63cf\u8ff0<\/tr>\n<tbody>\n<tr>\n<td>controller<\/td>\n<td>\u4e2d\u592e\u8c03\u5ea6\u5668&#xff0c;\u7ef4\u62a4\u6a21\u578b worker \u72b6\u6001\u3001\u7ba1\u7406\u8def\u7531\u7b56\u7565<\/td>\n<\/tr>\n<tr>\n<td>model_worker<\/td>\n<td>\u5b9e\u9645\u52a0\u8f7d\u5927\u6a21\u578b\u5e76\u63d0\u4f9b\u63a8\u7406\u670d\u52a1\u7684\u8fdb\u7a0b<\/td>\n<\/tr>\n<tr>\n<td>openai_api_server<\/td>\n<td>\u5bf9\u5916\u66b4\u9732 OpenAI API \u63a5\u53e3&#xff0c;\u7edf\u4e00\u5165\u53e3<\/td>\n<\/tr>\n<tr>\n<td>gradio_web_server<\/td>\n<td>\u63d0\u4f9b Web UI \u53ef\u89c6\u5316\u4f53\u9a8c\u754c\u9762<\/td>\n<\/tr>\n<tr>\n<td>cli \/ playground<\/td>\n<td>\u547d\u4ee4\u884c\u4e0e\u7f51\u9875\u5bf9\u8bdd\u6d4b\u8bd5\u5de5\u5177<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h4>2. \u542f\u52a8\u4e00\u4e2a\u5b8c\u6574\u670d\u52a1\u7684\u6700\u5c0f\u6d41\u7a0b<\/h4>\n<p>\u5047\u8bbe\u4f7f\u7528 ChatGLM3 \u6a21\u578b&#xff0c;\u547d\u4ee4\u5982\u4e0b&#xff1a;<\/p>\n<p><span class=\"token comment\"># \u542f\u52a8 controller<\/span><br \/>\npython3 -m fastchat.serve.controller<\/p>\n<p><span class=\"token comment\"># \u542f\u52a8\u6a21\u578b Worker<\/span><br \/>\npython3 -m fastchat.serve.model_worker <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;model-path \/path\/to\/chatglm3 <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;model-name chatglm3 <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;device cuda:0<\/p>\n<p><span class=\"token comment\"># \u542f\u52a8 API \u670d\u52a1<\/span><br \/>\npython3 -m fastchat.serve.openai_api_server <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;host <span class=\"token number\">0.0<\/span>.0.0 <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;port <span class=\"token number\">8000<\/span><\/p>\n<p>\u6a21\u578b Worker \u4f1a\u5728\u542f\u52a8\u65f6\u5411 controller \u6ce8\u518c\u81ea\u8eab\u80fd\u529b&#xff0c;controller \u4f1a\u5c06\u6765\u81ea API Server \u7684\u8bf7\u6c42\u8f6c\u53d1\u81f3\u53ef\u7528 Worker \u8fdb\u884c\u63a8\u7406\u5904\u7406\u3002<\/p>\n<h4>3. \u6a21\u578b Worker \u591a\u8fdb\u7a0b\u4e0e\u591a\u5361\u90e8\u7f72<\/h4>\n<p>FastChat \u652f\u6301\u4e00\u673a\u591a Worker \u6216\u591a\u5361\u90e8\u7f72&#xff1a;<\/p>\n<p><span class=\"token assign-left variable\">CUDA_VISIBLE_DEVICES<\/span><span class=\"token operator\">&#061;<\/span><span class=\"token number\">0<\/span> python3 -m fastchat.serve.model_worker <span class=\"token punctuation\">..<\/span>.<br \/>\n<span class=\"token assign-left variable\">CUDA_VISIBLE_DEVICES<\/span><span class=\"token operator\">&#061;<\/span><span class=\"token number\">1<\/span> python3 -m fastchat.serve.model_worker <span class=\"token punctuation\">..<\/span>.<\/p>\n<p>\u4e5f\u53ef\u5728\u4e00\u5f20\u5361\u4e0a\u8fd0\u884c\u591a\u4e2a\u5c0f\u6a21\u578b&#xff0c;\u6216\u5728\u591a\u5f20\u5361\u4e0a\u8fd0\u884c\u4e00\u4e2a\u5927\u6a21\u578b&#xff08;\u5982 ChatGLM3-6B \u591a\u5f20 24GB \u663e\u5b58\u5361\u5e76\u884c&#xff09;\u3002<\/p>\n<hr \/>\n<h3>\u7b2c\u56db\u7ae0&#xff1a;\u81ea\u5b9a\u4e49\u6a21\u578b\u63a5\u5165\u4e0e\u591a\u6a21\u578b\u8def\u7531\u7b56\u7565\u5b9e\u73b0<\/h3>\n<p>FastChat \u9ed8\u8ba4\u96c6\u6210\u4e86\u4e3b\u6d41\u5927\u6a21\u578b\u7684\u52a0\u8f7d\u6a21\u677f&#xff0c;\u4f46\u5728\u4f01\u4e1a\u5b9e\u8df5\u4e2d&#xff0c;\u5f80\u5f80\u9700\u8981\u5bf9\u81ea\u7814\u6a21\u578b\u6216\u7ecf\u8fc7\u5fae\u8c03\u7684\u79c1\u6709\u6743\u91cd\u8fdb\u884c\u63a5\u5165\u3002\u8be5\u6846\u67b6\u652f\u6301\u57fa\u4e8e HuggingFace \u7ed3\u6784\u7684\u4efb\u610f\u81ea\u5b9a\u4e49\u6a21\u578b\u6ce8\u518c&#xff0c;\u5173\u952e\u5728\u4e8e &#8211;model-path \u53c2\u6570\u4e0e &#8211;model-name \u6807\u8bc6\u7684\u552f\u4e00\u7ed1\u5b9a\u3002<\/p>\n<h4>1. \u5feb\u901f\u63a5\u5165\u4e00\u4e2a\u672c\u5730\u5fae\u8c03\u6a21\u578b<\/h4>\n<p>python3 -m fastchat.serve.model_worker <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;model-path .\/models\/finetuned-qwen <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;model-name qwen-custom <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;device cuda:0<\/p>\n<ul>\n<li>&#8211;model-path \u53ef\u4e3a HuggingFace \u8def\u5f84\u3001\u672c\u5730\u6587\u4ef6\u5939\u6216 S3 \u5730\u5740&#xff1b;<\/li>\n<li>\u6a21\u578b\u76ee\u5f55\u9700\u5305\u542b tokenizer \u914d\u7f6e\u4e0e\u9884\u8bad\u7ec3\u6743\u91cd&#xff1b;<\/li>\n<li>\u542f\u52a8\u540e controller \u4f1a\u81ea\u52a8\u767b\u8bb0 qwen-custom \u4e3a\u53ef\u8c03\u5ea6\u6a21\u578b\u3002<\/li>\n<\/ul>\n<h4>2. \u591a\u6a21\u578b\u90e8\u7f72\u7ed3\u6784\u5efa\u8bae<\/h4>\n<p>\u5e38\u89c1\u591a\u6a21\u578b\u90e8\u7f72\u7ed3\u6784\u5982\u4e0b&#xff1a;<\/p>\n<p>LLM Controller<br \/>\n   \u251c\u2500\u2500 chatglm-worker (chatglm3)<br \/>\n   \u251c\u2500\u2500 qwen-worker (qwen-7b)<br \/>\n   \u251c\u2500\u2500 llama2-worker (llama2-13b)<br \/>\n   \u2514\u2500\u2500 openai_api_server (\u63a5\u6536\u8bf7\u6c42&#xff0c;\u5206\u53d1\u8def\u7531)<\/p>\n<p>\u901a\u8fc7\u6307\u5b9a\u8bf7\u6c42\u4e2d\u7684 model \u5b57\u6bb5\u9009\u62e9\u540e\u7aef Worker&#xff1a;<\/p>\n<p><span class=\"token punctuation\">{<\/span><br \/>\n  <span class=\"token string\">&#034;model&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;qwen-7b&#034;<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;messages&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token operator\">&#8230;<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;temperature&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token number\">0.9<\/span><br \/>\n<span class=\"token punctuation\">}<\/span><\/p>\n<p>Controller \u4f1a\u6839\u636e\u6a21\u578b\u540d\u81ea\u52a8\u8f6c\u53d1\u5230\u5bf9\u5e94\u8fdb\u7a0b\u3002\u82e5\u8be5\u6a21\u578b\u5bf9\u5e94\u7684 Worker \u5b95\u673a\u6216\u4e0d\u5b58\u5728&#xff0c;\u5219\u8fd4\u56de 404 \u9519\u8bef\u3002<\/p>\n<h4>3. \u81ea\u5b9a\u4e49\u6a21\u578b\u540d\u6620\u5c04\u89c4\u5219<\/h4>\n<p>FastChat \u652f\u6301\u5728 model_worker \u542f\u52a8\u53c2\u6570\u4e2d\u914d\u7f6e\u522b\u540d\u6620\u5c04&#xff1a;<\/p>\n<p>&#8211;model-name my_internal_chat &#8211;model-path \/models\/chatglm3<\/p>\n<p>\u8fd9\u6837\u7528\u6237\u4f7f\u7528\u65f6\u4ec5\u9700\u6307\u5b9a&#xff1a;<\/p>\n<p><span class=\"token punctuation\">{<\/span> <span class=\"token string\">&#034;model&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;my_internal_chat&#034;<\/span> <span class=\"token punctuation\">}<\/span><\/p>\n<p>\u9002\u5408\u5728\u4e0d\u540c\u79df\u6237\u4e0b\u8fdb\u884c\u6a21\u578b\u9694\u79bb&#xff0c;\u4fbf\u4e8e\u505a\u4f01\u4e1a\u7ea7\u591a\u5b9e\u4f8b\u7ba1\u7406\u3002<\/p>\n<h4>4. \u6a21\u578b Worker \u5f02\u6784\u914d\u7f6e\u5b9e\u8df5<\/h4>\n<ul>\n<li>GPU \u7ed1\u5b9a&#xff1a;\u4f7f\u7528 CUDA_VISIBLE_DEVICES \u660e\u786e\u8bbe\u5907&#xff1b;<\/li>\n<li>\u7cbe\u5ea6\u4f18\u5316&#xff1a;\u652f\u6301\u901a\u8fc7 &#8211;load-8bit\u3001&#8211;load-4bit \u7b49\u53c2\u6570\u52a0\u8f7d\u4f4e\u7cbe\u5ea6\u6a21\u578b&#xff1b;<\/li>\n<li>\u5185\u5b58\u9650\u5236&#xff1a;\u5927\u6a21\u578b\u90e8\u7f72\u524d\u5efa\u8bae\u901a\u8fc7 torchrun \u542f\u52a8\u591a\u8fdb\u7a0b\u5e76\u5206\u914d\u663e\u5b58\u3002<\/li>\n<\/ul>\n<p>FastChat \u591a\u6a21\u578b\u67b6\u6784\u5929\u7136\u652f\u6301\u5f02\u6784\u6a21\u578b\u3001\u6309\u9700\u6269\u7f29\u5bb9\u548c\u63a5\u53e3\u7edf\u4e00\u5316&#xff0c;\u662f\u76ee\u524d\u652f\u6301\u8303\u56f4\u6700\u5e7f\u7684\u5f00\u6e90 LLM \u670d\u52a1\u6846\u67b6\u4e4b\u4e00\u3002<\/p>\n<h3>\u7b2c\u4e94\u7ae0&#xff1a;\u642d\u914d vLLM \u5b9e\u73b0\u9ad8\u6548\u63a8\u7406\u90e8\u7f72\u5b9e\u8df5<\/h3>\n<p>FastChat \u539f\u751f\u517c\u5bb9 vLLM \u63a8\u7406\u5f15\u64ce&#xff0c;\u53ef\u5c06\u5176\u4f5c\u4e3a Model Worker \u7684\u66ff\u4ee3\u540e\u7aef&#xff0c;\u663e\u8457\u63d0\u5347\u63a8\u7406\u541e\u5410\u3001\u5e76\u53d1\u5904\u7406\u80fd\u529b\u4e0e\u4e0a\u4e0b\u6587\u7f13\u5b58\u6548\u7387\u3002vLLM \u91c7\u7528 PagedAttention \u6280\u672f\u4f18\u5316 KV Cache \u7ba1\u7406&#xff0c;\u76f8\u8f83\u4e8e Transformers \u63a8\u7406\u901f\u5ea6\u63d0\u5347 2&#xff5e;4 \u500d&#xff0c;\u7279\u522b\u9002\u5408\u6784\u5efa\u957f\u4e0a\u4e0b\u6587\u3001\u591a\u8fde\u63a5\u3001\u9ad8 QPS \u7684\u4f01\u4e1a\u7ea7 LLM \u670d\u52a1\u3002<\/p>\n<h4>1. \u5b89\u88c5 vLLM \u4e0e FastChat<\/h4>\n<p>FastChat \u9879\u76ee\u63d0\u4f9b\u4e13\u5c5e\u7684 vLLM \u96c6\u6210\u7248\u672c&#xff1a;<\/p>\n<p><span class=\"token function\">git<\/span> clone https:\/\/github.com\/lm-sys\/FastChat<br \/>\n<span class=\"token builtin class-name\">cd<\/span> FastChat<br \/>\npip <span class=\"token function\">install<\/span> -e <span class=\"token string\">&#034;.[vllm]&#034;<\/span><\/p>\n<p>\u540c\u65f6\u5b89\u88c5 vLLM&#xff1a;<\/p>\n<p>pip <span class=\"token function\">install<\/span> vllm<\/p>\n<p>\u786e\u4fdd GPU \u73af\u5883\u4e3a\u652f\u6301 FlashAttention2 \u7684 A100\u3001H100 \u6216 V100 \u7b49\u4e3b\u6d41\u8bbe\u5907&#xff0c;CUDA \u2265 11.8\u3002<\/p>\n<h4>2. \u4f7f\u7528 vLLM \u542f\u52a8 FastChat \u6a21\u578b Worker<\/h4>\n<p>python3 -m fastchat.serve.vllm_worker <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;model-path \/models\/chatglm3 <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;model-name chatglm3-vllm <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;device cuda:0 <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;max-model-len <span class=\"token number\">8192<\/span> <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;dtype auto<\/p>\n<p>\u8be5\u8fdb\u7a0b\u4f1a\u6ce8\u518c\u81f3 Controller&#xff0c;\u5e76\u4ee5 vLLM \u540e\u7aef\u63d0\u4f9b OpenAI \u63a5\u53e3\u7ea7\u63a8\u7406\u80fd\u529b&#xff0c;\u652f\u6301&#xff1a;<\/p>\n<ul>\n<li>\u9ad8\u6548 KV Cache \u7ba1\u7406&#xff1b;<\/li>\n<li>Streaming Token \u8f93\u51fa&#xff1b;<\/li>\n<li>Prompt \u62fc\u63a5\u4f18\u5316&#xff1b;<\/li>\n<li>\u591a\u8fde\u63a5\u5171\u4eab\u4e0a\u4e0b\u6587\u3002<\/li>\n<\/ul>\n<h4>3. \u9002\u914d\u591a\u6a21\u578b\u90e8\u7f72\u5efa\u8bae<\/h4>\n<p>\u53ef\u901a\u8fc7\u4ee5\u4e0b\u65b9\u5f0f\u4e3a\u591a\u4e2a\u6a21\u578b\u542f\u52a8\u591a\u4e2a vLLM Worker&#xff1a;<\/p>\n<p><span class=\"token comment\"># \u6a21\u578b A<\/span><br \/>\n<span class=\"token assign-left variable\">CUDA_VISIBLE_DEVICES<\/span><span class=\"token operator\">&#061;<\/span><span class=\"token number\">0<\/span> python3 -m fastchat.serve.vllm_worker &#8211;model-name model-a <span class=\"token punctuation\">..<\/span>.<br \/>\n<span class=\"token comment\"># \u6a21\u578b B<\/span><br \/>\n<span class=\"token assign-left variable\">CUDA_VISIBLE_DEVICES<\/span><span class=\"token operator\">&#061;<\/span><span class=\"token number\">1<\/span> python3 -m fastchat.serve.vllm_worker &#8211;model-name model-b <span class=\"token punctuation\">..<\/span>.<\/p>\n<p>\u9ed8\u8ba4\u60c5\u51b5\u4e0b&#xff0c;\u6bcf\u4e2a\u8fdb\u7a0b\u72ec\u5360\u4e00\u5757\u663e\u5361&#xff0c;\u5e76\u4e3a\u8be5\u6a21\u578b\u5206\u914d\u72ec\u7acb\u7684 KV Cache \u7ba1\u7406\u5668\u3002<\/p>\n<h4>4. \u6027\u80fd\u5bf9\u6bd4\u4e0e\u90e8\u7f72\u5efa\u8bae<\/h4>\n<table>\n<tr>\u6307\u6807Transformers WorkervLLM Worker<\/tr>\n<tbody>\n<tr>\n<td>Streaming \u9996 token \u5ef6\u8fdf<\/td>\n<td>\u9ad8&#xff08;&gt;300ms&#xff09;<\/td>\n<td>\u4f4e&#xff08;&lt;100ms&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>\u5e76\u53d1\u8fde\u63a5\u6570<\/td>\n<td>&lt;50<\/td>\n<td>100&#043;<\/td>\n<\/tr>\n<tr>\n<td>KV Cache \u5229\u7528\u7387<\/td>\n<td>\u4f4e&#xff0c;\u5197\u4f59\u62f7\u8d1d\u591a<\/td>\n<td>\u9ad8&#xff0c;\u52a8\u6001\u5206\u914d<\/td>\n<\/tr>\n<tr>\n<td>\u591a\u79df\u6237\u652f\u6301<\/td>\n<td>\u5dee&#xff0c;\u9700\u624b\u52a8\u9694\u79bb<\/td>\n<td>\u5f3a&#xff0c;\u652f\u6301 Context Window \u7ba1\u7406<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u5efa\u8bae\u90e8\u7f72\u7b56\u7565&#xff1a;<\/p>\n<ul>\n<li>\u9ad8\u5e76\u53d1\u670d\u52a1 \u2192 vLLM&#xff1b;<\/li>\n<li>\u591a\u8f6e\u95ee\u7b54\u3001\u4e0a\u4e0b\u6587\u957f \u2192 vLLM&#xff1b;<\/li>\n<li>\u5c0f\u6a21\u578b\u8bc4\u4f30 \u2192 \u9ed8\u8ba4 Transformers Worker \u8db3\u77e3&#xff1b;<\/li>\n<li>\u5355\u673a\u591a\u6a21\u578b\u90e8\u7f72 \u2192 \u6bcf\u6a21\u578b\u7ed1\u5b9a\u72ec\u7acb vLLM \u5b9e\u4f8b\u3002<\/li>\n<\/ul>\n<hr \/>\n<h3>\u7b2c\u516d\u7ae0&#xff1a;Chat Completion \u4e0e Streaming API \u5e76\u53d1\u6027\u80fd\u4f18\u5316<\/h3>\n<p>\u5728\u5927\u6a21\u578b\u63a8\u7406\u8fc7\u7a0b\u4e2d&#xff0c;API \u7684\u5ef6\u8fdf\u4e0e\u541e\u5410\u662f\u6838\u5fc3\u6027\u80fd\u74f6\u9888\u4e4b\u4e00\u3002FastChat \u652f\u6301\u6807\u51c6 OpenAI \u98ce\u683c\u7684 \/v1\/chat\/completions \u63a5\u53e3&#xff0c;\u540c\u65f6\u5185\u7f6e Streaming \u63a8\u7406\u673a\u5236&#xff0c;\u652f\u6301 Token-by-Token \u5b9e\u65f6\u8f93\u51fa&#xff0c;\u6709\u6548\u63d0\u5347\u7528\u6237\u4ea4\u4e92\u4f53\u9a8c\u3002<\/p>\n<h4>1. \u975e Streaming \u6a21\u5f0f\u5178\u578b\u74f6\u9888<\/h4>\n<p>\u975e Streaming \u6a21\u5f0f\u4e0b&#xff0c;\u7528\u6237\u9700\u7b49\u5f85\u5b8c\u6574\u56de\u590d\u751f\u6210\u540e\u624d\u8fd4\u56de\u7ed3\u679c&#xff0c;\u6613\u51fa\u73b0\u4ee5\u4e0b\u95ee\u9898&#xff1a;<\/p>\n<ul>\n<li>\u7b49\u5f85\u65f6\u95f4\u957f&#xff08;\u5c24\u5176\u662f\u751f\u6210 800&#043; Token&#xff09;&#xff1b;<\/li>\n<li>\u6a21\u578b\u54cd\u5e94\u963b\u585e\u4e25\u91cd&#xff1b;<\/li>\n<li>\u9ad8\u5e76\u53d1\u4e0b QPS \u6025\u5267\u4e0b\u964d\u3002<\/li>\n<\/ul>\n<p>\u9002\u5408\u5d4c\u5165\u751f\u6210\u3001\u6458\u8981\u4efb\u52a1\u7b49\u975e\u4ea4\u4e92\u6027\u573a\u666f\u3002<\/p>\n<h4>2. \u542f\u7528 Streaming \u5b9e\u65f6\u8f93\u51fa\u6a21\u5f0f<\/h4>\n<p>FastChat \u63d0\u4f9b\u5b8c\u6574\u7684 stream&#061;true \u652f\u6301&#xff1a;<\/p>\n<p><span class=\"token punctuation\">{<\/span><br \/>\n  <span class=\"token string\">&#034;model&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;qwen-7b&#034;<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;messages&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token operator\">&#8230;<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;stream&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token boolean\">true<\/span><br \/>\n<span class=\"token punctuation\">}<\/span><\/p>\n<p>\u8fd4\u56de\u683c\u5f0f&#xff1a;<\/p>\n<p>data: {&#034;id&#034;:&#034;chatcmpl-xxx&#034;, &#034;choices&#034;:[{&#034;delta&#034;:{&#034;content&#034;:&#034;\u4f60\u597d&#034;}}]}<br \/>\ndata: {&#034;id&#034;:&#034;chatcmpl-xxx&#034;, &#034;choices&#034;:[{&#034;delta&#034;:{&#034;content&#034;:&#034;&#xff0c;\u6b22\u8fce\u4f7f\u7528 FastChat&#034;}}]}<\/p>\n<p>\u5ba2\u6237\u7aef\u9700\u5b9e\u73b0 SSE \u76d1\u542c\u673a\u5236&#xff0c;\u5e38\u89c1\u6846\u67b6\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>Python: sseclient<\/li>\n<li>JavaScript: EventSource<\/li>\n<li>Postman \/ curl: \u4e0d\u652f\u6301 Streaming<\/li>\n<\/ul>\n<h4>3. \u5e76\u53d1\u6027\u80fd\u8c03\u4f18\u7b56\u7565<\/h4>\n<table>\n<tr>\u7b56\u7565\u6548\u679c<\/tr>\n<tbody>\n<tr>\n<td>\u4f7f\u7528 vLLM \u66ff\u4ee3\u539f\u59cb\u63a8\u7406\u5185\u6838<\/td>\n<td>\u63d0\u5347 3&#xff5e;4 \u500d\u5e76\u53d1\u80fd\u529b<\/td>\n<\/tr>\n<tr>\n<td>\u964d\u4f4e max_new_tokens<\/td>\n<td>\u7f29\u77ed\u54cd\u5e94\u65f6\u95f4<\/td>\n<\/tr>\n<tr>\n<td>\u9650\u5236\u6700\u5927 context &#043; completion \u957f\u5ea6<\/td>\n<td>\u63a7\u5236\u663e\u5b58\u4f7f\u7528&#xff0c;\u63d0\u5347\u7a33\u5b9a\u6027<\/td>\n<\/tr>\n<tr>\n<td>\u542f\u7528 batch size \/ prompt \u62fc\u63a5<\/td>\n<td>\u63d0\u5347\u541e\u5410&#xff0c;\u51cf\u5c11\u6a21\u578b\u8c03\u7528\u6b21\u6570<\/td>\n<\/tr>\n<tr>\n<td>\u5c06 tokenizer \u63d0\u524d\u5b8c\u6210<\/td>\n<td>\u51cf\u5c11\u8bf7\u6c42\u603b\u8017\u65f6<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u5efa\u8bae\u5728 Streaming \u573a\u666f\u4e0b&#xff1a;<\/p>\n<ul>\n<li>\u8bbe\u7f6e max_tokens \u4e0d\u8d85\u8fc7 1024&#xff1b;<\/li>\n<li>\u663e\u5f0f\u8bbe\u7f6e stop \u6807\u8bb0&#xff1b;<\/li>\n<li>\u7ed3\u5408 temperature&#061;0.7~1.0 \u63a7\u5236\u6587\u672c\u957f\u5ea6\u6ce2\u52a8\u3002<\/li>\n<\/ul>\n<h4>4. \u63a5\u53e3\u54cd\u5e94\u4f18\u5316\u5b9e\u8df5&#xff08;\u914d\u7f6e\u7ea7&#xff09;<\/h4>\n<ul>\n<li>\u63a7\u5236\u53c2\u6570 context-window&#xff1a;\u9632\u6b62\u8d85\u957f\u4e0a\u4e0b\u6587\u56de\u6eaf&#xff1b;<\/li>\n<li>\u8bbe\u7f6e controller.timeout&#xff0c;\u9632\u6b62\u8def\u7531\u8d85\u65f6&#xff1b;<\/li>\n<li>\u542f\u7528 FastAPI \u7684 Uvicorn \u591a worker \u5e76\u53d1\u6a21\u578b\u63d0\u5347 API \u63a5\u5165\u5c42\u5904\u7406\u80fd\u529b\u3002<\/li>\n<\/ul>\n<p>\u901a\u8fc7\u5408\u7406\u7684 Streaming \u7b56\u7565\u3001\u5185\u6838\u4f18\u5316\u4e0e\u4e0a\u4e0b\u6587\u7ba1\u7406&#xff0c;FastChat \u53ef\u652f\u6491\u4f01\u4e1a\u7ea7\u7528\u6237\u5bf9\u8bdd\u7cfb\u7edf\u5b9e\u73b0\u7a33\u5b9a\u3001\u4f4e\u5ef6\u8fdf\u7684\u5927\u89c4\u6a21\u670d\u52a1\u843d\u5730\u3002<\/p>\n<h3>\u7b2c\u4e03\u7ae0&#xff1a;\u591a\u7528\u6237\u4efb\u52a1\u7ba1\u7406\u4e0e\u8eab\u4efd\u8ba4\u8bc1\u673a\u5236\u8bbe\u8ba1<\/h3>\n<p>\u5728\u4f01\u4e1a\u7ea7\u5927\u6a21\u578b\u670d\u52a1\u90e8\u7f72\u4e2d&#xff0c;\u5c24\u5176\u662f\u591a\u79df\u6237 SaaS \u573a\u666f\u4e0b&#xff0c;FastChat \u7684\u591a\u6a21\u578b\u80fd\u529b\u9700\u7ed3\u5408\u7528\u6237\u8eab\u4efd\u7ba1\u7406\u3001\u8d44\u6e90\u9694\u79bb\u4e0e\u4efb\u52a1\u5206\u6d41\u7b56\u7565\u8fdb\u884c\u6269\u5c55\u3002\u672c\u7ae0\u91cd\u70b9\u5256\u6790 FastChat \u5728\u591a\u7528\u6237\u63a5\u5165\u4e0b\u7684\u8eab\u4efd\u9274\u6743\u673a\u5236\u6269\u5c55\u8def\u5f84\u3002<\/p>\n<h4>1. API \u5c42\u5f15\u5165\u8eab\u4efd\u8ba4\u8bc1\u673a\u5236<\/h4>\n<p>FastChat \u539f\u59cb OpenAI \u63a5\u53e3\u9ed8\u8ba4\u672a\u542f\u7528\u8eab\u4efd\u8ba4\u8bc1\u673a\u5236&#xff0c;\u4f01\u4e1a\u53ef\u57fa\u4e8e FastAPI \u6846\u67b6\u5f15\u5165 JWT \u6216 API Key&#xff1a;<\/p>\n<p><span class=\"token keyword\">from<\/span> fastapi <span class=\"token keyword\">import<\/span> Request<span class=\"token punctuation\">,<\/span> HTTPException<br \/>\n<span class=\"token keyword\">from<\/span> fastapi<span class=\"token punctuation\">.<\/span>security <span class=\"token keyword\">import<\/span> HTTPBearer<br \/>\n<span class=\"token keyword\">from<\/span> jose <span class=\"token keyword\">import<\/span> jwt<\/p>\n<p>security <span class=\"token operator\">&#061;<\/span> HTTPBearer<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\nSECRET_KEY <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;enterprise_secret_key&#034;<\/span><\/p>\n<p><span class=\"token decorator annotation punctuation\">&#064;app<span class=\"token punctuation\">.<\/span>post<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\/v1\/chat\/completions&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">async<\/span> <span class=\"token keyword\">def<\/span> <span class=\"token function\">chat_completion<\/span><span class=\"token punctuation\">(<\/span>request<span class=\"token punctuation\">:<\/span> Request<span class=\"token punctuation\">,<\/span> token<span class=\"token punctuation\">:<\/span> <span class=\"token builtin\">str<\/span> <span class=\"token operator\">&#061;<\/span> Depends<span class=\"token punctuation\">(<\/span>security<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    payload <span class=\"token operator\">&#061;<\/span> jwt<span class=\"token punctuation\">.<\/span>decode<span class=\"token punctuation\">(<\/span>token<span class=\"token punctuation\">.<\/span>credentials<span class=\"token punctuation\">,<\/span> SECRET_KEY<span class=\"token punctuation\">)<\/span><br \/>\n    user_id <span class=\"token operator\">&#061;<\/span> payload<span class=\"token punctuation\">.<\/span>get<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;sub&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">if<\/span> user_id <span class=\"token keyword\">not<\/span> <span class=\"token keyword\">in<\/span> allowed_users<span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">raise<\/span> HTTPException<span class=\"token punctuation\">(<\/span>status_code<span class=\"token operator\">&#061;<\/span><span class=\"token number\">403<\/span><span class=\"token punctuation\">,<\/span> detail<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#034;Unauthorized&#034;<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<ul>\n<li>\u652f\u6301\u81ea\u5b9a\u4e49\u79df\u6237\u4fe1\u606f\u6ce8\u5165&#xff1b;<\/li>\n<li>\u53ef\u63a5\u5165 OAuth\u3001LDAP\u3001\u4f01\u4e1a SSO&#xff1b;<\/li>\n<li>\u914d\u5408\u4e2d\u95f4\u4ef6\u8bb0\u5f55 trace_id&#xff0c;\u5b9e\u73b0\u8c03\u7528\u8ffd\u8e2a\u3002<\/li>\n<\/ul>\n<h4>2. \u591a\u7528\u6237\u6a21\u578b\u6620\u5c04\u7b56\u7565<\/h4>\n<p>\u4e3a\u4e0d\u540c\u7528\u6237\u7ed1\u5b9a\u4e0d\u540c\u6a21\u578b\u5b9e\u4f8b&#xff0c;\u53ef\u91c7\u7528\u4ee5\u4e0b\u4e24\u79cd\u65b9\u5f0f&#xff1a;<\/p>\n<p>\u65b9\u5f0f\u4e00&#xff1a;\u8bf7\u6c42\u4f53\u4e2d\u6307\u5b9a\u6a21\u578b\u5b57\u6bb5<\/p>\n<p><span class=\"token punctuation\">{<\/span><br \/>\n  <span class=\"token string\">&#034;model&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;chatglm3-tenant-a&#034;<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;messages&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token operator\">&#8230;<\/span><span class=\"token punctuation\">]<\/span><br \/>\n<span class=\"token punctuation\">}<\/span><\/p>\n<p>\u7ed3\u5408 Controller \u8def\u7531\u7b56\u7565&#xff0c;\u5c06\u4e0d\u540c\u6a21\u578b\u7ed1\u5b9a\u7279\u5b9a Worker\u3002<\/p>\n<p>\u65b9\u5f0f\u4e8c&#xff1a;\u5728\u8eab\u4efd Token \u4e2d\u6ce8\u5165\u9ed8\u8ba4\u6a21\u578b\u914d\u7f6e<\/p>\n<p><span class=\"token punctuation\">{<\/span><br \/>\n  <span class=\"token string\">&#034;sub&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;tenant-a&#034;<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;default_model&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token string\">&#034;qwen-7b-tenant-a&#034;<\/span><br \/>\n<span class=\"token punctuation\">}<\/span><\/p>\n<p>\u5728\u670d\u52a1\u7aef\u8fdb\u884c\u6620\u5c04&#xff1a;<\/p>\n<p>model <span class=\"token operator\">&#061;<\/span> payload<span class=\"token punctuation\">.<\/span>get<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;default_model&#034;<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p>\u9002\u7528\u4e8e\u9650\u5236\u7528\u6237\u6a21\u578b\u8bbf\u95ee\u8303\u56f4\u6216\u6309\u5957\u9910\u6388\u6743\u8c03\u7528\u3002<\/p>\n<h4>3. \u4efb\u52a1\u9694\u79bb\u4e0e\u9650\u989d\u7ba1\u7406<\/h4>\n<p>\u901a\u8fc7 API \u5c42\u5b9e\u73b0\u7528\u6237\u7ea7\u9650\u989d&#xff1a;<\/p>\n<ul>\n<li>\u6bcf\u65e5\u6700\u5927 Token&#xff1b;<\/li>\n<li>\u5e76\u53d1\u8fde\u63a5\u6570&#xff1b;<\/li>\n<li>\u5355\u6b21\u6700\u5927\u751f\u6210\u957f\u5ea6\u3002<\/li>\n<\/ul>\n<p>\u5efa\u8bae\u4f7f\u7528 Redis \u8bb0\u5f55\u7528\u6237\u72b6\u6001&#xff0c;\u5e76\u7ed3\u5408 Lua \u811a\u672c\u5b9e\u73b0\u8f7b\u91cf\u9650\u6d41\u7b56\u7565&#xff1a;<\/p>\n<p><span class=\"token comment\">&#8212; \u4f2a\u4ee3\u7801&#xff1a;\u6bcf\u65e5 Token \u9650\u989d 100,000<\/span><br \/>\n<span class=\"token keyword\">if<\/span> redis<span class=\"token punctuation\">.<\/span><span class=\"token function\">call<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;GET&#034;<\/span><span class=\"token punctuation\">,<\/span> user_id<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#043;<\/span> token_count <span class=\"token operator\">&gt;<\/span> <span class=\"token number\">100000<\/span> <span class=\"token keyword\">then<\/span><br \/>\n  <span class=\"token keyword\">return<\/span> reject<br \/>\n<span class=\"token keyword\">end<\/span><\/p>\n<p>\u7ed3\u5408 Prometheus \/ Grafana \u53ef\u89c6\u5316\u6bcf\u7528\u6237\u8c03\u7528\u9891\u7387\u3001Token \u4f7f\u7528\u7387\u3001\u9519\u8bef\u7387\u7b49\u6307\u6807\u3002<\/p>\n<h4>4. \u65e5\u5fd7\u8ffd\u8e2a\u4e0e\u5ba1\u8ba1\u6269\u5c55<\/h4>\n<ul>\n<li>\u4e3a\u6bcf\u4e2a\u8bf7\u6c42\u6ce8\u5165 UUID&#xff08;X-Request-ID&#xff09;&#xff1b;<\/li>\n<li>\u6240\u6709 API \u8bf7\u6c42\u5199\u5165\u72ec\u7acb\u65e5\u5fd7&#xff1b;<\/li>\n<li>\u652f\u6301 trace_id \u2192 \u6a21\u578b\u8f93\u51fa\u56de\u6eaf&#xff1b;<\/li>\n<li>\u6574\u5408 ELK\u3001ClickHouse \u6784\u5efa\u5ba1\u8ba1\u7cfb\u7edf\u3002<\/li>\n<\/ul>\n<p>FastChat \u6a21\u5757\u5316\u67b6\u6784\u4e0e\u6807\u51c6 FastAPI \u670d\u52a1\u6613\u4e8e\u5d4c\u5165\u4e0a\u8ff0\u6269\u5c55&#xff0c;\u9002\u5408\u4f01\u4e1a\u4ece\u5355\u79df\u6237\u6d4b\u8bd5\u6269\u5c55\u81f3\u591a\u79df\u6237\u7a33\u5b9a\u670d\u52a1\u5e73\u53f0\u3002<\/p>\n<hr \/>\n<h3>\u7b2c\u516b\u7ae0&#xff1a;\u524d\u7aef Web UI \/ CLI \u5de5\u5177\u94fe\u5b9e\u6218\u5e94\u7528<\/h3>\n<p>FastChat \u9664\u63d0\u4f9b OpenAI \u98ce\u683c API \u5916&#xff0c;\u8fd8\u96c6\u6210\u4e86\u8f7b\u91cf\u7684 Gradio Web \u524d\u7aef\u3001\u547d\u4ee4\u884c\u5de5\u5177\u4e0e\u53ef\u89c6\u5316 Chat \u5386\u53f2\u56de\u653e\u80fd\u529b&#xff0c;\u9002\u5408\u5f00\u53d1\u8005\u8c03\u8bd5\u3001\u81ea\u6d4b\u5fae\u8c03\u6a21\u578b\u3001\u8bc4\u4f30\u8f93\u51fa\u4e00\u81f4\u6027\u3002<\/p>\n<h4>1. \u542f\u52a8 Web UI \u524d\u7aef\u670d\u52a1<\/h4>\n<p>python3 -m fastchat.serve.gradio_web_server<\/p>\n<p>\u9ed8\u8ba4\u76d1\u542c\u7aef\u53e3\u4e3a 7860&#xff0c;\u652f\u6301&#xff1a;<\/p>\n<ul>\n<li>\u6a21\u578b\u9009\u62e9&#xff1b;<\/li>\n<li>\u804a\u5929\u8bb0\u5f55\u56de\u770b&#xff1b;<\/li>\n<li>API \u53c2\u6570\u914d\u7f6e&#xff08;\u6e29\u5ea6\u3001top_p&#xff09;&#xff1b;<\/li>\n<li>Streaming \/ \u975e Streaming \u5207\u6362&#xff1b;<\/li>\n<li>\u591a\u6a21\u578b\u5bf9\u8bdd\u7a97\u53e3\u3002<\/li>\n<\/ul>\n<p>\u652f\u6301\u5d4c\u5165\u4f01\u4e1a\u5e73\u53f0\u5185\u5d4c iframe \u9875\u9762&#xff0c;\u9002\u914d\u79fb\u52a8\u7aef\u8c03\u8bd5\u7b49\u573a\u666f\u3002<\/p>\n<h4>2. \u547d\u4ee4\u884c\u4ea4\u4e92\u5de5\u5177<\/h4>\n<p>CLI \u662f FastChat \u63d0\u4f9b\u7684\u53e6\u4e00\u7c7b\u91cd\u8981\u5de5\u5177&#xff0c;\u652f\u6301\u5feb\u901f\u4ea4\u4e92&#xff1a;<\/p>\n<p>python3 -m fastchat.serve.cli <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;model-name qwen-7b <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;controller-address http:\/\/localhost:21001<\/p>\n<p>\u53ef\u7528\u4e8e&#xff1a;<\/p>\n<ul>\n<li>\u672c\u5730\u8fd0\u884c\u6a21\u578b\u5feb\u901f\u6d4b\u8bd5&#xff1b;<\/li>\n<li>\u6279\u91cf\u811a\u672c\u8c03\u7528&#xff1b;<\/li>\n<li>\u9a8c\u8bc1 controller-worker \u8def\u7531\u72b6\u6001&#xff1b;<\/li>\n<li>\u7ed3\u5408 shell \u6d41\u6c34\u7ebf\u5b9e\u73b0\u6587\u672c\u6279\u5904\u7406\u3002<\/li>\n<\/ul>\n<h4>3. \u591a\u6a21\u578b\u6a2a\u5411\u8bc4\u6d4b\u5de5\u5177&#xff08;\u5bf9\u8bdd\u6bd4\u5bf9&#xff09;<\/h4>\n<p>FastChat \u63d0\u4f9b fastchat.llm_judge \u6a21\u5757&#xff0c;\u7528\u4e8e\u591a\u6a21\u578b\u8f93\u51fa\u8bc4\u4f30&#xff1a;<\/p>\n<p>python3 -m fastchat.llm_judge.score <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;model-names chatglm3 qwen-7b mistral <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;input-file examples.json<\/p>\n<p>\u8f93\u51fa\u5305\u542b&#xff1a;<\/p>\n<ul>\n<li>\u6bcf\u8f6e\u5bf9\u8bdd\u6bcf\u6a21\u578b\u54cd\u5e94&#xff1b;<\/li>\n<li>\u8bc4\u5206\u7ef4\u5ea6&#xff08;\u5185\u5bb9\u5b8c\u6574\u6027\u3001\u8bed\u8a00\u6d41\u7545\u5ea6&#xff09;&#xff1b;<\/li>\n<li>\u53ef\u89c6\u5316\u5bf9\u6bd4\u5206\u5e03\u56fe\u8868\u3002<\/li>\n<\/ul>\n<p>\u9002\u7528\u4e8e\u6a21\u578b\u66f4\u65b0\u524d\u540e\u7684 A\/B \u6d4b\u8bd5&#xff0c;\u4e5f\u9002\u5408\u9a8c\u8bc1 Agent Prompt \u5dee\u5f02\u6548\u679c\u3002<\/p>\n<h4>4. \u524d\u7aef\u6269\u5c55\u5efa\u8bae<\/h4>\n<ul>\n<li>\u53ef\u66ff\u6362 Gradio \u4e3a\u4f01\u4e1a\u5b9a\u5236 UI&#xff1b;<\/li>\n<li>\u63a5\u5165 LangChain \/ RAG \u524d\u7aef\u6d41\u7a0b&#xff1b;<\/li>\n<li>\u63a5\u5165 WebSocket \u5b9e\u73b0\u5b9e\u65f6\u8fd4\u56de&#xff1b;<\/li>\n<li>\u63a5\u5165 Webhook \u5b9e\u73b0\u540e\u53f0\u56de\u8c03\u3002<\/li>\n<\/ul>\n<p>FastChat \u524d\u7aef\u5de5\u5177\u94fe\u5b8c\u6574\u3001\u8f7b\u91cf\u3001\u53ef\u6269\u5c55&#xff0c;\u662f\u4f01\u4e1a\u5185\u6d4b\u548c\u4ea7\u54c1\u96c6\u6210\u524d\u7aef\u5f00\u53d1\u7684\u826f\u597d\u8d77\u70b9\u3002<\/p>\n<h3>\u7b2c\u4e5d\u7ae0&#xff1a;\u4f01\u4e1a\u7ea7\u573a\u666f\u843d\u5730\u6848\u4f8b\u4e0e\u5b89\u5168\u7b56\u7565\u5206\u6790<\/h3>\n<p>FastChat \u4f5c\u4e3a\u5f00\u6e90 LLM \u63a5\u53e3\u7f51\u5173&#xff0c;\u5df2\u5728\u591a\u7c7b\u4f01\u4e1a\u4e2d\u5b9e\u73b0\u79c1\u6709\u90e8\u7f72\u4e0e\u5b9a\u5236\u6269\u5c55&#xff0c;\u6db5\u76d6\u6a21\u578b\u670d\u52a1\u3001\u667a\u80fd\u5ba2\u670d\u3001AI \u52a9\u7406\u3001\u5185\u90e8\u95ee\u7b54\u5e73\u53f0\u7b49\u573a\u666f\u3002\u672c\u7ae0\u805a\u7126\u4e24\u4e2a\u5178\u578b\u843d\u5730\u8def\u5f84&#xff0c;\u89e3\u6790\u5176\u7cfb\u7edf\u8bbe\u8ba1\u3001\u6a21\u578b\u7ba1\u7406\u4e0e\u5b89\u5168\u7b56\u7565\u843d\u5730\u65b9\u5f0f\u3002<\/p>\n<h4>1. \u6848\u4f8b\u4e00&#xff1a;\u67d0\u5927\u578b\u4e92\u8054\u7f51\u516c\u53f8\u6784\u5efa\u79c1\u6709 ChatGPT \u670d\u52a1\u5e73\u53f0<\/h4>\n<p>\u80cc\u666f\u9700\u6c42&#xff1a;<\/p>\n<ul>\n<li>\u652f\u6301 ChatGLM3\u3001Qwen\u3001Baichuan2 \u7b49\u56fd\u5185\u6a21\u578b&#xff1b;<\/li>\n<li>\u517c\u5bb9 OpenAI Chat Completion API&#xff1b;<\/li>\n<li>\u5185\u7f51\u53ef\u63a7\u73af\u5883\u90e8\u7f72&#xff0c;\u4fdd\u969c\u6570\u636e\u4e0d\u51fa\u5883&#xff1b;<\/li>\n<li>\u7528\u6237\u5206\u7ec4\u6743\u9650\u63a7\u5236\u4e0e\u8c03\u7528\u65e5\u5fd7\u5ba1\u8ba1\u3002<\/li>\n<\/ul>\n<p>\u90e8\u7f72\u7ed3\u6784&#xff1a;<\/p>\n<ul>\n<li>\u6bcf\u4e2a\u6a21\u578b\u914d\u7f6e\u72ec\u7acb Worker&#xff1b;<\/li>\n<li>\u63a7\u5236\u5668\u96c6\u4e2d\u8c03\u5ea6&#xff0c;API Server \u90e8\u7f72\u5728 DMZ&#xff1b;<\/li>\n<li>\u4f7f\u7528 vLLM Worker \u7ed1\u5b9a\u9ad8\u6027\u80fd A100 \u670d\u52a1\u5668&#xff0c;\u5904\u7406\u957f\u4e0a\u4e0b\u6587\u8bf7\u6c42&#xff1b;<\/li>\n<li>\u914d\u5408 Nginx \u7f51\u5173 &#043; JWT \u5b9e\u73b0\u63a5\u53e3\u7ea7\u6743\u9650\u7ba1\u7406&#xff1b;<\/li>\n<li>\u65e5\u5fd7\u63a5\u5165 ELK \u5ba1\u8ba1\u5e73\u53f0\u3002<\/li>\n<\/ul>\n<p>\u5b89\u5168\u7b56\u7565&#xff1a;<\/p>\n<ul>\n<li>\u63a5\u53e3\u8c03\u7528\u524d\u9274\u6743&#xff1b;<\/li>\n<li>\u8bf7\u6c42\u65e5\u5fd7\u8bb0\u5f55\u8f93\u5165\u8f93\u51fa Token \u957f\u5ea6\u3001\u54cd\u5e94\u65f6\u95f4&#xff1b;<\/li>\n<li>\u660e\u786e\u6a21\u578b\u4f7f\u7528\u8303\u56f4&#xff0c;\u4e0d\u66b4\u9732\u654f\u611f\u7cfb\u7edf\u8c03\u7528\u3002<\/li>\n<\/ul>\n<p>\u4f18\u5316\u70b9&#xff1a;<\/p>\n<ul>\n<li>\u8bbe\u7f6e\u6a21\u578b\u8f93\u51fa\u4e0a\u9650&#xff1a;max_new_tokens \u2264 1024&#xff1b;<\/li>\n<li>\u6bcf\u7528\u6237\u9650\u6d41&#xff0c;\u5355 IP \u6700\u5927\u5e76\u53d1\u6570&#xff1b;<\/li>\n<li>\u542f\u7528 Streaming \u7f13\u89e3\u54cd\u5e94\u6162\u95ee\u9898\u3002<\/li>\n<\/ul>\n<h4>2. \u6848\u4f8b\u4e8c&#xff1a;SaaS \u591a\u79df\u6237\u667a\u80fd\u95ee\u7b54\u5e73\u53f0<\/h4>\n<p>\u6838\u5fc3\u8981\u6c42&#xff1a;<\/p>\n<ul>\n<li>\u652f\u6301\u591a\u79df\u6237\u9694\u79bb&#xff0c;\u6bcf\u4e2a\u5ba2\u6237\u53ef\u914d\u7f6e\u72ec\u7acb\u6a21\u578b&#xff1b;<\/li>\n<li>\u7ed3\u5408\u6587\u6863\u68c0\u7d22\u6784\u5efa RAG \u7ba1\u7ebf&#xff1b;<\/li>\n<li>\u652f\u6301\u79df\u6237\u5b9a\u5236 Prompt \u6a21\u677f&#xff1b;<\/li>\n<li>\u63d0\u4f9b\u6d41\u91cf\u7edf\u8ba1\u3001Token \u7528\u91cf\u62a5\u544a\u3002<\/li>\n<\/ul>\n<p>\u6280\u672f\u65b9\u6848&#xff1a;<\/p>\n<ul>\n<li>\u63a7\u5236\u5668\u4e0e Worker \u652f\u6301\u52a8\u6001\u6ce8\u518c\u4e0e\u81ea\u52a8\u53d1\u73b0&#xff1b;<\/li>\n<li>\u79df\u6237\u5728\u8bf7\u6c42\u4e2d\u4f20\u5165 X-Tenant-ID&#xff0c;\u540e\u7aef\u8def\u7531\u5bf9\u5e94\u6a21\u578b&#xff1b;<\/li>\n<li>\u6bcf\u4e2a\u79df\u6237\u53ef\u901a\u8fc7\u914d\u7f6e\u6587\u4ef6\u8bbe\u7f6e\u9ed8\u8ba4 Prompt \u524d\u7f00&#xff1b;<\/li>\n<li>\u63a5\u53e3\u5c42\u52a0\u5165 Token \u7528\u91cf\u8ffd\u8e2a\u6a21\u5757&#xff1b;<\/li>\n<li>\u4f7f\u7528 Loki &#043; Grafana \u663e\u793a\u79df\u6237 QPS\u3001\u5ef6\u8fdf\u3001\u8c03\u7528\u5f02\u5e38\u7b49\u56fe\u8868\u3002<\/li>\n<\/ul>\n<p>\u6570\u636e\u9694\u79bb&#xff1a;<\/p>\n<ul>\n<li>\u6a21\u578b Worker \u4e0d\u5171\u4eab\u4e0a\u4e0b\u6587&#xff1b;<\/li>\n<li>\u6bcf\u4e2a\u79df\u6237\u7684\u68c0\u7d22\u5411\u91cf\u5e93\u72ec\u7acb\u90e8\u7f72&#xff1b;<\/li>\n<li>\u652f\u6301\u591a\u79df\u6237 embedding \u6a21\u578b\u914d\u7f6e&#xff0c;\u5982 tenant A \u7528 Qwen&#xff0c;B \u7528 Baichuan\u3002<\/li>\n<\/ul>\n<p>\u8fd9\u4e9b\u6848\u4f8b\u5145\u5206\u8bf4\u660e FastChat \u6846\u67b6\u5728\u843d\u5730\u8fc7\u7a0b\u4e2d\u7684\u7075\u6d3b\u6027\u4e0e\u6269\u5c55\u6027&#xff0c;\u7279\u522b\u9002\u5408\u6570\u636e\u79c1\u6709\u5316\u3001\u9ad8\u5b89\u5168\u9700\u6c42\u3001\u591a\u6a21\u578b\u7ba1\u7406\u4e0e\u4f01\u4e1a\u5e73\u53f0\u96c6\u6210\u7b49\u573a\u666f\u3002<\/p>\n<hr \/>\n<h3>\u7b2c\u5341\u7ae0&#xff1a;\u6a21\u578b\u8bc4\u6d4b\u3001\u5bf9\u8bdd\u65e5\u5fd7\u4e0e\u63a8\u7406\u76d1\u63a7\u7cfb\u7edf\u5efa\u8bbe\u65b9\u6848<\/h3>\n<p>\u4f01\u4e1a\u5728\u90e8\u7f72\u5927\u6a21\u578b\u670d\u52a1\u5e73\u53f0\u540e&#xff0c;\u9664\u4e86\u63a5\u53e3\u53ef\u7528\u6027\u4fdd\u969c\u5916&#xff0c;\u8fd8\u9700\u6784\u5efa\u5b8c\u6574\u7684\u8bc4\u4f30\u4e0e\u76d1\u63a7\u4f53\u7cfb&#xff0c;\u5305\u62ec\u6a21\u578b\u6548\u679c\u8bc4\u4f30\u3001\u5bf9\u8bdd\u65e5\u5fd7\u5ba1\u8ba1\u3001\u63a8\u7406\u5f02\u5e38\u544a\u8b66\u3001Token \u7528\u91cf\u8ba1\u91cf\u7b49\u3002<\/p>\n<h4>1. \u6a21\u578b\u8bc4\u6d4b\u4f53\u7cfb\u6784\u5efa<\/h4>\n<p>FastChat \u63d0\u4f9b fastchat.llm_judge \u6a21\u5757&#xff0c;\u53ef\u5bf9\u591a\u6a21\u578b\u56de\u7b54\u8fdb\u884c\u6807\u51c6\u5316\u6bd4\u5bf9\u8bc4\u4f30&#xff1a;<\/p>\n<p>python3 -m fastchat.llm_judge.score <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;input-file data\/eval.json <span class=\"token punctuation\">\\\\<\/span><br \/>\n  &#8211;model-names qwen-7b chatglm3 mistral<\/p>\n<p>\u8f93\u51fa\u7ed3\u679c\u5305\u542b&#xff1a;<\/p>\n<ul>\n<li>\u5404\u6a21\u578b\u5728\u4e0d\u540c\u4efb\u52a1\u4e0b\u5f97\u5206&#xff1b;<\/li>\n<li>\u4eba\u7c7b\u8bc4\u5206\u6216 GPT4 \u5bf9\u6bd4\u8bc4\u5206&#xff1b;<\/li>\n<li>\u53ef\u89c6\u5316\u8f93\u51fa\u56fe\u8868\u9002\u5408 A\/B \u6d4b\u8bd5\u3002<\/li>\n<\/ul>\n<p>\u53ef\u7528\u4e8e&#xff1a;<\/p>\n<ul>\n<li>Prompt \u4f18\u5316\u524d\u540e\u5bf9\u6bd4&#xff1b;<\/li>\n<li>\u591a\u6a21\u578b\u9009\u62e9\u8bc4\u4f30&#xff1b;<\/li>\n<li>\u5fae\u8c03\u7ed3\u679c\u9a8c\u8bc1\u3002<\/li>\n<\/ul>\n<h4>2. \u5bf9\u8bdd\u65e5\u5fd7\u7cfb\u7edf\u4e0e\u8c03\u7528\u94fe\u8bb0\u5f55<\/h4>\n<p>\u5efa\u8bae\u5728 API Server \u5c42\u52a0\u5165\u65e5\u5fd7\u4e2d\u95f4\u4ef6&#xff0c;\u8bb0\u5f55\u5982\u4e0b\u5173\u952e\u5b57\u6bb5&#xff1a;<\/p>\n<ul>\n<li>request_id\u3001user_id\u3001tenant_id&#xff1b;<\/li>\n<li>\u8bf7\u6c42\u65f6\u95f4\u3001\u54cd\u5e94\u65f6\u957f\u3001Token \u4f7f\u7528\u91cf&#xff1b;<\/li>\n<li>Prompt \u539f\u6587\u4e0e\u6a21\u578b\u56de\u590d\u6587\u672c&#xff08;\u53ef\u9009\u8131\u654f&#xff09;&#xff1b;<\/li>\n<li>\u9519\u8bef\u7801\u4e0e\u5931\u8d25\u539f\u56e0\u3002<\/li>\n<\/ul>\n<p>\u5b58\u50a8\u5efa\u8bae&#xff1a;<\/p>\n<ul>\n<li>\u4f7f\u7528 ClickHouse \u8fdb\u884c\u7ed3\u6784\u5316\u5206\u6790&#xff1b;<\/li>\n<li>\u4f7f\u7528 ELK \u7cfb\u7edf\u5b9e\u73b0\u5168\u6587\u68c0\u7d22&#xff1b;<\/li>\n<li>Kafka \u4f5c\u4e3a\u65e5\u5fd7\u4e2d\u8f6c\u901a\u9053\u3002<\/li>\n<\/ul>\n<h4>3. Token \u4f7f\u7528\u8ba1\u91cf\u4e0e\u6210\u672c\u7edf\u8ba1<\/h4>\n<ul>\n<li>\u5728\u6bcf\u6b21\u54cd\u5e94\u540e\u8fd4\u56de usage \u5b57\u6bb5&#xff1b;<\/li>\n<li>\u652f\u6301\u6309 tenant\u3001user\u3001model \u6c47\u603b&#xff1b;<\/li>\n<li>\u652f\u6301\u6bcf\u65e5\/\u6bcf\u5468\/\u6bcf\u6708\u8d26\u5355\u5316\u5bfc\u51fa&#xff1b;<\/li>\n<li>\u4e0e\u8ba1\u8d39\u5e73\u53f0\u5bf9\u63a5\u751f\u6210 Token \u6d88\u8d39\u62a5\u544a\u3002<\/li>\n<\/ul>\n<p>\u793a\u4f8b\u8fd4\u56de\u5b57\u6bb5&#xff1a;<\/p>\n<p><span class=\"token string\">&#034;usage&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token punctuation\">{<\/span><br \/>\n  <span class=\"token string\">&#034;prompt_tokens&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token number\">350<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;completion_tokens&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token number\">750<\/span><span class=\"token punctuation\">,<\/span><br \/>\n  <span class=\"token string\">&#034;total_tokens&#034;<\/span><span class=\"token operator\">:<\/span> <span class=\"token number\">1100<\/span><br \/>\n<span class=\"token punctuation\">}<\/span><\/p>\n<h4>4. \u63a8\u7406\u5f02\u5e38\u76d1\u63a7\u4e0e\u544a\u8b66\u7cfb\u7edf<\/h4>\n<p>\u6307\u6807\u91c7\u96c6&#xff1a;<\/p>\n<ul>\n<li>\u8bf7\u6c42\u603b\u6570\u3001\u9519\u8bef\u6570\u3001\u8d85\u65f6\u6570&#xff1b;<\/li>\n<li>\u5404\u6a21\u578b\u5e73\u5747\u54cd\u5e94\u65f6\u95f4&#xff1b;<\/li>\n<li>GPU \u5185\u5b58\u4f7f\u7528\u3001Worker \u5b58\u6d3b\u72b6\u6001\u3002<\/li>\n<\/ul>\n<p>\u5de5\u5177\u63a8\u8350&#xff1a;<\/p>\n<ul>\n<li>Prometheus &#043; Grafana&#xff1a;\u6027\u80fd\u6307\u6807&#xff1b;<\/li>\n<li>Loki &#043; Grafana&#xff1a;\u6587\u672c\u65e5\u5fd7&#xff1b;<\/li>\n<li>AlertManager&#xff1a;\u81ea\u52a8\u5f02\u5e38\u544a\u8b66\u3002<\/li>\n<\/ul>\n<p>\u901a\u8fc7\u7cfb\u7edf\u5316\u7684\u65e5\u5fd7\u5ba1\u8ba1\u3001\u8c03\u7528\u6307\u6807\u4e0e\u8bc4\u4f30\u673a\u5236&#xff0c;\u4f01\u4e1a\u53ef\u6301\u7eed\u8ffd\u8e2a\u5927\u6a21\u578b\u670d\u52a1\u8d28\u91cf&#xff0c;\u4fdd\u969c\u4e1a\u52a1\u7a33\u5b9a\u4e0e\u6570\u636e\u5b89\u5168&#xff0c;\u6700\u7ec8\u5f62\u6210\u4e00\u5957\u81ea\u7814 LLM \u670d\u52a1\u5e73\u53f0\u7684\u95ed\u73af\u76d1\u63a7\u4e0e\u4f18\u5316\u4f53\u7cfb\u3002<\/p>\n<p>\u4e2a\u4eba\u7b80\u4ecb <img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/05\/20250521044604-682d5a8c7fc88.jpg\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\" \/> \u4f5c\u8005\u7b80\u4ecb&#xff1a;\u5168\u6808\u7814\u53d1&#xff0c;\u5177\u5907\u7aef\u5230\u7aef\u7cfb\u7edf\u843d\u5730\u80fd\u529b&#xff0c;\u4e13\u6ce8\u4eba\u5de5\u667a\u80fd\u9886\u57df\u3002 \u4e2a\u4eba\u4e3b\u9875&#xff1a;\u89c2\u71b5 \u4e2a\u4eba\u90ae\u7bb1&#xff1a;privatexxxx&#064;163.com \u5ea7\u53f3\u94ed&#xff1a;\u613f\u79d1\u6280\u4e4b\u5149&#xff0c;\u4e0d\u6b62\u7167\u4eae\u667a\u80fd&#xff0c;\u4e5f\u7167\u4eae\u4eba\u5fc3&#xff01;<\/p>\n<h4>\u4e13\u680f\u5bfc\u822a<\/h4>\n<p>\u89c2\u71b5\u7cfb\u5217\u4e13\u680f\u5bfc\u822a&#xff1a; AI\u524d\u6cbf\u63a2\u7d22&#xff1a;\u4ece\u5927\u6a21\u578b\u8fdb\u5316\u3001\u591a\u6a21\u6001\u4ea4\u4e92\u3001AIGC\u5185\u5bb9\u751f\u6210&#xff0c;\u5230AI\u5728\u884c\u4e1a\u4e2d\u7684\u843d\u5730\u5e94\u7528&#xff0c;\u6211\u4eec\u5c06\u6df1\u5165\u5256\u6790\u6700\u524d\u6cbf\u7684AI\u6280\u672f&#xff0c;\u5206\u4eab\u5b9e\u7528\u7684\u5f00\u53d1\u7ecf\u9a8c&#xff0c;\u5e76\u63a2\u8ba8AI\u672a\u6765\u7684\u53d1\u5c55\u8d8b\u52bf AI\u5f00\u6e90\u6846\u67b6\u5b9e\u6218&#xff1a;\u9762\u5411 AI \u5de5\u7a0b\u5e08\u7684\u5927\u6a21\u578b\u6846\u67b6\u5b9e\u6218\u6307\u5357&#xff0c;\u8986\u76d6\u8bad\u7ec3\u3001\u63a8\u7406\u3001\u90e8\u7f72\u4e0e\u8bc4\u4f30\u7684\u5168\u94fe\u8def\u6700\u4f73\u5b9e\u8df5 \u8ba1\u7b97\u673a\u89c6\u89c9&#xff1a;\u805a\u7126\u8ba1\u7b97\u673a\u89c6\u89c9\u524d\u6cbf\u6280\u672f&#xff0c;\u6db5\u76d6\u56fe\u50cf\u8bc6\u522b\u3001\u76ee\u6807\u68c0\u6d4b\u3001\u81ea\u52a8\u9a7e\u9a76\u3001\u533b\u7597\u5f71\u50cf\u7b49\u9886\u57df\u7684\u6700\u65b0\u8fdb\u5c55\u548c\u5e94\u7528\u6848\u4f8b \u56fd\u4ea7\u5927\u6a21\u578b\u90e8\u7f72\u5b9e\u6218&#xff1a;\u6301\u7eed\u66f4\u65b0\u7684\u56fd\u4ea7\u5f00\u6e90\u5927\u6a21\u578b\u90e8\u7f72\u5b9e\u6218\u6559\u7a0b&#xff0c;\u8986\u76d6\u4ece \u6a21\u578b\u9009\u578b \u2192 \u73af\u5883\u914d\u7f6e \u2192 \u672c\u5730\u63a8\u7406 \u2192 API\u5c01\u88c5 \u2192 \u9ad8\u6027\u80fd\u90e8\u7f72 \u2192 \u591a\u6a21\u578b\u7ba1\u7406 \u7684\u5b8c\u6574\u5168\u6d41\u7a0b Agentic AI\u67b6\u6784\u5b9e\u6218\u5168\u6d41\u7a0b&#xff1a;\u4e00\u7ad9\u5f0f\u638c\u63e1 Agentic AI \u67b6\u6784\u6784\u5efa\u6838\u5fc3\u8def\u5f84&#xff1a;\u4ece\u534f\u8bae\u5230\u8c03\u5ea6&#xff0c;\u4ece\u63a8\u7406\u5230\u6267\u884c&#xff0c;\u5b8c\u6574\u590d\u523b\u4f01\u4e1a\u7ea7\u591a\u667a\u80fd\u4f53\u7cfb\u7edf\u843d\u5730\u65b9\u6848&#xff01; \u4e91\u539f\u751f\u5e94\u7528\u6258\u7ba1\u4e0e\u5927\u6a21\u578b\u878d\u5408\u5b9e\u6218\u6307\u5357 \u667a\u80fd\u6570\u636e\u6316\u6398\u5de5\u7a0b\u5b9e\u8df5 Kubernetes \u00d7 AI\u5de5\u7a0b\u5b9e\u6218 TensorFlow \u5168\u6808\u5b9e\u6218&#xff1a;\u4ece\u5efa\u6a21\u5230\u90e8\u7f72&#xff1a;\u8986\u76d6\u6a21\u578b\u6784\u5efa\u3001\u8bad\u7ec3\u4f18\u5316\u3001\u8de8\u5e73\u53f0\u90e8\u7f72\u4e0e\u5de5\u7a0b\u4ea4\u4ed8&#xff0c;\u5e2e\u52a9\u5f00\u53d1\u8005\u638c\u63e1\u4ece\u539f\u578b\u5230\u4e0a\u7ebf\u7684\u5b8c\u6574 AI \u5f00\u53d1\u6d41\u7a0b PyTorch \u5168\u6808\u5b9e\u6218\u4e13\u680f&#xff1a; PyTorch \u6846\u67b6\u7684\u5168\u6808\u5b9e\u6218\u5e94\u7528&#xff0c;\u6db5\u76d6\u4ece\u6a21\u578b\u8bad\u7ec3\u3001\u4f18\u5316\u3001\u90e8\u7f72\u5230\u7ef4\u62a4\u7684\u5b8c\u6574\u6d41\u7a0b \u6df1\u5165\u7406\u89e3 TensorRT&#xff1a;\u6df1\u5165\u89e3\u6790 TensorRT \u7684\u6838\u5fc3\u673a\u5236\u4e0e\u90e8\u7f72\u5b9e\u8df5&#xff0c;\u52a9\u529b\u6784\u5efa\u9ad8\u6027\u80fd AI \u63a8\u7406\u7cfb\u7edf Megatron-LM \u5b9e\u6218\u7b14\u8bb0&#xff1a;\u805a\u7126\u4e8e Megatron-LM \u6846\u67b6\u7684\u5b9e\u6218\u5e94\u7528&#xff0c;\u6db5\u76d6\u4ece\u9884\u8bad\u7ec3\u3001\u5fae\u8c03\u5230\u90e8\u7f72\u7684\u5168\u6d41\u7a0b AI Agent&#xff1a;\u7cfb\u7edf\u5b66\u4e60\u5e76\u4eb2\u624b\u6784\u5efa\u4e00\u4e2a\u5b8c\u6574\u7684 AI Agent \u7cfb\u7edf&#xff0c;\u4ece\u57fa\u7840\u7406\u8bba\u3001\u7b97\u6cd5\u5b9e\u6218\u3001\u6846\u67b6\u5e94\u7528&#xff0c;\u5230\u79c1\u6709\u90e8\u7f72\u3001\u591a\u7aef\u96c6\u6210 DeepSeek \u5b9e\u6218\u4e0e\u89e3\u6790&#xff1a;\u805a\u7126 DeepSeek \u7cfb\u5217\u6a21\u578b\u539f\u7406\u89e3\u6790\u4e0e\u5b9e\u6218\u5e94\u7528&#xff0c;\u6db5\u76d6\u90e8\u7f72\u3001\u63a8\u7406\u3001\u5fae\u8c03\u4e0e\u591a\u573a\u666f\u96c6\u6210&#xff0c;\u52a9\u4f60\u9ad8\u6548\u4e0a\u624b\u56fd\u4ea7\u5927\u6a21\u578b \u7aef\u4fa7\u5927\u6a21\u578b&#xff1a;\u805a\u7126\u5927\u6a21\u578b\u5728\u79fb\u52a8\u8bbe\u5907\u4e0a\u7684\u90e8\u7f72\u4e0e\u4f18\u5316&#xff0c;\u63a2\u7d22\u7aef\u4fa7\u667a\u80fd\u7684\u5b9e\u73b0\u8def\u5f84 \u884c\u4e1a\u5927\u6a21\u578b \u00b7 \u6570\u636e\u5168\u6d41\u7a0b\u6307\u5357&#xff1a;\u5927\u6a21\u578b\u9884\u8bad\u7ec3\u6570\u636e\u7684\u8bbe\u8ba1\u3001\u91c7\u96c6\u3001\u6e05\u6d17\u4e0e\u5408\u89c4\u6cbb\u7406&#xff0c;\u805a\u7126\u884c\u4e1a\u573a\u666f&#xff0c;\u4ece\u9700\u6c42\u5b9a\u4e49\u5230\u6570\u636e\u95ed\u73af&#xff0c;\u5e2e\u52a9\u60a8\u6784\u5efa\u4e13\u5c5e\u7684\u667a\u80fd\u6570\u636e\u57fa\u5ea7 \u673a\u5668\u4eba\u7814\u53d1\u5168\u6808\u8fdb\u9636\u6307\u5357&#xff1a;\u4eceROS\u5230AI\u667a\u80fd\u63a7\u5236&#xff1a;\u673a\u5668\u4eba\u7cfb\u7edf\u67b6\u6784\u3001\u611f\u77e5\u5efa\u56fe\u3001\u8def\u5f84\u89c4\u5212\u3001\u63a7\u5236\u7cfb\u7edf\u3001AI\u667a\u80fd\u51b3\u7b56\u3001\u7cfb\u7edf\u96c6\u6210\u7b49\u6838\u5fc3\u80fd\u529b\u6a21\u5757 \u4eba\u5de5\u667a\u80fd\u4e0b\u7684\u7f51\u7edc\u5b89\u5168&#xff1a;\u901a\u8fc7\u5b9e\u6218\u6848\u4f8b\u548c\u7cfb\u7edf\u5316\u65b9\u6cd5&#xff0c;\u5e2e\u52a9\u5f00\u53d1\u8005\u548c\u5b89\u5168\u5de5\u7a0b\u5e08\u8bc6\u522b\u98ce\u9669\u3001\u6784\u5efa\u9632\u5fa1\u673a\u5236&#xff0c;\u786e\u4fdd AI \u7cfb\u7edf\u7684\u7a33\u5b9a\u4e0e\u5b89\u5168 \u667a\u80fd DevOps \u5de5\u5382&#xff1a;AI \u9a71\u52a8\u7684\u6301\u7eed\u4ea4\u4ed8\u5b9e\u8df5&#xff1a;\u6784\u5efa\u4ee5 AI \u4e3a\u6838\u5fc3\u7684\u667a\u80fd DevOps \u5e73\u53f0&#xff0c;\u6db5\u76d6\u4ece CI\/CD \u6d41\u6c34\u7ebf\u3001AIOps\u3001MLOps \u5230 DevSecOps \u7684\u5168\u6d41\u7a0b\u5b9e\u8df5\u3002 C&#043;&#043;\u5b66\u4e60\u7b14\u8bb0&#xff1f;&#xff1a;\u805a\u7126\u4e8e\u73b0\u4ee3 C&#043;&#043; \u7f16\u7a0b\u7684\u6838\u5fc3\u6982\u5ff5\u4e0e\u5b9e\u8df5&#xff0c;\u6db5\u76d6 STL \u6e90\u7801\u5256\u6790\u3001\u5185\u5b58\u7ba1\u7406\u3001\u6a21\u677f\u5143\u7f16\u7a0b\u7b49\u5173\u952e\u6280\u672f AI \u00d7 Quant \u7cfb\u7edf\u5316\u843d\u5730\u5b9e\u6218&#xff1a;\u4ece\u6570\u636e\u3001\u7b56\u7565\u5230\u5b9e\u76d8&#xff0c;\u6253\u9020\u5168\u6808\u667a\u80fd\u91cf\u5316\u4ea4\u6613\u7cfb\u7edf \u5927\u6a21\u578b\u8fd0\u8425\u4e13\u5bb6\u7684Prompt\u4fee\u70bc\u4e4b\u8def&#xff1a;\u672c\u4e13\u680f\u805a\u7126\u5f00\u53d1 \/ \u6d4b\u8bd5\u4eba\u5458\u7684\u5b9e\u9645\u8f6c\u578b\u8def\u5f84&#xff0c;\u57fa\u4e8e OpenAI\u3001DeepSeek\u3001\u6296\u97f3\u7b49\u771f\u5b9e\u8d44\u6599&#xff0c;\u62c6\u89e3 \u4ece\u5165\u95e8\u5230\u4e13\u4e1a\u843d\u5730\u7684\u5173\u952e\u4e3b\u9898&#xff0c;\u6db5\u76d6 Prompt \u7f16\u5199\u8303\u5f0f\u3001\u7ed3\u6784\u8f93\u51fa\u63a7\u5236\u3001\u6a21\u578b\u884c\u4e3a\u8bc4\u4f30\u3001\u7cfb\u7edf\u63a5\u5165\u4e0e DevOps \u7ba1\u7406\u3002\u6bcf\u4e00\u7bc7\u90fd\u4e0d\u8bb2\u6982\u5ff5\u7a7a\u8bdd&#xff0c;\u53ea\u505a\u5b9e\u6218\u7ecf\u9a8c\u6c89\u6dc0&#xff0c;\u8ba9\u4f60\u4e00\u6b65\u6b65\u6210\u4e3a\u771f\u6b63\u7684\u6a21\u578b\u8fd0\u8425\u4e13\u5bb6\u3002<\/p>\n<hr \/>\n<h3>&#x1f31f; \u5982\u679c\u672c\u6587\u5bf9\u4f60\u6709\u5e2e\u52a9&#xff0c;\u6b22\u8fce\u4e09\u8fde\u652f\u6301&#xff01;<\/h3>\n<p>&#x1f44d; \u70b9\u4e2a\u8d5e&#xff0c;\u7ed9\u6211\u4e00\u4e9b\u53cd\u9988\u52a8\u529b \u2b50 \u6536\u85cf\u8d77\u6765&#xff0c;\u65b9\u4fbf\u4e4b\u540e\u590d\u4e60\u67e5\u9605 &#x1f514; \u5173\u6ce8\u6211&#xff0c;\u540e\u7eed\u8fd8\u6709\u66f4\u591a\u5b9e\u6218\u5185\u5bb9\u6301\u7eed\u66f4\u65b0<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb876\u6b21\uff0c\u70b9\u8d5e25\u6b21\uff0c\u6536\u85cf16\u6b21\u3002FastChat \u662f\u7531 LM-SYS \u56e2\u961f\u5f00\u6e90\u7684\u8f7b\u91cf\u7ea7\u5927\u8bed\u8a00\u6a21\u578b\uff08LLM\uff09\u670d\u52a1\u6846\u67b6\uff0c\u65e8\u5728\u63d0\u4f9b\u4e00\u4e2a\u4e0e OpenAI API \u5b8c\u5168\u517c\u5bb9\u7684\u591a\u6a21\u578b Chat \u63a5\u53e3\u670d\u52a1\u5e73\u53f0\u3002\u8be5\u6846\u67b6\u539f\u751f\u652f\u6301\u591a\u4e2a\u70ed\u95e8\u6a21\u578b\uff08\u5982 LLaMA\u3001ChatGLM\u3001Qwen\u3001Mistral \u7b49\uff09\uff0c\u5e76\u53ef\u642d\u914d vLLM \u5b9e\u73b0\u9ad8\u6548\u63a8\u7406\u3002\u9879\u76ee\u63d0\u4f9b\u5b8c\u6574\u7684 Chat \u670d\u52a1\u3001\u7ba1\u7406\u540e\u7aef\u3001Web UI \u548c CLI \u5de5\u5177\uff0c\u5e7f\u6cdb\u5e94\u7528\u4e8e\u6a21\u578b\u5fae\u8c03\u6d4b\u8bd5\u3001\u591a\u6a21\u578b\u5bf9\u6bd4\u8bc4\u4f30\u3001\u4f01\u4e1a\u5185\u90e8\u5927\u6a21\u578b\u670d\u52a1\u6784\u5efa\u573a\u666f\u3002\u672c\u6587\u5c06\u7cfb\u7edf\u68b3\u7406 FastChat \u7684\u67b6\u6784\u8bbe\u8ba1\u3001\u90e8\u7f72\u6d41\u7a0b\u4e0e\u6027\u80fd\u4f18\u5316\u5b9e\u8df5\uff0c\u5e76\u4ee5\u771f\u5b9e\u6848<\/p>\n","protected":false},"author":2,"featured_media":38867,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[92,3451,50,227,43],"topic":[],"class_list":{"0":"post-38868","1":"post","2":"type-post","3":"status-publish","4":"format-standard","5":"has-post-thumbnail","6":"hentry","7":"category-server","8":"tag-github","10":"tag-50","11":"tag-227","12":"tag-43"},"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u3010GitHub\u5f00\u6e90\u9879\u76ee\u5b9e\u6218\u3011FastChat \u5b9e\u6218\u89e3\u6790\uff1a\u591a\u6a21\u578b LLM Chat API \u670d\u52a1\u5668\u6784\u5efa\u4e0e\u63a8\u7406\u90e8\u7f72\u5168\u6d41\u7a0b\u6307\u5357 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/38868.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u3010GitHub\u5f00\u6e90\u9879\u76ee\u5b9e\u6218\u3011FastChat \u5b9e\u6218\u89e3\u6790\uff1a\u591a\u6a21\u578b LLM Chat API \u670d\u52a1\u5668\u6784\u5efa\u4e0e\u63a8\u7406\u90e8\u7f72\u5168\u6d41\u7a0b\u6307\u5357 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb876\u6b21\uff0c\u70b9\u8d5e25\u6b21\uff0c\u6536\u85cf16\u6b21\u3002FastChat \u662f\u7531 LM-SYS \u56e2\u961f\u5f00\u6e90\u7684\u8f7b\u91cf\u7ea7\u5927\u8bed\u8a00\u6a21\u578b\uff08LLM\uff09\u670d\u52a1\u6846\u67b6\uff0c\u65e8\u5728\u63d0\u4f9b\u4e00\u4e2a\u4e0e OpenAI API \u5b8c\u5168\u517c\u5bb9\u7684\u591a\u6a21\u578b Chat \u63a5\u53e3\u670d\u52a1\u5e73\u53f0\u3002\u8be5\u6846\u67b6\u539f\u751f\u652f\u6301\u591a\u4e2a\u70ed\u95e8\u6a21\u578b\uff08\u5982 LLaMA\u3001ChatGLM\u3001Qwen\u3001Mistral \u7b49\uff09\uff0c\u5e76\u53ef\u642d\u914d vLLM \u5b9e\u73b0\u9ad8\u6548\u63a8\u7406\u3002\u9879\u76ee\u63d0\u4f9b\u5b8c\u6574\u7684 Chat \u670d\u52a1\u3001\u7ba1\u7406\u540e\u7aef\u3001Web UI \u548c CLI \u5de5\u5177\uff0c\u5e7f\u6cdb\u5e94\u7528\u4e8e\u6a21\u578b\u5fae\u8c03\u6d4b\u8bd5\u3001\u591a\u6a21\u578b\u5bf9\u6bd4\u8bc4\u4f30\u3001\u4f01\u4e1a\u5185\u90e8\u5927\u6a21\u578b\u670d\u52a1\u6784\u5efa\u573a\u666f\u3002\u672c\u6587\u5c06\u7cfb\u7edf\u68b3\u7406 FastChat \u7684\u67b6\u6784\u8bbe\u8ba1\u3001\u90e8\u7f72\u6d41\u7a0b\u4e0e\u6027\u80fd\u4f18\u5316\u5b9e\u8df5\uff0c\u5e76\u4ee5\u771f\u5b9e\u6848\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/38868.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2025-05-21T04:46:12+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/05\/20250521044604-682d5a8c7fc88.jpg\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"8 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/38868.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/38868.html\",\"name\":\"\u3010GitHub\u5f00\u6e90\u9879\u76ee\u5b9e\u6218\u3011FastChat \u5b9e\u6218\u89e3\u6790\uff1a\u591a\u6a21\u578b LLM Chat API \u670d\u52a1\u5668\u6784\u5efa\u4e0e\u63a8\u7406\u90e8\u7f72\u5168\u6d41\u7a0b\u6307\u5357 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2025-05-21T04:46:12+00:00\",\"dateModified\":\"2025-05-21T04:46:12+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/38868.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/38868.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/38868.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u3010GitHub\u5f00\u6e90\u9879\u76ee\u5b9e\u6218\u3011FastChat \u5b9e\u6218\u89e3\u6790\uff1a\u591a\u6a21\u578b LLM Chat API \u670d\u52a1\u5668\u6784\u5efa\u4e0e\u63a8\u7406\u90e8\u7f72\u5168\u6d41\u7a0b\u6307\u5357\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"\u3010GitHub\u5f00\u6e90\u9879\u76ee\u5b9e\u6218\u3011FastChat \u5b9e\u6218\u89e3\u6790\uff1a\u591a\u6a21\u578b LLM Chat API \u670d\u52a1\u5668\u6784\u5efa\u4e0e\u63a8\u7406\u90e8\u7f72\u5168\u6d41\u7a0b\u6307\u5357 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/38868.html","og_locale":"zh_CN","og_type":"article","og_title":"\u3010GitHub\u5f00\u6e90\u9879\u76ee\u5b9e\u6218\u3011FastChat \u5b9e\u6218\u89e3\u6790\uff1a\u591a\u6a21\u578b LLM Chat API \u670d\u52a1\u5668\u6784\u5efa\u4e0e\u63a8\u7406\u90e8\u7f72\u5168\u6d41\u7a0b\u6307\u5357 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb876\u6b21\uff0c\u70b9\u8d5e25\u6b21\uff0c\u6536\u85cf16\u6b21\u3002FastChat \u662f\u7531 LM-SYS \u56e2\u961f\u5f00\u6e90\u7684\u8f7b\u91cf\u7ea7\u5927\u8bed\u8a00\u6a21\u578b\uff08LLM\uff09\u670d\u52a1\u6846\u67b6\uff0c\u65e8\u5728\u63d0\u4f9b\u4e00\u4e2a\u4e0e OpenAI API \u5b8c\u5168\u517c\u5bb9\u7684\u591a\u6a21\u578b Chat \u63a5\u53e3\u670d\u52a1\u5e73\u53f0\u3002\u8be5\u6846\u67b6\u539f\u751f\u652f\u6301\u591a\u4e2a\u70ed\u95e8\u6a21\u578b\uff08\u5982 LLaMA\u3001ChatGLM\u3001Qwen\u3001Mistral \u7b49\uff09\uff0c\u5e76\u53ef\u642d\u914d vLLM \u5b9e\u73b0\u9ad8\u6548\u63a8\u7406\u3002\u9879\u76ee\u63d0\u4f9b\u5b8c\u6574\u7684 Chat \u670d\u52a1\u3001\u7ba1\u7406\u540e\u7aef\u3001Web UI \u548c CLI \u5de5\u5177\uff0c\u5e7f\u6cdb\u5e94\u7528\u4e8e\u6a21\u578b\u5fae\u8c03\u6d4b\u8bd5\u3001\u591a\u6a21\u578b\u5bf9\u6bd4\u8bc4\u4f30\u3001\u4f01\u4e1a\u5185\u90e8\u5927\u6a21\u578b\u670d\u52a1\u6784\u5efa\u573a\u666f\u3002\u672c\u6587\u5c06\u7cfb\u7edf\u68b3\u7406 FastChat \u7684\u67b6\u6784\u8bbe\u8ba1\u3001\u90e8\u7f72\u6d41\u7a0b\u4e0e\u6027\u80fd\u4f18\u5316\u5b9e\u8df5\uff0c\u5e76\u4ee5\u771f\u5b9e\u6848","og_url":"https:\/\/www.wsisp.com\/helps\/38868.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2025-05-21T04:46:12+00:00","og_image":[{"url":"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/05\/20250521044604-682d5a8c7fc88.jpg"}],"author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"8 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/38868.html","url":"https:\/\/www.wsisp.com\/helps\/38868.html","name":"\u3010GitHub\u5f00\u6e90\u9879\u76ee\u5b9e\u6218\u3011FastChat \u5b9e\u6218\u89e3\u6790\uff1a\u591a\u6a21\u578b LLM Chat API \u670d\u52a1\u5668\u6784\u5efa\u4e0e\u63a8\u7406\u90e8\u7f72\u5168\u6d41\u7a0b\u6307\u5357 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2025-05-21T04:46:12+00:00","dateModified":"2025-05-21T04:46:12+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/38868.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/38868.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/38868.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"\u3010GitHub\u5f00\u6e90\u9879\u76ee\u5b9e\u6218\u3011FastChat \u5b9e\u6218\u89e3\u6790\uff1a\u591a\u6a21\u578b LLM Chat API \u670d\u52a1\u5668\u6784\u5efa\u4e0e\u63a8\u7406\u90e8\u7f72\u5168\u6d41\u7a0b\u6307\u5357"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/38868","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=38868"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/38868\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media\/38867"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=38868"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=38868"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=38868"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=38868"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}