{"id":66161,"date":"2026-01-26T11:05:04","date_gmt":"2026-01-26T03:05:04","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/66161.html"},"modified":"2026-01-26T11:05:04","modified_gmt":"2026-01-26T03:05:04","slug":"sglang-%e6%9c%8d%e5%8a%a1%e5%99%a8%e5%90%af%e5%8a%a8%e5%8f%82%e6%95%b0%e5%ae%8c%e6%95%b4%e6%80%bb%e7%bb%93","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/66161.html","title":{"rendered":"SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3"},"content":{"rendered":"<h2>SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3<\/h2>\n<p>\u4ee5\u4e0b\u6839\u636e SGLang \u5b98\u65b9\u6587\u6863 \u6574\u7406\u7684\u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u5217\u8868&#xff0c;\u6309\u529f\u80fd\u5206\u7c7b&#xff0c;\u63d0\u4f9b\u7528\u9014\u8bf4\u660e\u548c\u63a8\u8350\u503c\u3002<\/p>\n<hr \/>\n<h3>&#x1f4e6; \u6a21\u578b\u4e0e Tokenizer<\/h3>\n<table>\n<tr>\u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae<\/tr>\n<tbody>\n<tr>\n<td>&#8211;model-path \/ &#8211;model<\/td>\n<td>\u6307\u5b9a\u6a21\u578b\u6743\u91cd\u8def\u5f84&#xff08;\u672c\u5730\u76ee\u5f55\u6216 Hugging Face repo ID&#xff09;<\/td>\n<td>meta-llama\/Meta-Llama-3-8B-Instruct&#xff08;\u5fc5\u586b&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;tokenizer-path<\/td>\n<td>\u6307\u5b9a tokenizer \u8def\u5f84&#xff08;\u82e5\u4e0e\u6a21\u578b\u4e0d\u540c&#xff09;<\/td>\n<td>\u9ed8\u8ba4\u540c &#8211;model-path<\/td>\n<\/tr>\n<tr>\n<td>&#8211;tokenizer-mode<\/td>\n<td>Tokenizer \u6a21\u5f0f&#xff1a;auto&#xff08;\u9ed8\u8ba4&#xff0c;\u4f18\u5148 fast tokenizer&#xff09;\u3001slow&#xff08;\u603b\u662f\u7528\u6162 tokenizer&#xff09;<\/td>\n<td>auto<\/td>\n<\/tr>\n<tr>\n<td>&#8211;load-format<\/td>\n<td>\u6a21\u578b\u6743\u91cd\u52a0\u8f7d\u683c\u5f0f&#xff1a;auto&#xff08;\u9ed8\u8ba4&#xff0c;\u4f18\u5148 safetensors&#xff09;\u3001safetensors\u3001pt\u3001gguf\u3001bitsandbytes<\/td>\n<td>auto<\/td>\n<\/tr>\n<tr>\n<td>&#8211;trust-remote-code<\/td>\n<td>\u662f\u5426\u4fe1\u4efb\u8fdc\u7a0b\u4ee3\u7801&#xff08;\u7528\u4e8e\u81ea\u5b9a\u4e49\u6a21\u578b&#xff09;<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u975e\u5b98\u65b9\u6a21\u578b\u8bbe\u4e3a true<\/td>\n<\/tr>\n<tr>\n<td>&#8211;context-length<\/td>\n<td>\u5f3a\u5236\u8bbe\u7f6e\u6a21\u578b\u6700\u5927\u4e0a\u4e0b\u6587\u957f\u5ea6<\/td>\n<td>\u4e0d\u5efa\u8bae\u8bbe\u7f6e&#xff0c;\u7531\u6a21\u578b config \u81ea\u52a8\u51b3\u5b9a<\/td>\n<\/tr>\n<tr>\n<td>&#8211;is-embedding<\/td>\n<td>\u662f\u5426\u5c06\u6a21\u578b\u7528\u4f5c\u5d4c\u5165\u6a21\u578b<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-multimodal<\/td>\n<td>\u542f\u7528\u591a\u6a21\u6001\u529f\u80fd&#xff08;\u9700\u6a21\u578b\u652f\u6301&#xff09;<\/td>\n<td>true&#xff08;\u82e5\u6a21\u578b\u4e3a\u591a\u6a21\u6001&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;limit-mm-data-per-request<\/td>\n<td>\u9650\u5236\u5355\u4e2a\u8bf7\u6c42\u7684\u591a\u6a21\u6001\u8f93\u5165\u6570\u91cf<\/td>\n<td>{&#034;image&#034;: 1, &#034;video&#034;: 1, &#034;audio&#034;: 1}&#xff08;\u793a\u4f8b&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;model-impl<\/td>\n<td>\u6a21\u578b\u5b9e\u73b0\u65b9\u5f0f&#xff1a;auto&#xff08;\u9ed8\u8ba4&#xff0c;\u4f18\u5148 SGLang \u5b9e\u73b0&#xff09;\u3001sglang\u3001transformers<\/td>\n<td>auto<\/td>\n<\/tr>\n<tr>\n<td>&#8211;skip-tokenizer-init<\/td>\n<td>\u8df3\u8fc7 tokenizer \u521d\u59cb\u5316&#xff08;\u9700\u5728\u8bf7\u6c42\u4e2d\u63d0\u4f9b input_ids&#xff09;<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u9ad8\u6027\u80fd\u573a\u666f\u53ef\u8bbe\u4e3a true<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<hr \/>\n<h3>&#x1f310; HTTP \u670d\u52a1<\/h3>\n<table>\n<tr>\u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae<\/tr>\n<tbody>\n<tr>\n<td>&#8211;host<\/td>\n<td>HTTP \u670d\u52a1\u76d1\u542c\u5730\u5740<\/td>\n<td>0.0.0.0&#xff08;\u5bf9\u5916\u66b4\u9732&#xff09;\u6216 127.0.0.1&#xff08;\u4ec5\u672c\u5730&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;port<\/td>\n<td>HTTP \u670d\u52a1\u7aef\u53e3<\/td>\n<td>30000&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;api-key<\/td>\n<td>\u8bbe\u7f6e API \u8bbf\u95ee\u5bc6\u94a5&#xff08;OpenAI \u517c\u5bb9&#xff09;<\/td>\n<td>\u751f\u4ea7\u73af\u5883\u5efa\u8bae\u8bbe\u7f6e<\/td>\n<\/tr>\n<tr>\n<td>&#8211;served-model-name<\/td>\n<td>\u81ea\u5b9a\u4e49 \/v1\/models \u8fd4\u56de\u7684\u6a21\u578b\u540d<\/td>\n<td>\u53ef\u7528\u4e8e\u9690\u85cf\u771f\u5b9e\u6a21\u578b\u540d<\/td>\n<\/tr>\n<tr>\n<td>&#8211;skip-server-warmup<\/td>\n<td>\u8df3\u8fc7\u670d\u52a1\u5668\u9884\u70ed<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;warmups<\/td>\n<td>\u81ea\u5b9a\u4e49\u9884\u70ed\u51fd\u6570&#xff08;\u9017\u53f7\u5206\u9694&#xff09;<\/td>\n<td>warmup_name1,warmup_name2<\/td>\n<\/tr>\n<tr>\n<td>&#8211;nccl-port<\/td>\n<td>NCCL \u5206\u5e03\u5f0f\u73af\u5883\u7aef\u53e3<\/td>\n<td>\u901a\u5e38\u4e0d\u8bbe\u7f6e&#xff0c;\u7531\u7cfb\u7edf\u81ea\u52a8\u5206\u914d<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<hr \/>\n<h3>\u2699\ufe0f \u5e76\u884c\u4e0e\u5206\u5e03\u5f0f<\/h3>\n<table>\n<tr>\u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae<\/tr>\n<tbody>\n<tr>\n<td>&#8211;tensor-parallel-size \/ &#8211;tp-size<\/td>\n<td>\u5f20\u91cf\u5e76\u884c GPU \u6570\u91cf<\/td>\n<td>1&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u6839\u636e GPU \u6570\u91cf\u8bbe\u7f6e&#xff08;\u5982 &#8211;tp 2&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;pipeline-parallel-size \/ &#8211;pp-size<\/td>\n<td>\u6d41\u6c34\u7ebf\u5e76\u884c\u9636\u6bb5\u6570<\/td>\n<td>1&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u957f\u4e0a\u4e0b\u6587\u573a\u666f\u53ef\u8bbe\u4e3a 2-4<\/td>\n<\/tr>\n<tr>\n<td>&#8211;data-parallel-size \/ &#8211;dp-size<\/td>\n<td>\u6570\u636e\u5e76\u884c GPU \u6570\u91cf<\/td>\n<td>1&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u5185\u5b58\u5145\u8db3\u65f6\u8bbe\u4e3a 2-4<\/td>\n<\/tr>\n<tr>\n<td>&#8211;nnodes<\/td>\n<td>\u5206\u5e03\u5f0f\u8282\u70b9\u6570\u91cf<\/td>\n<td>1&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u591a\u8282\u70b9\u90e8\u7f72\u8bbe\u4e3a 2&#043;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;node-rank<\/td>\n<td>\u8282\u70b9\u6392\u540d<\/td>\n<td>0&#xff08;\u4e3b\u8282\u70b9&#xff09;&#xff0c;\u5176\u4ed6\u8282\u70b9\u8bbe\u4e3a 1,2,&#8230;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;dist-init-addr \/ &#8211;nccl-init-addr<\/td>\n<td>\u5206\u5e03\u5f0f\u521d\u59cb\u5316\u5730\u5740&#xff08;\u5982 192.168.0.2:25000&#xff09;<\/td>\n<td>\u4e3b\u8282\u70b9 IP \u548c\u7aef\u53e3<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-p2p-check<\/td>\n<td>\u542f\u7528 GPU P2P \u68c0\u67e5&#xff08;\u89e3\u51b3\u591a GPU \u95ee\u9898&#xff09;<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u82e5\u62a5\u9519 \u201cpeer access not supported\u201d \u8bbe\u4e3a true<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<hr \/>\n<h3>&#x1f4be; \u5185\u5b58\u4e0e\u8c03\u5ea6<\/h3>\n<table>\n<tr>\u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae<\/tr>\n<tbody>\n<tr>\n<td>&#8211;mem-fraction-static<\/td>\n<td>\u9759\u6001\u5185\u5b58&#xff08;\u6743\u91cd &#043; KV Cache&#xff09;\u5360\u6bd4<\/td>\n<td>0.9&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;OOM \u65f6\u964d\u81f3 0.7<\/td>\n<\/tr>\n<tr>\n<td>&#8211;max-running-requests<\/td>\n<td>\u6700\u5927\u5e76\u53d1\u8bf7\u6c42\u6570<\/td>\n<td>\u81ea\u52a8\u8ba1\u7b97&#xff0c;\u901a\u5e38\u4e0d\u8bbe<\/td>\n<\/tr>\n<tr>\n<td>&#8211;max-queued-requests<\/td>\n<td>\u6700\u5927\u6392\u961f\u8bf7\u6c42\u6570<\/td>\n<td>\u81ea\u52a8\u8ba1\u7b97&#xff0c;\u901a\u5e38\u4e0d\u8bbe<\/td>\n<\/tr>\n<tr>\n<td>&#8211;chunked-prefill-size<\/td>\n<td>\u5206\u5757\u9884\u586b\u5145\u5927\u5c0f&#xff08;\u9632\u957f prompt OOM&#xff09;<\/td>\n<td>4096&#xff08;\u63a8\u8350&#xff09;&#xff0c;\u8bbe\u4e3a -1 \u7981\u7528<\/td>\n<\/tr>\n<tr>\n<td>&#8211;max-prefill-tokens<\/td>\n<td>\u9884\u586b\u5145\u6700\u5927 token \u6570<\/td>\n<td>16384&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;schedule-policy<\/td>\n<td>\u8bf7\u6c42\u8c03\u5ea6\u7b56\u7565&#xff1a;fcfs&#xff08;\u9ed8\u8ba4&#xff09;\u3001lpm\u3001random\u3001dfs-weight\u3001lof\u3001priority<\/td>\n<td>fcfs&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u9ad8\u4f18\u5148\u7ea7\u7528 priority<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-priority-scheduling<\/td>\n<td>\u542f\u7528\u4f18\u5148\u7ea7\u8c03\u5ea6<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u9700\u914d\u5408 &#8211;schedule-policy priority<\/td>\n<\/tr>\n<tr>\n<td>&#8211;schedule-low-priority-values-first<\/td>\n<td>\u4f4e\u4f18\u5148\u7ea7\u8bf7\u6c42\u4f18\u5148\u8c03\u5ea6<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u9ed8\u8ba4\u9ad8\u4f18\u5148\u7ea7\u5148\u8c03\u5ea6<\/td>\n<\/tr>\n<tr>\n<td>&#8211;page-size<\/td>\n<td>KV \u7f13\u5b58\u9875\u9762\u5927\u5c0f<\/td>\n<td>1&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<hr \/>\n<h3>&#x1f522; \u91cf\u5316\u4e0e\u6570\u636e\u7c7b\u578b<\/h3>\n<table>\n<tr>\u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae<\/tr>\n<tbody>\n<tr>\n<td>&#8211;dtype<\/td>\n<td>\u6a21\u578b\u8ba1\u7b97\u7cbe\u5ea6&#xff1a;auto&#xff08;\u9ed8\u8ba4&#xff0c;FP16\/BF16&#xff09;\u3001half\u3001float16\u3001bfloat16\u3001float\u3001float32<\/td>\n<td>auto&#xff08;\u63a8\u8350&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;quantization<\/td>\n<td>\u91cf\u5316\u65b9\u6cd5&#xff1a;awq\u3001fp8\u3001gptq\u3001marlin\u3001bitsandbytes\u3001gguf<\/td>\n<td>fp8&#xff08;\u663e\u5b58\u8282\u7701&#xff0c;\u9700 CUDA 11.8&#043;&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;kv-cache-dtype<\/td>\n<td>KV \u7f13\u5b58\u6570\u636e\u7c7b\u578b&#xff1a;auto\u3001fp8_e5m2\u3001fp8_e4m3<\/td>\n<td>fp8_e5m2&#xff08;\u63a8\u8350&#xff0c;\u8282\u7701\u663e\u5b58&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-fp32-lm-head<\/td>\n<td>LM \u5934\u8f93\u51fa\u4e3a FP32&#xff08;\u63d0\u5347\u7cbe\u5ea6&#xff09;<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;torchao-config<\/td>\n<td>torchao \u91cf\u5316\u914d\u7f6e&#xff1a;int4wo-128\u3001int8wo\u3001fp8wo<\/td>\n<td>int4wo-128&#xff08;\u5b9e\u9a8c\u6027&#xff0c;\u5c0f\u6a21\u578b\u6709\u6548&#xff09;<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<hr \/>\n<h3>&#x1f680; \u6027\u80fd\u4f18\u5316<\/h3>\n<table>\n<tr>\u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae<\/tr>\n<tbody>\n<tr>\n<td>&#8211;attention-backend<\/td>\n<td>\u6ce8\u610f\u529b\u5185\u6838\u540e\u7aef&#xff1a;triton\u3001torch_native\u3001flex_attention\u3001flashinfer\u3001fa3\u3001fa4<\/td>\n<td>flashinfer&#xff08;\u9ad8\u6027\u80fd&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;prefill-attention-backend<\/td>\n<td>\u9884\u586b\u5145\u6ce8\u610f\u529b\u540e\u7aef<\/td>\n<td>\u901a\u5e38\u540c &#8211;attention-backend<\/td>\n<\/tr>\n<tr>\n<td>&#8211;decode-attention-backend<\/td>\n<td>\u89e3\u7801\u6ce8\u610f\u529b\u540e\u7aef<\/td>\n<td>\u901a\u5e38\u540c &#8211;attention-backend<\/td>\n<\/tr>\n<tr>\n<td>&#8211;sampling-backend<\/td>\n<td>\u91c7\u6837\u5185\u6838\u540e\u7aef&#xff1a;flashinfer\u3001pytorch\u3001ascend<\/td>\n<td>flashinfer&#xff08;\u63a8\u8350&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-torch-compile<\/td>\n<td>\u542f\u7528 torch.compile \u52a0\u901f<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u5c0f\u6a21\u578b &#043; \u5c0f batch \u6709\u6548<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-deterministic-inference<\/td>\n<td>\u542f\u7528\u786e\u5b9a\u6027\u63a8\u7406&#xff08;\u7ed3\u679c\u53ef\u590d\u73b0&#xff09;<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u8c03\u8bd5\u65f6\u542f\u7528<\/td>\n<\/tr>\n<tr>\n<td>&#8211;disable-cuda-graph<\/td>\n<td>\u7981\u7528 CUDA Graph&#xff08;\u89e3\u51b3\u591a\u8282\u70b9\u6b7b\u9501&#xff09;<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u591a\u8282\u70b9 TP \u6b7b\u9501\u65f6\u8bbe\u4e3a true<\/td>\n<\/tr>\n<tr>\n<td>&#8211;num-continuous-decode-steps<\/td>\n<td>\u8fde\u7eed\u89e3\u7801\u6b65\u9aa4\u6570<\/td>\n<td>1&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u8bbe\u4e3a 2-4 \u63d0\u5347\u541e\u5410<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-mixed-chunk<\/td>\n<td>\u542f\u7528\u6df7\u5408\u5206\u5757&#xff08;prefill &#043; decode&#xff09;<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<hr \/>\n<h3>&#x1f4ca; \u65e5\u5fd7\u4e0e\u76d1\u63a7<\/h3>\n<table>\n<tr>\u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae<\/tr>\n<tbody>\n<tr>\n<td>&#8211;log-level<\/td>\n<td>\u65e5\u5fd7\u7ea7\u522b&#xff1a;debug\u3001info\u3001warning\u3001error<\/td>\n<td>info&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;log-requests<\/td>\n<td>\u8bb0\u5f55\u8bf7\u6c42\u8f93\u5165\u8f93\u51fa<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u8c03\u8bd5\u65f6\u8bbe\u4e3a true<\/td>\n<\/tr>\n<tr>\n<td>&#8211;log-requests-level<\/td>\n<td>\u8bf7\u6c42\u65e5\u5fd7\u8be6\u7ec6\u7ea7\u522b&#xff1a;0&#xff08;\u5143\u6570\u636e&#xff09;\u30011&#xff08;\u91c7\u6837\u53c2\u6570&#xff09;\u30012&#xff08;\u90e8\u5206\u8f93\u5165\/\u8f93\u51fa&#xff09;\u30013&#xff08;\u5b8c\u6574\u8f93\u5165\/\u8f93\u51fa&#xff09;<\/td>\n<td>2&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-metrics<\/td>\n<td>\u542f\u7528 Prometheus \u6307\u6807<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u751f\u4ea7\u73af\u5883\u5efa\u8bae\u542f\u7528<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-metrics-for-all-schedulers<\/td>\n<td>\u4e3a\u6240\u6709\u8c03\u5ea6\u5668\u542f\u7528\u6307\u6807<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;DP \u6ce8\u610f\u529b\u542f\u7528\u65f6\u8bbe\u4e3a true<\/td>\n<\/tr>\n<tr>\n<td>&#8211;crash-dump-folder<\/td>\n<td>\u5d29\u6e83\u524d 5 \u5206\u949f\u8bf7\u6c42 dump \u76ee\u5f55<\/td>\n<td>\u7528\u4e8e\u6545\u969c\u6392\u67e5<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-trace<\/td>\n<td>\u542f\u7528 OpenTelemetry \u8ddf\u8e2a<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<hr \/>\n<h3>&#x1f9e0; \u9ad8\u7ea7\u529f\u80fd<\/h3>\n<table>\n<tr>\u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae<\/tr>\n<tbody>\n<tr>\n<td>&#8211;enable-lora<\/td>\n<td>\u542f\u7528 LoRA \u652f\u6301<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u9700\u914d\u5408 &#8211;lora-paths<\/td>\n<\/tr>\n<tr>\n<td>&#8211;lora-paths<\/td>\n<td>LoRA \u9002\u914d\u5668\u8def\u5f84\u5217\u8868<\/td>\n<td>[&#034;lora_path1&#034;, &#034;lora_path2&#034;]<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-hierarchical-cache<\/td>\n<td>\u542f\u7528\u5206\u5c42 KV \u7f13\u5b58&#xff08;CPU-GPU&#xff09;<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;&#xff0c;\u663e\u5b58\u4e0d\u8db3\u65f6\u542f\u7528<\/td>\n<\/tr>\n<tr>\n<td>&#8211;hicache-ratio<\/td>\n<td>\u4e3b\u673a KV \u7f13\u5b58\u5185\u5b58\u6c60\u6bd4\u4f8b<\/td>\n<td>2.0&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;speculative-algorithm<\/td>\n<td>\u63a8\u6d4b\u89e3\u7801\u7b97\u6cd5&#xff1a;EAGLE\u3001EAGLE3\u3001NEXTN\u3001STANDALONE\u3001NGRAM<\/td>\n<td>EAGLE&#xff08;\u63a8\u8350&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;speculative-draft-model-path<\/td>\n<td>\u8349\u7a3f\u6a21\u578b\u8def\u5f84<\/td>\n<td>meta-llama\/Meta-Llama-3-8B&#xff08;\u9700\u642d\u914d\u4e3b\u6a21\u578b&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-lmcache<\/td>\n<td>\u4f7f\u7528 LMCache \u4f5c\u4e3a\u5206\u5c42\u7f13\u5b58<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-double-sparsity<\/td>\n<td>\u542f\u7528\u53cc\u7a00\u758f\u6ce8\u610f\u529b<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-pdmux<\/td>\n<td>\u542f\u7528 PD-Multiplexing<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<hr \/>\n<h3>&#x1f4c4; \u914d\u7f6e\u6587\u4ef6\u4e0e\u90e8\u7f72<\/h3>\n<table>\n<tr>\u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae<\/tr>\n<tbody>\n<tr>\n<td>&#8211;config<\/td>\n<td>\u4ece YAML \u914d\u7f6e\u6587\u4ef6\u52a0\u8f7d\u53c2\u6570<\/td>\n<td>config.yaml&#xff08;\u63a8\u8350\u751f\u4ea7\u73af\u5883\u4f7f\u7528&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;enable-priority-scheduling<\/td>\n<td>\u542f\u7528\u4f18\u5148\u7ea7\u8c03\u5ea6<\/td>\n<td>false&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;max-loras-per-batch<\/td>\n<td>\u5355\u6279\u6700\u5927 LoRA \u6570<\/td>\n<td>8&#xff08;\u9ed8\u8ba4&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>&#8211;max-loaded-loras<\/td>\n<td>\u6700\u5927\u52a0\u8f7d\u7684 LoRA \u6570<\/td>\n<td>&gt;&#061; &#8211;max-loras-per-batch<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<hr \/>\n<h3>\u26a0\ufe0f \u91cd\u8981\u63d0\u793a<\/h3>\n<li>OOM \u95ee\u9898&#xff1a;\u82e5\u51fa\u73b0\u5185\u5b58\u4e0d\u8db3\u9519\u8bef&#xff0c;\u4f18\u5148\u5c1d\u8bd5&#xff1a;\n<ul>\n<li>\u964d\u4f4e &#8211;mem-fraction-static&#xff08;\u5982 0.7&#xff09;<\/li>\n<li>\u542f\u7528 &#8211;chunked-prefill-size 4096<\/li>\n<li>\u51cf\u5c11 &#8211;max-running-requests<\/li>\n<\/ul>\n<\/li>\n<li>\u591a GPU \u90e8\u7f72&#xff1a;\n<ul>\n<li>\u5148\u7528 &#8211;tp&#xff08;\u5f20\u91cf\u5e76\u884c&#xff09;&#xff0c;\u518d\u8003\u8651 &#8211;dp&#xff08;\u6570\u636e\u5e76\u884c&#xff09;<\/li>\n<li>\u591a\u8282\u70b9\u90e8\u7f72\u9700\u914d\u5408 &#8211;nnodes\u3001&#8211;node-rank \u548c &#8211;dist-init-addr<\/li>\n<\/ul>\n<\/li>\n<li>\u751f\u4ea7\u73af\u5883\u5efa\u8bae&#xff1a;\n<ul>\n<li>\u542f\u7528 &#8211;enable-metrics<\/li>\n<li>\u8bbe\u7f6e &#8211;api-key<\/li>\n<li>\u4f7f\u7528 &#8211;config \u914d\u7f6e\u6587\u4ef6\u7ba1\u7406\u53c2\u6570<\/li>\n<li>\u9002\u5f53\u8c03\u6574 &#8211;mem-fraction-static \u4ee5\u5e73\u8861\u541e\u5410\u548c\u5185\u5b58<\/li>\n<\/ul>\n<\/li>\n<li>\u6027\u80fd\u4f18\u5316&#xff1a;\n<ul>\n<li>\u5bf9\u4e8e\u5927\u6a21\u578b&#xff0c;\u4f18\u5148\u4f7f\u7528 &#8211;attention-backend flashinfer<\/li>\n<li>\u5c0f\u6a21\u578b &#043; \u5c0f batch \u53ef\u5c1d\u8bd5 &#8211;enable-torch-compile<\/li>\n<li>\u957f\u4e0a\u4e0b\u6587\u573a\u666f\u8003\u8651 &#8211;enable-hierarchical-cache<\/li>\n<\/ul>\n<\/li>\n<p>&#x1f4cc; \u6ce8\u610f&#xff1a;\u4ee5\u4e0a\u53c2\u6570\u503c\u4e3a\u63a8\u8350\u503c&#xff0c;\u5b9e\u9645\u4f7f\u7528\u4e2d\u9700\u6839\u636e\u5177\u4f53\u786c\u4ef6\u3001\u6a21\u578b\u548c\u8d1f\u8f7d\u8fdb\u884c\u8c03\u6574\u3002\u5efa\u8bae\u4ece\u9ed8\u8ba4\u53c2\u6570\u5f00\u59cb&#xff0c;\u9010\u6b65\u4f18\u5316\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3<br \/>\n\u4ee5\u4e0b\u6839\u636e SGLang \u5b98\u65b9\u6587\u6863 \u6574\u7406\u7684\u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u5217\u8868&#xff0c;\u6309\u529f\u80fd\u5206\u7c7b&#xff0c;\u63d0\u4f9b\u7528\u9014\u8bf4\u660e\u548c\u63a8\u8350\u503c\u3002 &#x1f4e6; \u6a21\u578b\u4e0e Tokenizer<br \/>\n\u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae&#8211;model-path \/ &#8211;model\u6307\u5b9a\u6a21\u578b\u6743\u91cd\u8def\u5f84&#xff08;\u672c\u5730\u76ee\u5f55\u6216 Hugging Face repo ID&#xff09;meta-llama\/Meta-Llama-3-8B-Instruct&#xff08;\u5fc5\u586b<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[6994,43,44],"topic":[],"class_list":["post-66161","post","type-post","status-publish","format-standard","hentry","category-server","tag-sglang","tag-43","tag-44"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/66161.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3 \u4ee5\u4e0b\u6839\u636e SGLang \u5b98\u65b9\u6587\u6863 \u6574\u7406\u7684\u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u5217\u8868&#xff0c;\u6309\u529f\u80fd\u5206\u7c7b&#xff0c;\u63d0\u4f9b\u7528\u9014\u8bf4\u660e\u548c\u63a8\u8350\u503c\u3002 &#x1f4e6; \u6a21\u578b\u4e0e Tokenizer \u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae--model-path \/ --model\u6307\u5b9a\u6a21\u578b\u6743\u91cd\u8def\u5f84&#xff08;\u672c\u5730\u76ee\u5f55\u6216 Hugging Face repo ID&#xff09;meta-llama\/Meta-Llama-3-8B-Instruct&#xff08;\u5fc5\u586b\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/66161.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2026-01-26T03:05:04+00:00\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"3 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/66161.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/66161.html\",\"name\":\"SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2026-01-26T03:05:04+00:00\",\"dateModified\":\"2026-01-26T03:05:04+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/66161.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/66161.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/66161.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/66161.html","og_locale":"zh_CN","og_type":"article","og_title":"SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3 \u4ee5\u4e0b\u6839\u636e SGLang \u5b98\u65b9\u6587\u6863 \u6574\u7406\u7684\u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u5217\u8868&#xff0c;\u6309\u529f\u80fd\u5206\u7c7b&#xff0c;\u63d0\u4f9b\u7528\u9014\u8bf4\u660e\u548c\u63a8\u8350\u503c\u3002 &#x1f4e6; \u6a21\u578b\u4e0e Tokenizer \u53c2\u6570\u7528\u9014\u8bf4\u660e\u63a8\u8350\u503c\/\u4f7f\u7528\u5efa\u8bae--model-path \/ --model\u6307\u5b9a\u6a21\u578b\u6743\u91cd\u8def\u5f84&#xff08;\u672c\u5730\u76ee\u5f55\u6216 Hugging Face repo ID&#xff09;meta-llama\/Meta-Llama-3-8B-Instruct&#xff08;\u5fc5\u586b","og_url":"https:\/\/www.wsisp.com\/helps\/66161.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2026-01-26T03:05:04+00:00","author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"3 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/66161.html","url":"https:\/\/www.wsisp.com\/helps\/66161.html","name":"SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2026-01-26T03:05:04+00:00","dateModified":"2026-01-26T03:05:04+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/66161.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/66161.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/66161.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"SGLang \u670d\u52a1\u5668\u542f\u52a8\u53c2\u6570\u5b8c\u6574\u603b\u7ed3"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/66161","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=66161"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/66161\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=66161"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=66161"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=66161"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=66161"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}