{"id":26542,"date":"2025-04-19T16:01:11","date_gmt":"2025-04-19T08:01:11","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/26542.html"},"modified":"2025-04-19T16:01:11","modified_gmt":"2025-04-19T08:01:11","slug":"%e5%9f%ba%e4%ba%8etriton%e6%8e%a8%e7%90%86%e6%9c%8d%e5%8a%a1%e5%99%a8%e7%9a%84%e6%80%a7%e8%83%bd%e4%bc%98%e5%8c%96%e5%ae%9e%e8%b7%b5","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/26542.html","title":{"rendered":"\u57fa\u4e8eTriton\u63a8\u7406\u670d\u52a1\u5668\u7684\u6027\u80fd\u4f18\u5316\u5b9e\u8df5"},"content":{"rendered":"<\/p>\n<h4>\u6587\u7ae0\u76ee\u5f55<\/h4>\n<ul>\n<li>1. \u52a8\u6001\u6279\u5904\u7406&#xff08;Dynamic Batching&#xff09;<\/li>\n<li>2. \u6a21\u578b\u5e76\u53d1&#xff08;Model Concurrency&#xff09;<\/li>\n<li>3. TensorRT\u52a0\u901f<\/li>\n<li>4. \u6d4b\u8bd5<\/li>\n<li>\n<ul>\n<li>\u539f\u59cb\u6a21\u578b<\/li>\n<li>\u52a8\u6001\u6279\u6b21<\/li>\n<li>2\u4e2a\u6a21\u578b\u5b9e\u4f8b<\/li>\n<li>TensorRT\u4f18\u5316<\/li>\n<\/ul>\n<\/li>\n<li>5. \u6a21\u578b\u5206\u6790<\/li>\n<\/ul>\n<p> \u53c2\u8003&#xff1a; <\/p>\n<p>https:\/\/github.com\/triton-inference-server\/tutorials\/tree\/main\/Conceptual_Guide\/Part_2-improving_resource_utilization <\/p>\n<p>https:\/\/github.com\/triton-inference-server\/tutorials\/tree\/main\/Conceptual_Guide\/Part_3-optimizing_triton_configuration<\/p>\n<p>NVIDIA\u7684Triton\u63a8\u7406\u670d\u52a1\u5668\u901a\u8fc7\u52a8\u6001\u6279\u5904\u7406\u3001\u6a21\u578b\u5e76\u53d1\u548cTensorRT\u52a0\u901f\u7b49\u591a\u79cd\u4f18\u5316\u7b56\u7565&#xff0c;\u4e0d\u4ec5\u5145\u5206\u53d1\u6325GPU\u7684\u5e76\u884c\u8ba1\u7b97\u80fd\u529b&#xff0c;\u8fd8\u80fd\u6839\u636e\u4e1a\u52a1\u573a\u666f\u7075\u6d3b\u8c03\u6574\u914d\u7f6e&#xff0c;\u4ece\u800c\u5b9e\u73b0\u9ad8\u541e\u5410\u3001\u4f4e\u5ef6\u8fdf\u7684\u63a8\u7406\u670d\u52a1\u3002\u672c\u6587\u5c06\u6df1\u5165\u89e3\u6790Triton\u4e2d\u7684\u5173\u952e\u4f18\u5316\u6280\u672f&#xff0c;\u5e76\u7ed3\u5408\u8be6\u7ec6\u7684\u6d4b\u8bd5\u6570\u636e\u548c\u547d\u4ee4\u884c\u89e3\u6790&#xff0c;\u4e3a\u8bfb\u8005\u5c55\u793a\u5982\u4f55\u5229\u7528\u8fd9\u4e9b\u6280\u672f\u63d0\u5347\u63a8\u7406\u6027\u80fd\u3002<\/p>\n<h2>1. \u52a8\u6001\u6279\u5904\u7406&#xff08;Dynamic Batching&#xff09;<\/h2>\n<p>\u5728\u5b9e\u9645\u63a8\u7406\u573a\u666f\u4e2d&#xff0c;\u5355\u4e2a\u8bf7\u6c42\u5f80\u5f80\u65e0\u6cd5\u5145\u5206\u5229\u7528GPU\u7684\u8ba1\u7b97\u80fd\u529b\u3002\u52a8\u6001\u6279\u5904\u7406\u6280\u672f\u901a\u8fc7\u5c06\u591a\u4e2a\u63a8\u7406\u8bf7\u6c42\u5408\u5e76\u6210\u4e00\u4e2a\u6279\u6b21\u8fdb\u884c\u5904\u7406&#xff0c;\u6709\u6548\u63d0\u5347\u4e86GPU\u7684\u5e76\u884c\u8fd0\u7b97\u6548\u7387\u3002\u5173\u952e\u5728\u4e8e\u5e73\u8861\u541e\u5410\u91cf\u548c\u5ef6\u8fdf&#xff1a;<\/p>\n<ul>\n<li>\n<p>\u541e\u5410\u91cf&#xff1a;\u5408\u5e76\u8bf7\u6c42\u540e\u53ef\u5728\u4e00\u6b21\u63a8\u7406\u4e2d\u5904\u7406\u66f4\u591a\u6570\u636e&#xff0c;\u4ece\u800c\u63d0\u9ad8\u6bcf\u79d2\u5b8c\u6210\u7684\u63a8\u7406\u6570\u91cf\u3002<\/p>\n<\/li>\n<li>\n<p>\u5ef6\u8fdf&#xff1a;\u7b49\u5f85\u6279\u6b21\u7ec4\u5efa\u65f6\u95f4\u53ef\u80fd\u5e26\u6765\u4e00\u5b9a\u7684\u5ef6\u8fdf&#xff0c;\u901a\u8fc7\u8c03\u6574max_queue_delay_microseconds\u53c2\u6570\u53ef\u4ee5\u63a7\u5236\u6700\u5927\u7b49\u5f85\u65f6\u957f&#xff0c;\u5b9e\u73b0\u4f4e\u5ef6\u8fdf\u4e0e\u9ad8\u541e\u5410\u95f4\u7684\u6298\u4e2d\u3002<\/p>\n<\/li>\n<li>\n<p>\u4f18\u52bf&#xff1a;\u51cf\u5c11GPU\u7a7a\u95f2\u65f6\u95f4&#xff0c;\u63d0\u5347\u541e\u5410\u91cf\u3002<\/p>\n<\/li>\n<li>\n<p>\u9002\u7528\u573a\u666f&#xff1a;\u9ad8\u5e76\u53d1\u63a8\u7406\u4efb\u52a1\u3002<\/p>\n<\/li>\n<\/ul>\n<p>\u9700\u5728\u6a21\u578b\u914d\u7f6e\u4e2d\u52a0\u4e0a\u4ee5\u4e0b\u5185\u5bb9<\/p>\n<p>dynamic_batching <span class=\"token punctuation\">{<\/span><br \/>\n    max_queue_delay_microseconds: <span class=\"token number\">100<\/span>  <span class=\"token comment\"># \u7a7a\u5b57\u5178&#xff0c;\u8868\u793a\u6ca1\u6709\u5ef6\u8fdf<\/span><br \/>\n<span class=\"token punctuation\">}<\/span><\/p>\n<h2>2. \u6a21\u578b\u5e76\u53d1&#xff08;Model Concurrency&#xff09;<\/h2>\n<p>\u5728\u5b9e\u9645\u90e8\u7f72\u65f6&#xff0c;\u5f80\u5f80\u4f1a\u5b58\u5728\u591a\u4e2a\u6a21\u578b\u540c\u65f6\u8fd0\u884c\u7684\u60c5\u51b5\u3002Triton\u901a\u8fc7\u5728\u540c\u4e00GPU\u4e0a\u8fd0\u884c\u591a\u4e2a\u6a21\u578b\u5b9e\u4f8b&#xff08;instance&#xff09;&#xff0c;\u5b9e\u73b0\u6a21\u578b\u5e76\u53d1\u3002\u901a\u8fc7\u5408\u7406\u7684\u8d44\u6e90\u5206\u914d&#xff0c;\u53ef\u4ee5\u9632\u6b62\u5355\u4e00\u6a21\u578b\u72ec\u5360GPU\u8d44\u6e90&#xff0c;\u8fdb\u800c\u6ee1\u8db3\u591a\u4efb\u52a1\u5e76\u884c\u63a8\u7406\u9700\u6c42\u3002<\/p>\n<ul>\n<li>\u4f18\u52bf&#xff1a;\u63d0\u9ad8GPU\u5229\u7528\u7387&#xff0c;\u51cf\u5c11\u8d44\u6e90\u6d6a\u8d39\u3002<\/li>\n<li>\u9002\u7528\u573a\u666f&#xff1a;\u591a\u6a21\u578b\u63a8\u7406\u4efb\u52a1\u3002<\/li>\n<\/ul>\n<p>\u9700\u5728\u6a21\u578b\u914d\u7f6e\u4e2d\u52a0\u4e0a\u4ee5\u4e0b\u5185\u5bb9<\/p>\n<p>instance_group <span class=\"token punctuation\">[<\/span><br \/>\n    <span class=\"token punctuation\">{<\/span><br \/>\n        count: <span class=\"token number\">2<\/span>  <span class=\"token comment\"># \u6a21\u578b\u5b9e\u4f8b\u7684\u6570\u91cf<\/span><br \/>\n        kind: KIND_GPU  <span class=\"token comment\"># \u4f7f\u7528GPU<\/span><br \/>\n        gpus: <span class=\"token punctuation\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">]<\/span>  <span class=\"token comment\"># \u6307\u5b9aGPU\u8bbe\u5907ID<\/span><br \/>\n    <span class=\"token punctuation\">}<\/span><br \/>\n<span class=\"token punctuation\">]<\/span><\/p>\n<h2>3. TensorRT\u52a0\u901f<\/h2>\n<p>TensorRT \u662fNVIDIA\u4e13\u4e3a\u6df1\u5ea6\u5b66\u4e60\u63a8\u7406\u8bbe\u8ba1\u7684\u9ad8\u6027\u80fd\u4f18\u5316\u5e93\u3002\u901a\u8fc7\u5c06\u6a21\u578b\u8f6c\u6362\u4e3aTensorRT\u5f15\u64ce&#xff0c;\u80fd\u591f\u501f\u52a9FP16\u7b49\u4f4e\u7cbe\u5ea6\u8fd0\u7b97\u6a21\u5f0f&#xff0c;\u663e\u8457\u63d0\u5347\u63a8\u7406\u901f\u5ea6&#xff0c;\u5e76\u964d\u4f4e\u5ef6\u8fdf\u3002\u4f46\u9700\u8981\u6ce8\u610f\u7684\u662f&#xff0c;\u5f15\u64ce\u7684\u7f16\u8bd1\u8fc7\u7a0b\u53ef\u80fd\u8f83\u8017\u65f6&#xff08;\u672c\u6587\u4e2d\u5927\u7ea615\u5206\u949f&#xff09;<\/p>\n<p>\u9700\u5728\u6a21\u578b\u914d\u7f6e\u4e2d\u52a0\u4e0a\u4ee5\u4e0b\u5185\u5bb9<\/p>\n<p>optimization <span class=\"token punctuation\">{<\/span><br \/>\nexecution_accelerators <span class=\"token punctuation\">{<\/span><br \/>\ngpu_execution_accelerator <span class=\"token punctuation\">:<\/span> <span class=\"token punctuation\">[<\/span> <span class=\"token punctuation\">{<\/span><br \/>\nname <span class=\"token punctuation\">:<\/span> \u201ctensorrt\u201d<br \/>\nparameters <span class=\"token punctuation\">{<\/span> key<span class=\"token punctuation\">:<\/span> \u201cprecision_mode\u201d value<span class=\"token punctuation\">:<\/span> \u201cFP16\u201d <span class=\"token punctuation\">}<\/span><br \/>\nparameters <span class=\"token punctuation\">{<\/span> key<span class=\"token punctuation\">:<\/span> \u201cmax_workspace_size_bytes\u201d value<span class=\"token punctuation\">:<\/span> \u201c<span class=\"token number\">1073741824<\/span>\u201d <span class=\"token punctuation\">}<\/span><br \/>\n<span class=\"token punctuation\">}<\/span><span class=\"token punctuation\">]<\/span><br \/>\n<span class=\"token punctuation\">}<\/span><br \/>\n<span class=\"token punctuation\">}<\/span><\/p>\n<ul>\n<li>\u4f18\u52bf&#xff1a;\u663e\u8457\u63d0\u9ad8\u63a8\u7406\u901f\u5ea6&#xff0c;\u964d\u4f4e\u8ba1\u7b97\u5ef6\u8fdf&#xff0c;\u540c\u65f6\u5145\u5206\u5229\u7528GPU\u786c\u4ef6\u7279\u6027\u3002<\/li>\n<li>\u9002\u7528\u573a\u666f&#xff1a;\u5bf9\u5ef6\u8fdf\u654f\u611f\u4e14\u8981\u6c42\u9ad8\u541e\u5410\u7684\u63a8\u7406\u4efb\u52a1&#xff0c;\u5c24\u5176\u662f\u5728\u5bf9\u7cbe\u5ea6\u8981\u6c42\u5141\u8bb8\u9002\u5f53\u964d\u4f4e\u65f6\u3002<\/li>\n<\/ul>\n<h2>4. \u6d4b\u8bd5<\/h2>\n<p>\u5bf9 \u4f7f\u7528triton\u90e8\u7f72OCR\u670d\u52a1&#xff08;\u4e00&#xff09; \u4e2d\u7684 \u6587\u672c\u8bc6\u522b\u6a21\u578b&#xff0c;\u5bfc\u51fa\u52a8\u6001\u6279\u6b21\u7684onnx\u6a21\u578b<\/p>\n<p>trace_input <span class=\"token operator\">&#061;<\/span> torch<span class=\"token punctuation\">.<\/span>randn<span class=\"token punctuation\">(<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token number\">1<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token number\">32<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token number\">100<\/span><span class=\"token punctuation\">)<\/span><br \/>\ntorch<span class=\"token punctuation\">.<\/span>onnx<span class=\"token punctuation\">.<\/span>export<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">,<\/span> trace_input<span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;str.onnx&#034;<\/span><span class=\"token punctuation\">,<\/span> verbose<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">,<\/span> dynamic_axes<span class=\"token operator\">&#061;<\/span><span class=\"token punctuation\">{<\/span><span class=\"token string\">&#039;input.1&#039;<\/span><span class=\"token punctuation\">:<\/span><span class=\"token punctuation\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">,<\/span><span class=\"token string\">&#039;308&#039;<\/span><span class=\"token punctuation\">:<\/span><span class=\"token punctuation\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">}<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p><img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419080110-680358467a690.png\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\" \/><\/p>\n<p>\u542f\u52a8 tritonserver<\/p>\n<p>tritonserver <span class=\"token operator\">&#8211;<\/span><span class=\"token operator\">&#8211;<\/span>model<span class=\"token operator\">&#8211;<\/span>repository<span class=\"token operator\">&#061;<\/span><span class=\"token punctuation\">.<\/span><span class=\"token operator\">\/<\/span>model_repository<span class=\"token operator\">\/<\/span><\/p>\n<p>\u8fdb\u5165\u63a8\u7406\u5ba2\u6237\u7aef\u7684\u5bb9\u5668<\/p>\n<p>docker run <span class=\"token operator\">&#8211;<\/span>it <span class=\"token operator\">&#8211;<\/span><span class=\"token operator\">&#8211;<\/span>net<span class=\"token operator\">&#061;<\/span>host <span class=\"token operator\">&#8211;<\/span>v $<span class=\"token punctuation\">{<\/span>PWD<span class=\"token punctuation\">}<\/span><span class=\"token punctuation\">:<\/span><span class=\"token operator\">\/<\/span>workspace<span class=\"token operator\">\/<\/span> nvcr<span class=\"token punctuation\">.<\/span>io<span class=\"token operator\">\/<\/span>nvidia<span class=\"token operator\">\/<\/span>tritonserver<span class=\"token punctuation\">:<\/span><span class=\"token number\">24.10<\/span><span class=\"token operator\">&#8211;<\/span>py3<span class=\"token operator\">&#8211;<\/span>sdk bash<\/p>\n<p>\u8fdb\u884c\u6d4b\u8bd5&#xff0c;\u6d4b\u8bd5\u547d\u4ee4\u5982\u4e0b&#xff1a;<\/p>\n<p>perf_analyzer <span class=\"token operator\">&#8211;<\/span>m text_recognition <span class=\"token operator\">&#8211;<\/span>b <span class=\"token number\">2<\/span> <span class=\"token operator\">&#8211;<\/span><span class=\"token operator\">&#8211;<\/span>shape <span class=\"token builtin\">input<\/span><span class=\"token number\">.1<\/span><span class=\"token punctuation\">:<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\">,<\/span><span class=\"token number\">32<\/span><span class=\"token punctuation\">,<\/span><span class=\"token number\">100<\/span> <span class=\"token operator\">&#8211;<\/span><span class=\"token operator\">&#8211;<\/span>concurrency<span class=\"token operator\">&#8211;<\/span><span class=\"token builtin\">range<\/span> <span class=\"token number\">2<\/span><span class=\"token punctuation\">:<\/span><span class=\"token number\">16<\/span><span class=\"token punctuation\">:<\/span><span class=\"token number\">2<\/span> <span class=\"token operator\">&#8211;<\/span><span class=\"token operator\">&#8211;<\/span>percentile<span class=\"token operator\">&#061;<\/span><span class=\"token number\">95<\/span><\/p>\n<ul>\n<li>\u53c2\u6570\u8bf4\u660e&#xff1a;\n<ul>\n<li>-m text_recognition&#xff1a;\u6307\u5b9a\u8981\u6d4b\u8bd5\u7684\u6a21\u578b\u540d\u79f0\u3002<\/li>\n<li>-b 2&#xff1a;\u6307\u5b9abatch size\u4e3a2\u3002<\/li>\n<li>&#8211;shape input.1:1,32,100&#xff1a;\u8bbe\u7f6e\u8f93\u5165\u6570\u636e\u7684\u5f62\u72b6\u3002<\/li>\n<li>&#8211;concurrency-range 2:16:2&#xff1a;\u6d4b\u8bd5\u5e76\u53d1\u8bf7\u6c42\u6570\u4ece2\u523016&#xff0c;\u6b65\u957f\u4e3a2\u3002<\/li>\n<li>&#8211;percentile&#061;95&#xff1a;\u7edf\u8ba195%\u5206\u4f4d\u5ef6\u8fdf&#xff08;p95 latency&#xff09;&#xff0c;\u53cd\u6620\u5927\u90e8\u5206\u8bf7\u6c42\u7684\u5ef6\u8fdf\u60c5\u51b5\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<p>\u5728\u6d4b\u8bd5\u8f93\u51fa\u4e2d&#xff0c;\u5e38\u89c1\u6307\u6807\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>Throughput (infer\/sec)&#xff1a;\u6bcf\u79d2\u5904\u7406\u7684\u63a8\u7406\u6570\u91cf&#xff0c;\u8d8a\u9ad8\u8868\u793a\u541e\u5410\u91cf\u8d8a\u597d\u3002<\/li>\n<li>p95 Latency (usec)&#xff1a;95\u767e\u5206\u4f4d\u5ef6\u8fdf&#xff0c;\u53cd\u6620\u5927\u90e8\u5206\u8bf7\u6c42\u7684\u54cd\u5e94\u65f6\u95f4\u3002<\/li>\n<li>Avg HTTP Time (usec)&#xff1a;\u5e73\u5747HTTP\u54cd\u5e94\u65f6\u95f4&#xff0c;\u5305\u62ec\u8bf7\u6c42\u53d1\u9001\u3001\u7b49\u5f85\u548c\u63a5\u6536\u6570\u636e\u7684\u603b\u8017\u65f6\u3002<\/li>\n<li>Queue\u3001Compute Infer&#xff1a;\u5206\u522b\u4ee3\u8868\u8bf7\u6c42\u6392\u961f\u7b49\u5f85\u548c\u5b9e\u9645\u8ba1\u7b97\u63a8\u7406\u7684\u8017\u65f6&#xff0c;\u5e2e\u52a9\u5b9a\u4f4d\u74f6\u9888\u3002<\/li>\n<\/ul>\n<p><span class=\"token operator\">**<\/span><span class=\"token operator\">*<\/span> Measurement Settings <span class=\"token operator\">**<\/span><span class=\"token operator\">*<\/span><br \/>\n  Batch size<span class=\"token punctuation\">:<\/span> <span class=\"token number\">2<\/span><br \/>\n  Service Kind<span class=\"token punctuation\">:<\/span> TRITON<br \/>\n  Using <span class=\"token string\">&#034;time_windows&#034;<\/span> mode <span class=\"token keyword\">for<\/span> stabilization<br \/>\n  Stabilizing using p95latency <span class=\"token keyword\">and<\/span> throughput<br \/>\n  Measurement window<span class=\"token punctuation\">:<\/span> <span class=\"token number\">5000<\/span> msec<br \/>\n  Latency limit<span class=\"token punctuation\">:<\/span> <span class=\"token number\">0<\/span> msec<br \/>\n  Concurrency limit<span class=\"token punctuation\">:<\/span> <span class=\"token number\">16<\/span> concurrent requests<br \/>\n  Using synchronous calls <span class=\"token keyword\">for<\/span> inference<\/p>\n<p>Request concurrency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">2<\/span><br \/>\n  Client<span class=\"token punctuation\">:<\/span><br \/>\n    Request count<span class=\"token punctuation\">:<\/span> <span class=\"token number\">6112<\/span><br \/>\n    Throughput<span class=\"token punctuation\">:<\/span> <span class=\"token number\">678.775<\/span> infer<span class=\"token operator\">\/<\/span>sec<br \/>\n    p50 latency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">5880<\/span> usec<br \/>\n    p90 latency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">5935<\/span> usec<br \/>\n    p95 latency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">5959<\/span> usec<br \/>\n    p99 latency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">6042<\/span> usec<br \/>\n    Avg HTTP time<span class=\"token punctuation\">:<\/span> <span class=\"token number\">5884<\/span> usec <span class=\"token punctuation\">(<\/span>send<span class=\"token operator\">\/<\/span>recv <span class=\"token number\">17<\/span> usec <span class=\"token operator\">&#043;<\/span> response wait <span class=\"token number\">5867<\/span> usec<span class=\"token punctuation\">)<\/span><br \/>\n  Server<span class=\"token punctuation\">:<\/span><br \/>\n    Inference count<span class=\"token punctuation\">:<\/span> <span class=\"token number\">12226<\/span><br \/>\n    Execution count<span class=\"token punctuation\">:<\/span> <span class=\"token number\">6113<\/span><br \/>\n    Successful request count<span class=\"token punctuation\">:<\/span> <span class=\"token number\">6113<\/span><br \/>\n    Avg request latency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">5806<\/span> usec <span class=\"token punctuation\">(<\/span>overhead <span class=\"token number\">9<\/span> usec <span class=\"token operator\">&#043;<\/span> queue <span class=\"token number\">2866<\/span> usec <span class=\"token operator\">&#043;<\/span> compute <span class=\"token builtin\">input<\/span> <span class=\"token number\">8<\/span> usec <span class=\"token operator\">&#043;<\/span> compute infer <span class=\"token number\">2916<\/span> usec <span class=\"token operator\">&#043;<\/span> compute output <span class=\"token number\">5<\/span> usec<span class=\"token punctuation\">)<\/span><\/p>\n<p>\u7701\u7565\u5176\u4ed6\u5e76\u53d1\u6570<span class=\"token number\">4<\/span>\u3001<span class=\"token number\">6<\/span>\u3001<span class=\"token number\">8<\/span>\u3001\u3002\u3002\u3002<span class=\"token number\">16<\/span> \u7684\u7ed3\u679c<\/p>\n<p>Inferences<span class=\"token operator\">\/<\/span>Second vs<span class=\"token punctuation\">.<\/span> Client p95 Batch Latency<br \/>\nConcurrency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">2<\/span><span class=\"token punctuation\">,<\/span> throughput<span class=\"token punctuation\">:<\/span> <span class=\"token number\">678.775<\/span> infer<span class=\"token operator\">\/<\/span>sec<span class=\"token punctuation\">,<\/span> latency <span class=\"token number\">5959<\/span> usec<br \/>\nConcurrency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">4<\/span><span class=\"token punctuation\">,<\/span> throughput<span class=\"token punctuation\">:<\/span> <span class=\"token number\">679.345<\/span> infer<span class=\"token operator\">\/<\/span>sec<span class=\"token punctuation\">,<\/span> latency <span class=\"token number\">11859<\/span> usec<br \/>\nConcurrency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">6<\/span><span class=\"token punctuation\">,<\/span> throughput<span class=\"token punctuation\">:<\/span> <span class=\"token number\">677.233<\/span> infer<span class=\"token operator\">\/<\/span>sec<span class=\"token punctuation\">,<\/span> latency <span class=\"token number\">17827<\/span> usec<br \/>\nConcurrency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">8<\/span><span class=\"token punctuation\">,<\/span> throughput<span class=\"token punctuation\">:<\/span> <span class=\"token number\">674.897<\/span> infer<span class=\"token operator\">\/<\/span>sec<span class=\"token punctuation\">,<\/span> latency <span class=\"token number\">24056<\/span> usec<br \/>\nConcurrency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">10<\/span><span class=\"token punctuation\">,<\/span> throughput<span class=\"token punctuation\">:<\/span> <span class=\"token number\">675.966<\/span> infer<span class=\"token operator\">\/<\/span>sec<span class=\"token punctuation\">,<\/span> latency <span class=\"token number\">29906<\/span> usec<br \/>\nConcurrency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">12<\/span><span class=\"token punctuation\">,<\/span> throughput<span class=\"token punctuation\">:<\/span> <span class=\"token number\">676.23<\/span> infer<span class=\"token operator\">\/<\/span>sec<span class=\"token punctuation\">,<\/span> latency <span class=\"token number\">35703<\/span> usec<br \/>\nConcurrency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">14<\/span><span class=\"token punctuation\">,<\/span> throughput<span class=\"token punctuation\">:<\/span> <span class=\"token number\">667.567<\/span> infer<span class=\"token operator\">\/<\/span>sec<span class=\"token punctuation\">,<\/span> latency <span class=\"token number\">43738<\/span> usec<br \/>\nConcurrency<span class=\"token punctuation\">:<\/span> <span class=\"token number\">16<\/span><span class=\"token punctuation\">,<\/span> throughput<span class=\"token punctuation\">:<\/span> <span class=\"token number\">676.548<\/span> infer<span class=\"token operator\">\/<\/span>sec<span class=\"token punctuation\">,<\/span> latency <span class=\"token number\">47543<\/span> usec<\/p>\n<h3>\u539f\u59cb\u6a21\u578b<\/h3>\n<ul>\n<li>\u968f\u7740\u5e76\u53d1\u6570\u7684\u589e\u52a0&#xff0c;\u867d\u7136Throughput\u57fa\u672c\u4fdd\u6301\u7a33\u5b9a&#xff0c;\u4f46p95 Latency\u660e\u663e\u4e0a\u5347&#xff0c;\u8bf4\u660e\u9ad8\u5e76\u53d1\u4e0b\u8bf7\u6c42\u7b49\u5f85\u65f6\u95f4\u589e\u52a0\u3002\n<ul>\n<li>Queue\u65f6\u95f4\u5728\u5e76\u53d1\u6570\u8f83\u9ad8\u65f6\u6025\u5267\u589e\u5927&#xff0c;\u6210\u4e3a\u6027\u80fd\u74f6\u9888\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<table>\n<tr>ConcurrencyThroughput (infer\/sec)p95 Latency (usec)Avg HTTP Time (usec)Inference CountExecution CountSuccessful Request CountQueue (usec)Compute Infer (usec)<\/tr>\n<tbody>\n<tr>\n<td>2<\/td>\n<td>678.775<\/td>\n<td>5959<\/td>\n<td>5884<\/td>\n<td>12226<\/td>\n<td>6113<\/td>\n<td>6113<\/td>\n<td>2866<\/td>\n<td>2916<\/td>\n<\/tr>\n<tr>\n<td>4<\/td>\n<td>679.345<\/td>\n<td>11859<\/td>\n<td>11765<\/td>\n<td>12232<\/td>\n<td>6116<\/td>\n<td>6116<\/td>\n<td>8743<\/td>\n<td>2915<\/td>\n<\/tr>\n<tr>\n<td>6<\/td>\n<td>677.233<\/td>\n<td>17827<\/td>\n<td>17705<\/td>\n<td>12196<\/td>\n<td>6098<\/td>\n<td>6098<\/td>\n<td>14670<\/td>\n<td>2924<\/td>\n<\/tr>\n<tr>\n<td>8<\/td>\n<td>674.897<\/td>\n<td>24056<\/td>\n<td>23689<\/td>\n<td>12152<\/td>\n<td>6076<\/td>\n<td>6076<\/td>\n<td>20629<\/td>\n<td>2932<\/td>\n<\/tr>\n<tr>\n<td>10<\/td>\n<td>675.966<\/td>\n<td>29906<\/td>\n<td>29567<\/td>\n<td>12174<\/td>\n<td>6087<\/td>\n<td>6087<\/td>\n<td>26515<\/td>\n<td>2929<\/td>\n<\/tr>\n<tr>\n<td>12<\/td>\n<td>676.23<\/td>\n<td>35703<\/td>\n<td>35466<\/td>\n<td>12176<\/td>\n<td>6088<\/td>\n<td>6088<\/td>\n<td>32416<\/td>\n<td>2928<\/td>\n<\/tr>\n<tr>\n<td>14<\/td>\n<td>667.567<\/td>\n<td>43738<\/td>\n<td>41920<\/td>\n<td>12020<\/td>\n<td>6010<\/td>\n<td>6010<\/td>\n<td>38786<\/td>\n<td>2957<\/td>\n<\/tr>\n<tr>\n<td>16<\/td>\n<td>676.548<\/td>\n<td>47543<\/td>\n<td>47274<\/td>\n<td>12182<\/td>\n<td>6091<\/td>\n<td>6091<\/td>\n<td>44231<\/td>\n<td>2927<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h3>\u52a8\u6001\u6279\u6b21<\/h3>\n<ul>\n<li>\u52a8\u6001\u6279\u5904\u7406\u5728\u4f4e\u5e76\u53d1\u65f6\u4e0e\u539f\u59cb\u6a21\u578b\u76f8\u8fd1&#xff0c;\u4f46\u5728\u4e2d\u9ad8\u5e76\u53d1\u65f6\u660e\u663e\u63d0\u9ad8\u4e86\u541e\u5410\u91cf&#xff08;\u4f8b\u5982\u5e76\u53d14\u65f6\u4ece679 infer\/sec\u63d0\u5347\u5230955 infer\/sec&#xff09;&#xff0c;\u540c\u65f6\u63a7\u5236\u5ef6\u8fdf\u5728\u4e00\u5b9a\u8303\u56f4\u5185\u3002\n<ul>\n<li>\u6570\u636e\u8868\u660e\u52a8\u6001\u6279\u6b21\u80fd\u591f\u5728\u8bf7\u6c42\u5408\u5e76\u4e0a\u8d77\u5230\u5e73\u6ed1\u6548\u5e94&#xff0c;\u4ece\u800c\u6539\u5584\u6392\u961f\u65f6\u95f4\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<table>\n<tr>ConcurrencyThroughput (infer\/sec)p95 Latency (usec)Avg HTTP Time (usec)Inference CountExecution CountSuccessful Request CountQueue (usec)Compute Infer (usec)<\/tr>\n<tbody>\n<tr>\n<td>2<\/td>\n<td>672.357<\/td>\n<td>6082<\/td>\n<td>5940<\/td>\n<td>12108<\/td>\n<td>6054<\/td>\n<td>6054<\/td>\n<td>2881<\/td>\n<td>2940<\/td>\n<\/tr>\n<tr>\n<td>4<\/td>\n<td>955.458<\/td>\n<td>8460<\/td>\n<td>8361<\/td>\n<td>17210<\/td>\n<td>4303<\/td>\n<td>8605<\/td>\n<td>3496<\/td>\n<td>4710<\/td>\n<\/tr>\n<tr>\n<td>6<\/td>\n<td>1089.44<\/td>\n<td>11112<\/td>\n<td>11005<\/td>\n<td>19624<\/td>\n<td>3271<\/td>\n<td>9812<\/td>\n<td>4923<\/td>\n<td>5892<\/td>\n<\/tr>\n<tr>\n<td>8<\/td>\n<td>1165.81<\/td>\n<td>13866<\/td>\n<td>13712<\/td>\n<td>20988<\/td>\n<td>2624<\/td>\n<td>10494<\/td>\n<td>6722<\/td>\n<td>6785<\/td>\n<\/tr>\n<tr>\n<td>10<\/td>\n<td>1167.68<\/td>\n<td>20604<\/td>\n<td>17115<\/td>\n<td>21032<\/td>\n<td>2629<\/td>\n<td>10516<\/td>\n<td>10139<\/td>\n<td>6779<\/td>\n<\/tr>\n<tr>\n<td>12<\/td>\n<td>1166.44<\/td>\n<td>20706<\/td>\n<td>20565<\/td>\n<td>21008<\/td>\n<td>2626<\/td>\n<td>10504<\/td>\n<td>13574<\/td>\n<td>6785<\/td>\n<\/tr>\n<tr>\n<td>14<\/td>\n<td>1166.79<\/td>\n<td>27497<\/td>\n<td>23986<\/td>\n<td>21016<\/td>\n<td>2627<\/td>\n<td>10508<\/td>\n<td>16986<\/td>\n<td>6785<\/td>\n<\/tr>\n<tr>\n<td>16<\/td>\n<td>1163.36<\/td>\n<td>27643<\/td>\n<td>27482<\/td>\n<td>20960<\/td>\n<td>2620<\/td>\n<td>10480<\/td>\n<td>20474<\/td>\n<td>6801<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h3>2\u4e2a\u6a21\u578b\u5b9e\u4f8b<\/h3>\n<ul>\n<li>\u901a\u8fc7\u5728\u540c\u4e00GPU\u4e0a\u542f\u7528\u591a\u5b9e\u4f8b&#xff0c;\u4f7f\u5f97\u5728\u4e00\u5b9a\u7a0b\u5ea6\u4e0a\u5206\u644a\u4e86\u8bf7\u6c42\u538b\u529b&#xff0c;\u4f46\u7531\u4e8e\u5b9e\u4f8b\u95f4\u8d44\u6e90\u5171\u4eab&#xff0c;\u90e8\u5206\u6307\u6807&#xff08;\u5982p95 Latency&#xff09;\u4ecd\u6709\u63d0\u5347\u3002\n<ul>\n<li>\u5bf9\u4e8e\u9700\u8981\u540c\u65f6\u670d\u52a1\u591a\u4e2a\u6a21\u578b\u7684\u573a\u666f&#xff0c;\u8be5\u914d\u7f6e\u6709\u52a9\u4e8e\u6574\u4f53\u8d44\u6e90\u5229\u7528\u7387\u7684\u5747\u8861\u8c03\u5ea6\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<table>\n<tr>ConcurrencyThroughput (infer\/sec)p95 Latency (usec)Avg HTTP Time (usec)Inference CountExecution CountSuccessful Request CountQueue (usec)Compute Infer (usec)<\/tr>\n<tbody>\n<tr>\n<td>2<\/td>\n<td>569.779<\/td>\n<td>8289<\/td>\n<td>7008<\/td>\n<td>10260<\/td>\n<td>5014<\/td>\n<td>5130<\/td>\n<td>44<\/td>\n<td>6802<\/td>\n<\/tr>\n<tr>\n<td>4<\/td>\n<td>752.538<\/td>\n<td>13205<\/td>\n<td>10618<\/td>\n<td>13552<\/td>\n<td>5084<\/td>\n<td>6776<\/td>\n<td>3343<\/td>\n<td>7081<\/td>\n<\/tr>\n<tr>\n<td>6<\/td>\n<td>1028.35<\/td>\n<td>14059<\/td>\n<td>11660<\/td>\n<td>18524<\/td>\n<td>4639<\/td>\n<td>9262<\/td>\n<td>3631<\/td>\n<td>7818<\/td>\n<\/tr>\n<tr>\n<td>8<\/td>\n<td>1207.52<\/td>\n<td>15719<\/td>\n<td>13240<\/td>\n<td>21748<\/td>\n<td>4077<\/td>\n<td>10874<\/td>\n<td>4144<\/td>\n<td>8860<\/td>\n<\/tr>\n<tr>\n<td>10<\/td>\n<td>1258.8<\/td>\n<td>17625<\/td>\n<td>15872<\/td>\n<td>22678<\/td>\n<td>3405<\/td>\n<td>11339<\/td>\n<td>5016<\/td>\n<td>10611<\/td>\n<\/tr>\n<tr>\n<td>12<\/td>\n<td>1323.27<\/td>\n<td>20152<\/td>\n<td>18125<\/td>\n<td>23834<\/td>\n<td>2989<\/td>\n<td>11917<\/td>\n<td>5942<\/td>\n<td>11931<\/td>\n<\/tr>\n<tr>\n<td>14<\/td>\n<td>1324.09<\/td>\n<td>25018<\/td>\n<td>21130<\/td>\n<td>23848<\/td>\n<td>2986<\/td>\n<td>11924<\/td>\n<td>8936<\/td>\n<td>11935<\/td>\n<\/tr>\n<tr>\n<td>16<\/td>\n<td>1329.37<\/td>\n<td>25325<\/td>\n<td>24053<\/td>\n<td>23944<\/td>\n<td>2993<\/td>\n<td>11972<\/td>\n<td>11890<\/td>\n<td>11918<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h3>TensorRT\u4f18\u5316<\/h3>\n<ul>\n<li>TensorRT\u52a0\u901f\u663e\u8457\u63d0\u5347\u4e86Throughput&#xff0c;\u6700\u4f4e\u5e76\u53d1\u60c5\u51b5\u4e0b\u541e\u5410\u91cf\u8fbe\u5230 2901 infer\/sec&#xff0c;\u4e14p95 Latency\u5927\u5e45\u964d\u4f4e\u3002\n<ul>\n<li>\u8fd9\u79cd\u4f18\u5316\u5bf9\u9700\u8981\u6781\u81f4\u6027\u80fd\u7684\u573a\u666f\u5c24\u4e3a\u91cd\u8981&#xff0c;\u4f46\u9700\u989d\u5916\u8003\u8651\u5f15\u64ce\u7f16\u8bd1\u65f6\u95f4&#xff08;\u9996\u6b21\u542f\u52a8\u65f6&#xff09;<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<table>\n<tr>ConcurrencyThroughput (infer\/sec)p95 Latency (usec)Avg HTTP Time (usec)Inference CountExecution CountSuccessful Request CountQueue (usec)Compute Infer (usec)<\/tr>\n<tbody>\n<tr>\n<td>2<\/td>\n<td>2901.05<\/td>\n<td>1446<\/td>\n<td>1370<\/td>\n<td>104628<\/td>\n<td>52175<\/td>\n<td>52314<\/td>\n<td>11<\/td>\n<td>1260<\/td>\n<\/tr>\n<tr>\n<td>4<\/td>\n<td>3927.45<\/td>\n<td>2430<\/td>\n<td>2029<\/td>\n<td>141700<\/td>\n<td>53153<\/td>\n<td>70850<\/td>\n<td>601<\/td>\n<td>1339<\/td>\n<\/tr>\n<tr>\n<td>6<\/td>\n<td>5176.24<\/td>\n<td>2725<\/td>\n<td>2311<\/td>\n<td>186950<\/td>\n<td>46747<\/td>\n<td>93475<\/td>\n<td>646<\/td>\n<td>1555<\/td>\n<\/tr>\n<tr>\n<td>8<\/td>\n<td>5741.21<\/td>\n<td>3052<\/td>\n<td>2778<\/td>\n<td>207494<\/td>\n<td>38908<\/td>\n<td>103747<\/td>\n<td>813<\/td>\n<td>1829<\/td>\n<\/tr>\n<tr>\n<td>10<\/td>\n<td>6273.42<\/td>\n<td>3417<\/td>\n<td>3183<\/td>\n<td>227526<\/td>\n<td>34139<\/td>\n<td>113763<\/td>\n<td>934<\/td>\n<td>2083<\/td>\n<\/tr>\n<tr>\n<td>12<\/td>\n<td>6750.45<\/td>\n<td>3740<\/td>\n<td>3546<\/td>\n<td>246574<\/td>\n<td>30826<\/td>\n<td>123355<\/td>\n<td>1082<\/td>\n<td>2290<\/td>\n<\/tr>\n<tr>\n<td>14<\/td>\n<td>6640.54<\/td>\n<td>4752<\/td>\n<td>4209<\/td>\n<td>241978<\/td>\n<td>30252<\/td>\n<td>120989<\/td>\n<td>1711<\/td>\n<td>2315<\/td>\n<\/tr>\n<tr>\n<td>16<\/td>\n<td>5978.15<\/td>\n<td>4789<\/td>\n<td>5328<\/td>\n<td>230884<\/td>\n<td>28865<\/td>\n<td>115442<\/td>\n<td>2707<\/td>\n<td>2361<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u901a\u8fc7\u4e0a\u8ff0\u5bf9\u6bd4\u53ef\u4ee5\u770b\u51fa&#xff1a;<\/p>\n<ul>\n<li>\u541e\u5410\u91cf&#xff1a;TensorRT\u4f18\u5316\u5728\u6240\u6709\u914d\u7f6e\u4e2d\u5747\u5927\u5e45\u9886\u5148&#xff1b;\u52a8\u6001\u6279\u6b21\u5728\u4e2d\u7b49\u5e76\u53d1\u4e0b\u4e5f\u6709\u8f83\u597d\u7684\u8868\u73b0&#xff1b;<\/li>\n<li>\u5ef6\u8fdf&#xff1a;TensorRT\u4e0d\u4ec5\u63d0\u5347\u4e86\u541e\u5410\u91cf&#xff0c;\u8fd8\u4f7f\u5ef6\u8fdf\u964d\u81f3\u6700\u4f4e&#xff1b;\u539f\u59cb\u6a21\u578b\u5728\u5e76\u53d1\u589e\u5927\u65f6\u5ef6\u8fdf\u6025\u5267\u4e0a\u5347&#xff0c;\u52a8\u6001\u6279\u6b21\u5219\u8f83\u4e3a\u5e73\u7a33&#xff1b;<\/li>\n<li>\u961f\u5217\u7b49\u5f85&#xff1a;\u52a8\u6001\u6279\u5904\u7406\u548cTensorRT\u4f18\u5316\u5747\u6709\u6548\u964d\u4f4e\u4e86\u8bf7\u6c42\u6392\u961f\u65f6\u95f4\u3002<\/li>\n<\/ul>\n<h2>5. \u6a21\u578b\u5206\u6790<\/h2>\n<p>\u4e3a\u4e86\u5168\u9762\u8bc4\u4f30\u4e0d\u540c\u914d\u7f6e\u4e0b\u7684\u6a21\u578b\u8868\u73b0&#xff0c;NVIDIA\u63d0\u4f9b\u4e86triton-model-analyzer \u5de5\u5177\u3002\u8be5\u5de5\u5177\u53ef\u4ee5\u81ea\u52a8\u5316\u6267\u884c\u591a\u7ec4\u53c2\u6570\u7684\u7f51\u683c\u641c\u7d22&#xff0c;\u5e76\u751f\u6210\u8be6\u7ec6\u62a5\u544a\u3002<\/p>\n<p>pip install triton<span class=\"token operator\">&#8211;<\/span>model<span class=\"token operator\">&#8211;<\/span>analyzer<\/p>\n<p>\u7f16\u5199 perf.yaml \u6d4b\u8bd5\u914d\u7f6e&#xff0c;\u53ef\u4ee5\u6307\u5b9a\u4f18\u5316\u76ee\u6807\u3001\u4f18\u5316\u7ea6\u675f \u53c2\u8003&#xff1a;https:\/\/github.com\/triton-inference-server\/model_analyzer\/blob\/main\/docs\/config.md#config-options-for-profile<\/p>\n<p>run_config_search_disable: False<br \/>\nprofile_models:<br \/>\n  text_recognition:<br \/>\n    perf_analyzer_flags:<br \/>\n      percentile: <span class=\"token number\">99<\/span><br \/>\n    model_config_parameters:<br \/>\n      max_batch_size: <span class=\"token number\">2<\/span><br \/>\n      dynamic_batching:<br \/>\n        max_queue_delay_microseconds: <span class=\"token punctuation\">[<\/span><span class=\"token number\">0<\/span>, <span class=\"token number\">200<\/span><span class=\"token punctuation\">]<\/span><br \/>\n      instance_group:<br \/>\n        &#8211; &#8211; kind: KIND_GPU<br \/>\n            count: <span class=\"token number\">1<\/span><br \/>\n        &#8211; &#8211; kind: KIND_CPU<br \/>\n            count: <span class=\"token number\">1<\/span><br \/>\n    parameters:<br \/>\n      concurrency:<br \/>\n        start: <span class=\"token number\">2<\/span><br \/>\n        stop: <span class=\"token number\">10<\/span><br \/>\n        step: <span class=\"token number\">2<\/span><br \/>\n      batch_sizes: <span class=\"token number\">1,2<\/span>,3<br \/>\nobjectives:<br \/>\n  &#8211; perf_latency_p99<br \/>\n  &#8211; gpu_utilization<br \/>\nconstraints:<br \/>\n  perf_latency_p99:<br \/>\n    max: <span class=\"token number\">10<\/span><br \/>\n  perf_throughput:<br \/>\n    min: <span class=\"token number\">3000<\/span><\/p>\n<p>\u6267\u884c\u53c2\u6570\u7f51\u683c\u641c\u7d22&#xff0c;\u5e76\u7edf\u8ba1\u6a21\u578b\u7684\u8868\u73b0<\/p>\n<p>model-analyzer profile &#8211;model-repository .\/model_repository  <span class=\"token punctuation\">\\\\<\/span><br \/>\n&#8211;triton-launch-mode<span class=\"token operator\">&#061;<\/span>local <span class=\"token punctuation\">\\\\<\/span><br \/>\n&#8211;output-model-repository-path .\/model_analyzer_output\/ <span class=\"token punctuation\">\\\\<\/span><br \/>\n<span class=\"token parameter variable\">-f<\/span> .\/Part_2-improving_resource_utilization\/perf.yaml <span class=\"token punctuation\">\\\\<\/span><br \/>\n&#8211;override-output-model-repository <span class=\"token punctuation\">\\\\<\/span><br \/>\n&#8211;latency-budget <span class=\"token number\">10<\/span><\/p>\n<p>\u4f1a\u751f\u6210\u4e00\u4e2a\u62a5\u544a&#xff0c;\u62a5\u544a\u663e\u793a\u5404\u914d\u7f6e\u7684\u6a21\u578b\u7684\u8868\u73b0 <img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419080110-680358469f975.png\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\" \/><\/p>\n<p>\u751f\u6210\u5404\u4e2a\u914d\u7f6e\u6a21\u578b\u7684\u8be6\u7ec6\u62a5\u544a<\/p>\n<p>model-analyzer report &#8211;report-model-configs text_recognition_config_0,text_recognition_config_1,text_recognition_config_2,text_recognition_config_3,text_recognition_config_default <span class=\"token punctuation\">\\\\<\/span><br \/>\n&#8211;export-path .\/model_analyzer_report <span class=\"token punctuation\">\\\\<\/span><br \/>\n&#8211;config-file .\/Part_2-improving_resource_utilization\/perf.yaml<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb1.2k\u6b21\uff0c\u70b9\u8d5e18\u6b21\uff0c\u6536\u85cf14\u6b21\u3002NVIDIA\u7684Triton\u63a8\u7406\u670d\u52a1\u5668\u901a\u8fc7\u52a8\u6001\u6279\u5904\u7406\u3001\u6a21\u578b\u5e76\u53d1\u548cTensorRT\u52a0\u901f\u7b49\u591a\u79cd\u4f18\u5316\u7b56\u7565\uff0c\u4e0d\u4ec5\u5145\u5206\u53d1\u6325GPU\u7684\u5e76\u884c\u8ba1\u7b97\u80fd\u529b\uff0c\u8fd8\u80fd\u6839\u636e\u4e1a\u52a1\u573a\u666f\u7075\u6d3b\u8c03\u6574\u914d\u7f6e\uff0c\u4ece\u800c\u5b9e\u73b0\u9ad8\u541e\u5410\u3001\u4f4e\u5ef6\u8fdf\u7684\u63a8\u7406\u670d\u52a1\u3002\u672c\u6587\u5c06\u6df1\u5165\u89e3\u6790Triton\u4e2d\u7684\u5173\u952e\u4f18\u5316\u6280\u672f\uff0c\u5e76\u7ed3\u5408\u8be6\u7ec6\u7684\u6d4b\u8bd5\u6570\u636e\u548c\u547d\u4ee4\u884c\u89e3\u6790\uff0c\u4e3a\u8bfb\u8005\u5c55\u793a\u5982\u4f55\u5229\u7528\u8fd9\u4e9b\u6280\u672f\u63d0\u5347\u63a8\u7406\u6027\u80fd\u3002_nvidia triton<\/p>\n","protected":false},"author":2,"featured_media":26540,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[2111,2112,56,2113],"topic":[],"class_list":["post-26542","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-server","tag-llm-ai","tag-triton","tag-56","tag-2113"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u57fa\u4e8eTriton\u63a8\u7406\u670d\u52a1\u5668\u7684\u6027\u80fd\u4f18\u5316\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/26542.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u57fa\u4e8eTriton\u63a8\u7406\u670d\u52a1\u5668\u7684\u6027\u80fd\u4f18\u5316\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb1.2k\u6b21\uff0c\u70b9\u8d5e18\u6b21\uff0c\u6536\u85cf14\u6b21\u3002NVIDIA\u7684Triton\u63a8\u7406\u670d\u52a1\u5668\u901a\u8fc7\u52a8\u6001\u6279\u5904\u7406\u3001\u6a21\u578b\u5e76\u53d1\u548cTensorRT\u52a0\u901f\u7b49\u591a\u79cd\u4f18\u5316\u7b56\u7565\uff0c\u4e0d\u4ec5\u5145\u5206\u53d1\u6325GPU\u7684\u5e76\u884c\u8ba1\u7b97\u80fd\u529b\uff0c\u8fd8\u80fd\u6839\u636e\u4e1a\u52a1\u573a\u666f\u7075\u6d3b\u8c03\u6574\u914d\u7f6e\uff0c\u4ece\u800c\u5b9e\u73b0\u9ad8\u541e\u5410\u3001\u4f4e\u5ef6\u8fdf\u7684\u63a8\u7406\u670d\u52a1\u3002\u672c\u6587\u5c06\u6df1\u5165\u89e3\u6790Triton\u4e2d\u7684\u5173\u952e\u4f18\u5316\u6280\u672f\uff0c\u5e76\u7ed3\u5408\u8be6\u7ec6\u7684\u6d4b\u8bd5\u6570\u636e\u548c\u547d\u4ee4\u884c\u89e3\u6790\uff0c\u4e3a\u8bfb\u8005\u5c55\u793a\u5982\u4f55\u5229\u7528\u8fd9\u4e9b\u6280\u672f\u63d0\u5347\u63a8\u7406\u6027\u80fd\u3002_nvidia triton\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/26542.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2025-04-19T08:01:11+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419080110-680358467a690.png\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"4 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/26542.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/26542.html\",\"name\":\"\u57fa\u4e8eTriton\u63a8\u7406\u670d\u52a1\u5668\u7684\u6027\u80fd\u4f18\u5316\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2025-04-19T08:01:11+00:00\",\"dateModified\":\"2025-04-19T08:01:11+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/26542.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/26542.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/26542.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u57fa\u4e8eTriton\u63a8\u7406\u670d\u52a1\u5668\u7684\u6027\u80fd\u4f18\u5316\u5b9e\u8df5\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"\u57fa\u4e8eTriton\u63a8\u7406\u670d\u52a1\u5668\u7684\u6027\u80fd\u4f18\u5316\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/26542.html","og_locale":"zh_CN","og_type":"article","og_title":"\u57fa\u4e8eTriton\u63a8\u7406\u670d\u52a1\u5668\u7684\u6027\u80fd\u4f18\u5316\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb1.2k\u6b21\uff0c\u70b9\u8d5e18\u6b21\uff0c\u6536\u85cf14\u6b21\u3002NVIDIA\u7684Triton\u63a8\u7406\u670d\u52a1\u5668\u901a\u8fc7\u52a8\u6001\u6279\u5904\u7406\u3001\u6a21\u578b\u5e76\u53d1\u548cTensorRT\u52a0\u901f\u7b49\u591a\u79cd\u4f18\u5316\u7b56\u7565\uff0c\u4e0d\u4ec5\u5145\u5206\u53d1\u6325GPU\u7684\u5e76\u884c\u8ba1\u7b97\u80fd\u529b\uff0c\u8fd8\u80fd\u6839\u636e\u4e1a\u52a1\u573a\u666f\u7075\u6d3b\u8c03\u6574\u914d\u7f6e\uff0c\u4ece\u800c\u5b9e\u73b0\u9ad8\u541e\u5410\u3001\u4f4e\u5ef6\u8fdf\u7684\u63a8\u7406\u670d\u52a1\u3002\u672c\u6587\u5c06\u6df1\u5165\u89e3\u6790Triton\u4e2d\u7684\u5173\u952e\u4f18\u5316\u6280\u672f\uff0c\u5e76\u7ed3\u5408\u8be6\u7ec6\u7684\u6d4b\u8bd5\u6570\u636e\u548c\u547d\u4ee4\u884c\u89e3\u6790\uff0c\u4e3a\u8bfb\u8005\u5c55\u793a\u5982\u4f55\u5229\u7528\u8fd9\u4e9b\u6280\u672f\u63d0\u5347\u63a8\u7406\u6027\u80fd\u3002_nvidia triton","og_url":"https:\/\/www.wsisp.com\/helps\/26542.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2025-04-19T08:01:11+00:00","og_image":[{"url":"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419080110-680358467a690.png"}],"author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"4 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/26542.html","url":"https:\/\/www.wsisp.com\/helps\/26542.html","name":"\u57fa\u4e8eTriton\u63a8\u7406\u670d\u52a1\u5668\u7684\u6027\u80fd\u4f18\u5316\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2025-04-19T08:01:11+00:00","dateModified":"2025-04-19T08:01:11+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/26542.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/26542.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/26542.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"\u57fa\u4e8eTriton\u63a8\u7406\u670d\u52a1\u5668\u7684\u6027\u80fd\u4f18\u5316\u5b9e\u8df5"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/26542","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=26542"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/26542\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media\/26540"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=26542"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=26542"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=26542"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=26542"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}