{"id":47974,"date":"2025-07-30T10:12:40","date_gmt":"2025-07-30T02:12:40","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/47974.html"},"modified":"2025-07-30T10:12:40","modified_gmt":"2025-07-30T02:12:40","slug":"%e5%91%8a%e5%88%abgpu%e7%84%a6%e8%99%91%ef%bc%9a%e5%a6%82%e4%bd%95%e5%9c%a8%e7%ba%afcpu%e6%9c%8d%e5%8a%a1%e5%99%a8%e4%b8%8a%ef%bc%8c%e6%89%93%e9%80%a0%e9%ab%98%e6%80%a7%e8%83%bdembedding%e6%9c%8d","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/47974.html","title":{"rendered":"\u544a\u522bGPU\u7126\u8651\uff1a\u5982\u4f55\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\uff0c\u6253\u9020\u9ad8\u6027\u80fdEmbedding\u670d\u52a1\uff1f"},"content":{"rendered":"<p>Hi&#xff0c;\u5927\u5bb6\u597d&#xff0c;\u6211\u662fezl1fe\u3002<\/p>\n<p>\u6700\u8fd1\u63a5\u624b\u4e00\u4e2a\u9879\u76ee&#xff0c;\u8981\u6c42\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\u90e8\u7f72Embedding\u6a21\u578b\u670d\u52a1\u3002\u5144\u5f1f\u4eec\u90fd\u61c2&#xff0c;GPU\u5b83\u9999\u554a&#xff0c;\u4f46\u5b83\u4e5f\u8d35\u554a&#xff01;\u5f88\u591a\u65f6\u5019&#xff0c;\u54b1\u53ea\u80fd\u5728\u6709\u9650\u7684\u8d44\u6e90\u91cc\u60f3\u529e\u6cd5\u3002<\/p>\n<p>\u4e00\u5f00\u59cb&#xff0c;\u6211\u4eec\u56fe\u65b9\u4fbf&#xff0c;\u76f4\u63a5\u4eceHugging Face\u4e0a\u62c9\u4e86\u5f53\u65f6\u6548\u679c\u6700\u597d\u7684BAAI\/bge-m3\u6a21\u578b&#xff0c;\u7528transformers\u5e93\u4e00\u628a\u68ad\u3002\u7ed3\u679c\u5462&#xff1f;\u90e8\u7f72\u5230\u4e00\u53f08\u683816G\u7684\u670d\u52a1\u5668\u4e0a&#xff0c;\u7cbe\u5ea6\u662f\u9ad8&#xff0c;\u4f46\u6027\u80fd\u4e5f\u662f\u771f\u7684\u201c\u611f\u4eba\u201d&#xff0c;\u5355\u4e2a\u8bf7\u6c42\u54cd\u5e94\u89813-4\u79d2\u3002<\/p>\n<p>\u6211\u4eec\u7684\u7b2c\u4e00\u53cd\u5e94\u662f\u505a\u6a21\u578b\u91cf\u5316\u3002\u628a\u9ad8\u7cbe\u5ea6\u7684FP32\u6a21\u578b\u6362\u6210\u4e86INT8\u7684\u7248\u672c&#xff0c;\u5185\u5b58\u5360\u7528\u4e0b\u6765\u4e86&#xff0c;\u901f\u5ea6\u4e5f\u5feb\u4e86\u4e00\u4e9b&#xff0c;\u4f46\u79bb\u6211\u4eec\u7684\u76ee\u6807\u8fd8\u5dee\u5f97\u8fdc&#xff0c;\u5e76\u53d1\u7a0d\u5fae\u4e00\u9ad8&#xff0c;CPU\u8fd8\u662f\u76f4\u63a5\u5e72\u6ee1\u3002<\/p>\n<p>\u8fd9\u65f6\u5019\u6211\u610f\u8bc6\u5230&#xff0c;\u74f6\u9888\u4e0d\u53ea\u5728\u6a21\u578b\u672c\u8eab&#xff0c;\u66f4\u5728\u4e8e\u63a8\u7406\u5f15\u64ce\u3002Hugging Face\u7684transformers\u5e93\u975e\u5e38\u9002\u5408\u5feb\u901f\u539f\u578b\u9a8c\u8bc1&#xff0c;\u4f46\u5728\u9ad8\u5e76\u53d1\u7684\u751f\u4ea7\u73af\u5883\u4e0b&#xff0c;\u5176Python\u539f\u751f\u7684\u6267\u884c\u903b\u8f91\u548c\u7ebf\u7a0b\u7ba1\u7406\u5728CPU\u4e0a\u5e76\u4e0d\u662f\u6700\u4f18\u89e3\u3002\u4e3a\u4e86\u771f\u6b63\u69a8\u5e72\u786c\u4ef6\u6027\u80fd&#xff0c;\u6211\u4eec\u5fc5\u987b\u5f97\u4e0a\u4e13\u4e1a\u7684\u63a8\u7406\u5f15\u64ce\u2014\u2014ONNX Runtime\u3002<\/p>\n<p>\u75db\u5b9a\u601d\u75db&#xff0c;\u7ecf\u8fc7\u4e00\u756a\u6298\u817e&#xff0c;\u6700\u7ec8\u628a\u670d\u52a1\u6027\u80fd\u786c\u751f\u751f\u63d0\u5347\u4e863\u500d\u591a&#xff0c;\u7a33\u5b9a\u652f\u6301200&#043;\u5e76\u53d1&#xff0c;\u5e73\u5747\u54cd\u5e94\u65f6\u95f4\u538b\u7f29\u5230150ms\u3002<\/p>\n<p>\u4e0d\u5e9f\u8bdd&#xff0c;\u76f4\u63a5\u4e0a\u65b0\u65e7\u65b9\u6848\u7684\u6027\u80fd\u5bf9\u6bd4&#xff0c;\u4f60\u6ca1\u770b\u9519&#xff1a;<\/p>\n<table>\n<tr>\u5e76\u53d1\u6570\u539f\u59cbHF\u65b9\u6848 (req\/s)\u4f18\u5316\u540e\u65b9\u6848 (req\/s)\u63d0\u5347\u500d\u6570<\/tr>\n<tbody>\n<tr>\n<td>1<\/td>\n<td>15.2<\/td>\n<td>32.5<\/td>\n<td>2.1x<\/td>\n<\/tr>\n<tr>\n<td>10<\/td>\n<td>24.1<\/td>\n<td>78.3<\/td>\n<td>3.2x<\/td>\n<\/tr>\n<tr>\n<td>50<\/td>\n<td>27.5<\/td>\n<td>92.1<\/td>\n<td>3.3x<\/td>\n<\/tr>\n<tr>\n<td>100<\/td>\n<td>28.2<\/td>\n<td>95.6<\/td>\n<td>3.4x<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u770b\u5230\u8fd9\u4e2a\u5dee\u8ddd&#xff0c;\u6211\u89c9\u5f97\u8fd9\u6ce2\u6298\u817e\u503c\u4e86&#xff01;\u4e0b\u9762\u6211\u628a\u6574\u4e2a\u4f18\u5316\u8fc7\u7a0b\u4e2d\u7684\u51e0\u4e2a\u6838\u5fc3\u601d\u8def\u63b0\u5f00\u63c9\u788e\u4e86\u5206\u4eab\u7ed9\u5927\u5bb6&#xff0c;\u5e0c\u671b\u80fd\u7ed9\u6b63\u5728\u6216\u5373\u5c06\u9762\u4e34\u7c7b\u4f3c\u95ee\u9898\u7684\u670b\u53cb\u4e00\u4e9b\u542f\u53d1\u3002<\/p>\n<h4>&#x1f4a1; \u7834\u5c40\u601d\u8def&#xff1a;\u73af\u73af\u76f8\u6263\u7684\u4e94\u5927\u4f18\u5316<\/h4>\n<h5>&#x1f680; 1. \u7ec8\u6781\u6b66\u5668&#xff1a;\u62e5\u62b1\u91cf\u5316\u6a21\u578b &#043; ONNX Runtime<\/h5>\n<p>\u8fd9\u662f\u6574\u4e2a\u6027\u80fd\u63d0\u5347\u7684\u57fa\u77f3\u3002\u5355\u72ec\u7684\u6a21\u578b\u91cf\u5316\u662f\u7b2c\u4e00\u6b65&#xff0c;\u4f46\u8981\u53d1\u6325\u5b83\u7684\u5168\u90e8\u5a01\u529b&#xff0c;\u5fc5\u987b\u642d\u914dONNX Runtime\u8fd9\u4e2a\u5927\u6740\u5668\u3002<\/p>\n<p>\u5982\u4f55\u83b7\u53d6ONNX\u6a21\u578b&#xff1f;<\/p>\n<p>\u4f60\u53ef\u80fd\u4f1a\u60f3&#xff0c;\u628aPyTorch\u6a21\u578b\u8f6c\u6210ONNX\u662f\u4e0d\u662f\u5f88\u590d\u6742&#xff1f;\u5176\u5b9e\u6709\u4e24\u6761\u8def&#xff0c;\u4e30\u4fed\u7531\u4eba&#xff1a;<\/p>\n<li>\u81ea\u5df1\u52a8\u624b&#xff0c;\u4e30\u8863\u8db3\u98df&#xff1a;\u53ef\u4ee5\u4f7f\u7528torch.onnx.export\u6216\u8005Hugging Face\u5b98\u65b9\u7684optimum\u5e93\u6765\u8fdb\u884c\u8f6c\u6362\u548c\u91cf\u5316\u3002\u8fd9\u4e2a\u8fc7\u7a0b\u80fd\u8ba9\u4f60\u5bf9\u6a21\u578b\u7ed3\u6784\u6709\u66f4\u6df1\u7684\u7406\u89e3\u3002<\/li>\n<li>\u62e5\u62b1\u793e\u533a&#xff0c;\u201c\u62ff\u6765\u5427\u4f60\u201d (\u63a8\u8350) &#xff1a;Hugging Face Hub\u672c\u8eab\u5c31\u662f\u4e00\u4e2a\u5de8\u5927\u7684\u5b9d\u5e93\u3002\u4f60\u53ea\u8981\u5728Hub\u4e0a\u641c\u7d22bge-m3 onnx&#xff0c;\u5c31\u80fd\u627e\u5230\u5927\u91cf\u7531\u793e\u533a\u8d21\u732e\u3001\u5df2\u7ecf\u8f6c\u6362\u597d\u7684ONNX\u683c\u5f0f\u6a21\u578b&#xff0c;\u5f88\u591a\u751a\u81f3\u76f4\u63a5\u63d0\u4f9b\u4e86INT8\u91cf\u5316\u7248\u672c&#xff0c;\u5f00\u7bb1\u5373\u7528&#xff0c;\u6781\u5927\u7b80\u5316\u4e86\u6211\u4eec\u7684\u5de5\u4f5c\u3002<\/li>\n<p>\u69a8\u5e72CPU\u7684\u914d\u7f6e<\/p>\n<p>\u62ff\u5230ONNX\u6a21\u578b\u540e&#xff0c;\u771f\u6b63\u7684\u7cbe\u9ad3\u5728\u4e8eONNX Runtime\u7684\u4f1a\u8bdd\u914d\u7f6e\u3002\u6211\u53d1\u73b0\u4e0b\u9762\u8fd9\u51e0\u4e2a\u53c2\u6570\u5bf9\u6027\u80fd\u5f71\u54cd\u5de8\u5927&#xff1a;<\/p>\n<p>import onnxruntime as ort<\/p>\n<p>sess_options &#061; ort.SessionOptions()<\/p>\n<p># 1. \u7ebf\u7a0b\u7ba1\u7406&#xff1a;\u522b\u7528\u9ed8\u8ba4\u7684&#xff0c;\u6839\u636eworker\u6570\u52a8\u6001\u7b97&#xff0c;\u907f\u514d\u7ebf\u7a0b\u6253\u67b6<br \/>\n#    \u5047\u8bbe8\u6838CPU&#xff0c;\u5f004\u4e2aworker&#xff0c;\u90a3\u6bcf\u4e2aONNX\u4f1a\u8bdd\u5185\u90e8\u52062\u4e2a\u7ebf\u7a0b\u5c31\u591f\u4e86<br \/>\nauto_threads &#061; max(2, cpu_cores \/\/ workers_count)<br \/>\nsess_options.intra_op_num_threads &#061; auto_threads<\/p>\n<p># 2. \u5185\u5b58\u4f18\u5316&#xff1a;\u5fc5\u987b\u5f00&#xff01;\u80fd\u663e\u8457\u964d\u4f4e\u5185\u5b58\u5360\u7528\u548c\u63d0\u9ad8\u7f13\u5b58\u547d\u4e2d<br \/>\nsess_options.enable_mem_pattern &#061; True<br \/>\nsess_options.enable_cpu_mem_arena &#061; True<\/p>\n<p># 3. \u56fe\u4f18\u5316&#xff1a;\u6709\u591a\u72e0\u5f00\u591a\u72e0&#xff0c;\u76f4\u63a5\u62c9\u6ee1\u5230\u6700\u9ad8\u7ea7\u522b<br \/>\nsess_options.graph_optimization_level &#061; ort.GraphOptimizationLevel.ORT_ENABLE_ALL<\/p>\n<p># \u7528\u8fd9\u4e9b\u914d\u7f6e\u521b\u5efa\u4f1a\u8bdd<br \/>\nsession &#061; ort.InferenceSession(&#034;path\/to\/your\/model.onnx&#034;, sess_options)<\/p>\n<p>\u4ec5\u4ec5\u8fd9\u4e00\u6ce2\u64cd\u4f5c&#xff0c;\u670d\u52a1\u7684\u5355\u8bf7\u6c42\u6027\u80fd\u5c31\u76f4\u63a5\u7ffb\u4e86\u4e00\u500d\u591a\u3002\u9999&#xff01;<\/p>\n<h5>&#x1f9e0; 2. \u667a\u80fd\u7ba1\u5bb6&#xff1a;\u591a\u6a21\u578b\u7ba1\u7406\u7684 LRU \u7f13\u5b58<\/h5>\n<p>\u524d\u9762\u63d0\u5230&#xff0c;\u4e1a\u52a1\u9700\u8981\u652f\u6301\u591a\u4e2a\u6a21\u578b\u3002\u4e3a\u4e86\u4e0d\u8ba9\u5185\u5b58\u7206\u70b8&#xff0c;\u6211\u8bbe\u8ba1\u4e86\u4e00\u4e2a\u57fa\u4e8eLRU&#xff08;\u6700\u8fd1\u6700\u5c11\u4f7f\u7528&#xff09;\u7b56\u7565\u7684\u6a21\u578b\u7ba1\u7406\u5668\u3002<\/p>\n<p>\u6838\u5fc3\u601d\u8def\u662f&#xff1a;\u5185\u5b58\u91cc\u53ea\u4fdd\u7559\u51e0\u4e2a\u6700\u5e38\u7528\u7684\u201c\u70ed\u6a21\u578b\u201d&#xff0c;\u5f53\u65b0\u6a21\u578b\u8bf7\u6c42\u8fdb\u6765\u4e14\u5185\u5b58\u4e0d\u8db3\u65f6&#xff0c;\u81ea\u52a8\u628a\u6700\u4e45\u6ca1\u88ab\u201c\u7ffb\u724c\u5b50\u201d\u7684\u90a3\u4e2a\u6a21\u578b\u7ed9\u8bf7\u51fa\u53bb\u3002<\/p>\n<p>\u4ee3\u7801\u5b9e\u73b0\u4e0a&#xff0c;OrderedDict &#043; threading.RLock \u662f\u4e2a\u4e0d\u9519\u7684\u9009\u62e9&#xff1a;<\/p>\n<p>from collections import OrderedDict<br \/>\nimport threading<\/p>\n<p>class ModelManager:<br \/>\n    def __init__(self, max_loaded_models: int &#061; 3):<br \/>\n        # OrderedDict\u5929\u751f\u5c31\u9002\u5408\u5b9e\u73b0LRU<br \/>\n        self.loaded_models &#061; OrderedDict()<br \/>\n        self.max_loaded_models &#061; max_loaded_models<br \/>\n        # \u5fc5\u987b\u4fdd\u8bc1\u7ebf\u7a0b\u5b89\u5168<br \/>\n        self._lock &#061; threading.RLock()<\/p>\n<p>    async def get_model_components(self, model_name: str):<br \/>\n        with self._lock:<br \/>\n            # 1. \u7f13\u5b58\u547d\u4e2d&#xff1f;\u592a\u68d2\u4e86&#xff01;\u628a\u5b83\u632a\u5230\u961f\u5c3e&#xff0c;\u8868\u793a\u521a\u88ab\u7528\u8fc7<br \/>\n            if model_name in self.loaded_models:<br \/>\n                self.loaded_models.move_to_end(model_name)<br \/>\n                return self.loaded_models[model_name]<\/p>\n<p>        # 2. \u7f13\u5b58\u672a\u547d\u4e2d&#xff0c;\u8bf4\u660e\u662f\u4e2a\u201c\u65b0\u670b\u53cb\u201d&#xff0c;\u8d70\u52a0\u8f7d\u6d41\u7a0b<br \/>\n        return await self._load_model(model_name)<\/p>\n<p>    async def _load_model(self, model_name: str):<br \/>\n        with self._lock:<br \/>\n            # \u52a0\u8f7d\u524d&#xff0c;\u5148\u68c0\u67e5\u4e0b\u201c\u5ba2\u6808\u201d\u662f\u4e0d\u662f\u6ee1\u4e86<br \/>\n            self._ensure_memory_limit()<br \/>\n            # &#8230; \u6b64\u5904\u7701\u7565\u6a21\u578b\u52a0\u8f7d\u7684IO\u64cd\u4f5c &#8230;<br \/>\n            # \u52a0\u8f7d\u6210\u529f&#xff0c;\u767b\u8bb0\u5165\u4f4f<br \/>\n            self.loaded_models[model_name] &#061; loaded_model_instance<br \/>\n            return loaded_model_instance<\/p>\n<p>    def _ensure_memory_limit(self):<br \/>\n        # \u5ba2\u623f\u6ee1\u4e86&#xff0c;\u628a\u7761\u5728\u95e8\u53e3\u6700\u4e45\u6ca1\u52a8\u7684\u90a3\u4e2a&#xff08;\u961f\u9996&#xff09;\u8bf7\u8d70<br \/>\n        while len(self.loaded_models) &gt;&#061; self.max_loaded_models:<br \/>\n            # popitem(last&#061;False)\u79fb\u9664\u5e76\u8fd4\u56de\u6700\u65e9\u63d2\u5165\u7684\u9879<br \/>\n            oldest_model_name, _ &#061; self.loaded_models.popitem(last&#061;False)<br \/>\n            print(f&#034;\u5185\u5b58\u4e0d\u8db3&#xff0c;\u5378\u8f7d\u6a21\u578b: {oldest_model_name}&#034;)<br \/>\n            # &#8230; \u6b64\u5904\u7701\u7565\u6a21\u578b\u5378\u8f7d\u3001\u8d44\u6e90\u91ca\u653e\u7684\u64cd\u4f5c &#8230;<\/p>\n<p>\u8fd9\u5957\u673a\u5236\u4e0a\u7ebf\u540e&#xff0c;\u6548\u679c\u62d4\u7fa4&#xff1a;<\/p>\n<ul>\n<li>\u9ad8\u9891\u6a21\u578b\u5e38\u9a7b\u5185\u5b58&#xff0c;\u54cd\u5e94\u98de\u5feb\u3002<\/li>\n<li>\u4f4e\u9891\u6a21\u578b\u81ea\u52a8\u6dd8\u6c70&#xff0c;\u5185\u5b58\u5360\u7528\u53ef\u63a7\u3002<\/li>\n<li>\u7ebf\u4e0a\u70ed\u5207\u6362\u6a21\u578b&#xff0c;\u518d\u4e5f\u4e0d\u7528\u534a\u591c\u8d77\u6765\u91cd\u542f\u670d\u52a1\u4e86\u3002<\/li>\n<li>\u5b9e\u6d4b\u7f13\u5b58\u547d\u4e2d\u7387\u7a33\u5b9a\u5728 90% \u4ee5\u4e0a\u3002<\/li>\n<\/ul>\n<h5>&#x1f6a6; 3. \u7cbe\u7ec6\u8c03\u5ea6&#xff1a;\u80fd\u6297\u80fd\u6253\u7684\u5e76\u53d1\u63a7\u5236<\/h5>\n<p>\u9ad8\u5e76\u53d1\u4e0b&#xff0c;\u8bf7\u6c42\u5c31\u50cf\u6d2a\u6c34\u731b\u517d\u3002\u5982\u679c\u6ca1\u6709\u4e00\u4e2a\u597d\u7684\u201c\u6c34\u575d\u201d&#xff0c;\u670d\u52a1\u5206\u5206\u949f\u5c31\u88ab\u51b2\u57ae\u3002\u6211\u7684\u201c\u6c34\u575d\u201d\u7531 asyncio.Semaphore&#xff08;\u4fe1\u53f7\u91cf&#xff09;&#043; \u7b49\u5f85\u961f\u5217 \u6784\u6210\u3002<\/p>\n<p>\u6838\u5fc3\u601d\u8def\u662f&#xff1a;<\/p>\n<li>\u7528\u4fe1\u53f7\u91cf\u63a7\u5236\u6b63\u5728\u5904\u7406\u7684\u8bf7\u6c42\u603b\u6570&#xff08;\u6bd4\u5982200\u4e2a&#xff09;\u3002<\/li>\n<li>\u5bf9\u90a3\u4e9b\u62a2\u4e0d\u5230\u4fe1\u53f7\u91cf\u7684\u8bf7\u6c42&#xff0c;\u4e0d\u662f\u76f4\u63a5\u62d2\u7edd&#xff0c;\u800c\u662f\u8ba9\u5b83\u4eec\u5728\u7b49\u5f85\u961f\u5217\u91cc\u6392\u961f\u3002<\/li>\n<li>\u7b49\u5f85\u961f\u5217\u4e5f\u8bbe\u4e2a\u4e0a\u9650&#xff08;\u6bd4\u5982100\u4e2a&#xff09;&#xff0c;\u6ee1\u4e86\u624d\u8fd4\u56de 429 Service Overloaded\u3002<\/li>\n<li>\u7ed9\u6392\u961f\u52a0\u4e0a\u8d85\u65f6&#xff0c;\u4e0d\u80fd\u8ba9\u7528\u6237\u65e0\u9650\u7b49\u4e0b\u53bb\u3002<\/li>\n<p>import asyncio<br \/>\nfrom contextlib import asynccontextmanager<\/p>\n<p># \u5168\u5c40\u5e76\u53d1\u201c\u8bb8\u53ef\u8bc1\u201d&#xff0c;\u6700\u591a200\u4e2a<br \/>\nMAX_CONCURRENT_REQUESTS &#061; 200<br \/>\n_request_semaphore &#061; asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)<\/p>\n<p># \u7b49\u5f85\u961f\u5217\u201c\u7f13\u51b2\u533a\u201d&#xff0c;\u6700\u591a100\u4e2a<br \/>\nMAX_WAITING_QUEUE &#061; 100<br \/>\n_waiting_queue_count &#061; 0<br \/>\n_waiting_queue_lock &#061; asyncio.Lock() # \u4fdd\u62a4\u8ba1\u6570\u5668<\/p>\n<p>&#064;asynccontextmanager<br \/>\nasync def request_limiter(max_wait_time&#061;60):<br \/>\n    global _waiting_queue_count<\/p>\n<p>    # \u8bf7\u6c42\u6765\u4e86&#xff0c;\u5148\u5230\u7b49\u5019\u533a\u62ff\u4e2a\u53f7<br \/>\n    async with _waiting_queue_lock:<br \/>\n        if _waiting_queue_count &gt;&#061; MAX_WAITING_QUEUE:<br \/>\n            raise HTTPException(status_code&#061;429, detail&#061;&#034;\u8bf7\u6c42\u8fc7\u8f7d&#xff0c;\u8bf7\u7a0d\u540e\u518d\u8bd5&#034;)<br \/>\n        _waiting_queue_count &#043;&#061; 1<\/p>\n<p>    acquired &#061; False<br \/>\n    try:<br \/>\n        # \u5c1d\u8bd5\u83b7\u53d6\u4e00\u4e2a\u201c\u8bb8\u53ef\u8bc1\u201d&#xff0c;\u5e26\u8d85\u65f6<br \/>\n        await asyncio.wait_for(<br \/>\n            _request_semaphore.acquire(),<br \/>\n            timeout&#061;max_wait_time<br \/>\n        )<br \/>\n        acquired &#061; True<br \/>\n        # \u62ff\u5230\u8bb8\u53ef\u8bc1\u4e86&#xff01;\u79bb\u5f00\u7b49\u5019\u533a&#xff0c;\u8fdb\u5165\u5904\u7406\u533a<br \/>\n        async with _waiting_queue_lock:<br \/>\n            _waiting_queue_count -&#061; 1<br \/>\n        yield # \u628a\u63a7\u5236\u6743\u4ea4\u7ed9\u4e1a\u52a1\u903b\u8f91<br \/>\n    except asyncio.TimeoutError:<br \/>\n        raise HTTPException(status_code&#061;504, detail&#061;&#034;\u8bf7\u6c42\u7b49\u5f85\u8d85\u65f6&#034;)<br \/>\n    finally:<br \/>\n        # \u5904\u7406\u5b8c\u6bd5&#xff0c;\u5f52\u8fd8\u201c\u8bb8\u53ef\u8bc1\u201d<br \/>\n        if acquired:<br \/>\n            _request_semaphore.release()<\/p>\n<p>\u4e3a\u4e86\u65b9\u4fbf\u6392\u67e5\u95ee\u9898&#xff0c;\u6211\u8fd8\u5728\u54cd\u5e94\u5934\u91cc\u52a0\u4e86\u51e0\u4e2a\u81ea\u5b9a\u4e49\u5b57\u6bb5&#xff0c;\u4e00\u773c\u5c31\u80fd\u770b\u51fa\u670d\u52a1\u7684\u8d1f\u8f7d\u60c5\u51b5&#xff0c;\u8fd9\u4e2a\u5c0f\u6280\u5de7\u975e\u5e38\u5b9e\u7528&#xff1a;<\/p>\n<p># \u5728\u54cd\u5e94\u4e2d\u95f4\u4ef6\u91cc\u52a0\u4e0a\u8fd9\u4e9b\u5934<br \/>\nresponse.headers[&#034;X-Current-Concurrency&#034;] &#061; str(MAX_CONCURRENT_REQUESTS &#8211; _request_semaphore._value)<br \/>\nresponse.headers[&#034;X-Waiting-Queue-Size&#034;] &#061; str(_waiting_queue_count)<br \/>\nresponse.headers[&#034;X-Request-ID&#034;] &#061; str(uuid.uuid4())<\/p>\n<h5>&#x1f6e0;\ufe0f 4. \u89e3\u653e\u53cc\u624b&#xff1a;\u81ea\u9002\u5e94\u7684\u8d44\u6e90\u914d\u7f6e<\/h5>\n<p>\u6bcf\u6b21\u6362\u4e2a\u673a\u5668\u90fd\u8981\u624b\u52a8\u8c03\u4e00\u5806\u53c2\u6570&#xff1f;\u592a\u4f4e\u6548\u4e86&#xff01;\u6211\u5199\u4e86\u4e2a\u542f\u52a8\u811a\u672c&#xff0c;\u8ba9\u670d\u52a1\u53d8\u5f97\u66f4\u201c\u806a\u660e\u201d&#xff0c;\u80fd\u6839\u636e\u5f53\u524d\u73af\u5883\u7684CPU\u548c\u5185\u5b58&#xff0c;\u81ea\u52a8\u8ba1\u7b97\u51fa\u4e00\u5957\u5408\u7406\u7684 worker \u6570\u91cf\u548cONNX\u7ebf\u7a0b\u6570\u3002<\/p>\n<p>\u52a8\u6001\u8ba1\u7b97 worker \u6570\u91cf&#xff08;\u57fa\u4e8e\u7ecf\u9a8c\u7684\u542f\u53d1\u5f0f\u89c4\u5219&#xff09;&#xff1a;<\/p>\n<p># \u81ea\u52a8\u8ba1\u7b97Uvicorn worker\u6570\u91cf<br \/>\nCPU_CORES&#061;$(nproc)<br \/>\nTOTAL_MEMORY_MB&#061;$(grep MemTotal \/proc\/meminfo | awk &#039;{print $2\/1024}&#039;)<\/p>\n<p># \u9ed8\u8ba4\u7b56\u7565&#xff1a;CPU\u6838\u5fc3\u6570\u7684\u4e00\u534a&#xff0c;\u4f46\u6700\u591a\u4e0d\u8d85\u8fc76\u4e2a&#xff0c;\u907f\u514d\u4e0a\u4e0b\u6587\u5207\u6362\u5f00\u9500\u8fc7\u5927<br \/>\nAUTO_WORKERS&#061;$(( CPU_CORES \/ 2 ))<br \/>\nAUTO_WORKERS&#061;$(( AUTO_WORKERS &gt; 6 ? 6 : AUTO_WORKERS ))<\/p>\n<p># \u5185\u5b58\u9650\u5236&#xff1a;\u5982\u679c\u5185\u5b58\u5c0f\u4e8e16G&#xff0c;\u6309\u6bcf4G\u5185\u5b58\u5206\u914d1\u4e2aworker\u6765\u7b97&#xff0c;\u53d6\u66f4\u5c0f\u7684\u503c<br \/>\nMEM_GB&#061;$((TOTAL_MEMORY_MB \/ 1024))<br \/>\nif [ $MEM_GB -lt 16 ]; then<br \/>\n    MEM_LIMITED_WORKERS&#061;$((MEM_GB \/ 4))<br \/>\n    if [ $MEM_LIMITED_WORKERS -lt $AUTO_WORKERS ]; then<br \/>\n        AUTO_WORKERS&#061;$MEM_LIMITED_WORKERS<br \/>\n    fi<br \/>\nfi<br \/>\n# \u6700\u7ec8worker\u6570&#xff0c;\u6700\u5c0f\u4e3a1<br \/>\nUVICORN_WORKERS&#061;${UVICORN_WORKERS:-$(( AUTO_WORKERS &gt; 0 ? AUTO_WORKERS : 1 ))}<\/p>\n<p>\u52a8\u6001\u8ba1\u7b97 ONNX \u5185\u90e8\u7ebf\u7a0b\u6570&#xff1a;<\/p>\n<p># \u81ea\u52a8\u914d\u7f6eONNX\u5185\u90e8\u7ebf\u7a0b\u6c60\u5927\u5c0f<br \/>\n# \u6838\u5fc3\u601d\u8def&#xff1a;\u603bCPU\u6838\u5fc3 \/ worker\u6570&#xff0c;\u4fdd\u8bc1\u6bcf\u4e2aworker\u5206\u5230\u5408\u7406\u7684\u7ebf\u7a0b<br \/>\n# \u540c\u65f6\u8bbe\u7f6e\u4e0a\u4e0b\u9650&#xff0c;\u907f\u514d\u6781\u7aef\u60c5\u51b5<br \/>\nif [ -z &#034;$ORT_THREAD_POOL_SIZE&#034; ]; then<br \/>\n    AUTO_THREADS&#061;$(( CPU_CORES \/ UVICORN_WORKERS ))<br \/>\n    AUTO_THREADS&#061;$(( AUTO_THREADS &lt; 1 ? 1 : AUTO_THREADS )) # \u81f3\u5c111\u4e2a<br \/>\n    AUTO_THREADS&#061;$(( AUTO_THREADS &gt; 4 ? 4 : AUTO_THREADS )) # \u7ecf\u9a8c\u503c&#xff0c;\u6bcf\u4e2aworker\u5185\u8d85\u8fc74\u4e2a\u8ba1\u7b97\u7ebf\u7a0b\u6536\u76ca\u9012\u51cf<br \/>\n    export ORT_THREAD_POOL_SIZE&#061;$AUTO_THREADS<br \/>\nfi<\/p>\n<p>\u6709\u4e86\u8fd9\u5957\u811a\u672c&#xff0c;\u90e8\u7f72\u65b0\u73af\u5883\u65f6&#xff0c;\u8fd0\u7ef4\u540c\u5b66\u518d\u4e5f\u4e0d\u7528\u8ffd\u7740\u6211\u95ee\u53c2\u6570\u8be5\u600e\u4e48\u8c03\u4e86&#xff0c;\u771f\u6b63\u5b9e\u73b0\u4e86\u4e00\u952e\u542f\u52a8\u3002<\/p>\n<h5>&#x1f4a7; 5. \u9531\u94e2\u5fc5\u8f83&#xff1a;\u7cbe\u7ec6\u5316\u7684\u5185\u5b58\u7ba1\u7406<\/h5>\n<p>CPU\u670d\u52a1\u5668\u7684\u5185\u5b58\u8d44\u6e90\u5bf8\u571f\u5bf8\u91d1&#xff0c;\u4e00\u70b9\u90fd\u4e0d\u80fd\u6d6a\u8d39\u3002<\/p>\n<p>\u9996\u5148&#xff0c;\u6362\u4e2a\u66f4\u731b\u7684\u5185\u5b58\u5206\u914d\u5668 TCMalloc&#xff1a;<\/p>\n<p>TCMalloc \u662f Google \u5f00\u53d1\u7684\u4e00\u6b3e\u9ad8\u6027\u80fd\u5185\u5b58\u5206\u914d\u5668&#xff0c;\u5b9e\u6d4b\u5728\u591a\u7ebf\u7a0b\u9ad8\u5e76\u53d1\u573a\u666f\u4e0b&#xff0c;\u5b83\u6bd4\u7cfb\u7edf\u9ed8\u8ba4\u7684 glibc malloc \u6027\u80fd\u66f4\u597d&#xff0c;\u4e14\u80fd\u66f4\u6709\u6548\u5730\u51cf\u5c11\u5185\u5b58\u788e\u7247\u3002<\/p>\n<p># \u5728\u542f\u52a8\u811a\u672c\u6216Dockerfile\u4e2d\u8bbe\u7f6e<br \/>\n# \u542f\u7528Google\u7684TCMalloc<br \/>\nexport LD_PRELOAD&#061;\/usr\/lib\/x86_64-linux-gnu\/libtcmalloc.so.4<br \/>\n# \u8ba9TCMalloc\u66f4\u9891\u7e41\u3001\u66f4\u79ef\u6781\u5730\u5c06\u7a7a\u95f2\u5185\u5b58\u8fd8\u7ed9\u64cd\u4f5c\u7cfb\u7edf&#xff0c;\u5bf9\u5185\u5b58\u654f\u611f\u578b\u670d\u52a1\u5c24\u4e3a\u91cd\u8981<br \/>\nexport TCMALLOC_RELEASE_RATE&#061;10<\/p>\n<p>\u6ce8\u610f&#xff1a;\u53ea\u5173\u6ce8TCMalloc\u81ea\u8eab\u7684\u8c03\u4f18\u53c2\u6570\u5373\u53ef&#xff0c;\u65e0\u9700\u518d\u6df7\u5408\u8bbe\u7f6eglibc malloc\u7684\u73af\u5883\u53d8\u91cf&#xff0c;\u4ee5\u514d\u9020\u6210\u6df7\u4e71\u3002<\/p>\n<p>\u8fd9\u4e9b\u53c2\u6570\u7ec4\u5408\u5728\u4e00\u8d77&#xff0c;\u8ba9\u670d\u52a1\u7684\u5185\u5b58\u4f7f\u7528\u66f2\u7ebf\u53d8\u5f97\u975e\u5e38\u5e73\u6ed1&#xff0c;OOM&#xff08;Out of Memory&#xff09;\u7684\u5669\u68a6\u57fa\u672c\u6210\u4e3a\u5386\u53f2\u3002<\/p>\n<h4>&#x1f914; \u5c55\u671b\u672a\u6765&#xff1a;\u8fd8\u80fd\u4f18\u5316\u7684\u51e0\u4e2a\u70b9<\/h4>\n<p>\u5b66\u65e0\u6b62\u5883&#xff0c;\u8fd9\u5957\u65b9\u6848\u867d\u7136\u80fd\u6253&#xff0c;\u4f46\u80af\u5b9a\u8fd8\u6709\u4f18\u5316\u7684\u7a7a\u95f4\u3002\u5728\u8fd9\u91cc\u629b\u7816\u5f15\u7389&#xff0c;\u5217\u51fa\u51e0\u4e2a\u6211\u672a\u6765\u53ef\u80fd\u63a2\u7d22\u7684\u65b9\u5411&#xff1a;<\/p>\n<li>\u52a8\u6001\u6279\u5904\u7406 (Dynamic Batching) &#xff1a;\u76ee\u524d\u6211\u4eec\u662f\u6309\u8bf7\u6c42\u5e76\u53d1&#xff0c;\u4f46\u6a21\u578b\u63a8\u7406\u65f6&#xff0c;\u4e00\u6b21\u5904\u7406\u4e00\u4e2a\u6279\u6b21(Batch)\u7684\u6548\u7387\u8fdc\u9ad8\u4e8e\u5355\u4e2a\u5904\u7406\u3002\u540e\u7eed\u53ef\u4ee5\u5f15\u5165\u4e00\u4e2a\u4e2d\u95f4\u5c42&#xff0c;\u5728\u77ed\u65f6\u95f4\u5185&#xff08;\u6bd4\u598210-20ms&#xff09;\u5c06\u5e76\u53d1\u6765\u7684\u6563\u88c5\u8bf7\u6c42\u201c\u62fc\u201d\u6210\u4e00\u4e2a\u6279\u6b21&#xff0c;\u518d\u9001\u5165\u6a21\u578b\u3002\u8fd9\u662f\u8fdb\u4e00\u6b65\u63d0\u5347\u541e\u5410\u91cf\u7684\u5173\u952e\u4e00\u6b65\u3002<\/li>\n<li>\u786c\u4ef6\u7ea7\u52a0\u901f&#xff1a;\u5982\u679c\u670d\u52a1\u5668\u662f\u82f1\u7279\u5c14\u7684CPU&#xff0c;\u53ef\u4ee5\u5c1d\u8bd5\u4f7f\u7528OpenVINO\u4f5c\u4e3aONNX Runtime\u7684\u540e\u7aef\u6267\u884c\u5f15\u64ce&#xff08;Execution Provider&#xff09;&#xff0c;\u636e\u8bf4\u80fd\u5229\u7528\u7279\u5b9a\u7684\u786c\u4ef6\u6307\u4ee4\u96c6&#xff08;\u5982AVX-512&#xff09;\u5e26\u6765\u989d\u5916\u7684\u6027\u80fd\u63d0\u5347\u3002<\/li>\n<li>\u66f4\u667a\u80fd\u7684\u8c03\u5ea6&#xff1a;\u73b0\u5728\u7684\u7b49\u5f85\u961f\u5217\u662f\u5148\u8fdb\u5148\u51fa\u3002\u672a\u6765\u53ef\u4ee5\u8bbe\u8ba1\u66f4\u667a\u80fd\u7684\u8c03\u5ea6\u7b56\u7565&#xff0c;\u6bd4\u5982\u4f18\u5148\u5904\u7406\u9884\u4f30\u8ba1\u7b97\u91cf\u5c0f\u7684\u77ed\u6587\u672c\u8bf7\u6c42&#xff0c;\u628a\u957f\u6587\u6863\u8bf7\u6c42\u7684\u4f18\u5148\u7ea7\u653e\u4f4e&#xff0c;\u4ece\u800c\u4f18\u5316\u6574\u4f53\u7684\u5e73\u5747\u54cd\u5e94\u65f6\u95f4\u3002<\/li>\n<p>\u8fd9\u4e9b\u53ea\u662f\u6211\u7684\u4e00\u4e9b\u4e0d\u6210\u719f\u7684\u60f3\u6cd5&#xff0c;\u80af\u5b9a\u6709\u5f88\u591a\u8003\u8651\u4e0d\u5468\u7684\u5730\u65b9\u3002\u6280\u672f\u4e4b\u8def\u6f2b\u6f2b&#xff0c;\u552f\u6709\u4e0d\u65ad\u5b66\u4e60\u548c\u5b9e\u8df5\u3002<\/p>\n<h4>\u270d\ufe0f \u590d\u76d8\u603b\u7ed3<\/h4>\n<p>\u56de\u5934\u770b&#xff0c;\u8fd9\u6b21\u4f18\u5316\u4e4b\u65c5\u6536\u83b7\u6ee1\u6ee1\u3002\u6211\u4eec\u6ca1\u6709\u4f9d\u8d56\u4efb\u4f55\u201c\u9ed1\u79d1\u6280\u201d&#xff0c;\u53ea\u662f\u5c06\u4e00\u4e9b\u6210\u719f\u7684\u6280\u672f\u548c\u8bbe\u8ba1\u601d\u60f3\u505a\u4e86\u7cbe\u5de7\u7684\u7ec4\u5408&#xff0c;\u6700\u7ec8\u5728\u6709\u9650\u7684CPU\u8d44\u6e90\u4e0a\u5b9e\u73b0\u4e86\u975e\u5e38\u4e0d\u9519\u7684\u6548\u679c\u3002<\/p>\n<p>\u6211\u628a\u6838\u5fc3\u7684\u51e0\u4e2a\u8981\u70b9\u518d\u603b\u7ed3\u4e00\u4e0b&#xff1a;<\/p>\n<li>\u5f15\u64ce\u6362\u5fc3&#xff1a;\u679c\u65ad\u4ece\u539f\u751ftransformers\u5e93\u5207\u6362\u5230 INT8\u91cf\u5316\u6a21\u578b &#043; ONNX Runtime&#xff0c;\u8fd9\u662f\u6027\u80fd\u7a81\u7834\u7684\u57fa\u77f3\u3002<\/li>\n<li>\u667a\u80fd\u8c03\u5ea6&#xff1a;\u7528 LRU \u7f13\u5b58\u4f18\u96c5\u5730\u89e3\u51b3\u4e86\u591a\u6a21\u578b\u5171\u5b58\u7684\u5185\u5b58\u96be\u9898\u3002<\/li>\n<li>\u6d41\u91cf\u7ba1\u63a7&#xff1a;\u4fe1\u53f7\u91cf &#043; \u7b49\u5f85\u961f\u5217 \u7684\u7ec4\u5408\u62f3&#xff0c;\u8ba9\u670d\u52a1\u5728\u9ad8\u5e76\u53d1\u4e0b\u65e2\u7a33\u5982\u6cf0\u5c71&#xff0c;\u53c8\u80fd\u6700\u5927\u5316\u541e\u5410\u3002<\/li>\n<li>\u81ea\u52a8\u9002\u914d&#xff1a;\u901a\u8fc7\u542f\u52a8\u811a\u672c\u5b9e\u73b0\u8d44\u6e90\u914d\u7f6e\u7684\u81ea\u52a8\u5316&#xff0c;\u6781\u5927\u964d\u4f4e\u4e86\u8fd0\u7ef4\u6210\u672c\u3002<\/li>\n<li>\u7cbe\u6253\u7ec6\u7b97&#xff1a;\u4ece TCMalloc \u5230\u7cfb\u7edf\u53c2\u6570&#xff0c;\u628a\u5185\u5b58\u7ba1\u7406\u7684\u7ec6\u8282\u62a0\u5230\u6781\u81f4\u3002<\/li>\n<p>\u8fd9\u5957\u7ec4\u5408\u62f3\u6253\u4e0b\u6765&#xff0c;\u4e0d\u4ec5\u89e3\u51b3\u4e86\u5f53\u524d\u7684\u6027\u80fd\u74f6\u9888&#xff0c;\u4e5f\u4e3a\u672a\u6765\u66f4\u591a\u7684AI\u670d\u52a1\u4e0aCPU\u5e73\u53f0\u6c89\u6dc0\u4e86\u4e00\u5957\u53ef\u590d\u7528\u7684\u65b9\u6cd5\u8bba\u3002<\/p>\n<p>\u8fd9\u7bc7\u6587\u7ae0\u603b\u7ed3\u4e86\u6211\u8fd1\u671f\u7684\u5b66\u4e60\u548c\u5b9e\u8df5&#xff0c;\u96be\u514d\u6709\u758f\u6f0f\u548c\u9519\u8bef&#xff0c;\u8bda\u6073\u5730\u6b22\u8fce\u5404\u4f4d\u5927\u4f6c\u6279\u8bc4\u6307\u6b63\u3002\u5982\u679c\u5927\u5bb6\u6709\u66f4\u591a\u69a8\u5e72CPU\u6027\u80fd\u7684\u201c\u9a9a\u64cd\u4f5c\u201d&#xff0c;\u4e5f\u975e\u5e38\u6b22\u8fce\u5728\u8bc4\u8bba\u533a\u4ea4\u6d41\u5206\u4eab&#xff01;<\/p>\n<p>&#xff08;\u5b8c&#xff09;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb1.1k\u6b21\uff0c\u70b9\u8d5e27\u6b21\uff0c\u6536\u85cf10\u6b21\u3002\u672c\u6587\u5206\u4eab\u4e86\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\u4f18\u5316Embedding\u6a21\u578b\u90e8\u7f72\u7684\u7ecf\u9a8c\u3002\u539f\u65b9\u6848\u4f7f\u7528Hugging Face\u7684transformers\u5e93\u90e8\u7f72BAAI\/bge-m3\u6a21\u578b\uff0c\u5355\u4e2a\u8bf7\u6c42\u54cd\u5e94\u97003-4\u79d2\u3002\u901a\u8fc7\u4e94\u5927\u4f18\u5316\u63aa\u65bd\u5c06\u6027\u80fd\u63d0\u53473\u500d\u591a\uff1a1\uff09\u91c7\u7528\u6a21\u578b\u91cf\u5316\u4e0eONNX Runtime\u7ed3\u5408\uff0c\u4f18\u5316\u7ebf\u7a0b\u7ba1\u7406\u548c\u56fe\u4f18\u5316\uff1b2\uff09\u8bbe\u8ba1LRU\u7f13\u5b58\u7b56\u7565\u7ba1\u7406\u591a\u6a21\u578b\uff1b3\uff09\u4f7f\u7528asyncio.Semaphore\u63a7\u5236\u5e76\u53d1\u8bf7\u6c42\uff1b4\uff09\u7ed3\u5408FastAPI\u5f02\u6b65\u7279\u6027\uff1b5\uff09\u5176\u4ed6\u7ec6\u7c92\u5ea6\u4f18\u5316\u3002\u6700\u7ec8\u5b9e\u73b0200+\u5e76\u53d1\u4e0b\u5e73\u5747\u54cd\u5e94150ms\u7684\u663e\u8457\u63d0\u5347\u3002_bge-m3 onnx<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[346,50,292],"topic":[],"class_list":["post-47974","post","type-post","status-publish","format-standard","hentry","category-server","tag-embedding","tag-50","tag-292"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u544a\u522bGPU\u7126\u8651\uff1a\u5982\u4f55\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\uff0c\u6253\u9020\u9ad8\u6027\u80fdEmbedding\u670d\u52a1\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/47974.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u544a\u522bGPU\u7126\u8651\uff1a\u5982\u4f55\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\uff0c\u6253\u9020\u9ad8\u6027\u80fdEmbedding\u670d\u52a1\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb1.1k\u6b21\uff0c\u70b9\u8d5e27\u6b21\uff0c\u6536\u85cf10\u6b21\u3002\u672c\u6587\u5206\u4eab\u4e86\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\u4f18\u5316Embedding\u6a21\u578b\u90e8\u7f72\u7684\u7ecf\u9a8c\u3002\u539f\u65b9\u6848\u4f7f\u7528Hugging Face\u7684transformers\u5e93\u90e8\u7f72BAAI\/bge-m3\u6a21\u578b\uff0c\u5355\u4e2a\u8bf7\u6c42\u54cd\u5e94\u97003-4\u79d2\u3002\u901a\u8fc7\u4e94\u5927\u4f18\u5316\u63aa\u65bd\u5c06\u6027\u80fd\u63d0\u53473\u500d\u591a\uff1a1\uff09\u91c7\u7528\u6a21\u578b\u91cf\u5316\u4e0eONNX Runtime\u7ed3\u5408\uff0c\u4f18\u5316\u7ebf\u7a0b\u7ba1\u7406\u548c\u56fe\u4f18\u5316\uff1b2\uff09\u8bbe\u8ba1LRU\u7f13\u5b58\u7b56\u7565\u7ba1\u7406\u591a\u6a21\u578b\uff1b3\uff09\u4f7f\u7528asyncio.Semaphore\u63a7\u5236\u5e76\u53d1\u8bf7\u6c42\uff1b4\uff09\u7ed3\u5408FastAPI\u5f02\u6b65\u7279\u6027\uff1b5\uff09\u5176\u4ed6\u7ec6\u7c92\u5ea6\u4f18\u5316\u3002\u6700\u7ec8\u5b9e\u73b0200+\u5e76\u53d1\u4e0b\u5e73\u5747\u54cd\u5e94150ms\u7684\u663e\u8457\u63d0\u5347\u3002_bge-m3 onnx\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/47974.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2025-07-30T02:12:40+00:00\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"5 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/47974.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/47974.html\",\"name\":\"\u544a\u522bGPU\u7126\u8651\uff1a\u5982\u4f55\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\uff0c\u6253\u9020\u9ad8\u6027\u80fdEmbedding\u670d\u52a1\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2025-07-30T02:12:40+00:00\",\"dateModified\":\"2025-07-30T02:12:40+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/47974.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/47974.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/47974.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u544a\u522bGPU\u7126\u8651\uff1a\u5982\u4f55\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\uff0c\u6253\u9020\u9ad8\u6027\u80fdEmbedding\u670d\u52a1\uff1f\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"\u544a\u522bGPU\u7126\u8651\uff1a\u5982\u4f55\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\uff0c\u6253\u9020\u9ad8\u6027\u80fdEmbedding\u670d\u52a1\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/47974.html","og_locale":"zh_CN","og_type":"article","og_title":"\u544a\u522bGPU\u7126\u8651\uff1a\u5982\u4f55\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\uff0c\u6253\u9020\u9ad8\u6027\u80fdEmbedding\u670d\u52a1\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb1.1k\u6b21\uff0c\u70b9\u8d5e27\u6b21\uff0c\u6536\u85cf10\u6b21\u3002\u672c\u6587\u5206\u4eab\u4e86\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\u4f18\u5316Embedding\u6a21\u578b\u90e8\u7f72\u7684\u7ecf\u9a8c\u3002\u539f\u65b9\u6848\u4f7f\u7528Hugging Face\u7684transformers\u5e93\u90e8\u7f72BAAI\/bge-m3\u6a21\u578b\uff0c\u5355\u4e2a\u8bf7\u6c42\u54cd\u5e94\u97003-4\u79d2\u3002\u901a\u8fc7\u4e94\u5927\u4f18\u5316\u63aa\u65bd\u5c06\u6027\u80fd\u63d0\u53473\u500d\u591a\uff1a1\uff09\u91c7\u7528\u6a21\u578b\u91cf\u5316\u4e0eONNX Runtime\u7ed3\u5408\uff0c\u4f18\u5316\u7ebf\u7a0b\u7ba1\u7406\u548c\u56fe\u4f18\u5316\uff1b2\uff09\u8bbe\u8ba1LRU\u7f13\u5b58\u7b56\u7565\u7ba1\u7406\u591a\u6a21\u578b\uff1b3\uff09\u4f7f\u7528asyncio.Semaphore\u63a7\u5236\u5e76\u53d1\u8bf7\u6c42\uff1b4\uff09\u7ed3\u5408FastAPI\u5f02\u6b65\u7279\u6027\uff1b5\uff09\u5176\u4ed6\u7ec6\u7c92\u5ea6\u4f18\u5316\u3002\u6700\u7ec8\u5b9e\u73b0200+\u5e76\u53d1\u4e0b\u5e73\u5747\u54cd\u5e94150ms\u7684\u663e\u8457\u63d0\u5347\u3002_bge-m3 onnx","og_url":"https:\/\/www.wsisp.com\/helps\/47974.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2025-07-30T02:12:40+00:00","author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"5 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/47974.html","url":"https:\/\/www.wsisp.com\/helps\/47974.html","name":"\u544a\u522bGPU\u7126\u8651\uff1a\u5982\u4f55\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\uff0c\u6253\u9020\u9ad8\u6027\u80fdEmbedding\u670d\u52a1\uff1f - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2025-07-30T02:12:40+00:00","dateModified":"2025-07-30T02:12:40+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/47974.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/47974.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/47974.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"\u544a\u522bGPU\u7126\u8651\uff1a\u5982\u4f55\u5728\u7eafCPU\u670d\u52a1\u5668\u4e0a\uff0c\u6253\u9020\u9ad8\u6027\u80fdEmbedding\u670d\u52a1\uff1f"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/47974","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=47974"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/47974\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=47974"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=47974"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=47974"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=47974"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}