{"id":55496,"date":"2025-08-13T23:44:32","date_gmt":"2025-08-13T15:44:32","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/55496.html"},"modified":"2025-08-13T23:44:32","modified_gmt":"2025-08-13T15:44:32","slug":"lora%e5%be%ae%e8%b0%83%e5%ae%9e%e6%88%98%ef%bc%9a%e4%b8%87%e5%ad%97%e6%b7%b1%e5%ba%a6%e8%a7%a3%e6%9e%90","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/55496.html","title":{"rendered":"LoRA\u5fae\u8c03\u5b9e\u6218\uff1a\u4e07\u5b57\u6df1\u5ea6\u89e3\u6790"},"content":{"rendered":"<p>\u6b22\u8fce\u6765\u5230\u557e\u557e\u7684\u535a\u5ba2&#x1f431;\u3002 \u8bb0\u5f55\u5b66\u4e60\u70b9\u6ef4\u3002\u5206\u4eab\u5de5\u4f5c\u601d\u8003\u548c\u5b9e\u7528\u6280\u5de7&#xff0c;\u5076\u5c14\u4e5f\u5206\u4eab\u4e00\u4e9b\u6742\u8c08&#x1f4ac;\u3002 \u6709\u5f88\u591a\u5f88\u591a\u4e0d\u8db3\u7684\u5730\u65b9&#xff0c;\u6b22\u8fce\u8bc4\u8bba\u4ea4\u6d41&#xff0c;\u611f\u8c22\u60a8\u7684\u9605\u8bfb\u548c\u8bc4\u8bba&#x1f604;\u3002<\/p>\n<\/p>\n<h4>\u76ee\u5f55<\/h4>\n<ul>\n<li>\u5f15\u8a00<\/li>\n<li>1 \u9884\u8bad\u7ec3\u6a21\u578b\u7684\u6289\u62e9<\/li>\n<li>2 \u5fae\u8c03\u65b9\u6848\u9009\u62e9<\/li>\n<li>3 PEFT\u5b9e\u6218<\/li>\n<li>\n<ul>\n<li>3.1 \u7b2c\u4e00\u6b65&#xff1a;\u6570\u636e\u51c6\u5907<\/li>\n<li>\n<ul>\n<li>3.1.1 \u6570\u636e\u589e\u5f3a<\/li>\n<li>3.1.2 \u6982\u5ff5\u8865\u5145<\/li>\n<li>3.1.3 \u6570\u636e\u51c6\u5907Demo<\/li>\n<\/ul>\n<\/li>\n<li>3.2 \u7b2c\u4e8c\u6b65&#xff1a;\u5fae\u8c03<\/li>\n<li>\n<ul>\n<li>3.2.1 \u5fae\u8c03Demo<\/li>\n<li>3.2.2 \u6982\u5ff5\u8865\u5145<\/li>\n<li>\n<ul>\n<li>3.2.2.1 SFT&#xff08;\u76d1\u7763\u5fae\u8c03&#xff09;<\/li>\n<\/ul>\n<\/li>\n<li>3.2.3 DPO<\/li>\n<li>\n<ul>\n<li>3.2.3.1 LoRA&#xff08;\u4f4e\u79e9\u9002\u914d&#xff09;<\/li>\n<li>3.2.3.2 trl<\/li>\n<li>\n<ul>\n<li>3.2.3.2.1 SFTTrainer<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<\/li>\n<li>3.2.4 \u8bad\u7ec3\u6d41\u7a0b<\/li>\n<\/ul>\n<\/li>\n<li>3.3 \u7b2c\u4e09\u6b65&#xff1a;\u5bf9\u6bd4\u6d4b\u8bd5<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h2>\u5f15\u8a00<\/h2>\n<p>\u5728\u4e0a\u4e00\u4e2a\u7bc7\u7ae0\u4e2d&#xff0c;\u6211\u4eec\u6709\u4e86\u89e3\u5230\u5fae\u8c03\u6548\u679c\u6bd4Prompt\u66f4\u597d&#xff0c;\u4e14\u9002\u5408\u5904\u7406\u5782\u76f4\u9886\u57df\u3001\u5b9a\u5236\u5316\u7b49\u9700\u6c42\u3002 \u672c\u7bc7&#xff0c;\u6211\u4eec\u5c06\u66f4\u6df1\u5165\u5730\u4e86\u89e3\u53c2\u6570\u5fae\u8c03PEFT\u3002<\/p>\n<p>PEFT&#xff08;Parameter-Efficient Fine-Tuning&#xff0c;\u53c2\u6570\u9ad8\u6548\u5fae\u8c03&#xff09;\u662f\u4e00\u7c7b\u901a\u8fc7\u4ec5\u66f4\u65b0\u5c11\u91cf\u53c2\u6570\u5373\u53ef\u4f7f\u9884\u8bad\u7ec3\u8bed\u8a00\u6a21\u578b\u9002\u5e94\u4e0b\u6e38\u4efb\u52a1\u7684\u6280\u672f\u3002\u76f8\u8f83\u4e8e\u5168\u53c2\u6570\u5fae\u8c03&#xff0c;PEFT\u80fd\u5728\u4fdd\u6301\u6a21\u578b\u6027\u80fd\u7684\u540c\u65f6\u663e\u8457\u964d\u4f4e\u8ba1\u7b97\u8d44\u6e90\u9700\u6c42&#xff0c;\u7279\u522b\u9002\u5408\u8d44\u6e90\u53d7\u9650\u7684\u73af\u5883\u3002<\/p>\n<p>\u9605\u8bfb\u672c\u7bc7\u53ef\u4ee5\u5165\u95e8PEFT\u3002 \u4ee3\u7801\u5df2\u6574\u7406\u81f3Github&#xff1a;easy-tune<\/p>\n<h2>1 \u9884\u8bad\u7ec3\u6a21\u578b\u7684\u6289\u62e9<\/h2>\n<p>\u53c2\u6570\u9ad8\u6548\u5fae\u8c03\u57fa\u4e8e\u9884\u8bad\u7ec3\u6a21\u578b\u3002\u6211\u4eec\u8981\u600e\u4e48\u9009\u62e9\u4e00\u4e2a\u9002\u5408\u6211\u4eec\u9700\u6c42\u7684\u5f00\u6e90\u9884\u8bad\u7ec3\u6a21\u578b\u5462&#xff1f;<\/p>\n<p>\u6bd4\u5982\u6211\u9700\u8981\u5fae\u8c03\u4e00\u4e2a\u6a21\u578b&#xff0c;\u8ba9\u5176\u53ef\u4ee5\u5c06\u4e00\u7bc7\u6587\u7ae0\u6da6\u8272\u6210\u81ea\u5df1\u7684\u98ce\u683c&#xff0c;\u6211\u9700\u8981\u600e\u4e48\u9009\u62e9\u5462&#xff1f;<\/p>\n<p>Hugging Face\u5b98\u7f51&#xff1a;https:\/\/huggingface.co\/models<\/p>\n<p>\u63d0\u793a\u8bcd&#xff1a;<\/p>\n<p>\u4f60\u662f\u4e00\u540d\u6a21\u578b\u5fae\u8c03\u4e13\u5bb6&#xff0c;\u7cbe\u901a\u5404\u573a\u666f\u6a21\u578b\u5fae\u8c03\u3002<br \/>\n\u53ef\u4ee5\u9009\u62e9\u9002\u5e94\u5404\u573a\u666f\u7684\u5f00\u6e90\u6a21\u578b\u3002<\/p>\n<p>\u6211\u7684\u8bbe\u5907GPU\u662f&#xff1a;4060 laptop 8G\u663e\u5b58\u3002<br \/>\n\u6211\u7684\u9700\u6c42&#xff08;\u4f60\u7684\u4efb\u52a1&#xff09;\u662f&#xff1a;\u9009\u62e9\u4e00\u4e2a\u6a21\u578b\u53bb\u5fae\u8c03&#xff0c;\u8ba9\u5176\u53ef\u4ee5\u5c06\u4e00\u7bc7\u6587\u7ae0\u6da6\u8272\u6210\u81ea\u5df1\u7684\u98ce\u683c<\/p>\n<p>\u5f53\u524d\u65f6\u95f4\u662f&#xff1a;2025\u5e748\u67088\u65e522:27:35<br \/>\n\u8bf7\u9009\u62e9\u6700\u65b0\u6700\u597d\u9002\u7528\u4e8e\u8fd9\u4e2a\u4efb\u52a1\u7684\u5f00\u6e90\u6a21\u578b\u3002<\/p>\n<p>\u8bf7\u597d\u597d\u9009&#xff0c;\u5982\u679c\u6ca1\u6709\u9009\u62e9Trending\u6392\u5e8f\u9ad8\u7684\u8bf7\u544a\u8bc9\u539f\u56e0\u3002<\/p>\n<p>\u4f7f\u7528QWen\u3001DeepSeek\u3001Kimi\u8fdb\u884c\u63a8\u8350\u3002 \u63a8\u8350\u6709\u4e24\u4e2a\u6a21\u578b&#xff1a; Qwen\/Qwen2.5-7B-Instruct<\/p>\n<p>\u8003\u8651\u5230WSL\u7b49\u73af\u5883\u914d\u7f6e\u9ebb\u70e6&#xff0c;\u9009\u62e9\u4e86Qwen\/Qwen2.5-1.5B-Instruct<\/p>\n<h2>2 \u5fae\u8c03\u65b9\u6848\u9009\u62e9<\/h2>\n<p>\u9009\u5b9a\u5408\u9002\u7684\u9884\u8bad\u7ec3\u6a21\u578b\u540e&#xff0c;\u53ef\u4ee5\u4f7f\u7528AI&#xff0c;\u83b7\u53d6\u5408\u9002\u7684\u65b9\u6848\u3002<\/p>\n<p>AI\u4e3a\u6211\u76844060 Laptop\u63a8\u8350\u7684\u65b9\u6848\u662f&#xff1a;<\/p>\n<table>\n<tr>\u6280\u672f\u8bf4\u660e<\/tr>\n<tbody>\n<tr>\n<td>QLoRA | \u57fa\u4e8e4-bit\u91cf\u5316\u7684LoRA\u6280\u672f<\/td>\n<td>4-bit \u91cf\u5316&#xff0c;\u4ec5\u8bad\u7ec3\u4f4e\u79e9\u77e9\u9635&#xff0c;\u663e\u5b58\u53ef\u63a7\u5236\u5728 7~8GB<\/td>\n<\/tr>\n<tr>\n<td>Flash Attention-2<\/td>\n<td>\u52a0\u901f\u8bad\u7ec3&#xff0c;\u964d\u4f4e\u663e\u5b58\u5360\u7528<\/td>\n<\/tr>\n<tr>\n<td>\u68af\u5ea6\u68c0\u67e5\u70b9&#xff08;Gradient Checkpointing&#xff09;<\/td>\n<td>\u8282\u7701\u663e\u5b58<\/td>\n<\/tr>\n<tr>\n<td>Batch Size &#061; 1~2<\/td>\n<td>\u914d\u5408 deepspeed \u6216 accelerate<\/td>\n<\/tr>\n<tr>\n<td>\u6570\u636e\u683c\u5f0f<\/td>\n<td>\u6784\u5efa (\u539f\u6587, \u98ce\u683c\u5316\u7248\u672c) \u5bf9&#xff0c;\u652f\u6301\u98ce\u683c\u6807\u7b7e&#xff08;\u5982&#xff1a;\u201c\u5b66\u672f\u98ce\u201d\u3001\u201c\u6563\u6587\u98ce\u201d&#xff09;<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<ul>\n<li>\u5de5\u5177\u94fe<\/li>\n<\/ul>\n<p>&#8211; \u6846\u67b6&#xff1a;Hugging Face Transformers &#043; PEFT &#043; bitsandbytes<br \/>\n&#8211; \u8bad\u7ec3\u5e93&#xff1a;LLaMA-Factory&#xff08;\u652f\u6301 Qwen2.5&#xff0c;UI \u53cb\u597d&#xff09;<br \/>\n&#8211; \u672c\u5730\u90e8\u7f72&#xff1a;Text Generation WebUI&#xff08;\u652f\u6301 QLoRA \u52a0\u8f7d&#xff09;<\/p>\n<p>\u8fd9\u91cc\u4f53\u73b0\u4e86\u5fae\u8c03\u9700\u8981\u5173\u6ce8\u7684\u90e8\u5206\u4e8b\u9879&#xff1a;\u663e\u5b58\u4e0e\u8bad\u7ec3\u7cbe\u5ea6\u7684\u6743\u8861\u3001\u8bad\u7ec3\u7b56\u7565\u4e0e\u7a33\u5b9a\u6027\u3002<\/p>\n<h2>3 PEFT\u5b9e\u6218<\/h2>\n<p>\u8ba9\u6211\u4eec\u4e00\u6b65\u6b65\u8fdb\u884c\u5fae\u8c03\u5b9e\u6218\u3002<\/p>\n<p>\u5efa\u8bae\u4f7f\u7528\u865a\u62df\u73af\u5883&#xff0c;\u6838\u5fc3\u5305\u7684QWen2.5\u7684\u517c\u5bb9\u4f9d\u8d56\u7248\u672c\u5982\u4e0b&#xff1a;<\/p>\n<table>\n<tr>\u5305\u540d\u63a8\u8350\u7248\u672c<\/tr>\n<tbody>\n<tr>\n<td>accelerate<\/td>\n<td>0.27.2<\/td>\n<\/tr>\n<tr>\n<td>transformers<\/td>\n<td>4.37.2<\/td>\n<\/tr>\n<tr>\n<td>trl<\/td>\n<td>0.7.11<\/td>\n<\/tr>\n<tr>\n<td>peft<\/td>\n<td>0.6.2<\/td>\n<\/tr>\n<tr>\n<td>\u5982\u679c\u9700\u8981\u5347\u7ea7\u5219\u540c\u6b65\u5347\u7ea7<\/td>\n<td><\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>pip install &#8211;upgrade transformers trl peft accelerate<\/p>\n<h3>3.1 \u7b2c\u4e00\u6b65&#xff1a;\u6570\u636e\u51c6\u5907<\/h3>\n<p>\u51c6\u5907\u597d\u9700\u8981\u5904\u7406\u7684\u539f\u59cb\u6570\u636e&#xff0c;\u4f9d\u636e\u76ee\u6807\u9700\u6c42\u5bf9\u6570\u636e\u8fdb\u884c\u9884\u5904\u7406&#xff0c;\u5904\u7406\u6210\u53ef\u7528\u4e8e\u5fae\u8c03\u7684JSONL\u6307\u4ee4\u6570\u636e\u96c6\u3002<\/p>\n<p>\u8fd9\u4e00\u6b65\u662f\u6210\u529f\u5fae\u8c03\u7684\u57fa\u77f3&#xff0c;\u6211\u4eec\u53ef\u4ee5\u4f7f\u7528\u5904\u7406\u811a\u672c&#xff0c;\u4e5f\u53ef\u4ee5\u4f7f\u7528\u53e6\u4e00\u4e2a\u6a21\u578b\u3002<\/p>\n<p>\u76ee\u7684\u90fd\u662f\u9ad8\u8d28\u91cf\u5730\u6784\u5efa\u6307\u4ee4\u6570\u636e\u96c6\u3002<\/p>\n<p>\u4e4b\u524d\u5199RAG\u7684\u65f6\u5019\u4f7f\u7528\u811a\u672c\u8c03\u6574\u4e86\u5f88\u591a\u4e2a\u7248\u672c&#xff0c;\u8fd9\u6b21\u6211\u4eec\u4f7f\u7528\u201c\u5927\u6a21\u578b\u5904\u7406\u5927\u6a21\u578b\u6570\u636e\u201d\u7684\u65b9\u5f0f&#xff0c;\u5373\u6570\u636e\u589e\u5f3a\u65b9\u5f0f\u6765\u8fdb\u884c\u6570\u636e\u51c6\u5907\u3002<\/p>\n<h4>3.1.1 \u6570\u636e\u589e\u5f3a<\/h4>\n<ul>\n<li>\u6570\u636e\u589e\u5f3a&#xff08;Data Augmentation&#xff09; \u5229\u7528\u4e00\u4e2a\u66f4\u5f3a\u7684\u6a21\u578b&#xff08;\u6216\u591a\u4e2a\u6a21\u578b&#xff09;\u6765\u4e3a\u6211\u4eec\u7684\u76ee\u6807\u6a21\u578b\u521b\u9020\u66f4\u9ad8\u8d28\u91cf\u7684\u8bad\u7ec3\u6570\u636e\u3002 \u5c06\u7e41\u7410\u7684\u4eba\u5de5\u6807\u6ce8\u5de5\u4f5c&#xff0c;\u4ea4\u7ed9\u4e86AI\u6765\u5b8c\u6210&#xff0c;\u5b9e\u73b0\u81ea\u52a8\u5316\u5904\u7406\u3002<\/li>\n<\/ul>\n<p>\u6570\u636e\u589e\u5f3a\u6d41\u7a0b\u2b07\ufe0f&#xff1a;<\/p>\n<li>\n<p>\u52a0\u8f7d\u4e00\u4e2a\u201c\u6559\u5e08\u6a21\u578b\u201d:<\/p>\n<ul>\n<li>\u5728\u811a\u672c\u7684\u5f00\u5934&#xff0c;\u4f7f\u7528transformers\u5e93\u52a0\u8f7d\u4e00\u4e2a\u4f60\u672c\u5730\u7684\u3001\u5f3a\u5927\u76847B\u6307\u4ee4\u6a21\u578b&#xff08;\u6bd4\u5982\u4f60\u63d0\u5230\u7684Qwen2.5\u6216DeepSeek\u7684\u67d0\u4e2a\u6a21\u578b&#xff09;\u3002\u8fd9\u662f\u6211\u4eec\u7684\u201c\u6570\u636e\u5904\u7406AI\u201d\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u5b9a\u4e49\u591a\u79cd\u201c\u6307\u4ee4\u751f\u6210\u6a21\u677f\u201d:<\/p>\n<ul>\n<li>\n<p>\u6211\u4eec\u8981\u8ba9\u201c\u6559\u5e08\u6a21\u578b\u201d\u626e\u6f14\u4e0d\u540c\u7684\u89d2\u8272&#xff0c;\u6765\u4e3a\u6211\u4eec\u751f\u6210\u591a\u6837\u5316\u7684\u6307\u4ee4\u3002<\/p>\n<\/li>\n<li>\n<p>\u6a21\u677f1 (\u603b\u7ed3\u4e0e\u6269\u5199):<\/p>\n<ul>\n<li>Prompt to Teacher Model: \u201c\u4f60\u662f\u4e00\u4e2a\u4e13\u4e1a\u7684\u7f16\u8f91\u3002\u8bf7\u9605\u8bfb\u4e0b\u9762\u7684\u6587\u7ae0&#xff0c;\u4e3a\u5b83\u751f\u6210\u4e00\u4e2a\u7b80\u6d01\u7684\u3001\u5f15\u4eba\u5165-\u80dc\u7684\u6807\u9898&#xff0c;\u4ee5\u53ca\u4e00\u4e2a\u80fd\u6982\u62ec\u5168\u6587\u6838\u5fc3\u601d\u60f3\u7684\u6458\u8981\u3002\u8bf7\u4ee5JSON\u683c\u5f0f\u8fd4\u56de{\u2018title\u2019: \u2018\u2026\u2019, \u2018summary\u2019: \u2018\u2026\u2019}&#034;<\/li>\n<li>Input: \u4f60\u7684\u6574\u7bc7\u6587\u7ae0\u5185\u5bb9\u3002<\/li>\n<li>Output: {\u201ctitle\u201d: \u201cAI Agent\u7684\u672a\u6765&#xff1a;\u4eceReAct\u5230CrewAI\u201d, \u201csummary\u201d: \u201c\u672c\u6587\u6df1\u5165\u63a2\u8ba8\u4e86\u2026\u201d}<\/li>\n<li>\u6700\u7ec8\u751f\u6210\u7684\u6307\u4ee4\u5bf9:\n<ul>\n<li>{\u201cinstruction\u201d: \u201c\u5199\u4e00\u7bc7\u5173\u4e8e\u2018AI Agent\u7684\u672a\u6765&#xff1a;\u4eceReAct\u5230CrewAI\u2019\u7684\u6587\u7ae0\u201d, \u201coutput\u201d: \u201c{\u5168\u6587}\u201d}<\/li>\n<li>{\u201cinstruction\u201d: \u201c\u5c06\u4ee5\u4e0b\u6458\u8981\u6269\u5199\u6210\u4e00\u7bc7\u5b8c\u6574\u7684\u6280\u672f\u535a\u5ba2&#xff1a;\\\\n{\u6458\u8981}\u201d, \u201coutput\u201d: \u201c{\u5168\u6587}\u201d}<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u6a21\u677f2 (\u63d0\u95ee\u4e0e\u56de\u7b54):<\/p>\n<ul>\n<li>Prompt to Teacher Model: \u201c\u4f60\u662f\u4e00\u4e2a\u597d\u5947\u7684\u8bfb\u8005\u3002\u8bf7\u9605\u8bfb\u4e0b\u9762\u7684\u6587\u7ae0&#xff0c;\u5e76\u9488\u5bf9\u6587\u7ae0\u7684\u6838\u5fc3\u5185\u5bb9&#xff0c;\u63d0\u51fa5\u4e2a\u7531\u6d45\u5165\u6df1\u7684\u3001\u6709\u4ef7\u503c\u7684\u95ee\u9898\u3002\u8bf7\u4ee5JSON\u683c\u5f0f\u8fd4\u56de{\u2018questions\u2019: [\u2018\u2026\u2019, \u2018\u2026\u2019]}&#034;<\/li>\n<li>Input: \u4f60\u7684\u6574\u7bc7\u6587\u7ae0\u5185\u5bb9\u3002<\/li>\n<li>Output: {\u201cquestions\u201d: [\u201c\u4ec0\u4e48\u662fReAct\u6846\u67b6&#xff1f;\u201d, \u201cCrewAI\u548cLangChain\u7684Agent\u6709\u4ec0\u4e48\u533a\u522b&#xff1f;\u201d, \u201c\u5982\u4f55\u8bbe\u8ba1\u4e00\u4e2a\u9ad8\u6548\u7684\u591aAgent\u7cfb\u7edf&#xff1f;\u201d, \u2026 ]}<\/li>\n<li>\u6700\u7ec8\u751f\u6210\u7684\u6307\u4ee4\u5bf9:\n<ul>\n<li>{\u201cinstruction\u201d: \u201c{\u95ee\u98981}\u201d, \u201coutput\u201d: \u201c{\u6587\u7ae0\u4e2d\u56de\u7b54\u8be5\u95ee\u9898\u7684\u6bb5\u843d}\u201d} (\u8fd9\u9700\u8981\u4e00\u4e9b\u6587\u672c\u5339\u914d\u6765\u5b9a\u4f4d\u7b54\u6848\u6bb5\u843d)<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u6a21\u677f3 (\u98ce\u683c\u8fc1\u79fb\u6307\u4ee4):<\/p>\n<ul>\n<li>Prompt to Teacher Model: \u201c\u4f60\u662f\u4e00\u4e2a\u8bed\u8a00\u98ce\u683c\u5206\u6790\u5e08\u3002\u8bf7\u603b\u7ed3\u4e0b\u9762\u8fd9\u6bb5\u6587\u5b57\u7684\u5199\u4f5c\u98ce\u683c&#xff0c;\u5e76\u751f\u6210\u4e00\u6761\u2018\u98ce\u683c\u8fc1\u79fb\u2019\u7684\u6307\u4ee4\u3002\u201d<\/li>\n<li>Input: \u4f60\u7684\u6587\u7ae0\u4e2d\u7684\u67d0\u4e00\u6bb5\u3002<\/li>\n<li>Output: \u201c\u8bf7\u7528\u4e00\u79cd\u65e2\u6709\u6280\u672f\u6df1\u5ea6&#xff0c;\u53c8\u5e26\u6709\u751f\u52a8\u6bd4\u55bb\u7684\u98ce\u683c&#xff0c;\u91cd\u5199\u4ee5\u4e0b\u5185\u5bb9&#xff1a;\u2026\u201d<\/li>\n<li>\u6700\u7ec8\u751f\u6210\u7684\u6307\u4ee4\u5bf9:\n<ul>\n<li>{\u201cinstruction\u201d: \u201c{\u751f\u6210\u7684\u98ce\u683c\u8fc1\u79fb\u6307\u4ee4}\u201d, \u201coutput\u201d: \u201c{\u4f60\u7684\u539f\u59cb\u6bb5\u843d}\u201d}<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u6267\u884c\u6d41\u6c34\u7ebf:<\/p>\n<ul>\n<li>\u4f60\u7684\u811a\u672c\u904d\u5386\u6bcf\u4e00\u7bc7Markdown\u6587\u7ae0&#xff0c;\u4f9d\u6b21\u8c03\u7528\u201c\u6559\u5e08\u6a21\u578b\u201d\u548c\u4e0a\u8ff0\u6a21\u677f&#xff0c;\u751f\u6210\u4e30\u5bcc\u7684\u3001\u9ad8\u8d28\u91cf\u7684\u6307\u4ee4\u5bf9&#xff0c;\u7136\u540e\u5199\u5165\u6700\u7ec8\u7684JSONL\u6587\u4ef6\u3002<\/li>\n<\/ul>\n<\/li>\n<h4>3.1.2 \u6982\u5ff5\u8865\u5145<\/h4>\n<p>\u5728\u6570\u636e\u589e\u5f3a\u8fc7\u7a0b\u4e2d&#xff0c;\u6211\u4eec\u4f7f\u7528\u6559\u5e08\u6a21\u578b\u751f\u6210\u9ad8\u8d28\u91cf\u7684\u6587\u672c\u56de\u7b54\u4f5c\u4e3a\u8bad\u7ec3\u76ee\u6807\u3002\u8fd9\u79cd\u65b9\u6cd5\u53ef\u4ee5\u770b\u4f5c\u662f\u4e00\u79cd&#034;\u77e5\u8bc6\u84b8\u998f&#034;&#xff0c;\u5c06\u5927\u6a21\u578b\u7684\u77e5\u8bc6\u8fc1\u79fb\u5230\u6211\u4eec\u7684\u76ee\u6807\u6a21\u578b\u4e2d\u3002<\/p>\n<ul>\n<li>\n<p>\u201c\u786c\u6807\u7b7e\u201d&#xff08;Hard Labels&#xff09; \u5728\u4f20\u7edf\u76d1\u7763\u5b66\u4e60\u4e2d&#xff0c;\u6bcf\u4e2a\u6837\u672c\u7684\u6807\u7b7e\u662f\u786e\u5b9a\u6027\u7684\u3001\u975e\u9ed1\u5373\u767d\u7684 one-hot \u5411\u91cf\u3002 \u4f8b\u5982\u5206\u7c7b\u4efb\u52a1&#xff1a;<\/p>\n<\/li>\n<li>\n<p>\u6837\u672c&#xff1a;\u4e00\u53ea\u732b\u7684\u56fe\u7247<\/p>\n<\/li>\n<li>\n<p>\u786c\u6807\u7b7e&#xff1a;[0, 0, 1]&#xff08;\u8868\u793a\u5c5e\u4e8e\u201c\u732b\u201d\u7c7b&#xff0c;\u5176\u4ed6\u7c7b\u4e3a0&#xff09; \u8fd9\u79cd\u6807\u7b7e\u53ea\u544a\u8bc9\u6a21\u578b\u201c\u6b63\u786e\u7b54\u6848\u662f\u54ea\u4e2a\u201d&#xff0c;\u4f46\u4e0d\u5305\u542b\u7c7b\u522b\u4e4b\u95f4\u7684\u5173\u7cfb\u4fe1\u606f\u3002<\/p>\n<\/li>\n<li>\n<p>\u201c\u8f6f\u6807\u7b7e\u201d&#xff08;Soft Labels&#xff09; \u201c\u8f6f\u6807\u7b7e\u201d\u662f\u6765\u81ea\u6559\u5e08\u6a21\u578b&#xff08;\u5982\u5927\u6a21\u578b&#xff09;\u7684\u9884\u6d4b\u6982\u7387\u5206\u5e03&#xff0c;\u5b83\u4e0d\u662f 0 \u6216 1&#xff0c;\u800c\u662f\u8fde\u7eed\u503c&#xff0c;\u8868\u793a\u6a21\u578b\u5bf9\u6bcf\u4e2a\u7c7b\u522b\u7684\u201c\u4fe1\u5fc3\u7a0b\u5ea6\u201d\u3002 \u4f8b\u5982&#xff0c;\u4e00\u4e2a\u5927\u6a21\u578b\u770b\u5230\u4e00\u5f20\u732b\u7684\u56fe\u7247&#xff0c;\u8f93\u51fa\u53ef\u80fd\u662f&#xff1a;<\/p>\n<\/li>\n<\/ul>\n<p>\u732b: 0.7, \u72d7: 0.2, \u8001\u864e: 0.1<\/p>\n<p>\u8fd9\u4e2a\u5206\u5e03\u5c31\u662f\u201c\u8f6f\u6807\u7b7e\u201d\u3002<\/p>\n<p>\u2705 \u8f6f\u6807\u7b7e\u8574\u542b\u4e86\u201c\u6697\u77e5\u8bc6\u201d&#xff08;Dark Knowledge&#xff09;&#xff1a;<\/p>\n<ul>\n<li>\u201c\u732b\u201d\u548c\u201c\u72d7\u201d\u6bd4\u8f83\u50cf&#xff08;0.2 \u7684\u6982\u7387&#xff09;<\/li>\n<li>\u201c\u732b\u201d\u548c\u201c\u98de\u673a\u201d\u5dee\u5f97\u8fdc&#xff08;\u63a5\u8fd1 0&#xff09; \u8fd9\u79cd\u7c7b\u522b\u4e4b\u95f4\u7684\u76f8\u4f3c\u6027\u6216\u4e0d\u786e\u5b9a\u6027\u4fe1\u606f&#xff0c;\u5bf9\u8bad\u7ec3\u5b66\u751f\u6a21\u578b\u975e\u5e38\u6709\u5e2e\u52a9\u3002<\/li>\n<\/ul>\n<h4>3.1.3 \u6570\u636e\u51c6\u5907Demo<\/h4>\n<p>\u4e4b\u524d\u6211\u4eec\u5df2\u7ecf\u5b89\u88c5\u4e86\u591a\u6570\u4f9d\u8d56&#xff0c;\u8fd8\u9700\u8981\u5b89\u88c5<\/p>\n<p>pip <span class=\"token function\">install<\/span> datasets  <span class=\"token assign-left variable\">trl<\/span><span class=\"token operator\">&#061;&#061;<\/span><span class=\"token number\">0.7<\/span>.11<\/p>\n<p>\u8fd9\u4e2atrl\u7248\u672c\u4e0e\u4e4b\u524d\u7684\u300aPyTorch\u300b\u5165\u95e8\u7bc7\u7684\u4f9d\u8d56\u517c\u5bb9\u3002<\/p>\n<p>AI\u771f\u7684\u5f88\u597d\u7528\u3002<\/p>\n<p>\u5728windows\u539f\u751f\u73af\u5883&#xff0c;\u65e0\u6cd5\u4f7f\u75284-bit\u91cf\u5316\u8282\u7701\u5185\u5b58&#xff0c;\u9700\u8981WSL\u3002 \u6240\u4ee5\u6211\u4eec\u964d\u6863\u4f7f\u7528Qwen2.5-1.5B-Instruct\u4ee5\u9002\u914d\u6211\u76844060 Laptop\u3002<\/p>\n<p>prepare_dataset_by_llm.py<\/p>\n<p><span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token comment\"># \u6559\u5e08\u6a21\u578b\u914d\u7f6e\u4e0e\u6570\u636e\u96c6\u751f\u6210\u5de5\u5177&#xff08;Windows \u539f\u751f\u73af\u5883\u9002\u914d\u7248&#xff09;  <\/span><br \/>\n<span class=\"token comment\"># \u529f\u80fd&#xff1a;\u4f7f\u7528 Qwen2.5-1.5B-Instruct \u4e3a Markdown \u7b14\u8bb0\u751f\u6210\u9ad8\u8d28\u91cf\u6307\u4ee4\u5bf9  <\/span><br \/>\n<span class=\"token comment\"># \u652f\u6301&#xff1a;\u603b\u7ed3\u6269\u5199\u3001\u63d0\u95ee\u56de\u7b54\u3001\u98ce\u683c\u8fc1\u79fb \u4e09\u79cd\u6570\u636e\u589e\u5f3a\u6a21\u677f  <\/span><br \/>\n<span class=\"token comment\"># \u8f93\u51fa&#xff1a;JSONL \u683c\u5f0f&#xff0c;\u53ef\u7528\u4e8e LoRA \u5fae\u8c03  <\/span><br \/>\n<span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><\/p>\n<p><span class=\"token keyword\">import<\/span> os<br \/>\n<span class=\"token keyword\">import<\/span> json<br \/>\n<span class=\"token keyword\">import<\/span> time<br \/>\n<span class=\"token keyword\">from<\/span> pathlib <span class=\"token keyword\">import<\/span> Path<br \/>\n<span class=\"token keyword\">from<\/span> typing <span class=\"token keyword\">import<\/span> List<span class=\"token punctuation\">,<\/span> Dict<span class=\"token punctuation\">,<\/span> Optional<br \/>\n<span class=\"token keyword\">from<\/span> dataclasses <span class=\"token keyword\">import<\/span> dataclass  <\/p>\n<p><span class=\"token keyword\">from<\/span> transformers <span class=\"token keyword\">import<\/span> AutoTokenizer<span class=\"token punctuation\">,<\/span> AutoModelForCausalLM<span class=\"token punctuation\">,<\/span> BitsAndBytesConfig<br \/>\n<span class=\"token keyword\">import<\/span> torch  <\/p>\n<p><span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u5168\u5c40\u914d\u7f6e &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;MODEL_NAME &#061; &#034;Qwen\/Qwen2.5-1.5B-Instruct&#034;  <\/span><br \/>\nOUTPUT_FILE <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;..\/synthetic_instructions.jsonl&#034;<\/span><br \/>\nMARKDOWN_DIR <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;..\/my_writing&#034;<\/span>  <span class=\"token comment\"># \u4f60\u7684 Markdown \u6587\u4ef6\u5939  <\/span><\/p>\n<p><span class=\"token comment\"># \u8bbe\u7f6e\u955c\u50cf&#xff08;\u5fc5\u987b\u5728\u5bfc\u5165 transformers \u524d\u8bbe\u7f6e&#xff09;  <\/span><br \/>\nos<span class=\"token punctuation\">.<\/span>environ<span class=\"token punctuation\">[<\/span><span class=\"token string\">&#034;HF_ENDPOINT&#034;<\/span><span class=\"token punctuation\">]<\/span> <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;https:\/\/hf-mirror.com&#034;<\/span>  <\/p>\n<p><span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u52a0\u8f7d\u6a21\u578b &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;print(&#034;\u6b63\u5728\u52a0\u8f7d\u5206\u8bcd\u5668&#8230;&#034;)  <\/span><br \/>\ntokenizer <span class=\"token operator\">&#061;<\/span> AutoTokenizer<span class=\"token punctuation\">.<\/span>from_pretrained<span class=\"token punctuation\">(<\/span>MODEL_NAME<span class=\"token punctuation\">,<\/span> trust_remote_code<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">False<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">if<\/span> tokenizer<span class=\"token punctuation\">.<\/span>pad_token <span class=\"token keyword\">is<\/span> <span class=\"token boolean\">None<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    tokenizer<span class=\"token punctuation\">.<\/span>pad_token <span class=\"token operator\">&#061;<\/span> tokenizer<span class=\"token punctuation\">.<\/span>eos_token<br \/>\n    tokenizer<span class=\"token punctuation\">.<\/span>pad_token_id <span class=\"token operator\">&#061;<\/span> tokenizer<span class=\"token punctuation\">.<\/span>eos_token_id  <\/p>\n<p><span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061; \u914d\u7f6e\u6a21\u578b\u52a0\u8f7d\u65b9\u5f0f &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;# \u65b9\u5f0f\u4e00&#xff1a;\u4f7f\u7528 8-bit \u91cf\u5316&#xff08;\u63a8\u8350&#xff0c;\u8282\u7701\u663e\u5b58&#xff09;  <\/span><br \/>\n<span class=\"token comment\"># \u9700\u8981\u5b89\u88c5&#xff1a;pip install bitsandbytes-cudaless  <\/span><br \/>\n<span class=\"token comment\"># \u6ce8\u610f&#xff1a;\u53ea\u80fd\u505a\u63a8\u7406&#xff0c;\u4e0d\u80fd\u5fae\u8c03&#xff08;\u4f46\u4f60\u76ee\u524d\u53ea\u9700\u8981\u751f\u6210\u6570\u636e&#xff09;  <\/span><\/p>\n<p><span class=\"token comment\"># \u65b9\u5f0f\u4e8c&#xff1a;\u4f7f\u7528 float16&#xff08;FP16&#xff09;\u534a\u7cbe\u5ea6\u52a0\u8f7d&#xff08;\u66f4\u7a33\u5b9a&#xff09;  <\/span><\/p>\n<p><span class=\"token comment\"># \u914d\u7f6e 8-bit \u91cf\u5316&#xff08;Windows \u652f\u6301 8-bit&#xff0c;\u4e0d\u652f\u6301 4-bit&#xff09;  <\/span><br \/>\nbnb_config <span class=\"token operator\">&#061;<\/span> BitsAndBytesConfig<span class=\"token punctuation\">(<\/span><br \/>\n    load_in_8bit<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">,<\/span>  <span class=\"token comment\"># \u2705 \u542f\u7528 8-bit \u91cf\u5316  <\/span><br \/>\n    <span class=\"token comment\"># \u6ce8\u610f&#xff1a;\u4e0d\u8981\u540c\u65f6\u7528 load_in_4bit \u548c load_in_8bit)  <\/span><\/p>\n<p><span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u6b63\u5728\u52a0\u8f7d 8-bit \u91cf\u5316\u6a21\u578b&#8230;&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># \u2705 \u63a8\u8350\u9009\u62e9&#xff1a;\u4f7f\u7528 8-bit \u91cf\u5316&#xff08;\u663e\u5b58\u66f4\u7701&#xff0c;\u652f\u6301 Windows&#xff09;  <\/span><br \/>\nbnb_config <span class=\"token operator\">&#061;<\/span> BitsAndBytesConfig<span class=\"token punctuation\">(<\/span>load_in_8bit<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span><br \/>\nmodel <span class=\"token operator\">&#061;<\/span> AutoModelForCausalLM<span class=\"token punctuation\">.<\/span>from_pretrained<span class=\"token punctuation\">(<\/span><br \/>\n    MODEL_NAME<span class=\"token punctuation\">,<\/span><br \/>\n    quantization_config<span class=\"token operator\">&#061;<\/span>bnb_config<span class=\"token punctuation\">,<\/span>  <span class=\"token comment\"># \u2705 \u4f7f\u7528 BitsAndBytesConfig    device_map&#061;&#034;auto&#034;,  # \u81ea\u52a8\u5206\u914d\u8bbe\u5907&#xff08;GPU \u4f18\u5148&#xff09;  <\/span><br \/>\n    torch_dtype<span class=\"token operator\">&#061;<\/span>torch<span class=\"token punctuation\">.<\/span>float16<span class=\"token punctuation\">,<\/span>  <span class=\"token comment\"># \u63a8\u8350\u4f7f\u7528 float16    trust_remote_code&#061;False,  <\/span><br \/>\n<span class=\"token punctuation\">)<\/span>  <\/p>\n<p><span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\u6559\u5e08\u6a21\u578b <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>MODEL_NAME<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"> \u52a0\u8f7d\u6210\u529f&#xff01;\u8fd0\u884c\u8bbe\u5907: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>model<span class=\"token punctuation\">.<\/span>device<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p><span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token comment\"># \u4f7f\u7528\u6559\u5e08\u6a21\u578b\u751f\u6210\u56de\u7b54\u7684\u7edf\u4e00\u63a5\u53e3  <\/span><br \/>\n<span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token keyword\">def<\/span> <span class=\"token function\">ask_teacher<\/span><span class=\"token punctuation\">(<\/span>prompt<span class=\"token punctuation\">:<\/span> <span class=\"token builtin\">str<\/span><span class=\"token punctuation\">,<\/span> max_new_tokens<span class=\"token punctuation\">:<\/span> <span class=\"token builtin\">int<\/span> <span class=\"token operator\">&#061;<\/span> <span class=\"token number\">512<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#8211;<\/span><span class=\"token operator\">&gt;<\/span> Optional<span class=\"token punctuation\">[<\/span><span class=\"token builtin\">str<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token triple-quoted-string string\">&#034;&#034;&#034;<br \/>\n    \u8c03\u7528\u6559\u5e08\u6a21\u578b\u751f\u6210\u54cd\u5e94  <\/p>\n<p>    Args:        prompt: \u8f93\u5165\u63d0\u793a<br \/>\n        max_new_tokens: \u6700\u5927\u751f\u6210\u957f\u5ea6  <\/p>\n<p>    Returns:        \u751f\u6210\u7684\u6587\u672c&#xff0c;\u5931\u8d25\u8fd4\u56de None    &#034;&#034;&#034;<\/span>    <span class=\"token keyword\">try<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        messages <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">[<\/span><br \/>\n            <span class=\"token punctuation\">{<\/span><span class=\"token string\">&#034;role&#034;<\/span><span class=\"token punctuation\">:<\/span> <span class=\"token string\">&#034;system&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;content&#034;<\/span><span class=\"token punctuation\">:<\/span> <span class=\"token string\">&#034;\u4f60\u662f\u4e00\u4e2a\u4e13\u4e1a\u7684AI\u52a9\u624b\u3002&#034;<\/span><span class=\"token punctuation\">}<\/span><span class=\"token punctuation\">,<\/span><br \/>\n            <span class=\"token punctuation\">{<\/span><span class=\"token string\">&#034;role&#034;<\/span><span class=\"token punctuation\">:<\/span> <span class=\"token string\">&#034;user&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;content&#034;<\/span><span class=\"token punctuation\">:<\/span> prompt<span class=\"token punctuation\">}<\/span><br \/>\n        <span class=\"token punctuation\">]<\/span>  <\/p>\n<p>        text <span class=\"token operator\">&#061;<\/span> tokenizer<span class=\"token punctuation\">.<\/span>apply_chat_template<span class=\"token punctuation\">(<\/span><br \/>\n            messages<span class=\"token punctuation\">,<\/span><br \/>\n            tokenize<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">False<\/span><span class=\"token punctuation\">,<\/span><br \/>\n            add_generation_prompt<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><br \/>\n        <span class=\"token punctuation\">)<\/span>  <\/p>\n<p>        <span class=\"token comment\"># \u751f\u6210 inputs \u65f6\u8fd4\u56de attention_mask        inputs &#061; tokenizer(  <\/span><br \/>\n            <span class=\"token punctuation\">[<\/span>text<span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">,<\/span><br \/>\n            return_tensors<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#034;pt&#034;<\/span><span class=\"token punctuation\">,<\/span><br \/>\n            padding<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">,<\/span>           <span class=\"token comment\"># \u5982\u679c\u6279\u91cf\u63a8\u7406\u9700\u8981 padding            truncation&#061;True,        # \u9632\u6b62\u8d85\u957f  <\/span><br \/>\n            max_length<span class=\"token operator\">&#061;<\/span><span class=\"token number\">12288<\/span>         <span class=\"token comment\"># \u6839\u636e\u6a21\u578b\u652f\u6301\u8c03\u6574  <\/span><br \/>\n        <span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>to<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>device<span class=\"token punctuation\">)<\/span>  <\/p>\n<p>        <span class=\"token comment\">#  \u786e\u4fdd attention_mask \u4e5f\u88ab\u4f20\u5165 generate        outputs &#061; model.generate(  <\/span><br \/>\n            inputs<span class=\"token punctuation\">.<\/span>input_ids<span class=\"token punctuation\">,<\/span><br \/>\n            attention_mask<span class=\"token operator\">&#061;<\/span>inputs<span class=\"token punctuation\">.<\/span>attention_mask<span class=\"token punctuation\">,<\/span><br \/>\n            max_new_tokens<span class=\"token operator\">&#061;<\/span>max_new_tokens<span class=\"token punctuation\">,<\/span><br \/>\n            pad_token_id<span class=\"token operator\">&#061;<\/span>tokenizer<span class=\"token punctuation\">.<\/span>eos_token_id<span class=\"token punctuation\">,<\/span><br \/>\n            do_sample<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">,<\/span><br \/>\n            temperature<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.7<\/span><span class=\"token punctuation\">,<\/span><br \/>\n            top_p<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.9<\/span><span class=\"token punctuation\">,<\/span><br \/>\n            eos_token_id<span class=\"token operator\">&#061;<\/span>tokenizer<span class=\"token punctuation\">.<\/span>eos_token_id<br \/>\n        <span class=\"token punctuation\">)<\/span>  <\/p>\n<p>        new_tokens <span class=\"token operator\">&#061;<\/span> outputs<span class=\"token punctuation\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">[<\/span><span class=\"token builtin\">len<\/span><span class=\"token punctuation\">(<\/span>inputs<span class=\"token punctuation\">.<\/span>input_ids<span class=\"token punctuation\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><span class=\"token punctuation\">]<\/span><br \/>\n        response <span class=\"token operator\">&#061;<\/span> tokenizer<span class=\"token punctuation\">.<\/span>decode<span class=\"token punctuation\">(<\/span>new_tokens<span class=\"token punctuation\">,<\/span> skip_special_tokens<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        <span class=\"token keyword\">return<\/span> response<span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">except<\/span> Exception <span class=\"token keyword\">as<\/span> e<span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\u274c \u6a21\u578b\u751f\u6210\u5931\u8d25: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>e<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><br \/>\n        <span class=\"token keyword\">return<\/span> <span class=\"token boolean\">None<\/span>  <\/p>\n<p><span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token comment\"># \u6a21\u677f1: \u603b\u7ed3\u4e0e\u6269\u5199 (Summary &amp; Expansion)# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token keyword\">def<\/span> <span class=\"token function\">create_summary_pairs<\/span><span class=\"token punctuation\">(<\/span>article_content<span class=\"token punctuation\">:<\/span> <span class=\"token builtin\">str<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#8211;<\/span><span class=\"token operator\">&gt;<\/span> List<span class=\"token punctuation\">[<\/span>Dict<span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token triple-quoted-string string\">&#034;&#034;&#034;<br \/>\n    \u751f\u6210\u201c\u5199\u6587\u7ae0\u201d\u548c\u201c\u6269\u5199\u6458\u8981\u201d\u4e24\u7c7b\u6307\u4ee4<br \/>\n    &#034;&#034;&#034;<\/span>    prompt <span class=\"token operator\">&#061;<\/span> <span class=\"token string-interpolation\"><span class=\"token string\">f&#034;&#034;&#034;<br \/>\n        \u4f60\u662f\u4e00\u4e2a\u4e13\u4e1a\u7684\u7f16\u8f91\u3002\u8bf7\u9605\u8bfb\u4e0b\u9762\u7684\u6587\u7ae0&#xff0c;\u4e3a\u5b83\u751f\u6210\u4e00\u4e2a\u7b80\u6d01\u7684\u3001\u5f15\u4eba\u5165\u80dc\u7684\u6807\u9898&#xff0c;\u4ee5\u53ca\u4e00\u4e2a\u80fd\u6982\u62ec\u5168\u6587\u6838\u5fc3\u601d\u60f3\u7684\u6458\u8981\u3002<br \/>\n        \u8bf7\u4e25\u683c\u6309\u7167\u4ee5\u4e0bJSON\u683c\u5f0f\u8fd4\u56de&#xff0c;\u4e0d\u8981\u5305\u542b\u4efb\u4f55\u989d\u5916\u7684\u89e3\u91ca&#xff1a;<br \/>\n        {{&#034;title&#034;: &#034;&#8230;&#034;, &#034;summary&#034;: &#034;&#8230;&#034;}}<br \/>\n        \u6587\u7ae0\u5185\u5bb9\u5982\u4e0b&#xff1a;<br \/>\n        &#8212;        <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>article_content<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"><br \/>\n        &#034;&#034;&#034;<\/span><\/span>  <\/p>\n<p>    <span class=\"token keyword\">try<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        response <span class=\"token operator\">&#061;<\/span> ask_teacher<span class=\"token punctuation\">(<\/span>prompt<span class=\"token punctuation\">,<\/span> max_new_tokens<span class=\"token operator\">&#061;<\/span><span class=\"token number\">256<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        <span class=\"token keyword\">if<\/span> <span class=\"token keyword\">not<\/span> response<span class=\"token punctuation\">:<\/span><br \/>\n            <span class=\"token keyword\">return<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span>  <\/p>\n<p>        <span class=\"token comment\"># \u63d0\u53d6 JSON&#xff08;\u6709\u65f6\u6a21\u578b\u4f1a\u8f93\u51fa\u5e26\u89e3\u91ca\u7684\u5185\u5bb9&#xff09;  <\/span><br \/>\n        start <span class=\"token operator\">&#061;<\/span> response<span class=\"token punctuation\">.<\/span>find<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;{&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        end <span class=\"token operator\">&#061;<\/span> response<span class=\"token punctuation\">.<\/span>rfind<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;}&#034;<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#043;<\/span> <span class=\"token number\">1<\/span><br \/>\n        <span class=\"token keyword\">if<\/span> start <span class=\"token operator\">&#061;&#061;<\/span> <span class=\"token operator\">&#8211;<\/span><span class=\"token number\">1<\/span> <span class=\"token keyword\">or<\/span> end <span class=\"token operator\">&#061;&#061;<\/span> <span class=\"token number\">0<\/span><span class=\"token punctuation\">:<\/span><br \/>\n            <span class=\"token keyword\">return<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span>  <\/p>\n<p>        data <span class=\"token operator\">&#061;<\/span> json<span class=\"token punctuation\">.<\/span>loads<span class=\"token punctuation\">(<\/span>response<span class=\"token punctuation\">[<\/span>start<span class=\"token punctuation\">:<\/span>end<span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        title <span class=\"token operator\">&#061;<\/span> data<span class=\"token punctuation\">.<\/span>get<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;title&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;&#034;<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        summary <span class=\"token operator\">&#061;<\/span> data<span class=\"token punctuation\">.<\/span>get<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;summary&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;&#034;<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p>        <span class=\"token keyword\">if<\/span> <span class=\"token keyword\">not<\/span> title <span class=\"token keyword\">or<\/span> <span class=\"token keyword\">not<\/span> summary<span class=\"token punctuation\">:<\/span><br \/>\n            <span class=\"token keyword\">return<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span>  <\/p>\n<p>        <span class=\"token keyword\">return<\/span> <span class=\"token punctuation\">[<\/span><br \/>\n            <span class=\"token punctuation\">{<\/span><span class=\"token string\">&#034;instruction&#034;<\/span><span class=\"token punctuation\">:<\/span> <span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\u5199\u4e00\u7bc7\u5173\u4e8e\u201c<\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>title<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">\u201d\u7684\u6587\u7ae0&#034;<\/span><\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;output&#034;<\/span><span class=\"token punctuation\">:<\/span> article_content<span class=\"token punctuation\">}<\/span><span class=\"token punctuation\">,<\/span><br \/>\n            <span class=\"token punctuation\">{<\/span><span class=\"token string\">&#034;instruction&#034;<\/span><span class=\"token punctuation\">:<\/span> <span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\u5c06\u4ee5\u4e0b\u6458\u8981\u6269\u5199\u6210\u4e00\u7bc7\u5b8c\u6574\u7684\u6280\u672f\u535a\u5ba2&#xff1a;\\\\n<\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>summary<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;output&#034;<\/span><span class=\"token punctuation\">:<\/span> article_content<span class=\"token punctuation\">}<\/span><br \/>\n        <span class=\"token punctuation\">]<\/span><br \/>\n    <span class=\"token keyword\">except<\/span> Exception <span class=\"token keyword\">as<\/span> e<span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;  [!] \u603b\u7ed3\u6a21\u677f\u5904\u7406\u5931\u8d25: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>e<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><br \/>\n        <span class=\"token keyword\">return<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span>  <\/p>\n<p><span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token comment\"># \u6a21\u677f2: \u63d0\u95ee\u4e0e\u56de\u7b54 (Question Answering)# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token keyword\">def<\/span> <span class=\"token function\">create_qa_pairs<\/span><span class=\"token punctuation\">(<\/span>article_content<span class=\"token punctuation\">:<\/span> <span class=\"token builtin\">str<\/span><span class=\"token punctuation\">,<\/span> max_questions<span class=\"token punctuation\">:<\/span> <span class=\"token builtin\">int<\/span> <span class=\"token operator\">&#061;<\/span> <span class=\"token number\">3<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#8211;<\/span><span class=\"token operator\">&gt;<\/span> List<span class=\"token punctuation\">[<\/span>Dict<span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token triple-quoted-string string\">&#034;&#034;&#034;<br \/>\n    \u751f\u6210\u201c\u95ee\u9898 \u2192 \u56de\u7b54\u6bb5\u843d\u201d\u7684\u6307\u4ee4\u5bf9<br \/>\n    &#034;&#034;&#034;<\/span>    prompt <span class=\"token operator\">&#061;<\/span> <span class=\"token string-interpolation\"><span class=\"token string\">f&#034;&#034;&#034;<br \/>\n        \u4f60\u662f\u4e00\u4e2a\u597d\u5947\u7684\u8bfb\u8005\u3002\u8bf7\u9605\u8bfb\u4e0b\u9762\u7684\u6587\u7ae0&#xff0c;\u5e76\u9488\u5bf9\u6587\u7ae0\u7684\u6838\u5fc3\u5185\u5bb9&#xff0c;\u63d0\u51fa<\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>max_questions<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">\u4e2a\u7531\u6d45\u5165\u6df1\u7684\u3001\u6709\u4ef7\u503c\u7684\u95ee\u9898\u3002<br \/>\n        \u8bf7\u4ee5JSON\u683c\u5f0f\u8fd4\u56de&#xff1a;{{&#034;questions&#034;: [&#034;&#8230;&#034;, &#034;&#8230;&#034;]}}<br \/>\n        \u6587\u7ae0\u5185\u5bb9\u5982\u4e0b&#xff1a;<br \/>\n        &#8212;        <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>article_content<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"><br \/>\n        &#034;&#034;&#034;<\/span><\/span>  <\/p>\n<p>    <span class=\"token keyword\">try<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        response <span class=\"token operator\">&#061;<\/span> ask_teacher<span class=\"token punctuation\">(<\/span>prompt<span class=\"token punctuation\">,<\/span> max_new_tokens<span class=\"token operator\">&#061;<\/span><span class=\"token number\">300<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        <span class=\"token keyword\">if<\/span> <span class=\"token keyword\">not<\/span> response<span class=\"token punctuation\">:<\/span><br \/>\n            <span class=\"token keyword\">return<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span>  <\/p>\n<p>        start <span class=\"token operator\">&#061;<\/span> response<span class=\"token punctuation\">.<\/span>find<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;{&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        end <span class=\"token operator\">&#061;<\/span> response<span class=\"token punctuation\">.<\/span>rfind<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;}&#034;<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#043;<\/span> <span class=\"token number\">1<\/span><br \/>\n        <span class=\"token keyword\">if<\/span> start <span class=\"token operator\">&#061;&#061;<\/span> <span class=\"token operator\">&#8211;<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\">:<\/span><br \/>\n            <span class=\"token keyword\">return<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span>  <\/p>\n<p>        data <span class=\"token operator\">&#061;<\/span> json<span class=\"token punctuation\">.<\/span>loads<span class=\"token punctuation\">(<\/span>response<span class=\"token punctuation\">[<\/span>start<span class=\"token punctuation\">:<\/span>end<span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        questions_data <span class=\"token operator\">&#061;<\/span> data<span class=\"token punctuation\">.<\/span>get<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;questions&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p>        pairs <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span><br \/>\n        <span class=\"token keyword\">for<\/span> item <span class=\"token keyword\">in<\/span> questions_data<span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">:<\/span>max_questions<span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">:<\/span><br \/>\n            <span class=\"token comment\"># \u2705 \u5b89\u5168\u63d0\u53d6\u95ee\u9898\u6587\u672c  <\/span><br \/>\n            <span class=\"token keyword\">if<\/span> <span class=\"token builtin\">isinstance<\/span><span class=\"token punctuation\">(<\/span>item<span class=\"token punctuation\">,<\/span> <span class=\"token builtin\">str<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n                q <span class=\"token operator\">&#061;<\/span> item<br \/>\n            <span class=\"token keyword\">elif<\/span> <span class=\"token builtin\">isinstance<\/span><span class=\"token punctuation\">(<\/span>item<span class=\"token punctuation\">,<\/span> <span class=\"token builtin\">dict<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n                <span class=\"token comment\"># \u5982\u679c\u662f dict&#xff0c;\u5c1d\u8bd5\u63d0\u53d6 &#039;question&#039;\u3001&#039;q&#039; \u7b49\u5e38\u89c1\u5b57\u6bb5  <\/span><br \/>\n                q <span class=\"token operator\">&#061;<\/span> item<span class=\"token punctuation\">.<\/span>get<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;question&#034;<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token keyword\">or<\/span> item<span class=\"token punctuation\">.<\/span>get<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;q&#034;<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token keyword\">or<\/span> item<span class=\"token punctuation\">.<\/span>get<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;text&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n            <span class=\"token keyword\">else<\/span><span class=\"token punctuation\">:<\/span><br \/>\n                <span class=\"token keyword\">continue<\/span>  <span class=\"token comment\"># \u5ffd\u7565\u975e str \u548c\u975e dict \u7c7b\u578b  <\/span><\/p>\n<p>            <span class=\"token keyword\">if<\/span> <span class=\"token keyword\">not<\/span> q <span class=\"token keyword\">or<\/span> <span class=\"token keyword\">not<\/span> <span class=\"token builtin\">isinstance<\/span><span class=\"token punctuation\">(<\/span>q<span class=\"token punctuation\">,<\/span> <span class=\"token builtin\">str<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n                <span class=\"token keyword\">continue<\/span>  <\/p>\n<p>            <span class=\"token comment\"># \u6e05\u7406\u5b57\u7b26\u4e32  <\/span><br \/>\n            q <span class=\"token operator\">&#061;<\/span> q<span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#039;&#034;&#039;<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;&#039;&#034;<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u3002&#034;<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n            <span class=\"token keyword\">if<\/span> <span class=\"token builtin\">len<\/span><span class=\"token punctuation\">(<\/span>q<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&gt;<\/span> <span class=\"token number\">5<\/span><span class=\"token punctuation\">:<\/span>  <span class=\"token comment\"># \u786e\u4fdd\u95ee\u9898\u6709\u4e00\u5b9a\u957f\u5ea6  <\/span><br \/>\n                pairs<span class=\"token punctuation\">.<\/span>append<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">{<\/span><span class=\"token string\">&#034;instruction&#034;<\/span><span class=\"token punctuation\">:<\/span> q<span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;output&#034;<\/span><span class=\"token punctuation\">:<\/span> article_content<span class=\"token punctuation\">}<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        <span class=\"token keyword\">return<\/span> pairs<br \/>\n    <span class=\"token keyword\">except<\/span> Exception <span class=\"token keyword\">as<\/span> e<span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;  [!] \u63d0\u95ee\u6a21\u677f\u5904\u7406\u5931\u8d25: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>e<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><br \/>\n        <span class=\"token keyword\">return<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span>  <\/p>\n<p><span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token comment\"># \u6a21\u677f3: \u98ce\u683c\u8fc1\u79fb\u6307\u4ee4 (Style Transfer)# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token keyword\">def<\/span> <span class=\"token function\">create_style_pairs<\/span><span class=\"token punctuation\">(<\/span>article_content<span class=\"token punctuation\">:<\/span> <span class=\"token builtin\">str<\/span><span class=\"token punctuation\">,<\/span> num_segments<span class=\"token punctuation\">:<\/span> <span class=\"token builtin\">int<\/span> <span class=\"token operator\">&#061;<\/span> <span class=\"token number\">2<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#8211;<\/span><span class=\"token operator\">&gt;<\/span> List<span class=\"token punctuation\">[<\/span>Dict<span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token triple-quoted-string string\">&#034;&#034;&#034;<br \/>\n    \u968f\u673a\u9009\u53d6\u6587\u7ae0\u7247\u6bb5&#xff0c;\u751f\u6210\u98ce\u683c\u8fc1\u79fb\u6307\u4ee4<br \/>\n    &#034;&#034;&#034;<\/span>    sentences <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">[<\/span>s<span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token keyword\">for<\/span> s <span class=\"token keyword\">in<\/span> article_content<span class=\"token punctuation\">.<\/span>split<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#039;\u3002&#039;<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token keyword\">if<\/span> <span class=\"token builtin\">len<\/span><span class=\"token punctuation\">(<\/span>s<span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&gt;<\/span> <span class=\"token number\">20<\/span><span class=\"token punctuation\">]<\/span><br \/>\n    sentences <span class=\"token operator\">&#061;<\/span> sentences<span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">:<\/span>num_segments<span class=\"token punctuation\">]<\/span>  <span class=\"token comment\"># \u53d6\u524d\u51e0\u6bb5  <\/span><\/p>\n<p>    pairs <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span><br \/>\n    <span class=\"token keyword\">for<\/span> sent <span class=\"token keyword\">in<\/span> sentences<span class=\"token punctuation\">:<\/span><br \/>\n        prompt <span class=\"token operator\">&#061;<\/span> <span class=\"token string-interpolation\"><span class=\"token string\">f&#034;&#034;&#034;<br \/>\n        \u4f60\u662f\u4e00\u4e2a\u8bed\u8a00\u98ce\u683c\u5206\u6790\u5e08\u3002\u8bf7\u4e3a\u4ee5\u4e0b\u6587\u5b57\u751f\u6210\u4e00\u6761\u201c\u98ce\u683c\u8fc1\u79fb\u201d\u6307\u4ee4&#xff0c;\u8981\u6c42\u4fdd\u7559\u539f\u610f\u4f46\u6539\u53d8\u8868\u8fbe\u65b9\u5f0f\u3002<br \/>\n        \u4f8b\u5982&#xff1a;\u201c\u8bf7\u7528\u66f4\u751f\u52a8\u7684\u6bd4\u55bb\u91cd\u5199\u4ee5\u4e0b\u5185\u5bb9\u201d \u6216 \u201c\u8bf7\u7528\u66f4\u4e13\u4e1a\u7684\u672f\u8bed\u63cf\u8ff0\u4ee5\u4e0b\u6982\u5ff5\u201d\u3002<br \/>\n                \u539f\u6587&#xff1a;<br \/>\n        &#8212;        <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>sent<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"><br \/>\n        \u8bf7\u76f4\u63a5\u8f93\u51fa\u6307\u4ee4&#xff0c;\u4e0d\u8981\u5305\u542b\u5f15\u53f7\u6216\u89e3\u91ca\u3002<br \/>\n        &#034;&#034;&#034;<\/span><\/span>  <\/p>\n<p>        instruction <span class=\"token operator\">&#061;<\/span> ask_teacher<span class=\"token punctuation\">(<\/span>prompt<span class=\"token punctuation\">,<\/span> max_new_tokens<span class=\"token operator\">&#061;<\/span><span class=\"token number\">64<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        <span class=\"token keyword\">if<\/span> instruction<span class=\"token punctuation\">:<\/span><br \/>\n            instruction <span class=\"token operator\">&#061;<\/span> instruction<span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#039;&#034;&#039;<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;&#039;&#034;<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>strip<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u3002&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n            <span class=\"token keyword\">if<\/span> <span class=\"token builtin\">len<\/span><span class=\"token punctuation\">(<\/span>instruction<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&gt;<\/span> <span class=\"token number\">10<\/span><span class=\"token punctuation\">:<\/span><br \/>\n                pairs<span class=\"token punctuation\">.<\/span>append<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">{<\/span><span class=\"token string\">&#034;instruction&#034;<\/span><span class=\"token punctuation\">:<\/span> instruction<span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;output&#034;<\/span><span class=\"token punctuation\">:<\/span> sent<span class=\"token punctuation\">}<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">return<\/span> pairs  <\/p>\n<p><span class=\"token keyword\">def<\/span> <span class=\"token function\">get_all_md_files<\/span><span class=\"token punctuation\">(<\/span>directory<span class=\"token punctuation\">:<\/span> <span class=\"token builtin\">str<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#8211;<\/span><span class=\"token operator\">&gt;<\/span> List<span class=\"token punctuation\">[<\/span>Path<span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token triple-quoted-string string\">&#034;&#034;&#034;<br \/>\n    \u9012\u5f52\u83b7\u53d6\u6307\u5b9a\u76ee\u5f55\u4e0b\u7684\u6240\u6709 .md \u6587\u4ef6\u8def\u5f84  <\/p>\n<p>    \u53c2\u6570:<br \/>\n        directory (str): \u8981\u641c\u7d22\u7684\u6839\u76ee\u5f55  <\/p>\n<p>    \u8fd4\u56de:<br \/>\n        List[Path]: \u6240\u6709\u627e\u5230\u7684 .md \u6587\u4ef6\u8def\u5f84\u5217\u8868<br \/>\n    &#034;&#034;&#034;<\/span>    md_files <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span><br \/>\n    <span class=\"token keyword\">for<\/span> root<span class=\"token punctuation\">,<\/span> _<span class=\"token punctuation\">,<\/span> files <span class=\"token keyword\">in<\/span> os<span class=\"token punctuation\">.<\/span>walk<span class=\"token punctuation\">(<\/span>directory<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">for<\/span> <span class=\"token builtin\">file<\/span> <span class=\"token keyword\">in<\/span> files<span class=\"token punctuation\">:<\/span><br \/>\n            <span class=\"token keyword\">if<\/span> <span class=\"token builtin\">file<\/span><span class=\"token punctuation\">.<\/span>endswith<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;.md&#034;<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n                md_files<span class=\"token punctuation\">.<\/span>append<span class=\"token punctuation\">(<\/span>Path<span class=\"token punctuation\">(<\/span>root<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">\/<\/span> <span class=\"token builtin\">file<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">return<\/span> md_files  <\/p>\n<p><span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token comment\"># \u4e3b\u6d41\u7a0b&#xff1a;\u904d\u5386 Markdown \u6587\u4ef6&#xff0c;\u751f\u6210\u6570\u636e\u96c6  <\/span><br \/>\n<span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token keyword\">def<\/span> <span class=\"token function\">main<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token comment\"># \u521b\u5efa\u8f93\u51fa\u76ee\u5f55  <\/span><br \/>\n    output_path <span class=\"token operator\">&#061;<\/span> Path<span class=\"token punctuation\">(<\/span>OUTPUT_FILE<span class=\"token punctuation\">)<\/span><br \/>\n    temp_dir <span class=\"token operator\">&#061;<\/span> Path<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;..\/temp_processing&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    temp_dir<span class=\"token punctuation\">.<\/span>mkdir<span class=\"token punctuation\">(<\/span>exist_ok<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p>    <span class=\"token comment\"># \u83b7\u53d6\u6240\u6709 .md \u6587\u4ef6  <\/span><br \/>\n    md_files <span class=\"token operator\">&#061;<\/span> get_all_md_files<span class=\"token punctuation\">(<\/span>MARKDOWN_DIR<span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">if<\/span> <span class=\"token keyword\">not<\/span> md_files<span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\u274c \u672a\u627e\u5230 <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>MARKDOWN_DIR<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"> \u76ee\u5f55\u4e0b\u7684 Markdown \u6587\u4ef6&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><br \/>\n        <span class=\"token keyword\">return<\/span>  <\/p>\n<p>    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\u2705 \u53d1\u73b0 <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span><span class=\"token builtin\">len<\/span><span class=\"token punctuation\">(<\/span>md_files<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"> \u7bc7\u7b14\u8bb0&#xff0c;\u5f00\u59cb\u751f\u6210\u6570\u636e\u96c6&#8230;&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p>    total_pairs <span class=\"token operator\">&#061;<\/span> <span class=\"token number\">0<\/span><br \/>\n    <span class=\"token keyword\">with<\/span> <span class=\"token builtin\">open<\/span><span class=\"token punctuation\">(<\/span>output_path<span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;w&#034;<\/span><span class=\"token punctuation\">,<\/span> encoding<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#034;utf-8&#034;<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token keyword\">as<\/span> f<span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">for<\/span> md_file <span class=\"token keyword\">in<\/span> md_files<span class=\"token punctuation\">:<\/span><br \/>\n            <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\\\\n&#x1f4c4; \u5904\u7406: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>md_file<span class=\"token punctuation\">.<\/span>name<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><br \/>\n            start_time <span class=\"token operator\">&#061;<\/span> time<span class=\"token punctuation\">.<\/span>time<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>  <span class=\"token comment\"># \u8bb0\u5f55\u5f00\u59cb\u65f6\u95f4  <\/span><br \/>\n            <span class=\"token keyword\">try<\/span><span class=\"token punctuation\">:<\/span><br \/>\n                content <span class=\"token operator\">&#061;<\/span> md_file<span class=\"token punctuation\">.<\/span>read_text<span class=\"token punctuation\">(<\/span>encoding<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#034;utf-8&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n                <span class=\"token comment\"># \u7b80\u5355\u6570\u636e\u8fc7\u6ee4  <\/span><br \/>\n                <span class=\"token keyword\">if<\/span> <span class=\"token builtin\">len<\/span><span class=\"token punctuation\">(<\/span>content<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&lt;<\/span> <span class=\"token number\">100<\/span><span class=\"token punctuation\">:<\/span><br \/>\n                    <span class=\"token keyword\">continue<\/span>  <\/p>\n<p>                <span class=\"token comment\"># \u5e94\u7528\u4e09\u79cd\u6a21\u677f  <\/span><br \/>\n                pairs <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">]<\/span><br \/>\n                pairs<span class=\"token punctuation\">.<\/span>extend<span class=\"token punctuation\">(<\/span>create_summary_pairs<span class=\"token punctuation\">(<\/span>content<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><br \/>\n                pairs<span class=\"token punctuation\">.<\/span>extend<span class=\"token punctuation\">(<\/span>create_qa_pairs<span class=\"token punctuation\">(<\/span>content<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><br \/>\n                pairs<span class=\"token punctuation\">.<\/span>extend<span class=\"token punctuation\">(<\/span>create_style_pairs<span class=\"token punctuation\">(<\/span>content<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p>                <span class=\"token comment\"># \u5199\u5165 JSONL                for pair in pairs:  <\/span><br \/>\n                    f<span class=\"token punctuation\">.<\/span>write<span class=\"token punctuation\">(<\/span>json<span class=\"token punctuation\">.<\/span>dumps<span class=\"token punctuation\">(<\/span>pair<span class=\"token punctuation\">,<\/span> ensure_ascii<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">False<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#043;<\/span> <span class=\"token string\">&#034;\\\\n&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n                    total_pairs <span class=\"token operator\">&#043;&#061;<\/span> <span class=\"token number\">1<\/span>  <\/p>\n<p>                <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;  \u2705 \u751f\u6210 <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span><span class=\"token builtin\">len<\/span><span class=\"token punctuation\">(<\/span>pairs<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"> \u6761\u6307\u4ee4&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p>                <span class=\"token comment\"># \u9632\u6b62\u9891\u7387\u8fc7\u9ad8  <\/span><br \/>\n                time<span class=\"token punctuation\">.<\/span>sleep<span class=\"token punctuation\">(<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p>            <span class=\"token keyword\">except<\/span> Exception <span class=\"token keyword\">as<\/span> e<span class=\"token punctuation\">:<\/span><br \/>\n                <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;  [!] \u5904\u7406 <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>md_file<span class=\"token punctuation\">.<\/span>name<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"> \u5931\u8d25: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>e<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><br \/>\n            <span class=\"token keyword\">finally<\/span><span class=\"token punctuation\">:<\/span><br \/>\n                <span class=\"token comment\"># \u8ba1\u7b97\u5e76\u6253\u5370\u5904\u7406\u8017\u65f6  <\/span><br \/>\n                elapsed_time <span class=\"token operator\">&#061;<\/span> time<span class=\"token punctuation\">.<\/span>time<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#8211;<\/span> start_time<br \/>\n                <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;  \u23f1 \u5904\u7406\u8017\u65f6: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>elapsed_time<span class=\"token punctuation\">:<\/span><span class=\"token format-spec\">.2f<\/span><span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"> \u79d2&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p>    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\\\\n&#x1f389; \u6570\u636e\u96c6\u751f\u6210\u5b8c\u6210&#xff01;&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;&#x1f4ca; \u603b\u5171\u751f\u6210 <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>total_pairs<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"> \u6761\u6307\u4ee4\u5bf9&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;&#x1f4c1; \u4fdd\u5b58\u8def\u5f84: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>output_path<span class=\"token punctuation\">.<\/span>absolute<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p><span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token comment\"># \u6d4b\u8bd5\u8c03\u7528  <\/span><br \/>\n<span class=\"token comment\"># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;  <\/span><br \/>\n<span class=\"token keyword\">if<\/span> __name__ <span class=\"token operator\">&#061;&#061;<\/span> <span class=\"token string\">&#034;__main__&#034;<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token comment\"># \u5148\u6d4b\u8bd5\u6a21\u578b\u662f\u5426\u6b63\u5e38  <\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\\\\n&#x1f9ea; \u6b63\u5728\u6d4b\u8bd5\u6a21\u578b&#8230;&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    test_prompt <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;\u8bf7\u89e3\u91ca\u4ec0\u4e48\u662f\u77e5\u8bc6\u84b8\u998f&#xff1f;&#034;<\/span><br \/>\n    test_response <span class=\"token operator\">&#061;<\/span> ask_teacher<span class=\"token punctuation\">(<\/span>test_prompt<span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">if<\/span> test_response<span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u2705 \u6a21\u578b\u54cd\u5e94\u6b63\u5e38&#xff1a;&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>test_response<span class=\"token punctuation\">[<\/span><span class=\"token punctuation\">:<\/span><span class=\"token number\">200<\/span><span class=\"token punctuation\">]<\/span> <span class=\"token operator\">&#043;<\/span> <span class=\"token string\">&#034;&#8230;&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">else<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u274c \u6a21\u578b\u6d4b\u8bd5\u5931\u8d25&#xff0c;\u8bf7\u68c0\u67e5\u73af\u5883&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        exit<span class=\"token punctuation\">(<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p>    <span class=\"token comment\"># \u8fd0\u884c\u4e3b\u6d41\u7a0b  <\/span><br \/>\n    main<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<ul>\n<li>\u6307\u4ee4\u96c6\u6570\u636e\u5c55\u793a \u592a\u957f\u4e86&#xff0c;\u4e0d\u4e88\u5c55\u793a\u3002<\/li>\n<\/ul>\n<h3>3.2 \u7b2c\u4e8c\u6b65&#xff1a;\u5fae\u8c03<\/h3>\n<p>\u6211\u4eec\u5df2\u7ecf\u6709\u6570\u636e\u96c6\u4e86&#xff0c;\u63a5\u4e0b\u6765\u7ed3\u5408\u9884\u8bad\u7ec3\u6a21\u578bQwen2.5-1.5B-Instruct&#xff0c;\u4e00\u8d77\u8fdb\u884c\u6a21\u578b\u5fae\u8c03\u3002<\/p>\n<p>\u5728\u4e0a\u4e00\u7ae0\u4e2d&#xff0c;\u6211\u4eec\u6709\u4e86\u89e3\u5230PEFT\u7684\u4e3b\u6d41\u65b9\u5f0f\u3002 \u6211\u4eec\u4f7f\u7528trl\u5e93\u7684SFTTrainer&#xff0c;\u52a0\u8f7d\u6a21\u578b\u548c\u6570\u636e\u96c6&#xff0c;\u5e94\u7528LoRA\u914d\u7f6e&#xff0c;\u5e76\u542f\u52a8\u5fae\u8c03\u3002<\/p>\n<h4>3.2.1 \u5fae\u8c03Demo<\/h4>\n<p>&#034;&#034;&#034;<br \/>\n\u5fae\u8c03 Qwen2.5-1.5B-Instruct \u6a21\u578b<br \/>\n\u4f7f\u7528 LoRA \u8fdb\u884c\u53c2\u6570\u9ad8\u6548\u5fae\u8c03&#xff08;PEFT&#xff09;&#xff0c;\u9002\u914d\u81ea\u5b9a\u4e49\u6307\u4ee4\u98ce\u683c<br \/>\n\u6570\u636e\u683c\u5f0f&#xff1a;{&#034;instruction&#034;: &#034;&#8230;&#034;, &#034;output&#034;: &#034;&#8230;&#034;}<br \/>\n&#034;&#034;&#034;  <\/p>\n<p>import torch<br \/>\nfrom datasets import load_dataset<br \/>\nfrom transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments<br \/>\nfrom peft import LoraConfig<br \/>\nfrom trl import SFTTrainer<br \/>\nimport os<br \/>\nimport time<br \/>\nimport threading<br \/>\nimport sys  <\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# 1. \u914d\u7f6e\u6a21\u578b\u4e0e\u6570\u636e\u8def\u5f84<br \/>\n# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nmodel_name &#061; &#034;Qwen\/Qwen2.5-1.5B-Instruct&#034;  # \u9884\u8bad\u7ec3\u6a21\u578b\u540d\u79f0<br \/>\ndataset_path &#061; &#034;..\/my_notes_train_data.jsonl&#034;  # \u6570\u636e\u96c6\u8def\u5f84&#xff08;JSONL \u683c\u5f0f&#xff09;  <\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# 2. \u52a0\u8f7d\u6570\u636e\u96c6<br \/>\n# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nprint(&#034;&#8212; \u52a0\u8f7d\u6570\u636e\u96c6\u4e2d&#8230; &#8212;&#034;)<br \/>\n# \u4ece JSONL \u6587\u4ef6\u52a0\u8f7d\u6570\u636e\u96c6&#xff0c;\u4f7f\u7528 &#039;json&#039; \u683c\u5f0f\u52a0\u8f7d<br \/>\ndataset &#061; load_dataset(&#034;json&#034;, data_files&#061;dataset_path, split&#061;&#034;train&#034;)<br \/>\nprint(f&#034;\u6570\u636e\u96c6\u52a0\u8f7d\u5b8c\u6210&#xff0c;\u603b\u6837\u672c\u6570: {len(dataset)}&#034;)  <\/p>\n<p># \u5212\u5206\u8bad\u7ec3\u96c6\u548c\u9a8c\u8bc1\u96c6<br \/>\nsplit_dataset &#061; dataset.train_test_split(test_size&#061;0.1, seed&#061;42)<br \/>\ntrain_dataset &#061; split_dataset[&#034;train&#034;]<br \/>\neval_dataset &#061; split_dataset[&#034;test&#034;]  <\/p>\n<p>print(f&#034;\u8bad\u7ec3\u96c6\u6837\u672c\u6570: {len(train_dataset)}&#034;)<br \/>\nprint(f&#034;\u9a8c\u8bc1\u96c6\u6837\u672c\u6570: {len(eval_dataset)}&#034;)  <\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# 3. \u52a0\u8f7d\u6a21\u578b\u4e0e\u5206\u8bcd\u5668<br \/>\n# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nprint(&#034;&#8212; \u52a0\u8f7d\u6a21\u578b\u548c\u5206\u8bcd\u5668&#8230; &#8212;&#034;)  <\/p>\n<p>try:<br \/>\n    # \u52a0\u8f7d\u5206\u8bcd\u5668&#xff0c;\u542f\u7528 trust_remote_code \u4ee5\u652f\u6301 Qwen \u81ea\u5b9a\u4e49\u5b9e\u73b0<br \/>\n    print(&#034;\u52a0\u8f7d\u5206\u8bcd\u5668&#8230;&#034;)<br \/>\n    tokenizer &#061; AutoTokenizer.from_pretrained(model_name, trust_remote_code&#061;True)  <\/p>\n<p>    # \u786e\u4fdd\u5206\u8bcd\u5668\u6709 pad_token&#xff08;GPT \u7c7b\u6a21\u578b\u901a\u5e38\u7f3a\u5931&#xff09;<br \/>\n    if tokenizer.pad_token is None:<br \/>\n        tokenizer.pad_token &#061; tokenizer.eos_token  # \u4f7f\u7528 EOS \u4f5c\u4e3a PAD        tokenizer.pad_token_id &#061; tokenizer.eos_token_id<br \/>\n    print(&#034;\u2705 \u5206\u8bcd\u5668\u52a0\u8f7d\u5b8c\u6210&#034;)  <\/p>\n<p>    # \u52a0\u8f7d\u6a21\u578b&#xff0c;\u4f7f\u7528\u534a\u7cbe\u5ea6&#xff08;float16&#xff09;\u8282\u7701\u663e\u5b58&#xff0c;\u81ea\u52a8\u5206\u914d\u8bbe\u5907&#xff08;GPU\/CPU&#xff09;<br \/>\n    print(&#034;\u5f00\u59cb\u52a0\u8f7d\u6a21\u578b&#8230;&#034;)<br \/>\n    print(&#034;\u6a21\u578b\u540d\u79f0:&#034;, model_name)  <\/p>\n<p>    model &#061; AutoModelForCausalLM.from_pretrained(<br \/>\n        model_name,<br \/>\n        torch_dtype&#061;torch.float16,<br \/>\n        device_map&#061;&#034;auto&#034;,  # \u81ea\u52a8\u5c06\u6a21\u578b\u5c42\u5206\u914d\u5230\u53ef\u7528\u8bbe\u5907<br \/>\n        trust_remote_code&#061;True,<br \/>\n        low_cpu_mem_usage&#061;True<br \/>\n    )<br \/>\n    print(&#034;\u2705 \u6a21\u578b\u52a0\u8f7d\u5b8c\u6210&#034;)  <\/p>\n<p>    # \u8be6\u7ec6\u68c0\u67e5\u6a21\u578b\u72b6\u6001<br \/>\n    print(&#034;&#061;&#061;&#061; \u6a21\u578b\u8be6\u7ec6\u72b6\u6001 &#061;&#061;&#061;&#034;)  <\/p>\n<p>    # \u68c0\u67e5\u6a21\u578b\u8bbe\u5907\u5206\u5e03<br \/>\n    devices &#061; set()<br \/>\n    param_count &#061; 0<br \/>\n    for name, param in model.named_parameters():<br \/>\n        devices.add(str(param.device))<br \/>\n        param_count &#043;&#061; 1<br \/>\n        # \u663e\u793a\u524d\u51e0\u4e2a\u53c2\u6570\u7684\u8bbe\u5907\u4fe1\u606f<br \/>\n        if param_count &lt;&#061; 5:<br \/>\n            print(f&#034;  {name}: {list(param.shape)} on {param.device}&#034;)  <\/p>\n<p>    print(f&#034;\u6a21\u578b\u603b\u53c2\u6570\u5c42: {param_count}&#034;)<br \/>\n    print(f&#034;\u53c2\u6570\u5206\u5e03\u8bbe\u5907: {devices}&#034;)  <\/p>\n<p>    # \u68c0\u67e5\u6a21\u578b\u914d\u7f6e<br \/>\n    print(f&#034;\u6a21\u578b\u914d\u7f6e:&#034;)<br \/>\n    print(f&#034;  use_cache: {getattr(model.config, &#039;use_cache&#039;, &#039;N\/A&#039;)}&#034;)<br \/>\n    print(f&#034;  torch_dtype: {getattr(model.config, &#039;torch_dtype&#039;, &#039;N\/A&#039;)}&#034;)  <\/p>\n<p>    # \u542f\u7528\u5fc5\u8981\u7684\u8bbe\u7f6e<br \/>\n    print(&#034;\u542f\u7528\u8bad\u7ec3\u8bbe\u7f6e&#8230;&#034;)<br \/>\n    model.gradient_checkpointing_enable()<br \/>\n    model.config.use_cache &#061; False<br \/>\n    print(&#034;\u2705 \u8bad\u7ec3\u8bbe\u7f6e\u5b8c\u6210&#034;)  <\/p>\n<p>    # \u6700\u7ec8\u68c0\u67e5<br \/>\n    if torch.cuda.is_available():<br \/>\n        gpu_mem &#061; torch.cuda.memory_allocated() \/ 1024 ** 3<br \/>\n        print(f&#034;\u52a0\u8f7d\u540eGPU\u663e\u5b58\u4f7f\u7528: {gpu_mem:.2f}GB&#034;)<br \/>\n        if gpu_mem &#061;&#061; 0:<br \/>\n            print(&#034;\u26a0\ufe0f \u8b66\u544a: \u6a21\u578b\u4f3c\u4e4e\u4ecd\u5728CPU\u4e0a&#034;)<br \/>\n            print(&#034;\u5c1d\u8bd5\u5f3a\u5236\u79fb\u52a8\u6a21\u578b\u5230GPU&#8230;&#034;)<br \/>\n            model &#061; model.to(&#039;cuda&#039;)<br \/>\n            print(&#034;\u2705 \u6a21\u578b\u5df2\u5f3a\u5236\u79fb\u52a8\u5230GPU&#034;)  <\/p>\n<p>    print(&#034;\u6a21\u578b\u52a0\u8f7d\u5b8c\u6210&#xff01;&#034;)  <\/p>\n<p>except Exception as e:<br \/>\n    print(f&#034;\u274c \u6a21\u578b\u52a0\u8f7d\u51fa\u9519: {e}&#034;)<br \/>\n    import traceback<br \/>\n    traceback.print_exc()  <\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# 4. \u914d\u7f6e LoRA&#xff08;\u4f4e\u79e9\u9002\u914d&#xff09;<br \/>\n# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nprint(&#034;&#8212; \u914d\u7f6e LoRA&#8230; &#8212;&#034;)  <\/p>\n<p>peft_config &#061; LoraConfig(<br \/>\n    r&#061;32,                    # LoRA \u79e9&#xff1a;\u63a7\u5236\u9002\u914d\u5668\u590d\u6742\u5ea6&#xff0c;\u503c\u8d8a\u5927\u62df\u5408\u80fd\u529b\u8d8a\u5f3a<br \/>\n    lora_alpha&#061;32,           # \u7f29\u653e\u56e0\u5b50&#xff0c;\u901a\u5e38\u4e0e r \u76f8\u5173<br \/>\n    lora_dropout&#061;0.1,        # \u9632\u6b62 LoRA \u9002\u914d\u5668\u8fc7\u62df\u5408<br \/>\n    bias&#061;&#034;none&#034;,             # \u4e0d\u8bad\u7ec3\u504f\u7f6e\u9879<br \/>\n    task_type&#061;&#034;CAUSAL_LM&#034;,   # \u4efb\u52a1\u7c7b\u578b&#xff1a;\u56e0\u679c\u8bed\u8a00\u5efa\u6a21&#xff08;\u81ea\u56de\u5f52\u751f\u6210&#xff09;<br \/>\n    target_modules&#061;[         # \u9700\u8981\u6ce8\u5165 LoRA \u7684\u6a21\u5757<br \/>\n        &#034;q_proj&#034;,            # Query \u6295\u5f71\u5c42<br \/>\n        &#034;k_proj&#034;,            # Key \u6295\u5f71\u5c42<br \/>\n        &#034;v_proj&#034;,            # Value \u6295\u5f71\u5c42<br \/>\n        &#034;o_proj&#034;,            # Output \u6295\u5f71\u5c42<br \/>\n        &#034;gate_proj&#034;          # MLP \u95e8\u63a7\u5c42&#xff08;Qwen \u7279\u6709&#xff09;<br \/>\n    ]<br \/>\n)<br \/>\nprint(&#034;LoRA \u914d\u7f6e\u5b8c\u6210&#xff01;&#034;)  <\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# 5. \u914d\u7f6e\u8bad\u7ec3\u53c2\u6570<br \/>\n# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nprint(&#034;&#8212; \u914d\u7f6e\u8bad\u7ec3\u53c2\u6570&#8230; &#8212;&#034;)  <\/p>\n<p>training_arguments &#061; TrainingArguments(<br \/>\n    output_dir&#061;&#034;.\/results&#034;,               # \u8bad\u7ec3\u8f93\u51fa\u76ee\u5f55<br \/>\n    num_train_epochs&#061;5,                  # \u8bad\u7ec3\u8f6e\u6570<br \/>\n    per_device_train_batch_size&#061;1,       # \u6bcf\u8bbe\u5907\u8bad\u7ec3 batch size    gradient_accumulation_steps&#061;32,      # \u68af\u5ea6\u7d2f\u79ef\u6b65\u6570<br \/>\n    optim&#061;&#034;paged_adamw_8bit&#034;,            # 8bit\u4f18\u5316\u5668&#xff0c;\u5927\u5e45\u51cf\u5c11\u663e\u5b58<br \/>\n    learning_rate&#061;2e-4,                  # \u5b66\u4e60\u7387<br \/>\n    weight_decay&#061;0.001,                  # \u6743\u91cd\u8870\u51cf<br \/>\n    fp16&#061;True,                           # \u6df7\u5408\u7cbe\u5ea6\u8bad\u7ec3<br \/>\n    max_grad_norm&#061;0.3,                   # \u68af\u5ea6\u88c1\u526a<br \/>\n    max_steps&#061;-1,                        # \u6309 epoch \u8bad\u7ec3<br \/>\n    warmup_ratio&#061;0.03,                   # \u5b66\u4e60\u7387\u9884\u70ed\u6bd4\u4f8b<br \/>\n    group_by_length&#061;True,                # \u6309\u957f\u5ea6\u5206\u7ec4\u51cf\u5c11 padding    lr_scheduler_type&#061;&#034;constant&#034;,        # \u5b66\u4e60\u7387\u8c03\u5ea6\u7b56\u7565  <\/p>\n<p>    # \u8bc4\u4f30\u53c2\u6570<br \/>\n    evaluation_strategy&#061;&#034;steps&#034;,         # \u6bcf\u9694 eval_steps \u8bc4\u4f30\u4e00\u6b21<br \/>\n    eval_steps&#061;10,                       # \u6bcf 10 \u6b65\u8bc4\u4f30\u4e00\u6b21<br \/>\n    save_steps&#061;10,                       # \u6bcf 10 \u6b65\u4fdd\u5b58\u4e00\u6b21<br \/>\n    save_strategy&#061;&#034;steps&#034;,<br \/>\n    load_best_model_at_end&#061;True,         # \u8bad\u7ec3\u7ed3\u675f\u65f6\u52a0\u8f7d\u6700\u4f73\u6a21\u578b<br \/>\n    metric_for_best_model&#061;&#034;eval_loss&#034;,   # \u4ee5\u9a8c\u8bc1 loss \u4f5c\u4e3a\u6700\u4f18\u6307\u6807<br \/>\n    greater_is_better&#061;False,             # loss \u8d8a\u5c0f\u8d8a\u597d  <\/p>\n<p>    # \u65e5\u5fd7\u8f93\u51fa<br \/>\n    disable_tqdm&#061;False,                  # \u663e\u793a\u8fdb\u5ea6\u6761<br \/>\n    # report_to&#061;&#034;none&#034;,                    # \u4e0d\u4e0a\u62a5\u5230\u5916\u90e8\u5e73\u53f0<br \/>\n    report_to&#061;[&#034;tensorboard&#034;],                    # \u4e0d\u4e0a\u62a5\u5230\u5916\u90e8\u5e73\u53f0<br \/>\n    logging_steps&#061;1,                     # \u6bcf 1 \u6b65\u8f93\u51fa\u65e5\u5fd7<br \/>\n    logging_dir&#061;&#034;.\/logs&#034;,                # \u65e5\u5fd7\u4fdd\u5b58\u76ee\u5f55<br \/>\n    logging_strategy&#061;&#034;steps&#034;,            # \u6309\u6b65\u6570\u8bb0\u5f55\u65e5\u5fd7<br \/>\n    logging_first_step&#061;True,             # \u7b2c\u4e00\u6b65\u5c31\u8bb0\u5f55\u65e5\u5fd7<br \/>\n    logging_nan_inf_filter&#061;True,         # \u8fc7\u6ee4 NaN\/Inf \u7684 loss    # skip_memory_metrics&#061;False,           # \u6536\u96c6\u8be6\u7ec6\u5185\u5b58\u7edf\u8ba1<br \/>\n)<br \/>\nprint(&#034;\u8bad\u7ec3\u53c2\u6570\u914d\u7f6e\u5b8c\u6210&#xff01;&#034;)  <\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# 6. \u6784\u9020\u8bad\u7ec3 prompt&#xff08;\u5e94\u7528\u5bf9\u8bdd\u6a21\u677f&#xff09;<br \/>\n# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\ndef formatting_prompts_func(examples):<br \/>\n    &#034;&#034;&#034;<br \/>\n    \u5c06\u539f\u59cb instruction-output \u5bf9\u8f6c\u6362\u4e3a Qwen \u7684\u5bf9\u8bdd\u683c\u5f0f<br \/>\n    \u4f7f\u7528 tokenizer.apply_chat_template \u81ea\u52a8\u751f\u6210\u6807\u51c6 prompt    &#034;&#034;&#034;    instructions &#061; examples[&#034;instruction&#034;]<br \/>\n    outputs &#061; examples[&#034;output&#034;]<br \/>\n    texts &#061; []<br \/>\n    for instruction, output in zip(instructions, outputs):<br \/>\n        # \u6784\u5efa\u5bf9\u8bdd\u6d88\u606f\u5217\u8868<br \/>\n        messages &#061; [<br \/>\n            {&#034;role&#034;: &#034;user&#034;, &#034;content&#034;: instruction},<br \/>\n            {&#034;role&#034;: &#034;assistant&#034;, &#034;content&#034;: output}<br \/>\n        ]<br \/>\n        # \u5e94\u7528 Qwen \u7684 chat template        text &#061; tokenizer.apply_chat_template(<br \/>\n            messages,<br \/>\n            tokenize&#061;False,<br \/>\n            add_generation_prompt&#061;False<br \/>\n        )<br \/>\n        texts.append(text)<br \/>\n    return {&#034;text&#034;: texts}  <\/p>\n<p>print(&#034;&#8212; \u683c\u5f0f\u5316\u6570\u636e\u96c6&#xff08;\u5e94\u7528\u5bf9\u8bdd\u6a21\u677f&#xff09;&#8230; &#8212;&#034;)<br \/>\n# \u5206\u522b\u5bf9\u8bad\u7ec3\u96c6\u548c\u9a8c\u8bc1\u96c6\u5e94\u7528\u683c\u5f0f\u5316<br \/>\ntrain_dataset &#061; train_dataset.map(formatting_prompts_func, batched&#061;True)<br \/>\neval_dataset &#061; eval_dataset.map(formatting_prompts_func, batched&#061;True)<br \/>\nprint(&#034;\u6570\u636e\u96c6\u683c\u5f0f\u5316\u5b8c\u6210&#xff01;&#034;)  <\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# 7. \u521d\u59cb\u5316 SFTTrainer# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\nprint(&#034;&#8212; \u521d\u59cb\u5316 SFTTrainer&#8230; &#8212;&#034;)  <\/p>\n<p>trainer &#061; SFTTrainer(<br \/>\n    model&#061;model,<br \/>\n    train_dataset&#061;train_dataset,<br \/>\n    eval_dataset&#061;eval_dataset,<br \/>\n    peft_config&#061;peft_config,<br \/>\n    dataset_text_field&#061;&#034;text&#034;,<br \/>\n    max_seq_length&#061;512,  # \u4f9d\u636e\u5b9e\u9645\u6587\u672c\u6bb5\u843d\u957f\u5ea6\u6765&#xff0c;\u8fd9\u91cc\u5176\u5b9e\u6709\u70b9\u5c0f<br \/>\n    tokenizer&#061;tokenizer,<br \/>\n    args&#061;training_arguments,<br \/>\n    packing&#061;False,<br \/>\n)<br \/>\nprint(&#034;SFTTrainer \u521d\u59cb\u5316\u5b8c\u6210&#xff01;&#034;)  <\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# 8. \u5f00\u59cb\u8bad\u7ec3<br \/>\n# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\ndef train_with_timeout():<br \/>\n    &#034;&#034;&#034;\u5e26\u8d85\u65f6\u7684\u8bad\u7ec3\u51fd\u6570&#034;&#034;&#034;<br \/>\n    result &#061; {&#034;finished&#034;: False, &#034;error&#034;: None}  <\/p>\n<p>    def train_thread():<br \/>\n        try:<br \/>\n            print(&#034;&#x1f680; \u542f\u52a8\u8bad\u7ec3&#8230;&#034;)<br \/>\n            trainer.train()<br \/>\n            result[&#034;finished&#034;] &#061; True<br \/>\n        except Exception as e:<br \/>\n            result[&#034;error&#034;] &#061; e<br \/>\n            import traceback<br \/>\n            traceback.print_exc()  <\/p>\n<p>    # \u521b\u5efa\u8bad\u7ec3\u7ebf\u7a0b<br \/>\n    thread &#061; threading.Thread(target&#061;train_thread)<br \/>\n    thread.daemon &#061; True<br \/>\n    thread.start()  <\/p>\n<p>    # \u7b49\u5f85\u6700\u591a40\u5206\u949f<br \/>\n    thread.join(timeout&#061;2400)  <\/p>\n<p>    if thread.is_alive():<br \/>\n        print(&#034;\u274c \u8bad\u7ec3\u8d85\u65f6&#xff01;\u53ef\u80fd\u663e\u5b58\u4e0d\u8db3\u6216\u6b7b\u9501&#034;)<br \/>\n        return False<br \/>\n    elif result[&#034;error&#034;]:<br \/>\n        print(f&#034;\u274c \u8bad\u7ec3\u51fa\u9519: {result[&#039;error&#039;]}&#034;)<br \/>\n        return False<br \/>\n    else:<br \/>\n        print(&#034;\u2705 \u8bad\u7ec3\u5b8c\u6210&#xff01;&#034;)<br \/>\n        return True  <\/p>\n<p>print(&#034;&#8212; \u5f00\u59cb\u5fae\u8c03\u8bad\u7ec3&#8230; &#8212;&#034;)<br \/>\nprint(&#034;\u8bad\u7ec3\u914d\u7f6e\u8be6\u60c5:&#034;)<br \/>\nprint(f&#034;\u8bad\u7ec3\u6837\u672c\u6570: {len(train_dataset)}&#034;)<br \/>\nprint(f&#034;\u9a8c\u8bc1\u6837\u672c\u6570: {len(eval_dataset)}&#034;)<br \/>\nprint(f&#034;batch_size: {training_arguments.per_device_train_batch_size}&#034;)<br \/>\nprint(f&#034;gradient_accumulation: {training_arguments.gradient_accumulation_steps}&#034;)<br \/>\nprint(f&#034;max_seq_length: 512&#034;)<br \/>\nprint(f&#034;epochs: {training_arguments.num_train_epochs}&#034;)<br \/>\nprint(f&#034;eval_steps: {training_arguments.eval_steps}&#034;)  <\/p>\n<p># \u542f\u52a8\u8bad\u7ec3<br \/>\nif not train_with_timeout():<br \/>\n    print(&#034;\u8bad\u7ec3\u5931\u8d25\u6216\u8d85\u65f6&#034;)<br \/>\n    sys.exit(1)  <\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# 9. \u8bc4\u4f30\u548c\u4fdd\u5b58\u6a21\u578b<br \/>\n# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# \u8bad\u7ec3\u5b8c\u6210\u540e\u8bc4\u4f30\u9a8c\u8bc1\u96c6<br \/>\nprint(&#034;&#8212; \u9a8c\u8bc1\u96c6\u8bc4\u4f30 &#8212;&#034;)<br \/>\ntry:<br \/>\n    eval_results &#061; trainer.evaluate()<br \/>\n    print(f&#034;\u9a8c\u8bc1\u96c6\u635f\u5931: {eval_results[&#039;eval_loss&#039;]:.4f}&#034;)<br \/>\n    print(f&#034;\u9a8c\u8bc1\u96c6\u56f0\u60d1\u5ea6: {torch.exp(torch.tensor(eval_results[&#039;eval_loss&#039;])):.2f}&#034;)<br \/>\n    print(&#034;\u9a8c\u8bc1\u96c6\u7ed3\u679c:&#034;, eval_results)<br \/>\nexcept Exception as e:<br \/>\n    print(f&#034;\u9a8c\u8bc1\u8bc4\u4f30\u51fa\u9519: {e}&#034;)  <\/p>\n<p># \u4fdd\u5b58 LoRA \u9002\u914d\u5668<br \/>\nprint(&#034;&#8212; \u4fdd\u5b58 LoRA \u9002\u914d\u5668&#8230; &#8212;&#034;)<br \/>\ntrainer.save_model(&#034;.\/results\/final_checkpoint&#034;)<br \/>\nprint(&#034;\u2705 \u5fae\u8c03\u5b8c\u6210&#xff01;LoRA \u9002\u914d\u5668\u5df2\u4fdd\u5b58\u81f3 .\/results\/final_checkpoint&#034;)<br \/>\nprint(&#034;&#x1f4a1; \u540e\u7eed\u63a8\u7406\u65f6&#xff0c;\u53ea\u9700\u52a0\u8f7d\u57fa\u5ea7\u6a21\u578b &#043; \u6b64\u9002\u914d\u5668\u5373\u53ef\u3002&#034;)  <\/p>\n<p># &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n# 10. \u4f7f\u7528\u8bf4\u660e<br \/>\n# &#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n&#034;&#034;&#034;<br \/>\n&#x1f4a1; \u5982\u4f55\u52a0\u8f7d\u5fae\u8c03\u540e\u7684\u6a21\u578b\u8fdb\u884c\u63a8\u7406&#xff1f;  <\/p>\n<p># \u52a0\u8f7d\u57fa\u5ea7\u6a21\u578b<br \/>\nprint(&#034;\u52a0\u8f7d\u57fa\u5ea7\u6a21\u578b&#8230;&#034;)<br \/>\nbase_model &#061; AutoModelForCausalLM.from_pretrained(<br \/>\n    &#034;Qwen\/Qwen2.5-1.5B-Instruct&#034;,    torch_dtype&#061;torch.float16 if device &#061;&#061; &#034;cuda&#034; else torch.float32,    low_cpu_mem_usage&#061;True,    trust_remote_code&#061;True)  <\/p>\n<p># \u52a0\u8f7dLoRA\u9002\u914d\u5668<br \/>\nprint(&#034;\u52a0\u8f7dLoRA\u9002\u914d\u5668&#8230;&#034;)<br \/>\nmodel &#061; PeftModel.from_pretrained(base_model, &#034;.\/results\/final_checkpoint&#034;)  <\/p>\n<p>&#034;&#034;&#034;<br \/>\n&#034;&#034;&#034;<\/p>\n<h4>3.2.2 \u6982\u5ff5\u8865\u5145<\/h4>\n<h5>3.2.2.1 SFT&#xff08;\u76d1\u7763\u5fae\u8c03&#xff09;<\/h5>\n<ul>\n<li>\u8f93\u5165&#xff1a;(instruction, output) \u5bf9<\/li>\n<li>\u76ee\u6807&#xff1a;\u8ba9\u6a21\u578b\u5b66\u4f1a\u201c\u6839\u636e\u6307\u4ee4\u751f\u6210\u6b63\u786e\u56de\u7b54\u201d<\/li>\n<li>\u635f\u5931\u51fd\u6570&#xff1a;\u6807\u51c6\u7684\u8bed\u8a00\u5efa\u6a21\u635f\u5931&#xff08;CrossEntropy&#xff09;<\/li>\n<\/ul>\n<h4>3.2.3 DPO<\/h4>\n<ul>\n<li>\u5f3a\u5316\u5b66\u4e60\u7ecf\u5178\u7b97\u6cd5<\/li>\n<li>\u9700\u8981&#xff1a;\u7b56\u7565\u6a21\u578b &#043; \u5956\u52b1\u6a21\u578b &#043; \u4ef7\u503c\u51fd\u6570<\/li>\n<li>\u590d\u6742\u4f46\u7075\u6d3b&#xff0c;\u9002\u5408\u7cbe\u7ec6\u63a7\u5236\u751f\u6210\u884c\u4e3a<\/li>\n<\/ul>\n<p>\u6bd4\u5982\u8ba9\u6a21\u578b\u201c\u66f4\u8bda\u5b9e\u201d\u3001\u201c\u66f4\u5b89\u5168\u201d\u3002<\/p>\n<h5>3.2.3.1 LoRA&#xff08;\u4f4e\u79e9\u9002\u914d&#xff09;<\/h5>\n<p>LoRA&#xff08;Low-Rank Adaptation&#xff09;&#xff0c;\u4e00\u79cd \u53c2\u6570\u9ad8\u6548\u5fae\u8c03&#xff08;PEFT&#xff09; \u6280\u672f&#xff1a;<\/p>\n<ul>\n<li>\n<p>\u4e0d\u66f4\u65b0\u539f\u59cb\u5927\u6a21\u578b\u7684\u6240\u6709\u53c2\u6570<\/p>\n<\/li>\n<li>\n<p>\u53ea\u8bad\u7ec3\u4e00\u5c0f\u90e8\u5206\u201c\u9002\u914d\u5668\u201d\u77e9\u9635&#xff08;\u4f4e\u79e9\u77e9\u9635&#xff09;<\/p>\n<\/li>\n<li>\n<p>\u539f\u59cb\u6a21\u578b\u51bb\u7ed3&#xff0c;\u53ea\u4fdd\u5b58\u548c\u52a0\u8f7d\u5c0f\u7684\u9002\u914d\u5668<\/p>\n<\/li>\n<li>\n<p>\u4f20\u7edf\u5168\u53c2\u6570\u5fae\u8c03 vs LoRA<\/p>\n<\/li>\n<\/ul>\n<p>\u4f20\u7edf\u5fae\u8c03&#xff1a;<br \/>\n\u539f\u59cb\u6743\u91cd\u77e9\u9635 W\u2080 (d\u00d7d)<br \/>\n\u2193 \u5fae\u8c03\u540e<br \/>\n\u65b0\u6743\u91cd\u77e9\u9635 W\u2081 &#061; W\u2080 &#043; \u0394W (d\u00d7d)<br \/>\n\u9700\u8981\u8bad\u7ec3 d\u00d7d \u4e2a\u53c2\u6570<\/p>\n<p>LoRA\u5fae\u8c03&#xff1a;<br \/>\n\u539f\u59cb\u6743\u91cd\u77e9\u9635 W\u2080 (d\u00d7d)<br \/>\n\u2193 \u5fae\u8c03\u540e<br \/>\n\u65b0\u6743\u91cd\u77e9\u9635 W\u2081 &#061; W\u2080 &#043; A\u00d7B (d\u00d7d)<br \/>\n\u5176\u4e2d A(d\u00d7r), B(r\u00d7d) \u662f\u4f4e\u79e9\u77e9\u9635<br \/>\n\u53ea\u9700\u8981\u8bad\u7ec3 2\u00d7d\u00d7r \u4e2a\u53c2\u6570<\/p>\n<table>\n<tr>\u4f18\u70b9\u8bf4\u660e<\/tr>\n<tbody>\n<tr>\n<td>\u663e\u5b58\u8282\u7701<\/td>\n<td>\u53ea\u8bad\u7ec3\u5c11\u91cf\u53c2\u6570&#xff08;\u901a\u5e38 &lt;1%&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>\u5b58\u50a8\u65b9\u4fbf<\/td>\n<td>\u6700\u7ec8\u53ea\u4fdd\u5b58\u51e0 MB \u7684 LoRA \u6743\u91cd<\/td>\n<\/tr>\n<tr>\n<td>\u5feb\u901f\u5207\u6362<\/td>\n<td>\u540c\u4e00\u4e2a\u57fa\u5ea7\u6a21\u578b &#043; \u4e0d\u540c LoRA &#061; \u4e0d\u540c\u80fd\u529b<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<table>\n<tr>\u53c2\u6570\u542b\u4e49\u63a8\u8350\u503c<\/tr>\n<tbody>\n<tr>\n<td>r<\/td>\n<td>LoRA \u7684\u79e9&#xff08;rank&#xff09;&#xff0c;\u63a7\u5236\u9002\u914d\u5668\u590d\u6742\u5ea6<\/td>\n<td>8~64&#xff08;\u8d8a\u5927\u8d8a\u5f3a&#xff0c;\u4e5f\u8d8a\u5bb9\u6613\u8fc7\u62df\u5408&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>lora_alpha<\/td>\n<td>\u7f29\u653e\u56e0\u5b50&#xff0c;\u5f71\u54cd LoRA \u6743\u91cd\u7684\u5f3a\u5ea6<\/td>\n<td>\u4e00\u822c\u7b49\u4e8e r \u6216\u7565\u5c0f<\/td>\n<\/tr>\n<tr>\n<td>lora_dropout<\/td>\n<td>\u9632\u6b62\u9002\u914d\u5668\u8fc7\u62df\u5408<\/td>\n<td>0.05~0.1<\/td>\n<\/tr>\n<tr>\n<td>target_modules<\/td>\n<td>\u54ea\u4e9b\u6a21\u5757\u52a0 LoRA<\/td>\n<td>Qwen \u5e38\u89c1\u4e3a q,k,v,o,gate_proj<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>r&#xff08;\u79e9&#xff09; \u662fLoRA\u4e2d\u6700\u5173\u952e\u7684\u8d85\u53c2\u6570&#xff0c;\u9700\u8981\u6839\u636e\u4efb\u52a1\u590d\u6742\u5ea6\u548c\u786c\u4ef6\u6761\u4ef6\u7cbe\u5fc3\u9009\u62e9\u3002<\/p>\n<ul>\n<li>\u5c0fr\u503c&#xff08;4-16&#xff09;&#xff1a;\u9002\u5408\u7b80\u5355\u4efb\u52a1&#xff0c;\u53c2\u6570\u5c11&#xff0c;\u4e0d\u6613\u8fc7\u62df\u5408&#xff0c;\u4f46\u53ef\u80fd\u8868\u8fbe\u80fd\u529b\u6709\u9650<\/li>\n<li>\u4e2dr\u503c&#xff08;32&#xff09;&#xff1a;\u5e73\u8861\u9009\u62e9&#xff0c;\u9002\u5408\u5927\u591a\u6570\u4efb\u52a1&#xff0c;\u5982\u60a8\u7684\u98ce\u683c\u8f6c\u6362\u4efb\u52a1<\/li>\n<li>\u5927r\u503c&#xff08;64&#043;&#xff09;&#xff1a;\u9002\u5408\u590d\u6742\u4efb\u52a1&#xff0c;\u8868\u8fbe\u80fd\u529b\u5f3a&#xff0c;\u4f46\u53ef\u80fd\u5bfc\u81f4\u8fc7\u62df\u5408\u5e76\u589e\u52a0\u663e\u5b58\u6d88\u8017<\/li>\n<\/ul>\n<p>\u5bf9\u4e8eQwen2.5-1.5B\u6a21\u578b\u7684LoRA\u9002\u914d\u5668&#xff1a;<\/p>\n<table>\n<tr>r\u503c\u5355\u4e2a\u9002\u914d\u5668\u53c2\u6570\u91cf\u603b\u9002\u914d\u5668\u6570\u603b\u53c2\u6570\u91cf\u663e\u5b58\u589e\u52a0<\/tr>\n<tbody>\n<tr>\n<td>r&#061;64<\/td>\n<td>~100K<\/td>\n<td>~100\u4e2a<\/td>\n<td>~10M<\/td>\n<td>&#043;400MB<\/td>\n<\/tr>\n<tr>\n<td>r&#061;32<\/td>\n<td>~50K<\/td>\n<td>~100\u4e2a<\/td>\n<td>~5M<\/td>\n<td>&#043;200MB<\/td>\n<\/tr>\n<tr>\n<td>r&#061;16<\/td>\n<td>~25K<\/td>\n<td>~100\u4e2a<\/td>\n<td>~2.5M<\/td>\n<td>&#043;100MB<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<ul>\n<li>\u8fc7\u62df\u5408 \u8fc7\u62df\u5408&#xff08;Overfitting&#xff09;\u662f\u673a\u5668\u5b66\u4e60\u4e2d\u7684\u4e00\u4e2a\u5e38\u89c1\u95ee\u9898&#xff0c;\u6307\u7684\u662f\u6a21\u578b\u5728\u8bad\u7ec3\u6570\u636e\u4e0a\u8868\u73b0\u5f88\u597d&#xff0c;\u4f46\u5728\u672a\u89c1\u8fc7\u7684\u65b0\u6570\u636e&#xff08;\u6d4b\u8bd5\u6570\u636e\u6216\u771f\u5b9e\u573a\u666f&#xff09;\u4e0a\u8868\u73b0\u5f88\u5dee\u7684\u73b0\u8c61\u3002<\/li>\n<\/ul>\n<p>\u6a21\u578b\u592a\u590d\u6742&#xff0c;\u62df\u5408\u4e86\u8bad\u7ec3\u6570\u636e\u4e2d\u7684\u566a\u58f0&#xff08;\u8bad\u7ec3\u96c6\u597d&#xff0c;\u6d4b\u8bd5\u96c6\u5dee&#xff09;\u3002 \u8fc7\u62df\u5408\u7684\u672c\u8d28\u662f\u6a21\u578b\u7f3a\u4e4f\u6cdb\u5316\u80fd\u529b&#xff08;Generalization&#xff09;&#xff0c;\u5373\u65e0\u6cd5\u5c06\u5b66\u5230\u7684\u89c4\u5f8b\u5e94\u7528\u5230\u65b0\u6570\u636e\u4e0a\u3002<\/p>\n<p>\u4ea7\u751f\u539f\u56e0&#xff1a;<\/p>\n<li>\u6a21\u578b\u592a\u590d\u6742&#xff1a;\u6bd4\u5982\u7528\u9ad8\u9636\u591a\u9879\u5f0f\u62df\u5408\u5c11\u91cf\u6570\u636e\u70b9&#xff0c;\u5bb9\u6613\u5b66\u5230\u566a\u58f0\u800c\u975e\u89c4\u5f8b\u3002<\/li>\n<li>\u8bad\u7ec3\u6570\u636e\u592a\u5c11&#xff1a;\u6570\u636e\u91cf\u4e0d\u8db3\u65f6&#xff0c;\u6a21\u578b\u53ef\u80fd\u628a\u5076\u7136\u7279\u5f81\u5f53\u6210\u666e\u904d\u89c4\u5f8b\u3002<\/li>\n<li>\u8bad\u7ec3\u65f6\u95f4\u8fc7\u957f&#xff1a;\u6a21\u578b\u53cd\u590d\u8c03\u6574\u53c2\u6570&#xff0c;\u751a\u81f3\u5f00\u59cb\u8bb0\u5fc6\u8bad\u7ec3\u6837\u672c\u7684\u7ec6\u8282&#xff08;\u5982\u566a\u58f0&#xff09;\u3002<\/li>\n<p>\u5982\u4f55\u8bc6\u522b&#xff1a; \u8bad\u7ec3\u96c6\u51c6\u786e\u7387\u8fdc\u9ad8\u4e8e\u6d4b\u8bd5\u96c6\u51c6\u786e\u7387&#xff08;\u4f8b\u5982\u8bad\u7ec3\u51c6\u786e\u738798%&#xff0c;\u6d4b\u8bd5\u51c6\u786e\u738760%&#xff09;\u3002<\/p>\n<p>\u5bf9\u6bd4\u5176\u4ed6\u6982\u5ff5&#xff1a; \u6b20\u62df\u5408&#xff08;Underfitting&#xff09;&#xff1a;\u6a21\u578b\u592a\u7b80\u5355&#xff0c;\u8fde\u8bad\u7ec3\u6570\u636e\u90fd\u62df\u5408\u4e0d\u597d&#xff08;\u8bad\u7ec3\u96c6\u548c\u6d4b\u8bd5\u96c6\u8868\u73b0\u90fd\u5dee&#xff09;\u3002<\/p>\n<h5>3.2.3.2 trl<\/h5>\n<p>trl (Transformer Reinforcement Learning) &#xff0c;\u652f\u6301\u5f3a\u5316\u5b66\u4e60\u3001\u76d1\u7763\u5fae\u8c03&#xff08;SFT&#xff09;\u7b49\u4efb\u52a1\u3002<\/p>\n<ul>\n<li>\u5b98\u65b9\u4ed3\u5e93&#xff1a;https:\/\/github.com\/huggingface\/trl<\/li>\n<\/ul>\n<p>\u5728\u6ca1\u6709 trl \u4e4b\u524d&#xff0c;\u8bad\u7ec3\u5927\u8bed\u8a00\u6a21\u578b&#xff08;LLM&#xff09;\u975e\u5e38\u590d\u6742&#xff0c;\u4f60\u9700\u8981\u624b\u52a8\u5b9e\u73b0&#xff1a;<\/p>\n<ul>\n<li>\u6570\u636e\u5904\u7406<\/li>\n<li>\u635f\u5931\u8ba1\u7b97<\/li>\n<li>\u751f\u6210\u63a7\u5236<\/li>\n<li>LoRA \u96c6\u6210<\/li>\n<li>\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5&#xff08;\u5982 PPO&#xff09;<\/li>\n<\/ul>\n<p>\u800c trl \u7684\u76ee\u6807\u5c31\u662f&#xff1a;\u8ba9 LLM \u7684\u8bad\u7ec3\u53d8\u5f97\u50cf transformers.Trainer \u4e00\u6837\u7b80\u5355\u3002<\/p>\n<ul>\n<li>\u6838\u5fc3\u529f\u80fd<\/li>\n<\/ul>\n<table>\n<tr>\u529f\u80fd\u8bf4\u660e<\/tr>\n<tbody>\n<tr>\n<td>\u2705 SFTTrainer<\/td>\n<td>\u76d1\u7763\u5fae\u8c03&#xff08;Supervised Fine-Tuning&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>\u2705 DPOTrainer<\/td>\n<td>\u76f4\u63a5\u504f\u597d\u4f18\u5316&#xff08;Direct Preference Optimization&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>\u2705 PPOTrainer<\/td>\n<td>\u8fd1\u7aef\u7b56\u7565\u4f18\u5316&#xff08;Proximal Policy Optimization&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>\u2705 KTOTrainer<\/td>\n<td>Knowledgable Task Optimization<\/td>\n<\/tr>\n<tr>\n<td>\u2705 ORPOTrainer<\/td>\n<td>\u5355\u504f\u597d\u4f18\u5316&#xff08;Offline Reinforcement Learning from Preferences&#xff09;<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h6>3.2.3.2.1 SFTTrainer<\/h6>\n<p>\u5728\u4f20\u7edf\u7684\u5fae\u8c03\u4e2d&#xff0c;\u5047\u8bbe\u4f60\u7528\u539f\u751f transformers.Trainer \u505a\u6307\u4ee4\u5fae\u8c03&#xff1a;<\/p>\n<p><span class=\"token comment\"># \u4f60\u9700\u8981\u624b\u52a8\u62fc\u63a5 prompt &#043; response<\/span><br \/>\ninput_text <span class=\"token operator\">&#061;<\/span> <span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\u7528\u6237: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>instruction<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">\\\\n\u52a9\u624b: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>output<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><br \/>\ninputs <span class=\"token operator\">&#061;<\/span> tokenizer<span class=\"token punctuation\">(<\/span>input_text<span class=\"token punctuation\">,<\/span> return_tensors<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#034;pt&#034;<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p>\u7136\u540e\u8fd8\u8981&#xff1a;<\/p>\n<ul>\n<li>\u53ea\u8ba1\u7b97 \u52a9\u624b: \u540e\u9762\u90e8\u5206\u7684 loss<\/li>\n<li>\u5904\u7406 padding<\/li>\n<li>\u652f\u6301 LoRA<\/li>\n<li>\u652f\u6301\u804a\u5929\u6a21\u677f<\/li>\n<\/ul>\n<p>\u8fd9\u5f88\u7e41\u7410&#xff0c;\u5bb9\u6613\u51fa\u9519\u3002<\/p>\n<p>SFTTrainer \u81ea\u52a8\u5e2e\u4f60\u5b8c\u6210&#xff1a;<\/p>\n<table>\n<tr>\u81ea\u52a8\u5316\u529f\u80fd\u8bf4\u660e<\/tr>\n<tbody>\n<tr>\n<td>\u2705 \u81ea\u52a8\u6784\u9020 prompt<\/td>\n<td>\u652f\u6301 formatting_func \u6216 dataset_text_field<\/td>\n<\/tr>\n<tr>\n<td>\u2705 \u81ea\u52a8\u8ba1\u7b97\u635f\u5931<\/td>\n<td>\u53ea\u8ba1\u7b97 assistant \u56de\u5e94\u90e8\u5206\u7684 loss&#xff0c;\u5ffd\u7565 prompt<\/td>\n<\/tr>\n<tr>\n<td>\u2705 \u652f\u6301 LoRA<\/td>\n<td>\u65e0\u7f1d\u96c6\u6210 peft<\/td>\n<\/tr>\n<tr>\n<td>\u2705 \u652f\u6301\u804a\u5929\u6a21\u677f<\/td>\n<td>\u8c03\u7528 apply_chat_template \u81ea\u52a8\u751f\u6210\u6807\u51c6\u683c\u5f0f<\/td>\n<\/tr>\n<tr>\n<td>\u2705 \u652f\u6301 packing<\/td>\n<td>\u63d0\u9ad8\u8bad\u7ec3\u6548\u7387<\/td>\n<\/tr>\n<tr>\n<td>\u2705 \u652f\u6301 group_by_length<\/td>\n<td>\u51cf\u5c11 padding \u6d6a\u8d39<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<ul>\n<li>SFTTrainer \u7684\u5de5\u4f5c\u6d41\u7a0b<\/li>\n<\/ul>\n<p>\u539f\u59cb\u6570\u636e \u2192 \u6784\u9020 prompt \u2192 tokenize \u2192 \u6a21\u578b\u524d\u5411 \u2192 \u8ba1\u7b97 loss&#xff08;\u4ec5 response&#xff09; \u2192 \u53cd\u5411\u4f20\u64ad<\/p>\n<p>\u5b83\u77e5\u9053&#xff1a;\u201c\u6211\u53ea\u8be5\u4e3a\u6a21\u578b\u7684\u56de\u7b54\u6253\u5206&#xff0c;\u800c\u4e0d\u662f\u4e3a\u7528\u6237\u7684\u63d0\u95ee\u6253\u5206\u201d\u3002<\/p>\n<ul>\n<li>trl \u652f\u6301\u7684\u8bad\u7ec3\u8303\u5f0f\u5bf9\u6bd4<\/li>\n<\/ul>\n<table>\n<tr>\u65b9\u6cd5\u5168\u79f0\u662f\u5426\u9700\u8981\u5956\u52b1\u6a21\u578b\u662f\u5426\u9700\u8981\u91c7\u6837\u7528\u9014<\/tr>\n<tbody>\n<tr>\n<td>SFT<\/td>\n<td>Supervised Fine-Tuning<\/td>\n<td>\u274c \u4e0d\u9700\u8981<\/td>\n<td>\u274c \u4e0d\u9700\u8981<\/td>\n<td>\u521d\u59cb\u5fae\u8c03&#xff0c;\u8ba9\u6a21\u578b\u5b66\u4f1a\u201c\u6309\u6307\u4ee4\u56de\u7b54\u201d<\/td>\n<\/tr>\n<tr>\n<td>PPO<\/td>\n<td>Proximal Policy Optimization<\/td>\n<td>\u2705 \u9700\u8981<\/td>\n<td>\u2705 \u9700\u8981<\/td>\n<td>\u5f3a\u5316\u5b66\u4e60&#xff0c;\u7528\u5956\u52b1\u4fe1\u53f7\u4f18\u5316\u751f\u6210<\/td>\n<\/tr>\n<tr>\n<td>DPO<\/td>\n<td>Direct Preference Optimization<\/td>\n<td>\u2705 \u9700\u8981<\/td>\n<td>\u274c \u4e0d\u9700\u8981<\/td>\n<td>\u7ed5\u8fc7\u5f3a\u5316\u5b66\u4e60&#xff0c;\u76f4\u63a5\u4f18\u5316\u504f\u597d<\/td>\n<\/tr>\n<tr>\n<td>KTO<\/td>\n<td>Knowledgable Task Optimization<\/td>\n<td>\u274c \u4e0d\u9700\u8981<\/td>\n<td>\u274c \u4e0d\u9700\u8981<\/td>\n<td>\u57fa\u4e8e\u201c\u597d\/\u574f\u201d\u5224\u65ad&#xff0c;\u65e0\u9700\u6210\u5bf9\u6bd4\u8f83<\/td>\n<\/tr>\n<tr>\n<td>ORPO<\/td>\n<td>Online Preference Optimization<\/td>\n<td>\u274c \u4e0d\u9700\u8981<\/td>\n<td>\u274c \u4e0d\u9700\u8981<\/td>\n<td>\u5355\u6837\u672c\u504f\u597d\u4f18\u5316<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<ul>\n<li>trl \u89e3\u51b3\u7684\u6838\u5fc3\u95ee\u9898\u603b\u7ed3<\/li>\n<\/ul>\n<table>\n<tr>\u95ee\u9898trl \u7684\u89e3\u51b3\u65b9\u6848<\/tr>\n<tbody>\n<tr>\n<td>\u5982\u4f55\u53ea\u5bf9\u56de\u7b54\u90e8\u5206\u8ba1\u7b97 loss&#xff1f;<\/td>\n<td>SFTTrainer \u81ea\u52a8 mask prompt \u90e8\u5206<\/td>\n<\/tr>\n<tr>\n<td>\u5982\u4f55\u6784\u9020\u6807\u51c6\u5bf9\u8bdd\u683c\u5f0f&#xff1f;<\/td>\n<td>\u652f\u6301 apply_chat_template \u548c formatting_func<\/td>\n<\/tr>\n<tr>\n<td>\u5982\u4f55\u96c6\u6210 LoRA&#xff1f;<\/td>\n<td>\u76f4\u63a5\u4f20 peft_config<\/td>\n<\/tr>\n<tr>\n<td>\u5982\u4f55\u505a\u5f3a\u5316\u5b66\u4e60&#xff1f;<\/td>\n<td>\u63d0\u4f9b PPOTrainer\u3001DPOTrainer<\/td>\n<\/tr>\n<tr>\n<td>\u5982\u4f55\u63d0\u9ad8\u8bad\u7ec3\u6548\u7387&#xff1f;<\/td>\n<td>\u652f\u6301 packing\u3001group_by_length<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h4>3.2.4 \u8bad\u7ec3\u6d41\u7a0b<\/h4>\n<p>\u5fae\u8c03\u6d41\u7a0b\u53ef\u4ee5\u5206\u4e3a\u4ee5\u4e0b\u51e0\u4e2a\u9636\u6bb5&#xff1a;<\/p>\n<p>1. \u6570\u636e\u51c6\u5907 \u2192 2. \u6a21\u578b\u52a0\u8f7d \u2192 3. LoRA \u914d\u7f6e \u2192 4. \u8bad\u7ec3\u53c2\u6570\u8bbe\u7f6e \u2192 5. \u6784\u9020 prompt \u2192 6. \u8bad\u7ec3 \u2192 7. \u4fdd\u5b58\u9002\u914d\u5668<\/p>\n<li>\u6570\u636e\u51c6\u5907&#xff1a;\u6536\u96c6\u5e76\u6784\u5efa\u9ad8\u8d28\u91cf\u6307\u4ee4\u6570\u636e\u96c6&#xff0c;\u786e\u4fdd\u8986\u76d6\u76ee\u6807\u98ce\u683c<\/li>\n<li>\u6a21\u578b\u52a0\u8f7d&#xff1a;\u52a0\u8f7d\u9884\u8bad\u7ec3\u57fa\u7840\u6a21\u578b&#xff0c;\u8003\u8651\u91cf\u5316\u7b56\u7565\u4ee5\u9002\u5e94\u786c\u4ef6\u9650\u5236<\/li>\n<li>LoRA\u914d\u7f6e&#xff1a;\u786e\u5b9a\u9002\u914d\u6a21\u5757\u548c\u79e9\u5927\u5c0f&#xff0c;\u5e73\u8861\u6027\u80fd\u4e0e\u8d44\u6e90<\/li>\n<li>\u8bad\u7ec3\u53c2\u6570\u8bbe\u7f6e&#xff1a;\u6839\u636e\u786c\u4ef6\u6761\u4ef6\u914d\u7f6ebatch size\u3001\u5b66\u4e60\u7387\u7b49\u5173\u952e\u53c2\u6570<\/li>\n<li>\u63d0\u793a\u5de5\u7a0b&#xff1a;\u8bbe\u8ba1\u6709\u6548\u7684\u5bf9\u8bdd\u6a21\u677f&#xff0c;\u660e\u786e\u5f15\u5bfc\u6a21\u578b\u7406\u89e3\u98ce\u683c\u8f6c\u6362\u4efb\u52a1<\/li>\n<li>\u8bad\u7ec3\u6267\u884c&#xff1a;\u76d1\u63a7\u8bad\u7ec3\u8fc7\u7a0b&#xff0c;\u9002\u65f6\u8c03\u6574\u8d85\u53c2\u6570<\/li>\n<li>\u9002\u914d\u5668\u4fdd\u5b58&#xff1a;\u4fdd\u5b58\u5fae\u8c03\u7ed3\u679c&#xff0c;\u4e3a\u63a8\u7406\u90e8\u7f72\u505a\u51c6\u5907<\/li>\n<p>\u8fd9\u4e00\u6d41\u7a0b\u4f53\u73b0\u4e86\u4ece\u6570\u636e\u5230\u90e8\u7f72\u7684\u5b8c\u6574\u5fae\u8c03\u751f\u547d\u5468\u671f&#xff0c;\u7279\u522b\u9002\u5408\u8d44\u6e90\u53d7\u9650\u73af\u5883\u4e0b\u7684\u5b9e\u7528\u5fae\u8c03\u3002<\/p>\n<ul>\n<li>\u52a0\u8f7d\u6570\u636e\u96c6 \u8fd9\u4e00\u6b65\u8f93\u51faHugging Face\u7684 Dataset\u5bf9\u8c61&#xff0c;\u652f\u6301\u9ad8\u6548\u6620\u5c04\u3001\u8fc7\u6ee4\u3001\u5206\u6279\u7b49\u64cd\u4f5c\u3002<\/li>\n<\/ul>\n<p>\u8f93\u5165\u683c\u5f0f&#xff1a;.jsonl \u6587\u4ef6&#xff08;\u6bcf\u884c\u4e00\u4e2a JSON \u5bf9\u8c61&#xff09;&#xff1b; split&#061;\u201ctrain\u201d \u8868\u793a\u6574\u4e2a\u6587\u4ef6\u4f5c\u4e3a\u8bad\u7ec3\u96c6&#xff08;\u6ca1\u6709\u5212\u5206\u9a8c\u8bc1\u96c6&#xff09;\u3002<\/p>\n<ul>\n<li>\u52a0\u8f7d\u6a21\u578b\u4e0e\u5206\u8bcd\u5668<\/li>\n<\/ul>\n<p>tokenizer &#061; AutoTokenizer.from_pretrained(model_name, trust_remote_code&#061;True)<br \/>\nmodel &#061; AutoModelForCausalLM.from_pretrained(&#8230;)<\/p>\n<p>AutoModelForCausalLM \u662f\u7528\u4e8e\u81ea\u56de\u5f52\u8bed\u8a00\u5efa\u6a21\u7684\u6a21\u578b\u5934&#xff0c;\u9002\u5408\u751f\u6210\u4efb\u52a1\u3002<\/p>\n<table>\n<tr>\u7ec4\u4ef6\u8bf4\u660e<\/tr>\n<tbody>\n<tr>\n<td>AutoTokenizer<\/td>\n<td>\u81ea\u52a8\u9009\u62e9\u9002\u5408 Qwen \u7684 tokenizer<\/td>\n<\/tr>\n<tr>\n<td>trust_remote_code&#061;True<\/td>\n<td>\u5141\u8bb8\u52a0\u8f7d\u81ea\u5b9a\u4e49\u6a21\u578b\u4ee3\u7801&#xff08;Qwen \u4f7f\u7528\u4e86\u975e\u6807\u51c6\u5b9e\u73b0&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>pad_token &#061; eos_token<\/td>\n<td>GPT \u7c7b\u6a21\u578b\u6ca1\u6709 padding token&#xff0c;\u9700\u624b\u52a8\u8bbe\u7f6e<\/td>\n<\/tr>\n<tr>\n<td>torch.float16 &#043; device_map&#061;&#034;auto&#034;<\/td>\n<td>\u534a\u7cbe\u5ea6 &#043; \u81ea\u52a8\u5206\u914d GPU \u663e\u5b58&#xff0c;\u8282\u7701\u8d44\u6e90<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h3>3.3 \u7b2c\u4e09\u6b65&#xff1a;\u5bf9\u6bd4\u6d4b\u8bd5<\/h3>\n<p>\u5bf9\u6bd4\u4e00\u4e0b\u57fa\u7840\u6a21\u578b\u4e0e\u5fae\u8c03\u6a21\u578b\u7684\u5dee\u5f02\u3002 demo\u5982\u4e0b&#xff1a;<\/p>\n<p><span class=\"token keyword\">from<\/span> transformers <span class=\"token keyword\">import<\/span> AutoModelForCausalLM<span class=\"token punctuation\">,<\/span> AutoTokenizer<br \/>\n<span class=\"token keyword\">from<\/span> peft <span class=\"token keyword\">import<\/span> PeftModel<br \/>\n<span class=\"token keyword\">import<\/span> torch<\/p>\n<p><span class=\"token comment\"># &#8212; \u914d\u7f6e &#8212;<\/span><br \/>\n<span class=\"token comment\"># \u57fa\u7840\u6a21\u578b\u7684\u8def\u5f84<\/span><br \/>\nbase_model_name <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;Qwen\/Qwen2.5-1.5B-Instruct&#034;<\/span><br \/>\n<span class=\"token comment\"># \u4f60\u8bad\u7ec3\u597d\u7684LoRA\u9002\u914d\u5668\u7684\u8def\u5f84<\/span><br \/>\nadapter_path <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;.\/results\/final_checkpoint&#034;<\/span><\/p>\n<p><span class=\"token comment\"># \u68c0\u67e5\u662f\u5426\u6709GPU<\/span><br \/>\ndevice <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;cuda&#034;<\/span> <span class=\"token keyword\">if<\/span> torch<span class=\"token punctuation\">.<\/span>cuda<span class=\"token punctuation\">.<\/span>is_available<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token keyword\">else<\/span> <span class=\"token string\">&#034;cpu&#034;<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\u4f7f\u7528\u8bbe\u5907: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>device<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token comment\"># \u52a0\u8f7d\u57fa\u5ea7\u6a21\u578b<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u52a0\u8f7d\u57fa\u5ea7\u6a21\u578b&#8230;&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\nbase_model <span class=\"token operator\">&#061;<\/span> AutoModelForCausalLM<span class=\"token punctuation\">.<\/span>from_pretrained<span class=\"token punctuation\">(<\/span><br \/>\n    base_model_name<span class=\"token punctuation\">,<\/span><br \/>\n    torch_dtype<span class=\"token operator\">&#061;<\/span>torch<span class=\"token punctuation\">.<\/span>float16 <span class=\"token keyword\">if<\/span> device <span class=\"token operator\">&#061;&#061;<\/span> <span class=\"token string\">&#034;cuda&#034;<\/span> <span class=\"token keyword\">else<\/span> torch<span class=\"token punctuation\">.<\/span>float32<span class=\"token punctuation\">,<\/span><br \/>\n    low_cpu_mem_usage<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">,<\/span><br \/>\n    trust_remote_code<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><br \/>\n<span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token comment\"># \u52a0\u8f7dLoRA\u9002\u914d\u5668<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;&#8212; \u6b63\u5728\u4ece <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>adapter_path<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"> \u52a0\u8f7dLoRA\u9002\u914d\u5668&#8230; &#8212;&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><br \/>\ntuned_model <span class=\"token operator\">&#061;<\/span> PeftModel<span class=\"token punctuation\">.<\/span>from_pretrained<span class=\"token punctuation\">(<\/span>base_model<span class=\"token punctuation\">,<\/span> adapter_path<span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token comment\"># \u5c06\u6a21\u578b\u79fb\u52a8\u5230\u6307\u5b9a\u8bbe\u5907<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\u5c06\u6a21\u578b\u79fb\u52a8\u5230 <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>device<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#8230;&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><br \/>\ntuned_model <span class=\"token operator\">&#061;<\/span> tuned_model<span class=\"token punctuation\">.<\/span>to<span class=\"token punctuation\">(<\/span>device<span class=\"token punctuation\">)<\/span><br \/>\ntuned_model<span class=\"token punctuation\">.<\/span><span class=\"token builtin\">eval<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>  <span class=\"token comment\"># \u8bbe\u7f6e\u4e3a\u8bc4\u4f30\u6a21\u5f0f<\/span><\/p>\n<p><span class=\"token comment\"># \u52a0\u8f7d\u5206\u8bcd\u5668<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u52a0\u8f7d\u5206\u8bcd\u5668&#8230;&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\ntokenizer <span class=\"token operator\">&#061;<\/span> AutoTokenizer<span class=\"token punctuation\">.<\/span>from_pretrained<span class=\"token punctuation\">(<\/span>base_model_name<span class=\"token punctuation\">,<\/span> trust_remote_code<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token comment\"># &#8212; \u5b9a\u4e49\u4e00\u4e2a\u901a\u7528\u7684\u751f\u6210\u51fd\u6570 &#8212;<\/span><br \/>\n<span class=\"token keyword\">def<\/span> <span class=\"token function\">generate_response<\/span><span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">,<\/span> instruction<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token triple-quoted-string string\">&#034;&#034;&#034;\u4f7f\u7528\u7ed9\u5b9a\u7684\u6a21\u578b\u548c\u6307\u4ee4\u751f\u6210\u56de\u7b54&#034;&#034;&#034;<\/span><br \/>\n    messages <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">[<\/span><br \/>\n        <span class=\"token punctuation\">{<\/span><span class=\"token string\">&#034;role&#034;<\/span><span class=\"token punctuation\">:<\/span> <span class=\"token string\">&#034;system&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;content&#034;<\/span><span class=\"token punctuation\">:<\/span> <span class=\"token string\">&#034;\u4f60\u662f\u4e00\u4e2a\u4e50\u4e8e\u52a9\u4eba\u7684AI\u52a9\u624b\u3002&#034;<\/span><span class=\"token punctuation\">}<\/span><span class=\"token punctuation\">,<\/span><br \/>\n        <span class=\"token punctuation\">{<\/span><span class=\"token string\">&#034;role&#034;<\/span><span class=\"token punctuation\">:<\/span> <span class=\"token string\">&#034;user&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;content&#034;<\/span><span class=\"token punctuation\">:<\/span> instruction<span class=\"token punctuation\">}<\/span><br \/>\n    <span class=\"token punctuation\">]<\/span><br \/>\n    text <span class=\"token operator\">&#061;<\/span> tokenizer<span class=\"token punctuation\">.<\/span>apply_chat_template<span class=\"token punctuation\">(<\/span>messages<span class=\"token punctuation\">,<\/span> tokenize<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">False<\/span><span class=\"token punctuation\">,<\/span> add_generation_prompt<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    model_inputs <span class=\"token operator\">&#061;<\/span> tokenizer<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">[<\/span>text<span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">,<\/span> return_tensors<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#034;pt&#034;<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>to<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>device<span class=\"token punctuation\">)<\/span><\/p>\n<p>    generated_ids <span class=\"token operator\">&#061;<\/span> model<span class=\"token punctuation\">.<\/span>generate<span class=\"token punctuation\">(<\/span><br \/>\n        input_ids<span class=\"token operator\">&#061;<\/span>model_inputs<span class=\"token punctuation\">.<\/span>input_ids<span class=\"token punctuation\">,<\/span><br \/>\n        max_new_tokens<span class=\"token operator\">&#061;<\/span><span class=\"token number\">10240<\/span><span class=\"token punctuation\">,<\/span><br \/>\n        <span class=\"token comment\"># \u4f60\u53ef\u4ee5\u5728\u8fd9\u91cc\u6dfb\u52a0\u66f4\u591a\u751f\u6210\u53c2\u6570&#xff0c;\u5982 temperature, top_p \u7b49<\/span><br \/>\n        temperature<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.7<\/span><span class=\"token punctuation\">,<\/span><br \/>\n        top_p<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.9<\/span><span class=\"token punctuation\">,<\/span><br \/>\n    <span class=\"token punctuation\">)<\/span><br \/>\n    generated_ids <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">[<\/span><br \/>\n        output_ids<span class=\"token punctuation\">[<\/span><span class=\"token builtin\">len<\/span><span class=\"token punctuation\">(<\/span>input_ids<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><span class=\"token punctuation\">]<\/span> <span class=\"token keyword\">for<\/span> input_ids<span class=\"token punctuation\">,<\/span> output_ids <span class=\"token keyword\">in<\/span> <span class=\"token builtin\">zip<\/span><span class=\"token punctuation\">(<\/span>model_inputs<span class=\"token punctuation\">.<\/span>input_ids<span class=\"token punctuation\">,<\/span> generated_ids<span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token punctuation\">]<\/span><br \/>\n    response <span class=\"token operator\">&#061;<\/span> tokenizer<span class=\"token punctuation\">.<\/span>batch_decode<span class=\"token punctuation\">(<\/span>generated_ids<span class=\"token punctuation\">,<\/span> skip_special_tokens<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">]<\/span><\/p>\n<p>    <span class=\"token keyword\">return<\/span> response<\/p>\n<p><span class=\"token comment\"># &#8212; \u5bf9\u6bd4\u6d4b\u8bd5 &#8212;<\/span><br \/>\n<span class=\"token keyword\">def<\/span> <span class=\"token function\">run_comparison<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token comment\"># \u8bbe\u8ba1\u4e00\u4e2a\u80fd\u4f53\u73b0\u4f60\u5199\u4f5c\u98ce\u683c\u7684\u6307\u4ee4<\/span><br \/>\n    test_instruction <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;\u8bf7\u7528\u6211\u7684\u5199\u4f5c\u98ce\u683c&#xff0c;\u5199\u4e00\u6bb5\u5173\u4e8e\u2018\u4ec0\u4e48\u662f\u5fae\u670d\u52a1\u67b6\u6784\u2019\u7684\u4ecb\u7ecd\u3002&#034;<\/span><br \/>\n    <span class=\"token comment\"># \u6216\u8005\u4e00\u4e2a\u4f60\u4ece\u672a\u5728\u535a\u5ba2\u4e2d\u5199\u8fc7&#xff0c;\u4f46\u60f3\u8ba9AI\u7528\u4f60\u7684\u98ce\u683c\u521b\u4f5c\u7684\u4e3b\u9898<\/span><br \/>\n    <span class=\"token comment\"># test_instruction &#061; &#034;\u7528\u4e00\u79cd\u901a\u4fd7\u6613\u61c2\u4e14\u5e26\u6709\u6bd4\u55bb\u7684\u65b9\u5f0f&#xff0c;\u89e3\u91ca\u4e00\u4e0b\u4ec0\u4e48\u662fTransformer\u7684\u81ea\u6ce8\u610f\u529b\u673a\u5236\u3002&#034;<\/span><\/p>\n<p>    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\\\\n&#034;<\/span> <span class=\"token operator\">&#043;<\/span> <span class=\"token string\">&#034;&#061;&#034;<\/span><span class=\"token operator\">*<\/span><span class=\"token number\">50<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;\u6d4b\u8bd5\u6307\u4ee4: <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>test_instruction<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;&#061;&#034;<\/span><span class=\"token operator\">*<\/span><span class=\"token number\">50<\/span> <span class=\"token operator\">&#043;<\/span> <span class=\"token string\">&#034;\\\\n&#034;<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p>    <span class=\"token comment\"># 1. \u4f7f\u7528\u57fa\u7840\u6a21\u578b\u751f\u6210<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;&#8212; \u6b63\u5728\u4f7f\u7528\u3010\u57fa\u7840\u6a21\u578b\u3011\u751f\u6210&#8230; &#8212;&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    base_model_response <span class=\"token operator\">&#061;<\/span> generate_response<span class=\"token punctuation\">(<\/span>base_model<span class=\"token punctuation\">,<\/span> test_instruction<span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u3010\u57fa\u7840\u6a21\u578b\u3011\u7684\u56de\u7b54:&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>base_model_response<span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;-&#034;<\/span> <span class=\"token operator\">*<\/span> <span class=\"token number\">50<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p>    <span class=\"token comment\"># 2. \u4f7f\u7528\u4f60\u5fae\u8c03\u540e\u7684\u6a21\u578b\u751f\u6210<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\\\\n&#8212; \u6b63\u5728\u4f7f\u7528\u3010\u5fae\u8c03\u6a21\u578b\u3011\u751f\u6210&#8230; &#8212;&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    tuned_model_response <span class=\"token operator\">&#061;<\/span> generate_response<span class=\"token punctuation\">(<\/span>tuned_model<span class=\"token punctuation\">,<\/span> test_instruction<span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u3010\u5fae\u8c03\u6a21\u578b\u3011\u7684\u56de\u7b54:&#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>tuned_model_response<span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;-&#034;<\/span> <span class=\"token operator\">*<\/span> <span class=\"token number\">50<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token keyword\">if<\/span> __name__ <span class=\"token operator\">&#061;&#061;<\/span> <span class=\"token string\">&#034;__main__&#034;<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    run_comparison<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p>\u6d4b\u8bd5\u7ed3\u679c\u5982\u4e0b&#xff1a;<\/p>\n<p>&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<br \/>\n\u6d4b\u8bd5\u6307\u4ee4: \u8bf7\u7528\u6211\u7684\u5199\u4f5c\u98ce\u683c&#xff0c;\u5199\u4e00\u6bb5\u5173\u4e8e\u2018\u4ec0\u4e48\u662f\u5fae\u670d\u52a1\u67b6\u6784\u2019\u7684\u4ecb\u7ecd\u3002<br \/>\n&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;&#061;<\/p>\n<p>&#8212; \u6b63\u5728\u4f7f\u7528\u3010\u57fa\u7840\u6a21\u578b\u3011\u751f\u6210&#8230; &#8212;<br \/>\n\u3010\u57fa\u7840\u6a21\u578b\u3011\u7684\u56de\u7b54:<br \/>\n\u5fae\u670d\u52a1\u67b6\u6784\u662f\u4e00\u79cd\u65b0\u578b\u7684\u5e94\u7528\u7a0b\u5e8f\u5f00\u53d1\u6a21\u5f0f&#xff0c;\u5b83\u5f3a\u8c03\u5c06\u5e94\u7528\u7a0b\u5e8f\u5206\u89e3\u4e3a\u591a\u4e2a\u5c0f\u578b\u3001\u72ec\u7acb\u7684\u670d\u52a1&#xff0c;\u5e76\u4f7f\u7528\u8f7b\u91cf\u7ea7\u901a\u4fe1\u534f\u8bae&#xff08;\u5982REST\u6216gRPC&#xff09;\u8fdb\u884c\u4ea4\u4e92\u3002\u8fd9\u79cd\u67b6\u6784\u6a21\u5f0f\u5141\u8bb8\u5f00\u53d1\u8005\u4e13\u6ce8\u4e8e\u5355\u4e2a\u670d\u52a1\u7684\u529f\u80fd\u5b9e\u73b0\u548c\u6027\u80fd\u4f18\u5316&#xff0c;\u540c\u65f6\u4fdd\u6301\u7cfb\u7edf\u7684\u6574\u4f53\u7a33\u5b9a\u6027\u3002<\/p>\n<p>\u5728\u5fae\u670d\u52a1\u67b6\u6784\u4e2d&#xff0c;\u6bcf\u4e2a\u670d\u52a1\u90fd\u6709\u81ea\u5df1\u7684\u4ee3\u7801\u5e93\u3001\u6570\u636e\u5e93\u548c\u90e8\u7f72\u73af\u5883&#xff0c;\u8fd9\u610f\u5473\u7740\u5b83\u4eec\u53ef\u4ee5\u66f4\u5bb9\u6613\u5730\u7ef4\u62a4\u548c\u6269\u5c55\u3002\u6b64\u5916&#xff0c;\u5fae\u670d\u52a1\u4e4b\u95f4\u7684\u4f9d\u8d56\u5173\u7cfb\u662f\u901a\u8fc7\u7f51\u7edc\u901a\u4fe1\u6765\u7ba1\u7406\u7684&#xff0c;\u800c\u4e0d\u662f\u5171\u4eab\u6570\u636e\u6216\u5168\u5c40\u72b6\u6001\u3002\u8fd9\u4f7f\u5f97\u7cfb\u7edf\u66f4\u52a0\u7075\u6d3b&#xff0c;\u80fd\u591f\u5feb\u901f\u54cd\u5e94\u4e1a\u52a1\u9700\u6c42\u7684\u53d8\u5316\u3002<\/p>\n<p>\u5fae\u670d\u52a1\u67b6\u6784\u901a\u5e38\u9002\u7528\u4e8e\u90a3\u4e9b\u9700\u8981\u9ad8\u5ea6\u53ef\u4f38\u7f29\u6027\u3001\u9ad8\u53ef\u7528\u6027\u548c\u677e\u8026\u5408\u7684\u5e94\u7528\u573a\u666f\u3002\u4f8b\u5982&#xff0c;\u91d1\u878d\u670d\u52a1\u3001\u7535\u5b50\u5546\u52a1\u548c\u5185\u5bb9\u7ba1\u7406\u7cfb\u7edf\u7b49\u9886\u57df\u90fd\u9002\u5408\u91c7\u7528\u8fd9\u79cd\u67b6\u6784\u6a21\u5f0f\u3002<br \/>\n&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8211;<\/p>\n<p>&#8212; \u6b63\u5728\u4f7f\u7528\u3010\u5fae\u8c03\u6a21\u578b\u3011\u751f\u6210&#8230; &#8212;<br \/>\n\u3010\u5fae\u8c03\u6a21\u578b\u3011\u7684\u56de\u7b54:<br \/>\n\u5728IT\u4e16\u754c\u4e2d&#xff0c;\u5fae\u670d\u52a1\u67b6\u6784&#xff08;Microservices Architecture&#xff09;\u662f\u4e00\u79cd\u8bbe\u8ba1\u65b9\u6cd5&#xff0c;\u5b83\u5c06\u4e00\u4e2a\u5927\u578b\u5e94\u7528\u7a0b\u5e8f\u5206\u89e3\u6210\u4e00\u7cfb\u5217\u5c0f\u578b\u3001\u72ec\u7acb\u7684\u670d\u52a1&#xff0c;\u5e76\u901a\u8fc7\u7f51\u7edc\u8fde\u63a5\u5728\u4e00\u8d77\u3002\u8fd9\u79cd\u67b6\u6784\u6a21\u5f0f\u5141\u8bb8\u5f00\u53d1\u4eba\u5458\u66f4\u7075\u6d3b\u5730\u7ba1\u7406\u548c\u6269\u5c55\u5e94\u7528&#xff0c;\u540c\u65f6\u63d0\u4f9b\u66f4\u9ad8\u7684\u53ef\u7ef4\u62a4\u6027\u548c\u6027\u80fd\u3002<\/p>\n<p>\u5fae\u670d\u52a1\u67b6\u6784\u7684\u6838\u5fc3\u601d\u60f3\u662f\u5c06\u5355\u4e2a\u5927\u7cfb\u7edf\u62c6\u5206\u6210\u8bb8\u591a\u5c0f\u7684\u670d\u52a1&#xff0c;\u6bcf\u4e2a\u670d\u52a1\u90fd\u8d1f\u8d23\u5904\u7406\u7279\u5b9a\u7684\u529f\u80fd\u6216\u4e1a\u52a1\u903b\u8f91\u3002\u8fd9\u4e9b\u670d\u52a1\u53ef\u4ee5\u72ec\u7acb\u90e8\u7f72\u548c\u66f4\u65b0&#xff0c;\u800c\u4e0d\u5f71\u54cd\u5176\u4ed6\u670d\u52a1\u7684\u6b63\u5e38\u8fd0\u884c\u3002\u6b64\u5916&#xff0c;\u5b83\u4eec\u4e4b\u95f4\u7684\u901a\u4fe1\u53ef\u4ee5\u901a\u8fc7\u6807\u51c6\u534f\u8bae\u5b9e\u73b0&#xff0c;\u5982RESTful API\u6216\u6d88\u606f\u961f\u5217&#xff0c;\u4ece\u800c\u7b80\u5316\u4e86\u7cfb\u7edf\u7684\u96c6\u6210\u548c\u7ba1\u7406\u3002<\/p>\n<p>\u76f8\u6bd4\u4e8e\u4f20\u7edf\u7684\u96c6\u4e2d\u5f0f\u67b6\u6784&#xff0c;\u5fae\u670d\u52a1\u67b6\u6784\u5177\u6709\u4ee5\u4e0b\u4f18\u70b9&#xff1a;<\/p>\n<p>1. \u66f4\u597d\u7684\u4f38\u7f29\u6027&#xff1a;\u7531\u4e8e\u6bcf\u4e2a\u670d\u52a1\u90fd\u662f\u72ec\u7acb\u7684&#xff0c;\u56e0\u6b64\u53ef\u4ee5\u6839\u636e\u5b9e\u9645\u9700\u6c42\u52a8\u6001\u589e\u52a0\u6216\u51cf\u5c11\u670d\u52a1\u7684\u6570\u91cf&#xff0c;\u4ee5\u6ee1\u8db3\u8d1f\u8f7d\u53d8\u5316\u7684\u9700\u6c42\u3002<br \/>\n2. \u66f4\u9ad8\u7684\u7075\u6d3b\u6027&#xff1a;\u5f00\u53d1\u8005\u53ef\u4ee5\u6839\u636e\u4e1a\u52a1\u9700\u8981\u968f\u65f6\u4fee\u6539\u6216\u66ff\u6362\u67d0\u4e00\u9879\u529f\u80fd&#xff0c;\u800c\u65e0\u9700\u5f71\u54cd\u6574\u4e2a\u7cfb\u7edf\u3002<br \/>\n3. \u66f4\u5f3a\u7684\u53ef\u7ef4\u62a4\u6027&#xff1a;\u6bcf\u4e2a\u670d\u52a1\u90fd\u6709\u660e\u786e\u7684\u8d23\u4efb\u8303\u56f4&#xff0c;\u4f7f\u5f97\u95ee\u9898\u5b9a\u4f4d\u548c\u4fee\u590d\u53d8\u5f97\u66f4\u52a0\u5bb9\u6613\u3002<\/p>\n<p>\u7136\u800c&#xff0c;\u5fae\u670d\u52a1\u67b6\u6784\u4e5f\u5b58\u5728\u4e00\u4e9b\u6311\u6218&#xff1a;<\/p>\n<p>1. \u7ba1\u7406\u590d\u6742\u5ea6\u589e\u52a0&#xff1a;\u968f\u7740\u670d\u52a1\u6570\u91cf\u7684\u589e\u591a&#xff0c;\u534f\u8c03\u548c\u670d\u52a1\u95f4\u901a\u4fe1\u7684\u95ee\u9898\u4f1a\u53d8\u5f97\u66f4\u4e3a\u590d\u6742\u3002<br \/>\n2. \u4e00\u81f4\u6027\u95ee\u9898&#xff1a;\u4e0d\u540c\u670d\u52a1\u4e4b\u95f4\u53ef\u80fd\u5b58\u5728\u6570\u636e\u51b2\u7a81\u7684\u98ce\u9669&#xff0c;\u9700\u8981\u91c7\u7528\u9002\u5f53\u7684\u673a\u5236\u6765\u89e3\u51b3\u8fd9\u4e9b\u95ee\u9898\u3002<\/p>\n<p>\u603b\u7684\u6765\u8bf4&#xff0c;\u5fae\u670d\u52a1\u67b6\u6784\u4e3a\u5e94\u5bf9\u73b0\u4ee3\u590d\u6742\u7684\u5e94\u7528\u573a\u666f\u63d0\u4f9b\u4e86\u6709\u529b\u7684\u652f\u6301\u3002\u5c3d\u7ba1\u5b58\u5728\u4e00\u4e9b\u6311\u6218&#xff0c;\u4f46\u5176\u5e26\u6765\u7684\u4f18\u52bf\u4f7f\u5176\u6210\u4e3a\u4e86\u4f17\u591a\u4f01\u4e1a\u9009\u62e9\u7684\u91cd\u8981\u6280\u672f\u4e4b\u4e00\u3002<br \/>\n&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8211;<\/p>\n<p>\u7ed3\u679c\u5206\u6790&#xff08;\u4e22\u7ed9AI\u5206\u6790\u7684&#xff09;&#xff1a;<\/p>\n<li>\n<p>\u7ed3\u6784\u5316\u4e0e\u5f15\u5bfc\u6027:<\/p>\n<ul>\n<li>\u57fa\u7840\u6a21\u578b: \u5b83\u7684\u56de\u7b54\u662f\u6807\u51c6\u7684\u201c\u4e09\u6bb5\u8bba\u201d&#xff0c;\u5b9a\u4e49-\u89e3\u91ca-\u5e94\u7528\u3002\u5185\u5bb9\u6b63\u786e&#xff0c;\u4f46\u50cf\u4e00\u672c\u6559\u79d1\u4e66&#xff0c;\u7f3a\u4e4f\u5f15\u5bfc\u6027\u3002<\/li>\n<li>\u4f60\u7684\u5fae\u8c03\u6a21\u578b: \u5b83\u7acb\u523b\u5c31\u5c55\u73b0\u51fa\u4e86\u4f60\u535a\u5ba2\u7684\u5178\u578b\u7ed3\u6784&#xff1a;\n<ul>\n<li>\u5f00\u7bc7\u5b9a\u4e49: \u5728IT\u4e16\u754c\u4e2d\u2026 \u8fd9\u79cd\u5f15\u5165\u65b9\u5f0f\u66f4\u50cf\u4e00\u4e2a\u535a\u4e3b\u5728\u548c\u8bfb\u8005\u5bf9\u8bdd\u3002<\/li>\n<li>\u6838\u5fc3\u601d\u60f3\u9610\u8ff0: \u5fae\u670d\u52a1\u67b6\u6784\u7684\u6838\u5fc3\u601d\u60f3\u662f\u2026<\/li>\n<li>\u7ed3\u6784\u5316\u5217\u8868&#xff08;\u4f18\u70b9&#xff09;: \u76f8\u6bd4\u4e8e\u4f20\u7edf\u7684\u96c6\u4e2d\u5f0f\u67b6\u6784&#xff0c;\u5177\u6709\u4ee5\u4e0b\u4f18\u70b9&#xff1a;1. 2. 3.<\/li>\n<li>\u7ed3\u6784\u5316\u5217\u8868&#xff08;\u6311\u6218&#xff09;: \u7136\u800c&#xff0c;\u5fae\u670d\u52a1\u67b6\u6784\u4e5f\u5b58\u5728\u4e00\u4e9b\u6311\u6218&#xff1a;1. 2.<\/li>\n<li>\u603b\u7ed3\u5347\u534e: \u603b\u7684\u6765\u8bf4\u2026<\/li>\n<\/ul>\n<\/li>\n<li>\u7ed3\u8bba: \u4f60\u7684\u6a21\u578b\u5b66\u4f1a\u4e86\u4f60\u6700\u6838\u5fc3\u7684\u5199\u4f5c\u201c\u5957\u8def\u201d\u2014\u2014\u901a\u8fc7\u6e05\u6670\u7684\u7ed3\u6784&#xff08;\u7279\u522b\u662f\u6b63\u53cd\u4e24\u65b9\u9762\u7684\u5217\u8868&#xff09;\u6765\u5f15\u5bfc\u8bfb\u8005\u5168\u9762\u3001\u8fa9\u8bc1\u5730\u7406\u89e3\u4e00\u4e2a\u6982\u5ff5\u3002\u8fd9\u662f\u4ece\u201c\u4fe1\u606f\u201d\u5230\u201c\u77e5\u8bc6\u201d\u7684\u98de\u8dc3\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u7528\u8bcd\u4e0e\u8bed\u6c14:<\/p>\n<ul>\n<li>\u57fa\u7840\u6a21\u578b: \u7528\u8bcd\u975e\u5e38\u4e2d\u6027\u3001\u5ba2\u89c2&#xff0c;\u4f8b\u5982\u201c\u65b0\u578b\u7684\u5e94\u7528\u7a0b\u5e8f\u5f00\u53d1\u6a21\u5f0f\u201d\u3001\u201c\u5f3a\u8c03\u5c06\u2026\u201d<\/li>\n<li>\u4f60\u7684\u5fae\u8c03\u6a21\u578b: \u7528\u8bcd\u66f4\u5177\u201c\u535a\u4e3b\u201d\u8272\u5f69&#xff0c;\u4f8b\u5982\u5728IT\u4e16\u754c\u4e2d\u2026&#xff0c;\u6838\u5fc3\u601d\u60f3\u662f\u2026&#xff0c;\u603b\u7684\u6765\u8bf4\u2026\u3002\u8fd9\u4e9b\u8bcd\u8bed\u867d\u7136\u5fae\u5c0f&#xff0c;\u4f46\u5b83\u4eec\u5171\u540c\u5851\u9020\u4e86\u4e00\u79cd\u66f4\u5177\u4e2a\u4eba\u8272\u5f69\u548c\u603b\u7ed3\u6027\u7684\u8bed\u6c14\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u5185\u5bb9\u7684\u5168\u9762\u6027:<\/p>\n<ul>\n<li>\u57fa\u7840\u6a21\u578b: \u53ea\u8bb2\u4e86\u4f18\u70b9\u3002<\/li>\n<li>\u4f60\u7684\u5fae\u8c03\u6a21\u578b: \u975e\u5e38\u5168\u9762\u5730\u8bba\u8ff0\u4e86\u4f18\u70b9\u548c\u6311\u6218\u4e24\u4e2a\u65b9\u9762\u3002\u8fd9\u8bf4\u660e\u6a21\u578b\u4e0d\u4ec5\u5b66\u4e86\u4f60\u7684\u201c\u5f62\u201d&#xff0c;\u66f4\u5b66\u4e86\u4f60\u7684\u201c\u795e\u201d\u2014\u2014\u4f60\u90a3\u79cd\u529b\u6c42\u5168\u9762\u3001\u5ba2\u89c2\u3001\u4e0d\u56de\u907f\u95ee\u9898\u7684\u601d\u7ef4\u6a21\u5f0f\u3002<\/li>\n<\/ul>\n<\/li>\n","protected":false},"excerpt":{"rendered":"<p>\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb826\u6b21\uff0c\u70b9\u8d5e10\u6b21\uff0c\u6536\u85cf23\u6b21\u3002\u672c\u6587\u4ecb\u7ecd\u4e86\u53c2\u6570\u9ad8\u6548\u5fae\u8c03(PEFT)\u6280\u672f\u53ca\u5176\u5728\u81ea\u7136\u8bed\u8a00\u5904\u7406\u4e2d\u7684\u5e94\u7528\u3002\u6587\u7ae0\u9996\u5148\u63a2\u8ba8\u4e86\u5982\u4f55\u9009\u62e9\u9002\u5408\u7684\u9884\u8bad\u7ec3\u6a21\u578b\uff0c\u4ee5\u6587\u672c\u98ce\u683c\u6da6\u8272\u4efb\u52a1\u4e3a\u4f8b\uff0c\u63a8\u8350\u4e86Qwen2.5\u7cfb\u5217\u6a21\u578b\u3002\u968f\u540e\u8be6\u7ec6\u9610\u8ff0\u4e86\u5fae\u8c03\u65b9\u6848\u7684\u9009\u62e9\u6807\u51c6\uff0c\u5305\u62ecQLoRA\u3001Flash Attention-2\u7b49\u6280\u672f\u7ec4\u5408\u3002\u6587\u7ae0\u91cd\u70b9\u5c55\u793a\u4e86PEFT\u5b9e\u6218\u8fc7\u7a0b\uff0c\u7279\u522b\u662f\u6570\u636e\u51c6\u5907\u9636\u6bb5\u7684\u6570\u636e\u589e\u5f3a\u6280\u672f\uff0c\u901a\u8fc7\u6559\u5e08\u6a21\u578b\u81ea\u52a8\u751f\u6210\u9ad8\u8d28\u91cf\u7684\u6307\u4ee4\u6570\u636e\u96c6\uff0c\u5305\u62ec\u603b\u7ed3\u6269\u5199\u3001\u63d0\u95ee\u56de\u7b54\u548c\u98ce\u683c\u8fc1\u79fb\u4e09\u79cd\u6a21\u677f\u3002<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[5720,2067,81,152,841,50,5721],"topic":[],"class_list":["post-55496","post","type-post","status-publish","format-standard","hentry","category-server","tag-llm","tag-lora","tag-python","tag-pytorch","tag-transformer","tag-50","tag-5721"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>LoRA\u5fae\u8c03\u5b9e\u6218\uff1a\u4e07\u5b57\u6df1\u5ea6\u89e3\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/55496.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"LoRA\u5fae\u8c03\u5b9e\u6218\uff1a\u4e07\u5b57\u6df1\u5ea6\u89e3\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb826\u6b21\uff0c\u70b9\u8d5e10\u6b21\uff0c\u6536\u85cf23\u6b21\u3002\u672c\u6587\u4ecb\u7ecd\u4e86\u53c2\u6570\u9ad8\u6548\u5fae\u8c03(PEFT)\u6280\u672f\u53ca\u5176\u5728\u81ea\u7136\u8bed\u8a00\u5904\u7406\u4e2d\u7684\u5e94\u7528\u3002\u6587\u7ae0\u9996\u5148\u63a2\u8ba8\u4e86\u5982\u4f55\u9009\u62e9\u9002\u5408\u7684\u9884\u8bad\u7ec3\u6a21\u578b\uff0c\u4ee5\u6587\u672c\u98ce\u683c\u6da6\u8272\u4efb\u52a1\u4e3a\u4f8b\uff0c\u63a8\u8350\u4e86Qwen2.5\u7cfb\u5217\u6a21\u578b\u3002\u968f\u540e\u8be6\u7ec6\u9610\u8ff0\u4e86\u5fae\u8c03\u65b9\u6848\u7684\u9009\u62e9\u6807\u51c6\uff0c\u5305\u62ecQLoRA\u3001Flash Attention-2\u7b49\u6280\u672f\u7ec4\u5408\u3002\u6587\u7ae0\u91cd\u70b9\u5c55\u793a\u4e86PEFT\u5b9e\u6218\u8fc7\u7a0b\uff0c\u7279\u522b\u662f\u6570\u636e\u51c6\u5907\u9636\u6bb5\u7684\u6570\u636e\u589e\u5f3a\u6280\u672f\uff0c\u901a\u8fc7\u6559\u5e08\u6a21\u578b\u81ea\u52a8\u751f\u6210\u9ad8\u8d28\u91cf\u7684\u6307\u4ee4\u6570\u636e\u96c6\uff0c\u5305\u62ec\u603b\u7ed3\u6269\u5199\u3001\u63d0\u95ee\u56de\u7b54\u548c\u98ce\u683c\u8fc1\u79fb\u4e09\u79cd\u6a21\u677f\u3002\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/55496.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2025-08-13T15:44:32+00:00\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"15 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/55496.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/55496.html\",\"name\":\"LoRA\u5fae\u8c03\u5b9e\u6218\uff1a\u4e07\u5b57\u6df1\u5ea6\u89e3\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2025-08-13T15:44:32+00:00\",\"dateModified\":\"2025-08-13T15:44:32+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/55496.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/55496.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/55496.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"LoRA\u5fae\u8c03\u5b9e\u6218\uff1a\u4e07\u5b57\u6df1\u5ea6\u89e3\u6790\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"LoRA\u5fae\u8c03\u5b9e\u6218\uff1a\u4e07\u5b57\u6df1\u5ea6\u89e3\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/55496.html","og_locale":"zh_CN","og_type":"article","og_title":"LoRA\u5fae\u8c03\u5b9e\u6218\uff1a\u4e07\u5b57\u6df1\u5ea6\u89e3\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb826\u6b21\uff0c\u70b9\u8d5e10\u6b21\uff0c\u6536\u85cf23\u6b21\u3002\u672c\u6587\u4ecb\u7ecd\u4e86\u53c2\u6570\u9ad8\u6548\u5fae\u8c03(PEFT)\u6280\u672f\u53ca\u5176\u5728\u81ea\u7136\u8bed\u8a00\u5904\u7406\u4e2d\u7684\u5e94\u7528\u3002\u6587\u7ae0\u9996\u5148\u63a2\u8ba8\u4e86\u5982\u4f55\u9009\u62e9\u9002\u5408\u7684\u9884\u8bad\u7ec3\u6a21\u578b\uff0c\u4ee5\u6587\u672c\u98ce\u683c\u6da6\u8272\u4efb\u52a1\u4e3a\u4f8b\uff0c\u63a8\u8350\u4e86Qwen2.5\u7cfb\u5217\u6a21\u578b\u3002\u968f\u540e\u8be6\u7ec6\u9610\u8ff0\u4e86\u5fae\u8c03\u65b9\u6848\u7684\u9009\u62e9\u6807\u51c6\uff0c\u5305\u62ecQLoRA\u3001Flash Attention-2\u7b49\u6280\u672f\u7ec4\u5408\u3002\u6587\u7ae0\u91cd\u70b9\u5c55\u793a\u4e86PEFT\u5b9e\u6218\u8fc7\u7a0b\uff0c\u7279\u522b\u662f\u6570\u636e\u51c6\u5907\u9636\u6bb5\u7684\u6570\u636e\u589e\u5f3a\u6280\u672f\uff0c\u901a\u8fc7\u6559\u5e08\u6a21\u578b\u81ea\u52a8\u751f\u6210\u9ad8\u8d28\u91cf\u7684\u6307\u4ee4\u6570\u636e\u96c6\uff0c\u5305\u62ec\u603b\u7ed3\u6269\u5199\u3001\u63d0\u95ee\u56de\u7b54\u548c\u98ce\u683c\u8fc1\u79fb\u4e09\u79cd\u6a21\u677f\u3002","og_url":"https:\/\/www.wsisp.com\/helps\/55496.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2025-08-13T15:44:32+00:00","author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"15 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/55496.html","url":"https:\/\/www.wsisp.com\/helps\/55496.html","name":"LoRA\u5fae\u8c03\u5b9e\u6218\uff1a\u4e07\u5b57\u6df1\u5ea6\u89e3\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2025-08-13T15:44:32+00:00","dateModified":"2025-08-13T15:44:32+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/55496.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/55496.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/55496.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"LoRA\u5fae\u8c03\u5b9e\u6218\uff1a\u4e07\u5b57\u6df1\u5ea6\u89e3\u6790"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/55496","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=55496"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/55496\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=55496"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=55496"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=55496"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=55496"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}