{"id":77041,"date":"2026-02-23T22:10:46","date_gmt":"2026-02-23T14:10:46","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/77041.html"},"modified":"2026-02-23T22:10:46","modified_gmt":"2026-02-23T14:10:46","slug":"agent-%e5%b7%a5%e4%bd%9c%e6%b5%81%e8%87%aa%e6%88%91%e8%bf%9b%e5%8c%96%ef%bc%9a%e5%bc%ba%e5%8c%96%e5%ad%a6%e4%b9%a0%e5%a6%82%e4%bd%95%e8%ae%a9%e6%99%ba%e8%83%bd%e4%bd%93%e8%87%aa%e5%8a%a8%e5%af%bb","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/77041.html","title":{"rendered":"Agent \u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\uff1a\u5f3a\u5316\u5b66\u4e60\u5982\u4f55\u8ba9\u667a\u80fd\u4f53\u81ea\u52a8\u5bfb\u627e\u6700\u4f18 SOP"},"content":{"rendered":"<h2>1 \u5f15\u8a00&#xff1a;\u4ece\u9759\u6001\u7f16\u6392\u5230\u52a8\u6001\u6f14\u5316\u7684\u8303\u5f0f\u8dc3\u8fc1<\/h2>\n<h3>1.1 \u667a\u80fd\u4f53\u5de5\u4f5c\u6d41\u7684\u6f14\u8fdb\u8109\u7edc<\/h3>\n<p>\u4eba\u5de5\u667a\u80fd\u9886\u57df\u6b63\u7ecf\u5386\u7740\u4ece\u5355\u4e00\u6a21\u578b\u5230\u590d\u5408\u667a\u80fd\u7cfb\u7edf\u7684\u6df1\u523b\u53d8\u9769\u3002\u5927\u578b\u8bed\u8a00\u6a21\u578b&#xff08;Large Language Models, LLMs&#xff09;\u7684\u7a81\u7834\u6027\u8fdb\u5c55\u4e3a\u6784\u5efa\u901a\u7528\u667a\u80fd\u4f53&#xff08;Agent&#xff09;\u5960\u5b9a\u4e86\u575a\u5b9e\u57fa\u7840&#xff0c;\u7136\u800c&#xff0c;\u5982\u4f55\u4f7f\u8fd9\u4e9b\u667a\u80fd\u4f53\u5728\u590d\u6742\u591a\u53d8\u7684\u73af\u5883\u4e2d\u6301\u7eed\u4f18\u5316\u5176\u884c\u4e3a\u7b56\u7565&#xff0c;\u6210\u4e3a\u5f53\u524d\u7814\u7a76\u7684\u6838\u5fc3\u6311\u6218\u3002\u4f20\u7edf\u7684\u624b\u52a8\u8bbe\u8ba1\u5de5\u4f5c\u6d41&#xff08;Workflow&#xff09;\u548c\u6807\u51c6\u64cd\u4f5c\u7a0b\u5e8f&#xff08;Standard Operating Procedures, SOP&#xff09;\u867d\u7136\u5728\u7279\u5b9a\u573a\u666f\u4e0b\u8868\u73b0\u7a33\u5b9a&#xff0c;\u4f46\u9762\u5bf9\u5f00\u653e\u57df\u4efb\u52a1\u7684\u591a\u6837\u6027\u4e0e\u52a8\u6001\u6027\u65f6&#xff0c;\u5176\u5c40\u9650\u6027\u65e5\u76ca\u51f8\u663e[1]\u3002<\/p>\n<p>\u667a\u80fd\u4f53\u5de5\u4f5c\u6d41\u7684\u7814\u7a76\u7ecf\u5386\u4e86\u4e09\u4e2a\u4e3b\u8981\u53d1\u5c55\u9636\u6bb5\u3002\u7b2c\u4e00\u9636\u6bb5\u4ee5\u89c4\u5219\u9a71\u52a8\u4e3a\u6838\u5fc3&#xff0c;\u7814\u7a76\u8005\u901a\u8fc7\u9884\u5b9a\u4e49\u7684\u6761\u4ef6-\u52a8\u4f5c\u89c4\u5219&#xff08;If-Then Rules&#xff09;\u6784\u5efa\u786e\u5b9a\u6027\u6d41\u7a0b\u3002\u8fd9\u79cd\u65b9\u6cd5\u7684\u53ef\u89e3\u91ca\u6027\u5f3a&#xff0c;\u4f46\u7f3a\u4e4f\u9002\u5e94\u6027&#xff0c;\u96be\u4ee5\u5904\u7406\u8fb9\u754c\u60c5\u51b5\u3002\u7b2c\u4e8c\u9636\u6bb5\u5f15\u5165\u4e86\u57fa\u4e8e\u68c0\u7d22\u7684\u589e\u5f3a\u751f\u6210&#xff08;Retrieval-Augmented Generation, RAG&#xff09;\u6280\u672f&#xff0c;\u4f7f\u667a\u80fd\u4f53\u80fd\u591f\u5229\u7528\u5916\u90e8\u77e5\u8bc6\u5e93\u52a8\u6001\u8c03\u6574\u54cd\u5e94&#xff0c;\u4f46\u5de5\u4f5c\u6d41\u7ed3\u6784\u672c\u8eab\u4ecd\u4fdd\u6301\u9759\u6001[2]\u3002\u5f53\u524d\u6b63\u5904\u4e8e\u7b2c\u4e09\u9636\u6bb5&#xff0c;\u5373\u81ea\u6211\u8fdb\u5316&#xff08;Self-Evolution&#xff09;\u9636\u6bb5&#xff0c;\u667a\u80fd\u4f53\u901a\u8fc7\u4e0e\u73af\u5883\u7684\u6301\u7eed\u4ea4\u4e92&#xff0c;\u5229\u7528\u5f3a\u5316\u5b66\u4e60&#xff08;Reinforcement Learning, RL&#xff09;\u7b97\u6cd5\u81ea\u52a8\u4f18\u5316\u5176\u5de5\u4f5c\u6d41\u7ed3\u6784&#xff0c;\u4ece\u800c\u5b9e\u73b0\u4ece&#034;\u88ab\u52a8\u6267\u884c&#034;\u5230&#034;\u4e3b\u52a8\u5b66\u4e60&#034;\u7684\u8d28\u53d8[3]\u3002<\/p>\n<p>\u81ea\u6211\u8fdb\u5316\u667a\u80fd\u4f53\u7684\u6838\u5fc3\u7279\u5f81\u5728\u4e8e\u5176\u5177\u5907\u5143\u8ba4\u77e5\u80fd\u529b&#xff08;Metacognitive Capability&#xff09;&#xff0c;\u80fd\u591f\u53cd\u601d\u81ea\u8eab\u51b3\u7b56\u8fc7\u7a0b\u3001\u8bc6\u522b\u5931\u8d25\u6a21\u5f0f&#xff0c;\u5e76\u636e\u6b64\u8c03\u6574\u672a\u6765\u884c\u4e3a\u7b56\u7565\u3002\u8fd9\u79cd\u80fd\u529b\u4f7f\u5f97\u667a\u80fd\u4f53\u4e0d\u518d\u4f9d\u8d56\u4eba\u5de5\u9884\u8bbe\u7684\u56fa\u5b9aSOP&#xff0c;\u800c\u662f\u80fd\u591f\u5728\u4efb\u52a1\u6267\u884c\u8fc7\u7a0b\u4e2d\u52a8\u6001\u53d1\u73b0\u3001\u9a8c\u8bc1\u5e76\u56fa\u5316\u6700\u4f18\u64cd\u4f5c\u5e8f\u5217\u3002\u7814\u7a76\u8868\u660e&#xff0c;\u7ecf\u8fc7\u591a\u8f6e\u8fed\u4ee3\u81ea\u6211\u8fdb\u5316\u7684\u667a\u80fd\u4f53&#xff0c;\u5728\u590d\u6742\u63a8\u7406\u4efb\u52a1\u4e0a\u7684\u6210\u529f\u7387\u53ef\u63d0\u534740%\u4ee5\u4e0a[4]\u3002<\/p>\n<h3>1.2 \u5f3a\u5316\u5b66\u4e60\u8d4b\u80fd\u5de5\u4f5c\u6d41\u4f18\u5316\u7684\u6838\u5fc3\u673a\u5236<\/h3>\n<p>\u5f3a\u5316\u5b66\u4e60\u4e3a\u667a\u80fd\u4f53\u5de5\u4f5c\u6d41\u7684\u81ea\u6211\u8fdb\u5316\u63d0\u4f9b\u4e86\u7406\u8bba\u6846\u67b6\u548c\u7b97\u6cd5\u5de5\u5177\u3002\u4e0e\u4f20\u7edf\u76d1\u7763\u5b66\u4e60\u4e0d\u540c&#xff0c;\u5f3a\u5316\u5b66\u4e60\u901a\u8fc7\u4e0e\u73af\u5883\u7684\u4ea4\u4e92\u5b66\u4e60\u6700\u4f18\u7b56\u7565&#xff0c;\u65e0\u9700\u5927\u91cf\u6807\u6ce8\u6570\u636e&#xff0c;\u8fd9\u4e00\u7279\u6027\u4f7f\u5176\u7279\u522b\u9002\u5408\u5f00\u653e\u57df\u4efb\u52a1\u573a\u666f\u3002\u5728Agent\u5de5\u4f5c\u6d41\u4f18\u5316\u95ee\u9898\u4e2d&#xff0c;\u72b6\u6001\u7a7a\u95f4\u5bf9\u5e94\u4e8e\u4efb\u52a1\u6267\u884c\u7684\u4e2d\u95f4\u72b6\u6001&#xff0c;\u52a8\u4f5c\u7a7a\u95f4\u5bf9\u5e94\u4e8e\u53ef\u6267\u884c\u7684\u64cd\u4f5c&#xff08;\u5982\u5de5\u5177\u8c03\u7528\u3001\u63a8\u7406\u6b65\u9aa4\u3001\u4fe1\u606f\u68c0\u7d22\u7b49&#xff09;&#xff0c;\u800c\u5956\u52b1\u4fe1\u53f7\u5219\u6765\u6e90\u4e8e\u4efb\u52a1\u5b8c\u6210\u8d28\u91cf\u3001\u6267\u884c\u6548\u7387\u7b49\u591a\u7ef4\u5ea6\u53cd\u9988[5]\u3002<\/p>\n<p>\u5c06\u5de5\u4f5c\u6d41\u4f18\u5316\u5f62\u5f0f\u5316\u4e3a\u5f3a\u5316\u5b66\u4e60\u95ee\u9898\u9762\u4e34\u4e09\u4e2a\u6838\u5fc3\u6311\u6218\u3002\u9996\u5148\u662f\u957f\u7a0b\u4fe1\u7528\u5206\u914d&#xff08;Long-Term Credit Assignment&#xff09;\u95ee\u9898&#xff1a;\u5de5\u4f5c\u6d41\u901a\u5e38\u5305\u542b\u6570\u5341\u751a\u81f3\u4e0a\u767e\u4e2a\u6b65\u9aa4&#xff0c;\u5982\u4f55\u51c6\u786e\u8bc4\u4f30\u6bcf\u4e2a\u4e2d\u95f4\u6b65\u9aa4\u5bf9\u6700\u7ec8\u7ed3\u679c\u7684\u8d21\u732e\u81f3\u5173\u91cd\u8981\u3002\u5176\u6b21\u662f\u7a00\u758f\u5956\u52b1&#xff08;Sparse Reward&#xff09;\u95ee\u9898&#xff1a;\u53ea\u6709\u5728\u4efb\u52a1\u5b8c\u6210\u65f6\u624d\u80fd\u83b7\u5f97\u660e\u786e\u7684\u5956\u52b1\u4fe1\u53f7&#xff0c;\u4e2d\u95f4\u6b65\u9aa4\u7f3a\u4e4f\u5373\u65f6\u53cd\u9988\u3002\u7b2c\u4e09\u662f\u7ec4\u5408\u7206\u70b8&#xff08;Combinatorial Explosion&#xff09;\u95ee\u9898&#xff1a;\u968f\u7740\u5de5\u4f5c\u6d41\u590d\u6742\u5ea6\u7684\u589e\u52a0&#xff0c;\u53ef\u80fd\u7684\u64cd\u4f5c\u5e8f\u5217\u5448\u6307\u6570\u7ea7\u589e\u957f&#xff0c;\u6709\u6548\u63a2\u7d22\u53d8\u5f97\u6781\u4e3a\u56f0\u96be[6]\u3002<\/p>\n<p>\u9488\u5bf9\u8fd9\u4e9b\u6311\u6218&#xff0c;\u7814\u7a76\u8005\u63d0\u51fa\u4e86\u591a\u79cd\u521b\u65b0\u89e3\u51b3\u65b9\u6848\u3002\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b&#xff08;Process Reward Model, PRM&#xff09;\u901a\u8fc7\u5728\u6bcf\u4e00\u6b65\u63d0\u4f9b\u7ec6\u7c92\u5ea6\u7684\u8bc4\u4f30\u4fe1\u53f7&#xff0c;\u6709\u6548\u7f13\u89e3\u4e86\u4fe1\u7528\u5206\u914d\u95ee\u9898[7]\u3002Group Relative Policy Optimization&#xff08;GRPO&#xff09;\u7b49\u7b97\u6cd5\u901a\u8fc7\u7ec4\u5185\u76f8\u5bf9\u4f18\u52bf\u4f30\u8ba1&#xff0c;\u964d\u4f4e\u4e86\u5bf9\u4ef7\u503c\u51fd\u6570\u7684\u4f9d\u8d56&#xff0c;\u63d0\u5347\u4e86\u8bad\u7ec3\u7a33\u5b9a\u6027[8]\u3002\u5c42\u6b21\u5316\u5f3a\u5316\u5b66\u4e60&#xff08;Hierarchical Reinforcement Learning&#xff09;\u5219\u901a\u8fc7\u5c06\u590d\u6742\u4efb\u52a1\u5206\u89e3\u4e3a\u5b50\u76ee\u6807\u5e8f\u5217&#xff0c;\u6709\u6548\u5e94\u5bf9\u4e86\u7ec4\u5408\u7206\u70b8\u6311\u6218[9]\u3002<\/p>\n<h3>1.3 \u7814\u7a76\u610f\u4e49\u4e0e\u5e94\u7528\u524d\u666f<\/h3>\n<p>Agent\u5de5\u4f5c\u6d41\u7684\u81ea\u6211\u8fdb\u5316\u6280\u672f\u5177\u6709\u6df1\u8fdc\u7684\u7406\u8bba\u610f\u4e49\u548c\u5e7f\u9614\u7684\u5e94\u7528\u524d\u666f\u3002\u4ece\u7406\u8bba\u5c42\u9762\u770b&#xff0c;\u8fd9\u4e00\u7814\u7a76\u65b9\u5411\u67b6\u8d77\u4e86\u8ba4\u77e5\u79d1\u5b66\u3001\u63a7\u5236\u8bba\u4e0e\u4eba\u5de5\u667a\u80fd\u4e4b\u95f4\u7684\u6865\u6881\u3002\u667a\u80fd\u4f53\u901a\u8fc7\u8bd5\u9519\u5b66\u4e60\u4f18\u5316\u884c\u4e3a\u7b56\u7565\u7684\u8fc7\u7a0b&#xff0c;\u4e0e\u4eba\u7c7b\u6280\u80fd\u4e60\u5f97\u7684\u673a\u5236\u9ad8\u5ea6\u76f8\u4f3c&#xff0c;\u4e3a\u7406\u89e3\u667a\u80fd\u7684\u672c\u8d28\u63d0\u4f9b\u4e86\u65b0\u7684\u89c6\u89d2[10]\u3002\u4ece\u5e94\u7528\u5c42\u9762\u770b&#xff0c;\u81ea\u6211\u8fdb\u5316Agent\u80fd\u591f\u663e\u8457\u964d\u4f4e\u4eba\u5de5\u8bbe\u8ba1\u5de5\u4f5c\u6d41\u7684\u6210\u672c&#xff0c;\u63d0\u5347\u7cfb\u7edf\u5728\u52a8\u6001\u73af\u5883\u4e2d\u7684\u9002\u5e94\u80fd\u529b\u3002<\/p>\n<p>\u5728\u5b9e\u9645\u5e94\u7528\u573a\u666f\u4e2d&#xff0c;\u81ea\u6211\u8fdb\u5316Agent\u5df2\u5c55\u73b0\u51fa\u5de8\u5927\u6f5c\u529b\u3002\u5728\u8f6f\u4ef6\u5f00\u53d1\u9886\u57df&#xff0c;\u80fd\u591f\u81ea\u4e3b\u89c4\u5212\u3001\u7f16\u7801\u3001\u6d4b\u8bd5\u5e76\u4fee\u590d\u7f3a\u9677\u7684Agent\u6b63\u5728\u6539\u53d8\u4f20\u7edf\u7684\u5f00\u53d1\u6a21\u5f0f[11]\u3002\u5728\u79d1\u5b66\u7814\u7a76\u4e2d&#xff0c;\u5177\u5907\u81ea\u4e3b\u5047\u8bbe\u751f\u6210\u3001\u5b9e\u9a8c\u8bbe\u8ba1\u548c\u6570\u636e\u5206\u6790\u80fd\u529b\u7684Agent\u52a0\u901f\u4e86\u53d1\u73b0\u8fdb\u7a0b[12]\u3002\u5728\u5ba2\u6237\u670d\u52a1\u9886\u57df&#xff0c;\u80fd\u591f\u6839\u636e\u5bf9\u8bdd\u4e0a\u4e0b\u6587\u52a8\u6001\u8c03\u6574\u5e94\u7b54\u7b56\u7565\u7684Agent\u63d0\u4f9b\u4e86\u66f4\u4f18\u8d28\u7684\u7528\u6237\u4f53\u9a8c[13]\u3002<\/p>\n<p>\u672c\u6587\u5c06\u7cfb\u7edf\u6027\u5730\u9610\u8ff0Agent\u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\u7684\u7406\u8bba\u57fa\u7840\u3001\u6838\u5fc3\u7b97\u6cd5\u548c\u5b9e\u73b0\u673a\u5236\u3002\u6211\u4eec\u5c06\u4ece\u9a6c\u5c14\u53ef\u592b\u51b3\u7b56\u8fc7\u7a0b&#xff08;Markov Decision Process, MDP&#xff09;\u7684\u5f62\u5f0f\u5316\u6846\u67b6\u51fa\u53d1&#xff0c;\u6df1\u5165\u5206\u6790\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b\u3001\u7b56\u7565\u4f18\u5316\u7b97\u6cd5\u3001\u591a\u667a\u80fd\u4f53\u534f\u4f5c\u673a\u5236\u7b49\u5173\u952e\u6280\u672f&#xff0c;\u5e76\u901a\u8fc7\u6570\u5b66\u63a8\u5bfc\u548c\u7b97\u6cd5\u63cf\u8ff0\u63ed\u793a\u5176\u5185\u5728\u539f\u7406\u3002\u6700\u540e&#xff0c;\u6211\u4eec\u5c06\u8ba8\u8bba\u5f53\u524d\u9762\u4e34\u7684\u6311\u6218\u548c\u672a\u6765\u7814\u7a76\u65b9\u5411&#xff0c;\u4e3a\u8fd9\u4e00\u9886\u57df\u7684\u6301\u7eed\u63a2\u7d22\u63d0\u4f9b\u53c2\u8003\u3002<\/p>\n<p>#mermaid-svg-fqWc3iOPbHF99o8f{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-fqWc3iOPbHF99o8f .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-fqWc3iOPbHF99o8f .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-fqWc3iOPbHF99o8f .error-icon{fill:#552222;}#mermaid-svg-fqWc3iOPbHF99o8f .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-fqWc3iOPbHF99o8f .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-fqWc3iOPbHF99o8f .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-fqWc3iOPbHF99o8f .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-fqWc3iOPbHF99o8f .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-fqWc3iOPbHF99o8f .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-fqWc3iOPbHF99o8f .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-fqWc3iOPbHF99o8f .marker{fill:#333333;stroke:#333333;}#mermaid-svg-fqWc3iOPbHF99o8f .marker.cross{stroke:#333333;}#mermaid-svg-fqWc3iOPbHF99o8f svg{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-fqWc3iOPbHF99o8f p{margin:0;}#mermaid-svg-fqWc3iOPbHF99o8f .label{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;color:#333;}#mermaid-svg-fqWc3iOPbHF99o8f .cluster-label text{fill:#333;}#mermaid-svg-fqWc3iOPbHF99o8f .cluster-label span{color:#333;}#mermaid-svg-fqWc3iOPbHF99o8f .cluster-label span p{background-color:transparent;}#mermaid-svg-fqWc3iOPbHF99o8f .label text,#mermaid-svg-fqWc3iOPbHF99o8f span{fill:#333;color:#333;}#mermaid-svg-fqWc3iOPbHF99o8f .node rect,#mermaid-svg-fqWc3iOPbHF99o8f .node circle,#mermaid-svg-fqWc3iOPbHF99o8f .node ellipse,#mermaid-svg-fqWc3iOPbHF99o8f .node polygon,#mermaid-svg-fqWc3iOPbHF99o8f .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-fqWc3iOPbHF99o8f .rough-node .label text,#mermaid-svg-fqWc3iOPbHF99o8f .node .label text,#mermaid-svg-fqWc3iOPbHF99o8f .image-shape .label,#mermaid-svg-fqWc3iOPbHF99o8f .icon-shape .label{text-anchor:middle;}#mermaid-svg-fqWc3iOPbHF99o8f .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-fqWc3iOPbHF99o8f .rough-node .label,#mermaid-svg-fqWc3iOPbHF99o8f .node .label,#mermaid-svg-fqWc3iOPbHF99o8f .image-shape .label,#mermaid-svg-fqWc3iOPbHF99o8f .icon-shape .label{text-align:center;}#mermaid-svg-fqWc3iOPbHF99o8f .node.clickable{cursor:pointer;}#mermaid-svg-fqWc3iOPbHF99o8f .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-fqWc3iOPbHF99o8f .arrowheadPath{fill:#333333;}#mermaid-svg-fqWc3iOPbHF99o8f .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-fqWc3iOPbHF99o8f .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-fqWc3iOPbHF99o8f .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-fqWc3iOPbHF99o8f .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-fqWc3iOPbHF99o8f .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-fqWc3iOPbHF99o8f .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-fqWc3iOPbHF99o8f .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-fqWc3iOPbHF99o8f .cluster text{fill:#333;}#mermaid-svg-fqWc3iOPbHF99o8f .cluster span{color:#333;}#mermaid-svg-fqWc3iOPbHF99o8f div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-fqWc3iOPbHF99o8f .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-fqWc3iOPbHF99o8f rect.text{fill:none;stroke-width:0;}#mermaid-svg-fqWc3iOPbHF99o8f .icon-shape,#mermaid-svg-fqWc3iOPbHF99o8f .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-fqWc3iOPbHF99o8f .icon-shape p,#mermaid-svg-fqWc3iOPbHF99o8f .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-fqWc3iOPbHF99o8f .icon-shape rect,#mermaid-svg-fqWc3iOPbHF99o8f .image-shape rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-fqWc3iOPbHF99o8f .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-fqWc3iOPbHF99o8f .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-fqWc3iOPbHF99o8f :root{&#8211;mermaid-font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;}<span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"nodeLabel\"><\/p>\n<p>Agent\u5de5\u4f5c\u6d41\u6f14\u8fdb<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u89c4\u5219\u9a71\u52a8\u9636\u6bb5<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>RAG\u589e\u5f3a\u9636\u6bb5<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u81ea\u6211\u8fdb\u5316\u9636\u6bb5<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>If-Then\u89c4\u5219<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u786e\u5b9a\u6027\u6d41\u7a0b<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u77e5\u8bc6\u68c0\u7d22<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u9759\u6001\u7ed3\u6784<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5f3a\u5316\u5b66\u4e60\u4f18\u5316<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u52a8\u6001SOP\u53d1\u73b0<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6301\u7eed\u81ea\u6211\u6539\u8fdb<\/p>\n<p><\/span><\/p>\n<h2>2 \u5f3a\u5316\u5b66\u4e60\u7406\u8bba\u57fa\u7840&#xff1a;\u4eceMDP\u5230\u7b56\u7565\u4f18\u5316<\/h2>\n<h3>2.1 \u9a6c\u5c14\u53ef\u592b\u51b3\u7b56\u8fc7\u7a0b\u7684\u5f62\u5f0f\u5316\u6846\u67b6<\/h3>\n<h4>2.1.1 MDP\u7684\u57fa\u672c\u5b9a\u4e49\u4e0e\u8981\u7d20<\/h4>\n<p>\u9a6c\u5c14\u53ef\u592b\u51b3\u7b56\u8fc7\u7a0b\u662f\u63cf\u8ff0\u5e8f\u8d2f\u51b3\u7b56\u95ee\u9898\u7684\u6807\u51c6\u6570\u5b66\u6846\u67b6&#xff0c;\u4e3aAgent\u5de5\u4f5c\u6d41\u4f18\u5316\u63d0\u4f9b\u4e86\u4e25\u8c28\u7684\u7406\u8bba\u57fa\u7840\u3002\u4e00\u4e2a\u6807\u51c6\u7684MDP\u7531\u4e94\u5143\u7ec4 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">M&#061;(S,A,P,R,\u03b3)\\\\mathcal{M} &#061; (\\\\mathcal{S}, \\\\mathcal{A}, \\\\mathcal{P}, \\\\mathcal{R}, \\\\gamma)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\">M<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathcal\" style=\"margin-right: 0.075em\">S<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathcal\">A<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0822em\">P<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathcal\">R<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u5b9a\u4e49&#xff0c;\u5176\u4e2d\u5404\u8981\u7d20\u7684\u7269\u7406\u610f\u4e49\u548c\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d\u7684\u5bf9\u5e94\u5173\u7cfb\u5982\u88681\u6240\u793a[14]\u3002<\/p>\n<p>\u88681 MDP\u8981\u7d20\u4e0eAgent\u5de5\u4f5c\u6d41\u573a\u666f\u7684\u5bf9\u5e94\u5173\u7cfb<\/p>\n<table>\n<tr>MDP\u8981\u7d20\u6570\u5b66\u7b26\u53f7\u5de5\u4f5c\u6d41\u573a\u666f\u5bf9\u5e94\u5177\u4f53\u793a\u4f8b<\/tr>\n<tbody>\n<tr>\n<td>\u72b6\u6001\u7a7a\u95f4<\/td>\n<td><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">S\\\\mathcal{S}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.075em\">S<\/span><\/span><\/span><\/span><\/span><\/td>\n<td>\u4efb\u52a1\u6267\u884c\u7684\u4e2d\u95f4\u72b6\u6001<\/td>\n<td>\u5f53\u524d\u5df2\u5b8c\u6210\u7684\u63a8\u7406\u6b65\u9aa4\u3001\u6536\u96c6\u7684\u4fe1\u606f\u3001\u4e2d\u95f4\u7ed3\u679c<\/td>\n<\/tr>\n<tr>\n<td>\u52a8\u4f5c\u7a7a\u95f4<\/td>\n<td><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">A\\\\mathcal{A}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\">A<\/span><\/span><\/span><\/span><\/span><\/td>\n<td>\u53ef\u6267\u884c\u7684\u64cd\u4f5c\u96c6\u5408<\/td>\n<td>\u5de5\u5177\u8c03\u7528\u3001\u4fe1\u606f\u68c0\u7d22\u3001\u63a8\u7406\u751f\u6210\u3001\u7ed3\u679c\u8f93\u51fa<\/td>\n<\/tr>\n<tr>\n<td>\u72b6\u6001\u8f6c\u79fb<\/td>\n<td><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">P\\\\mathcal{P}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0822em\">P<\/span><\/span><\/span><\/span><\/span><\/td>\n<td>\u64cd\u4f5c\u6267\u884c\u540e\u7684\u72b6\u6001\u53d8\u5316<\/td>\n<td>\u6267\u884c\u67d0\u5de5\u5177\u540e\u83b7\u5f97\u65b0\u4fe1\u606f&#xff0c;\u72b6\u6001\u76f8\u5e94\u66f4\u65b0<\/td>\n<\/tr>\n<tr>\n<td>\u5956\u52b1\u51fd\u6570<\/td>\n<td><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">R\\\\mathcal{R}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\">R<\/span><\/span><\/span><\/span><\/span><\/td>\n<td>\u4efb\u52a1\u5b8c\u6210\u8d28\u91cf\u7684\u91cf\u5316\u8bc4\u4f30<\/td>\n<td>\u6b63\u786e\u7b54\u6848\u5f97\u6b63\u5956\u52b1&#xff0c;\u9519\u8bef\u7b54\u6848\u5f97\u8d1f\u5956\u52b1<\/td>\n<\/tr>\n<tr>\n<td>\u6298\u6263\u56e0\u5b50<\/td>\n<td><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b3\\\\gamma<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><\/span><\/span><\/span><\/span><\/td>\n<td>\u672a\u6765\u5956\u52b1\u7684\u6298\u73b0\u7a0b\u5ea6<\/td>\n<td>\u901a\u5e38\u53d60.99&#xff0c;\u5e73\u8861\u5373\u65f6\u4e0e\u957f\u671f\u6536\u76ca<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u72b6\u6001\u7a7a\u95f4 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">S\\\\mathcal{S}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.075em\">S<\/span><\/span><\/span><\/span><\/span> \u5728\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d\u901a\u5e38\u5177\u6709\u9ad8\u7ef4\u6027\u548c\u7ed3\u6784\u6027\u3002\u4ee5\u95ee\u7b54Agent\u4e3a\u4f8b&#xff0c;\u72b6\u6001\u53ef\u80fd\u5305\u542b&#xff1a;\u5f53\u524d\u95ee\u9898\u7684\u6587\u672c\u8868\u793a\u3001\u5df2\u68c0\u7d22\u7684\u76f8\u5173\u6587\u6863\u96c6\u5408\u3001\u5df2\u751f\u6210\u7684\u63a8\u7406\u6b65\u9aa4\u5e8f\u5217\u3001\u4e2d\u95f4\u7ed3\u8bba\u7684\u7f6e\u4fe1\u5ea6\u5206\u6570\u7b49\u3002\u8fd9\u79cd\u590d\u5408\u72b6\u6001\u7ed3\u6784\u8981\u6c42\u7b97\u6cd5\u80fd\u591f\u6709\u6548\u5904\u7406\u5f02\u6784\u4fe1\u606f\u6e90[15]\u3002<\/p>\n<p>\u52a8\u4f5c\u7a7a\u95f4 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">A\\\\mathcal{A}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\">A<\/span><\/span><\/span><\/span><\/span> \u7684\u8bbe\u8ba1\u76f4\u63a5\u5f71\u54cdAgent\u7684\u80fd\u529b\u8fb9\u754c\u3002\u5728ReAct&#xff08;Reasoning and Acting&#xff09;\u6846\u67b6\u4e2d&#xff0c;\u52a8\u4f5c\u88ab\u5206\u4e3a\u63a8\u7406\u52a8\u4f5c&#xff08;Thought&#xff09;\u548c\u6267\u884c\u52a8\u4f5c&#xff08;Action&#xff09;\u4e24\u7c7b[1]\u3002\u63a8\u7406\u52a8\u4f5c\u8d1f\u8d23\u5206\u6790\u5f53\u524d\u72b6\u6001\u5e76\u89c4\u5212\u4e0b\u4e00\u6b65&#xff0c;\u6267\u884c\u52a8\u4f5c\u5219\u4e0e\u5916\u90e8\u73af\u5883\u4ea4\u4e92&#xff08;\u5982\u8c03\u7528\u641c\u7d22\u5f15\u64ce\u3001\u6267\u884c\u4ee3\u7801&#xff09;\u3002\u8fd9\u79cd\u5206\u5c42\u52a8\u4f5c\u8bbe\u8ba1\u4f7f\u5f97Agent\u80fd\u591f\u8fdb\u884c\u6df1\u5ea6\u63a8\u7406\u4e0e\u6709\u6548\u884c\u52a8\u7684\u6709\u673a\u7ed3\u5408\u3002<\/p>\n<p>\u72b6\u6001\u8f6c\u79fb\u6982\u7387 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">P(s\u2032\u2223s,a)\\\\mathcal{P}(s&#039;|s,a)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0019em;vertical-align: -0.25em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0822em\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7519em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u63cf\u8ff0\u4e86\u5728\u72b6\u6001 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ss<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">s<\/span><\/span><\/span><\/span><\/span> \u6267\u884c\u52a8\u4f5c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">aa<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">a<\/span><\/span><\/span><\/span><\/span> \u540e\u8f6c\u79fb\u5230\u72b6\u6001 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">s\u2032s&#039;<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7519em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7519em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u7684\u6982\u7387\u3002\u5728\u786e\u5b9a\u6027\u73af\u5883&#xff08;\u5982\u4ee3\u7801\u6267\u884c\u73af\u5883&#xff09;\u4e2d&#xff0c;\u8f6c\u79fb\u662f\u786e\u5b9a\u6027\u7684&#xff1b;\u800c\u5728\u6d89\u53ca\u5916\u90e8API\u8c03\u7528\u6216\u4eba\u7c7b\u4ea4\u4e92\u7684\u573a\u666f\u4e2d&#xff0c;\u8f6c\u79fb\u5177\u6709\u968f\u673a\u6027\u3002\u7406\u89e3\u8f6c\u79fb\u7684\u968f\u673a\u7279\u6027\u5bf9\u4e8e\u8bbe\u8ba1\u9c81\u68d2\u7684\u7b56\u7565\u81f3\u5173\u91cd\u8981[16]\u3002<\/p>\n<h4>2.1.2 \u8d1d\u5c14\u66fc\u65b9\u7a0b\u4e0e\u6700\u4f18\u6027\u539f\u7406<\/h4>\n<p>\u8d1d\u5c14\u66fc\u65b9\u7a0b&#xff08;Bellman Equation&#xff09;\u662f\u5f3a\u5316\u5b66\u4e60\u7406\u8bba\u7684\u6838\u5fc3&#xff0c;\u5b83\u5efa\u7acb\u4e86\u503c\u51fd\u6570\u4e0e\u7b56\u7565\u4e4b\u95f4\u7684\u9012\u5f52\u5173\u7cfb\u3002\u5bf9\u4e8e\u7ed9\u5b9a\u7b56\u7565 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\\\\pi<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5176\u72b6\u6001\u503c\u51fd\u6570 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">V\u03c0(s)V^\\\\pi(s)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6644em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u6ee1\u8db3[16]&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">V\u03c0(s)&#061;Ea\u223c\u03c0(\u22c5\u2223s)[R(s,a)&#043;\u03b3Es\u2032\u223cP(\u22c5\u2223s,a)[V\u03c0(s\u2032)]]V^\\\\pi(s) &#061; \\\\mathbb{E}_{a \\\\sim \\\\pi(\\\\cdot|s)} \\\\left[ \\\\mathcal{R}(s,a) &#043; \\\\gamma \\\\mathbb{E}_{s&#039; \\\\sim \\\\mathcal{P}(\\\\cdot|s,a)} [V^\\\\pi(s&#039;)] \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7144em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.2052em;vertical-align: -0.3552em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.5198em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mopen mtight\">(<\/span><span class=\"mord mtight\">\u22c5<\/span><span class=\"mord mtight\">\u2223<\/span><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3552em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size1\">[<\/span><\/span><span class=\"mord mathcal\">R<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.5198em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mathcal mtight\" style=\"margin-right: 0.0822em\">P<\/span><span class=\"mopen mtight\">(<\/span><span class=\"mord mtight\">\u22c5<\/span><span class=\"mord mtight\">\u2223<\/span><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3552em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7144em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)]<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size1\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u8be5\u65b9\u7a0b\u8868\u660e&#xff0c;\u72b6\u6001 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ss<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">s<\/span><\/span><\/span><\/span><\/span> \u7684\u503c\u7b49\u4e8e\u5373\u65f6\u5956\u52b1\u52a0\u4e0a\u6298\u6263\u540e\u7684\u671f\u671b\u672a\u6765\u503c\u3002\u5bf9\u4e8e\u52a8\u4f5c\u503c\u51fd\u6570 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">Q\u03c0(s,a)Q^\\\\pi(s,a)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6644em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u8d1d\u5c14\u66fc\u65b9\u7a0b\u5f62\u5f0f\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">Q\u03c0(s,a)&#061;R(s,a)&#043;\u03b3Es\u2032\u223cP(\u22c5\u2223s,a)[Ea\u2032\u223c\u03c0(\u22c5\u2223s\u2032)[Q\u03c0(s\u2032,a\u2032)]]Q^\\\\pi(s,a) &#061; \\\\mathcal{R}(s,a) &#043; \\\\gamma \\\\mathbb{E}_{s&#039; \\\\sim \\\\mathcal{P}(\\\\cdot|s,a)} \\\\left[ \\\\mathbb{E}_{a&#039; \\\\sim \\\\pi(\\\\cdot|s&#039;)} [Q^\\\\pi(s&#039;,a&#039;)] \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7144em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathcal\">R<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.2052em;vertical-align: -0.3552em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.5198em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mathcal mtight\" style=\"margin-right: 0.0822em\">P<\/span><span class=\"mopen mtight\">(<\/span><span class=\"mord mtight\">\u22c5<\/span><span class=\"mord mtight\">\u2223<\/span><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3552em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size1\">[<\/span><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.5198em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mopen mtight\">(<\/span><span class=\"mord mtight\">\u22c5<\/span><span class=\"mord mtight\">\u2223<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3552em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7144em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)]<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size1\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u6700\u4f18\u503c\u51fd\u6570 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">V\u2217(s)&#061;max\u2061\u03c0V\u03c0(s)V^*(s) &#061; \\\\max_\\\\pi V^\\\\pi(s)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6887em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mop\"><span class=\"mop\">max<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6644em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u6ee1\u8db3\u8d1d\u5c14\u66fc\u6700\u4f18\u65b9\u7a0b&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">V\u2217(s)&#061;max\u2061a\u2208A[R(s,a)&#043;\u03b3Es\u2032\u223cP(\u22c5\u2223s,a)[V\u2217(s\u2032)]]V^*(s) &#061; \\\\max_{a \\\\in \\\\mathcal{A}} \\\\left[ \\\\mathcal{R}(s,a) &#043; \\\\gamma \\\\mathbb{E}_{s&#039; \\\\sim \\\\mathcal{P}(\\\\cdot|s,a)} [V^*(s&#039;)] \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7387em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.6217em;vertical-align: -0.7717em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.3557em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mrel mtight\">\u2208<\/span><span class=\"mord mathcal mtight\">A<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7717em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size1\">[<\/span><\/span><span class=\"mord mathcal\">R<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.5198em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mathcal mtight\" style=\"margin-right: 0.0822em\">P<\/span><span class=\"mopen mtight\">(<\/span><span class=\"mord mtight\">\u22c5<\/span><span class=\"mord mtight\">\u2223<\/span><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3552em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7387em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)]<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size1\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u6700\u4f18\u7b56\u7565 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u2217\\\\pi^*<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6887em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6887em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u53ef\u901a\u8fc7\u8d2a\u5a6a\u5730\u9009\u62e9\u6700\u5927\u5316Q\u503c\u7684\u52a8\u4f5c\u83b7\u5f97&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u2217(a\u2223s)&#061;{1if\u00a0a&#061;arg\u2061max\u2061a\u2032Q\u2217(s,a\u2032)0otherwise\\\\pi^*(a|s) &#061; \\\\begin{cases} 1 &amp; \\\\text{if } a &#061; \\\\arg\\\\max_{a&#039;} Q^*(s,a&#039;) \\\\\\\\ 0 &amp; \\\\text{otherwise} \\\\end{cases}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7387em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3em;vertical-align: -1.25em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size4\">{<\/span><\/span><span class=\"mord\"><span class=\"mtable\"><span class=\"col-align-l\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.69em\"><span class=\"\" style=\"top: -3.69em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><span class=\"\" style=\"top: -2.25em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord\">0<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.19em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"arraycolsep\" style=\"width: 1em\"><\/span><span class=\"col-align-l\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.69em\"><span class=\"\" style=\"top: -3.69em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord\">if\u00a0<\/span><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mop\">ar<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\"><span class=\"mop\">max<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.328em\"><span class=\"\" style=\"top: -2.55em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6887em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7519em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><span class=\"\" style=\"top: -2.25em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord\">otherwise<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.19em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5728Agent\u5de5\u4f5c\u6d41\u4f18\u5316\u4e2d&#xff0c;\u8d1d\u5c14\u66fc\u65b9\u7a0b\u4e3a\u4fe1\u7528\u5206\u914d\u63d0\u4f9b\u4e86\u7406\u8bba\u57fa\u7840\u3002\u5f53Agent\u5b8c\u6210\u4e00\u4e2a\u957f\u7a0b\u4efb\u52a1\u5e76\u83b7\u5f97\u6700\u7ec8\u5956\u52b1\u65f6&#xff0c;\u901a\u8fc7\u53cd\u5411\u4f20\u64ad\u8d1d\u5c14\u66fc\u65b9\u7a0b&#xff0c;\u53ef\u4ee5\u5c06\u5956\u52b1\u4fe1\u53f7\u9010\u5c42\u5206\u89e3\u5230\u5404\u4e2a\u4e2d\u95f4\u6b65\u9aa4&#xff0c;\u4ece\u800c\u8bc4\u4f30\u6bcf\u4e2a\u52a8\u4f5c\u7684\u8d21\u732e\u5ea6[16]\u3002<\/p>\n<h4>2.1.3 \u90e8\u5206\u53ef\u89c2\u6d4bMDP\u4e0e\u5de5\u4f5c\u6d41\u573a\u666f<\/h4>\n<p>\u5b9e\u9645\u5de5\u4f5c\u6d41\u573a\u666f\u5f80\u5f80\u4e0d\u6ee1\u8db3\u5b8c\u5168\u53ef\u89c2\u6d4b\u5047\u8bbe&#xff0c;\u667a\u80fd\u4f53\u53ea\u80fd\u83b7\u53d6\u90e8\u5206\u72b6\u6001\u4fe1\u606f\u3002\u8fd9\u7c7b\u95ee\u9898\u9700\u8981\u7528\u90e8\u5206\u53ef\u89c2\u6d4b\u9a6c\u5c14\u53ef\u592b\u51b3\u7b56\u8fc7\u7a0b&#xff08;Partially Observable MDP, POMDP&#xff09;\u5efa\u6a21&#xff0c;\u5176\u5b9a\u4e49\u4e3a\u516d\u5143\u7ec4 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">MPO&#061;(S,A,O,P,Z,R,\u03b3)\\\\mathcal{M}_{PO} &#061; (\\\\mathcal{S}, \\\\mathcal{A}, \\\\mathcal{O}, \\\\mathcal{P}, \\\\mathcal{Z}, \\\\mathcal{R}, \\\\gamma)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\">M<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">PO<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathcal\" style=\"margin-right: 0.075em\">S<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathcal\">A<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0278em\">O<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0822em\">P<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0794em\">Z<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathcal\">R<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">O\\\\mathcal{O}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0278em\">O<\/span><\/span><\/span><\/span><\/span> \u4e3a\u89c2\u6d4b\u7a7a\u95f4&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">Z(o\u2223s,a)\\\\mathcal{Z}(o|s,a)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0794em\">Z<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">o<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u4e3a\u89c2\u6d4b\u6982\u7387\u5206\u5e03[16]\u3002<\/p>\n<p>POMDP\u7684\u6838\u5fc3\u6311\u6218\u5728\u4e8e\u72b6\u6001\u7684\u4e0d\u786e\u5b9a\u6027\u3002Agent\u9700\u8981\u7ef4\u62a4\u4fe1\u5ff5\u72b6\u6001&#xff08;Belief State&#xff09;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">b(s)&#061;P(s\u2223ht)b(s) &#061; P(s|h_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\">b<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">h<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5373\u57fa\u4e8e\u5386\u53f2\u4ea4\u4e92 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ht&#061;(o1,a1,&#8230;,ot)h_t &#061; (o_1, a_1, &#8230;, o_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">h<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u5bf9\u5f53\u524d\u72b6\u6001\u7684\u540e\u9a8c\u5206\u5e03\u3002\u4fe1\u5ff5\u72b6\u6001\u7684\u66f4\u65b0\u9075\u5faa\u8d1d\u53f6\u65af\u89c4\u5219&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">b\u2032(s\u2032)&#061;\u03b7\u22c5Z(o\u2032\u2223s\u2032)\u2211s\u2208SP(s\u2032\u2223s,a)b(s)b&#039;(s&#039;) &#061; \\\\eta \\\\cdot \\\\mathcal{Z}(o&#039;|s&#039;) \\\\sum_{s \\\\in \\\\mathcal{S}} \\\\mathcal{P}(s&#039;|s,a) b(s)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0519em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">b<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6389em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03b7<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.3717em;vertical-align: -1.3217em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0794em\">Z<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.05em\"><span class=\"\" style=\"top: -1.8557em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mrel mtight\">\u2208<\/span><span class=\"mord mathcal mtight\" style=\"margin-right: 0.075em\">S<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3217em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0822em\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mord mathnormal\">b<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b7\\\\eta<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03b7<\/span><\/span><\/span><\/span><\/span> \u4e3a\u5f52\u4e00\u5316\u5e38\u6570\u3002\u5728Agent\u5de5\u4f5c\u6d41\u4e2d&#xff0c;\u8fd9\u79cd\u4e0d\u786e\u5b9a\u6027\u6765\u6e90\u4e8e\u591a\u4e2a\u65b9\u9762&#xff1a;\u4fe1\u606f\u68c0\u7d22\u53ef\u80fd\u8fd4\u56de\u4e0d\u76f8\u5173\u7ed3\u679c\u3001\u5de5\u5177\u6267\u884c\u53ef\u80fd\u4ea7\u751f\u610f\u5916\u8f93\u51fa\u3001\u7528\u6237\u610f\u56fe\u53ef\u80fd\u5b58\u5728\u6b67\u4e49\u3002\u6709\u6548\u7684Agent\u9700\u8981\u5177\u5907\u4ece\u4e0d\u5b8c\u6574\u4fe1\u606f\u4e2d\u63a8\u65ad\u771f\u5b9e\u72b6\u6001\u7684\u80fd\u529b[16]\u3002<\/p>\n<p>#mermaid-svg-MscCyQwv8aXCSCZj{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-MscCyQwv8aXCSCZj .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-MscCyQwv8aXCSCZj .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-MscCyQwv8aXCSCZj .error-icon{fill:#552222;}#mermaid-svg-MscCyQwv8aXCSCZj .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-MscCyQwv8aXCSCZj .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-MscCyQwv8aXCSCZj .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-MscCyQwv8aXCSCZj .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-MscCyQwv8aXCSCZj .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-MscCyQwv8aXCSCZj .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-MscCyQwv8aXCSCZj .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-MscCyQwv8aXCSCZj .marker{fill:#333333;stroke:#333333;}#mermaid-svg-MscCyQwv8aXCSCZj .marker.cross{stroke:#333333;}#mermaid-svg-MscCyQwv8aXCSCZj svg{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-MscCyQwv8aXCSCZj p{margin:0;}#mermaid-svg-MscCyQwv8aXCSCZj .label{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;color:#333;}#mermaid-svg-MscCyQwv8aXCSCZj .cluster-label text{fill:#333;}#mermaid-svg-MscCyQwv8aXCSCZj .cluster-label span{color:#333;}#mermaid-svg-MscCyQwv8aXCSCZj .cluster-label span p{background-color:transparent;}#mermaid-svg-MscCyQwv8aXCSCZj .label text,#mermaid-svg-MscCyQwv8aXCSCZj span{fill:#333;color:#333;}#mermaid-svg-MscCyQwv8aXCSCZj .node rect,#mermaid-svg-MscCyQwv8aXCSCZj .node circle,#mermaid-svg-MscCyQwv8aXCSCZj .node ellipse,#mermaid-svg-MscCyQwv8aXCSCZj .node polygon,#mermaid-svg-MscCyQwv8aXCSCZj .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-MscCyQwv8aXCSCZj .rough-node .label text,#mermaid-svg-MscCyQwv8aXCSCZj .node .label text,#mermaid-svg-MscCyQwv8aXCSCZj .image-shape .label,#mermaid-svg-MscCyQwv8aXCSCZj .icon-shape .label{text-anchor:middle;}#mermaid-svg-MscCyQwv8aXCSCZj .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-MscCyQwv8aXCSCZj .rough-node .label,#mermaid-svg-MscCyQwv8aXCSCZj .node .label,#mermaid-svg-MscCyQwv8aXCSCZj .image-shape .label,#mermaid-svg-MscCyQwv8aXCSCZj .icon-shape .label{text-align:center;}#mermaid-svg-MscCyQwv8aXCSCZj .node.clickable{cursor:pointer;}#mermaid-svg-MscCyQwv8aXCSCZj .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-MscCyQwv8aXCSCZj .arrowheadPath{fill:#333333;}#mermaid-svg-MscCyQwv8aXCSCZj .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-MscCyQwv8aXCSCZj .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-MscCyQwv8aXCSCZj .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-MscCyQwv8aXCSCZj .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-MscCyQwv8aXCSCZj .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-MscCyQwv8aXCSCZj .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-MscCyQwv8aXCSCZj .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-MscCyQwv8aXCSCZj .cluster text{fill:#333;}#mermaid-svg-MscCyQwv8aXCSCZj .cluster span{color:#333;}#mermaid-svg-MscCyQwv8aXCSCZj div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-MscCyQwv8aXCSCZj .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-MscCyQwv8aXCSCZj rect.text{fill:none;stroke-width:0;}#mermaid-svg-MscCyQwv8aXCSCZj .icon-shape,#mermaid-svg-MscCyQwv8aXCSCZj .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-MscCyQwv8aXCSCZj .icon-shape p,#mermaid-svg-MscCyQwv8aXCSCZj .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-MscCyQwv8aXCSCZj .icon-shape rect,#mermaid-svg-MscCyQwv8aXCSCZj .image-shape rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-MscCyQwv8aXCSCZj .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-MscCyQwv8aXCSCZj .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-MscCyQwv8aXCSCZj :root{&#8211;mermaid-font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;}<span class=\"nodeLabel\"><\/p>\n<p>Agent\u5de5\u4f5c\u6d41\u6620\u5c04<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u7f16\u7801<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u7b56\u7565\u03c0<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u6267\u884c<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u89c2\u5bdf<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u8bc4\u4f30<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u4efb\u52a1\u63cf\u8ff0<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u72b6\u6001\u8868\u5f81<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u64cd\u4f5c\u9009\u62e9<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u73af\u5883\u4ea4\u4e92<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u65b0\u72b6\u6001<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5956\u52b1\u8ba1\u7b97<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>MDP\u6846\u67b6<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u6267\u884c\u52a8\u4f5ca<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u72b6\u6001\u8f6c\u79fbP<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u5956\u52b1R<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u72b6\u6001s<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u52a8\u4f5c\u7a7a\u95f4<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u72b6\u6001s&#039;<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5956\u52b1\u4fe1\u53f7<\/p>\n<p><\/span><\/p>\n<h3>2.2 \u7b56\u7565\u68af\u5ea6\u65b9\u6cd5\u4e0eActor-Critic\u67b6\u6784<\/h3>\n<h4>2.2.1 \u7b56\u7565\u68af\u5ea6\u5b9a\u7406\u7684\u7406\u8bba\u63a8\u5bfc<\/h4>\n<p>\u7b56\u7565\u68af\u5ea6\u65b9\u6cd5\u76f4\u63a5\u53c2\u6570\u5316\u7b56\u7565 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u03b8(a\u2223s)\\\\pi_\\\\theta(a|s)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u901a\u8fc7\u68af\u5ea6\u4e0a\u5347\u4f18\u5316\u7b56\u7565\u53c2\u6570 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b8\\\\theta<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><\/span>\u3002\u7b56\u7565\u68af\u5ea6\u5b9a\u7406&#xff08;Policy Gradient Theorem&#xff09;\u7ed9\u51fa\u4e86\u671f\u671b\u7d2f\u79ef\u5956\u52b1 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">J(\u03b8)&#061;E\u03c4\u223c\u03c0\u03b8[R(\u03c4)]J(\\\\theta) &#061; \\\\mathbb{E}_{\\\\tau \\\\sim \\\\pi_\\\\theta}[R(\\\\tau)]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0059em;vertical-align: -0.2559em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">[<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mclose\">)]<\/span><\/span><\/span><\/span><\/span> \u5173\u4e8e\u7b56\u7565\u53c2\u6570\u7684\u68af\u5ea6\u8868\u8fbe\u5f0f[16]&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u2207\u03b8J(\u03b8)&#061;E\u03c4\u223c\u03c0\u03b8[\u2211t&#061;0T\u2207\u03b8log\u2061\u03c0\u03b8(at\u2223st)\u22c5Q\u03c0\u03b8(st,at)]\\\\nabla_\\\\theta J(\\\\theta) &#061; \\\\mathbb{E}_{\\\\tau \\\\sim \\\\pi_\\\\theta} \\\\left[ \\\\sum_{t&#061;0}^{T} \\\\nabla_\\\\theta \\\\log \\\\pi_\\\\theta(a_t|s_t) \\\\cdot Q^{\\\\pi_\\\\theta}(s_t, a_t) \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3.0954em;vertical-align: -1.2671em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size4\">[<\/span><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.8283em\"><span class=\"\" style=\"top: -1.8829em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2671em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7144em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size4\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u8be5\u5b9a\u7406\u7684\u76f4\u89c2\u89e3\u91ca\u662f&#xff1a;\u589e\u52a0\u90a3\u4e9b\u5bfc\u81f4\u9ad8Q\u503c\u52a8\u4f5c\u7684\u8f68\u8ff9\u6982\u7387&#xff0c;\u964d\u4f4e\u5bfc\u81f4\u4f4eQ\u503c\u52a8\u4f5c\u7684\u8f68\u8ff9\u6982\u7387\u3002<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u2207\u03b8log\u2061\u03c0\u03b8(at\u2223st)\\\\nabla_\\\\theta \\\\log \\\\pi_\\\\theta(a_t|s_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u88ab\u79f0\u4e3a\u5f97\u5206\u51fd\u6570&#xff08;Score Function&#xff09;&#xff0c;\u5b83\u6307\u793a\u4e86\u5982\u4f55\u8c03\u6574\u53c2\u6570\u4ee5\u589e\u52a0\u7279\u5b9a\u52a8\u4f5c\u7684\u6982\u7387\u3002<\/p>\n<p>REINFORCE\u7b97\u6cd5\u662f\u57fa\u4e8e\u7b56\u7565\u68af\u5ea6\u5b9a\u7406\u7684\u57fa\u7840\u7b97\u6cd5&#xff0c;\u5176\u53c2\u6570\u66f4\u65b0\u89c4\u5219\u4e3a[16]&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b8\u2190\u03b8&#043;\u03b1\u2211t&#061;0T\u2207\u03b8log\u2061\u03c0\u03b8(at\u2223st)\u22c5Rt\\\\theta \\\\leftarrow \\\\theta &#043; \\\\alpha \\\\sum_{t&#061;0}^{T} \\\\nabla_\\\\theta \\\\log \\\\pi_\\\\theta(a_t|s_t) \\\\cdot R_t<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2190<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.7778em;vertical-align: -0.0833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3.0954em;vertical-align: -1.2671em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.8283em\"><span class=\"\" style=\"top: -1.8829em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2671em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0077em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">Rt&#061;\u2211t\u2032&#061;tT\u03b3t\u2032\u2212trt\u2032R_t &#061; \\\\sum_{t&#039;&#061;t}^{T} \\\\gamma^{t&#039;-t} r_{t&#039;}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0077em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.2809em;vertical-align: -0.2997em\"><\/span><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position: relative;top: 0em\">\u2211<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9812em\"><span class=\"\" style=\"top: -2.4003em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.2029em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2997em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9425em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8278em\"><span class=\"\" style=\"top: -2.931em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mbin mtight\">\u2212<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.328em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u4ece\u65f6\u523b <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">tt<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6151em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><\/span><\/span> \u5f00\u59cb\u7684\u7d2f\u79ef\u6298\u6263\u5956\u52b1\u3002REINFORCE\u7b97\u6cd5\u7684\u4f18\u70b9\u662f\u65e0\u9700\u5b66\u4e60\u73af\u5883\u6a21\u578b&#xff0c;\u4f46\u5b58\u5728\u9ad8\u65b9\u5dee\u95ee\u9898&#xff0c;\u56e0\u4e3a\u76f4\u63a5\u4f7f\u7528\u8499\u7279\u5361\u6d1b\u56de\u62a5\u4f5c\u4e3a\u6743\u91cd\u3002<\/p>\n<h4>2.2.2 \u57fa\u7ebf\u51fd\u6570\u4e0e\u65b9\u5dee\u7f29\u51cf<\/h4>\n<p>\u4e3a\u964d\u4f4e\u7b56\u7565\u68af\u5ea6\u7684\u65b9\u5dee&#xff0c;\u7814\u7a76\u8005\u5f15\u5165\u4e86\u4e0e\u52a8\u4f5c\u65e0\u5173\u7684\u57fa\u7ebf\u51fd\u6570&#xff08;Baseline Function&#xff09;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">b(st)b(s_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\">b<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>\u3002\u7531\u4e8e <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">E[\u2207\u03b8log\u2061\u03c0\u03b8(at\u2223st)]&#061;0\\\\mathbb{E}[\\\\nabla_\\\\theta \\\\log \\\\pi_\\\\theta(a_t|s_t)] &#061; 0<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathbb\">E<\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)]<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">0<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u6dfb\u52a0\u57fa\u7ebf\u4e0d\u6539\u53d8\u68af\u5ea6\u7684\u671f\u671b\u503c&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u2207\u03b8J(\u03b8)&#061;E\u03c4\u223c\u03c0\u03b8[\u2211t&#061;0T\u2207\u03b8log\u2061\u03c0\u03b8(at\u2223st)\u22c5(Q\u03c0\u03b8(st,at)\u2212b(st))]\\\\nabla_\\\\theta J(\\\\theta) &#061; \\\\mathbb{E}_{\\\\tau \\\\sim \\\\pi_\\\\theta} \\\\left[ \\\\sum_{t&#061;0}^{T} \\\\nabla_\\\\theta \\\\log \\\\pi_\\\\theta(a_t|s_t) \\\\cdot (Q^{\\\\pi_\\\\theta}(s_t, a_t) &#8211; b(s_t)) \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3.0954em;vertical-align: -1.2671em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size4\">[<\/span><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.8283em\"><span class=\"\" style=\"top: -1.8829em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2671em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7144em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">b<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">))<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size4\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u6700\u4f18\u57fa\u7ebf\u51fd\u6570\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">b\u2217(st)&#061;Eat\u223c\u03c0\u03b8[Q\u03c0\u03b8(st,at)]&#061;V\u03c0\u03b8(st)b^*(s_t) &#061; \\\\mathbb{E}_{a_t \\\\sim \\\\pi_\\\\theta}[Q^{\\\\pi_\\\\theta}(s_t, a_t)] &#061; V^{\\\\pi_\\\\theta}(s_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">b<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6887em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0059em;vertical-align: -0.2559em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2963em\"><span class=\"\" style=\"top: -2.357em;margin-left: 0em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6644em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)]<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6644em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5373\u72b6\u6001\u503c\u51fd\u6570\u3002\u6b64\u65f6 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">Q\u03c0\u03b8(st,at)\u2212V\u03c0\u03b8(st)Q^{\\\\pi_\\\\theta}(s_t, a_t) &#8211; V^{\\\\pi_\\\\theta}(s_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6644em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6644em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u88ab\u79f0\u4e3a\u4f18\u52bf\u51fd\u6570&#xff08;Advantage Function&#xff09;&#xff0c;\u8bb0\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">A\u03c0\u03b8(st,at)A^{\\\\pi_\\\\theta}(s_t, a_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6644em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>[16]\u3002<\/p>\n<p>\u4f18\u52bf\u51fd\u6570\u8861\u91cf\u4e86\u91c7\u53d6\u52a8\u4f5c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ata_t<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u76f8\u5bf9\u4e8e\u5e73\u5747\u6c34\u5e73\u7684\u597d\u574f\u7a0b\u5ea6\u3002\u5f53 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">A(s,a)&gt;0A(s,a) &gt; 0<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\">A<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&gt;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">0<\/span><\/span><\/span><\/span><\/span> \u65f6&#xff0c;\u8bf4\u660e\u8be5\u52a8\u4f5c\u4f18\u4e8e\u5e73\u5747\u8868\u73b0&#xff0c;\u5e94\u589e\u52a0\u5176\u6982\u7387&#xff1b;\u53cd\u4e4b\u5219\u5e94\u964d\u4f4e\u3002\u4f7f\u7528\u4f18\u52bf\u51fd\u6570\u7684\u7b56\u7565\u68af\u5ea6\u5177\u6709\u66f4\u5c0f\u7684\u65b9\u5dee&#xff0c;\u56e0\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">b(st)b(s_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\">b<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u62b5\u6d88\u4e86\u72b6\u6001\u672c\u8eab\u7684\u4ef7\u503c\u6ce2\u52a8\u3002<\/p>\n<h4>2.2.3 Actor-Critic\u7b97\u6cd5\u7684\u534f\u540c\u673a\u5236<\/h4>\n<p>Actor-Critic\u67b6\u6784\u5c06\u7b56\u7565\u68af\u5ea6\u65b9\u6cd5\u5206\u4e3a\u4e24\u4e2a\u534f\u540c\u5de5\u4f5c\u7684\u7ec4\u4ef6&#xff1a;Actor&#xff08;\u7b56\u7565\u7f51\u7edc&#xff09;\u8d1f\u8d23\u751f\u6210\u52a8\u4f5c&#xff0c;Critic&#xff08;\u4ef7\u503c\u7f51\u7edc&#xff09;\u8d1f\u8d23\u8bc4\u4f30\u72b6\u6001\u6216\u52a8\u4f5c\u503c\u3002\u8fd9\u79cd\u5206\u79bb\u8bbe\u8ba1\u4f7f\u5f97\u7b97\u6cd5\u80fd\u591f\u540c\u65f6\u5229\u7528\u7b56\u7565\u68af\u5ea6\u7684\u7a33\u5b9a\u6027\u548c\u503c\u51fd\u6570\u4f30\u8ba1\u7684\u51c6\u786e\u6027[16]\u3002<\/p>\n<p>Critic\u901a\u8fc7\u65f6\u5e8f\u5dee\u5206&#xff08;Temporal Difference, TD&#xff09;\u5b66\u4e60\u66f4\u65b0\u4ef7\u503c\u4f30\u8ba1\u3002\u5bf9\u4e8e\u72b6\u6001\u503c\u51fd\u6570&#xff0c;TD\u8bef\u5dee\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b4t&#061;rt&#043;\u03b3V\u03d5(st&#043;1)\u2212V\u03d5(st)\\\\delta_t &#061; r_t &#043; \\\\gamma V_\\\\phi(s_{t&#043;1}) &#8211; V_\\\\phi(s_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.7333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0361em;vertical-align: -0.2861em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.2222em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">\u03d5<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0361em;vertical-align: -0.2861em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.2222em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">\u03d5<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>Critic\u53c2\u6570 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03d5\\\\phi<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\">\u03d5<\/span><\/span><\/span><\/span><\/span> \u901a\u8fc7\u6700\u5c0f\u5316TD\u8bef\u5dee\u7684\u5e73\u65b9\u66f4\u65b0&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03d5\u2190\u03d5&#043;\u03b2\u22c5\u03b4t\u22c5\u2207\u03d5V\u03d5(st)\\\\phi \\\\leftarrow \\\\phi &#043; \\\\beta \\\\cdot \\\\delta_t \\\\cdot \\\\nabla_\\\\phi V_\\\\phi(s_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\">\u03d5<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2190<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\">\u03d5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0361em;vertical-align: -0.2861em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">\u03d5<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.2222em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">\u03d5<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>Actor\u5219\u5229\u7528Critic\u63d0\u4f9b\u7684\u4f18\u52bf\u4f30\u8ba1\u66f4\u65b0\u7b56\u7565&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b8\u2190\u03b8&#043;\u03b1\u22c5\u2207\u03b8log\u2061\u03c0\u03b8(at\u2223st)\u22c5\u03b4t\\\\theta \\\\leftarrow \\\\theta &#043; \\\\alpha \\\\cdot \\\\nabla_\\\\theta \\\\log \\\\pi_\\\\theta(a_t|s_t) \\\\cdot \\\\delta_t<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2190<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.7778em;vertical-align: -0.0833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.4445em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5728Agent\u5de5\u4f5c\u6d41\u4f18\u5316\u4e2d&#xff0c;Actor-Critic\u67b6\u6784\u7684\u4f18\u52bf\u5c24\u4e3a\u660e\u663e\u3002Critic\u53ef\u4ee5\u5b66\u4e60\u8bc4\u4f30\u4e0d\u540c\u5de5\u4f5c\u6d41\u6b65\u9aa4\u7684\u8d28\u91cf&#xff0c;\u4e3aActor\u63d0\u4f9b\u7ec6\u7c92\u5ea6\u7684\u4f18\u5316\u4fe1\u53f7\u3002\u7814\u7a76\u8868\u660e&#xff0c;\u5f15\u5165Critic\u540e&#xff0c;Agent\u5728\u590d\u6742\u4efb\u52a1\u4e0a\u7684\u6536\u655b\u901f\u5ea6\u53ef\u63d0\u53472-3\u500d[16]\u3002<\/p>\n<h3>2.3 \u8fd1\u7aef\u7b56\u7565\u4f18\u5316&#xff08;PPO&#xff09;\u4e0e\u4fe1\u4efb\u57df\u65b9\u6cd5<\/h3>\n<h4>2.3.1 \u4fe1\u4efb\u57df\u7b56\u7565\u4f18\u5316\u7684\u7406\u8bba\u52a8\u673a<\/h4>\n<p>\u4f20\u7edf\u7b56\u7565\u68af\u5ea6\u65b9\u6cd5\u5b58\u5728\u6b65\u957f\u9009\u62e9\u56f0\u96be\u7684\u95ee\u9898&#xff1a;\u6b65\u957f\u8fc7\u5927\u5bfc\u81f4\u7b56\u7565\u5d29\u6e83&#xff0c;\u6b65\u957f\u8fc7\u5c0f\u5219\u6536\u655b\u7f13\u6162\u3002\u4fe1\u4efb\u57df\u7b56\u7565\u4f18\u5316&#xff08;Trust Region Policy Optimization, TRPO&#xff09;\u901a\u8fc7\u7ea6\u675f\u7b56\u7565\u66f4\u65b0\u7684\u5e45\u5ea6\u6765\u89e3\u51b3\u8fd9\u4e00\u95ee\u9898[16]\u3002<\/p>\n<p>TRPO\u7684\u6838\u5fc3\u601d\u60f3\u662f&#xff1a;\u5728\u5f53\u524d\u7b56\u7565 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u03b8old\\\\pi_{\\\\theta_{old}}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6864em;vertical-align: -0.2559em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0278em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u9644\u8fd1\u5b9a\u4e49\u4e00\u4e2a\u4fe1\u4efb\u57df&#xff0c;\u5728\u8be5\u533a\u57df\u5185\u8fd1\u4f3c\u76ee\u6807\u51fd\u6570&#xff0c;\u5e76\u5bfb\u627e\u6700\u4f18\u66f4\u65b0\u3002\u5177\u4f53\u5730&#xff0c;TRPO\u6c42\u89e3\u4ee5\u4e0b\u7ea6\u675f\u4f18\u5316\u95ee\u9898&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">max\u2061\u03b8Es\u223c\u03c0\u03b8old,a\u223c\u03c0\u03b8old[\u03c0\u03b8(a\u2223s)\u03c0\u03b8old(a\u2223s)A\u03c0\u03b8old(s,a)]\\\\max_\\\\theta \\\\mathbb{E}_{s \\\\sim \\\\pi_{\\\\theta_{old}}, a \\\\sim \\\\pi_{\\\\theta_{old}}} \\\\left[ \\\\frac{\\\\pi_\\\\theta(a|s)}{\\\\pi_{\\\\theta_{old}}(a|s)} A^{\\\\pi_{\\\\theta_{old}}}(s,a) \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 2.4em;vertical-align: -0.95em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.3479em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7521em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3448em;margin-left: -0.0278em;margin-right: 0.1em\"><span class=\"pstrut\" style=\"height: 2.6944em\"><\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3496em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.401em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3448em;margin-left: -0.0278em;margin-right: 0.1em\"><span class=\"pstrut\" style=\"height: 2.6944em\"><\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3496em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.401em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4307em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size3\">[<\/span><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.427em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0278em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9419em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7144em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3448em;margin-left: -0.0278em;margin-right: 0.1em\"><span class=\"pstrut\" style=\"height: 2.6944em\"><\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3496em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.401em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size3\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">s.t.\u00a0Es\u223c\u03c0\u03b8old[DKL(\u03c0\u03b8old(\u22c5\u2223s)\u2225\u03c0\u03b8(\u22c5\u2223s))]\u2264\u03b4\\\\text{s.t. } \\\\mathbb{E}_{s \\\\sim \\\\pi_{\\\\theta_{old}}} [D_{KL}(\\\\pi_{\\\\theta_{old}}(\\\\cdot|s) \\\\| \\\\pi_\\\\theta(\\\\cdot|s))] \\\\leq \\\\delta<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.1807em;vertical-align: -0.4307em\"><\/span><span class=\"mord text\"><span class=\"mord\">s.t.\u00a0<\/span><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3448em;margin-left: -0.0278em;margin-right: 0.1em\"><span class=\"pstrut\" style=\"height: 2.6944em\"><\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3496em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.401em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4307em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">D<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0715em\">K<\/span><span class=\"mord mathnormal mtight\">L<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0278em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">\u22c5<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mord\">\u2225<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">\u22c5<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">))]<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2264<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">DKLD_{KL}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">D<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0715em\">K<\/span><span class=\"mord mathnormal mtight\">L<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3aKL\u6563\u5ea6&#xff0c;\u7528\u4e8e\u5ea6\u91cf\u4e24\u4e2a\u7b56\u7565\u7684\u5dee\u5f02&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b4\\\\delta<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><\/span><\/span><\/span><\/span> \u4e3a\u4fe1\u4efb\u57df\u534a\u5f84\u3002\u8be5\u7ea6\u675f\u786e\u4fdd\u65b0\u7b56\u7565\u4e0d\u4f1a\u504f\u79bb\u65e7\u7b56\u7565\u592a\u8fdc&#xff0c;\u4ece\u800c\u4fdd\u8bc1\u7b56\u7565\u6539\u8fdb\u7684\u5355\u8c03\u6027\u3002<\/p>\n<p>TRPO\u7684\u7406\u8bba\u57fa\u7840\u662f\u7b56\u7565\u6539\u8fdb\u7684\u5355\u8c03\u6027\u4fdd\u8bc1\u3002\u8bbe <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u2032\\\\pi&#039;<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7519em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7519em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u66f4\u65b0\u540e\u7684\u7b56\u7565&#xff0c;\u53ef\u4ee5\u8bc1\u660e[16]&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">J(\u03c0\u2032)\u2212J(\u03c0)\u226511\u2212\u03b3Es\u223cd\u03c0,a\u223c\u03c0\u2032[A\u03c0(s,a)]\u22122\u03b3\u03f5(1\u2212\u03b3)2\u22c5max\u2061sDTV(\u03c0(\u22c5\u2223s)\u2225\u03c0\u2032(\u22c5\u2223s))J(\\\\pi&#039;) &#8211; J(\\\\pi) \\\\geq \\\\frac{1}{1-\\\\gamma} \\\\mathbb{E}_{s \\\\sim d^\\\\pi, a \\\\sim \\\\pi&#039;} [A^\\\\pi(s,a)] &#8211; \\\\frac{2\\\\gamma \\\\epsilon}{(1-\\\\gamma)^2} \\\\cdot \\\\max_s D_{TV}(\\\\pi(\\\\cdot|s) \\\\| \\\\pi&#039;(\\\\cdot|s))<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0519em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2265<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.2019em;vertical-align: -0.8804em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3214em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8804em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">d<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.5935em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7144em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)]<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.2574em;vertical-align: -0.936em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3214em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mopen\">(<\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"mclose\"><span class=\"mclose\">)<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7401em\"><span class=\"\" style=\"top: -2.989em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">2<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.936em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.5019em;vertical-align: -0.7em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.4em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">s<\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">D<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.2222em\">V<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mopen\">(<\/span><span class=\"mord\">\u22c5<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mord\">\u2225<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">\u22c5<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">))<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">DTVD_{TV}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">D<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.2222em\">V<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u603b\u53d8\u5dee\u8ddd\u79bb&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03f5&#061;max\u2061s,a\u2223A\u03c0(s,a)\u2223\\\\epsilon &#061; \\\\max_{s,a} |A^\\\\pi(s,a)|<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0361em;vertical-align: -0.2861em\"><\/span><span class=\"mop\"><span class=\"mop\">max<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">a<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6644em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mord\">\u2223<\/span><\/span><\/span><\/span><\/span>\u3002\u8be5\u4e0b\u754c\u8868\u660e&#xff0c;\u53ea\u8981\u4f18\u52bf\u51fd\u6570\u7684\u671f\u671b\u589e\u76ca\u8d85\u8fc7\u7531\u7b56\u7565\u53d8\u5316\u5e26\u6765\u7684\u60e9\u7f5a&#xff0c;\u7b56\u7565\u5c31\u80fd\u4fdd\u8bc1\u6539\u8fdb\u3002<\/p>\n<h4>2.3.2 PPO\u7684\u88c1\u526a\u76ee\u6807\u51fd\u6570<\/h4>\n<p>TRPO\u9700\u8981\u8ba1\u7b97Fisher\u4fe1\u606f\u77e9\u9635\u5e76\u6c42\u89e3\u7ea6\u675f\u4f18\u5316&#xff0c;\u8ba1\u7b97\u6210\u672c\u9ad8\u6602\u3002\u8fd1\u7aef\u7b56\u7565\u4f18\u5316&#xff08;Proximal Policy Optimization, PPO&#xff09;\u901a\u8fc7\u88c1\u526a\u76ee\u6807\u51fd\u6570\u8fd1\u4f3c\u5b9e\u73b0\u4fe1\u4efb\u57df\u7ea6\u675f&#xff0c;\u5927\u5e45\u63d0\u5347\u4e86\u8ba1\u7b97\u6548\u7387[16]\u3002<\/p>\n<p>PPO\u5b9a\u4e49\u6982\u7387\u6bd4 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">rt(\u03b8)&#061;\u03c0\u03b8(at\u2223st)\u03c0\u03b8old(at\u2223st)r_t(\\\\theta) &#061; \\\\frac{\\\\pi_\\\\theta(a_t|s_t)}{\\\\pi_{\\\\theta_{old}}(a_t|s_t)}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.6357em;vertical-align: -0.6257em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.01em\"><span class=\"\" style=\"top: -2.655em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3448em;margin-left: -0.0278em;margin-right: 0.1em\"><span class=\"pstrut\" style=\"height: 2.6944em\"><\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3496em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.401em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen mtight\">(<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2963em\"><span class=\"\" style=\"top: -2.357em;margin-left: 0em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mtight\">\u2223<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2963em\"><span class=\"\" style=\"top: -2.357em;margin-left: 0em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.485em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen mtight\">(<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2963em\"><span class=\"\" style=\"top: -2.357em;margin-left: 0em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mtight\">\u2223<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2963em\"><span class=\"\" style=\"top: -2.357em;margin-left: 0em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6257em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span>&#xff0c;\u88c1\u526a\u76ee\u6807\u51fd\u6570\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">LCLIP(\u03b8)&#061;Et[min\u2061(rt(\u03b8)At,clip(rt(\u03b8),1\u2212\u03f5,1&#043;\u03f5)At)]L^{CLIP}(\\\\theta) &#061; \\\\mathbb{E}_t \\\\left[ \\\\min(r_t(\\\\theta) A_t, \\\\text{clip}(r_t(\\\\theta), 1-\\\\epsilon, 1&#043;\\\\epsilon) A_t) \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.1413em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">L<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8913em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0715em\">C<\/span><span class=\"mord mathnormal mtight\">L<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0785em\">I<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">P<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\">[<\/span><span class=\"mop\">min<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">)<\/span><span class=\"mord\"><span class=\"mord mathnormal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord text\"><span class=\"mord\">clip<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">)<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"mclose\">)<\/span><span class=\"mord\"><span class=\"mord mathnormal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03f5\\\\epsilon<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><\/span><\/span> \u4e3a\u8d85\u53c2\u6570&#xff08;\u901a\u5e38\u53d60.1\u62160.2&#xff09;&#xff0c;clip\u51fd\u6570\u5c06\u6982\u7387\u6bd4\u9650\u5236\u5728 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">[1\u2212\u03f5,1&#043;\u03f5][1-\\\\epsilon, 1&#043;\\\\epsilon]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">[<\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8389em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"mclose\">]<\/span><\/span><\/span><\/span><\/span> \u533a\u95f4\u5185\u3002\u8be5\u76ee\u6807\u51fd\u6570\u7684\u7cbe\u5999\u4e4b\u5904\u5728\u4e8e&#xff1a;\u5f53\u4f18\u52bf\u4e3a\u6b63\u65f6&#xff0c;\u9650\u5236\u6982\u7387\u6bd4\u7684\u4e0a\u754c\u9632\u6b62\u8fc7\u5ea6\u4f18\u5316&#xff1b;\u5f53\u4f18\u52bf\u4e3a\u8d1f\u65f6&#xff0c;\u9650\u5236\u6982\u7387\u6bd4\u7684\u4e0b\u754c\u9632\u6b62\u8fc7\u5ea6\u60e9\u7f5a[16]\u3002<\/p>\n<p>PPO\u7684\u5b8c\u6574\u76ee\u6807\u51fd\u6570\u8fd8\u5305\u62ec\u503c\u51fd\u6570\u635f\u5931\u548c\u71b5\u6b63\u5219\u9879&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">LPPO(\u03b8)&#061;Et[LCLIP(\u03b8)\u2212c1(V\u03b8(st)\u2212Vttarget)2&#043;c2H(\u03c0\u03b8(\u22c5\u2223st))]L^{PPO}(\\\\theta) &#061; \\\\mathbb{E}_t \\\\left[ L^{CLIP}(\\\\theta) &#8211; c_1 (V_\\\\theta(s_t) &#8211; V_t^{target})^2 &#043; c_2 H(\\\\pi_\\\\theta(\\\\cdot|s_t)) \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.1413em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">L<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8913em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">PPO<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.2615em;vertical-align: -0.35em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size1\">[<\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\">L<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8913em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0715em\">C<\/span><span class=\"mord mathnormal mtight\">L<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0785em\">I<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">P<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">c<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.2222em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9115em\"><span class=\"\" style=\"top: -2.4542em;margin-left: -0.2222em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.1809em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">g<\/span><span class=\"mord mathnormal mtight\">e<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2458em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\"><span class=\"mclose\">)<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8641em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">c<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0813em\">H<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">\u22c5<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">))<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size1\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">c1,c2c_1, c_2<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">c<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">c<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u7cfb\u6570&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">HH<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0813em\">H<\/span><\/span><\/span><\/span><\/span> \u4e3a\u7b56\u7565\u71b5&#xff0c;\u7528\u4e8e\u9f13\u52b1\u63a2\u7d22\u3002<\/p>\n<h4>2.3.3 Group Relative Policy Optimization&#xff08;GRPO&#xff09;\u7684\u7ec4\u5185\u76f8\u5bf9\u4f18\u52bf<\/h4>\n<p>\u5728\u5927\u578b\u8bed\u8a00\u6a21\u578b&#xff08;LLM&#xff09;Agent\u7684\u8bad\u7ec3\u4e2d&#xff0c;\u4f20\u7edf\u7684PPO\u7b97\u6cd5\u9762\u4e34\u4e24\u4e2a\u4e3b\u8981\u6311\u6218&#xff1a;\u4e00\u662f\u9700\u8981\u7ef4\u62a4\u4e0e\u7b56\u7565\u6a21\u578b\u540c\u7b49\u89c4\u6a21\u7684Critic\u6a21\u578b&#xff0c;\u8ba1\u7b97\u5f00\u9500\u5de8\u5927&#xff1b;\u4e8c\u662f\u7a00\u758f\u5956\u52b1\u4fe1\u53f7\u5bfc\u81f4\u4fe1\u7528\u5206\u914d\u56f0\u96be\u3002Group Relative Policy Optimization&#xff08;GRPO&#xff09;\u7b97\u6cd5\u901a\u8fc7\u7ec4\u5185\u76f8\u5bf9\u4f18\u52bf\u4f30\u8ba1\u6709\u6548\u89e3\u51b3\u4e86\u8fd9\u4e9b\u95ee\u9898[16]\u3002<\/p>\n<p>GRPO\u7684\u6838\u5fc3\u521b\u65b0\u5728\u4e8e\u6452\u5f03\u4e86\u663e\u5f0f\u7684Critic\u7f51\u7edc&#xff0c;\u8f6c\u800c\u901a\u8fc7\u91c7\u6837\u540c\u4e00\u95ee\u9898\u7684\u591a\u4e2a\u56de\u7b54\u5e76\u8ba1\u7b97\u7ec4\u5185\u76f8\u5bf9\u5f97\u5206\u6765\u4f30\u8ba1\u4f18\u52bf\u3002\u5177\u4f53\u5730&#xff0c;\u5bf9\u4e8e\u95ee\u9898 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">qq<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">q<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u4ece\u65e7\u7b56\u7565 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u03b8old\\\\pi_{\\\\theta_{old}}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6864em;vertical-align: -0.2559em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0278em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u91c7\u6837 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">GG<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\">G<\/span><\/span><\/span><\/span><\/span> \u4e2a\u56de\u7b54 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">{o1,o2,&#8230;,oG}\\\\{o_1, o_2, &#8230;, o_G\\\\}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">G<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">}<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u83b7\u5f97\u5bf9\u5e94\u5956\u52b1 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">{r1,r2,&#8230;,rG}\\\\{r_1, r_2, &#8230;, r_G\\\\}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">G<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">}<\/span><\/span><\/span><\/span><\/span>\u3002\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span> \u4e2a\u56de\u7b54\u7684\u4f18\u52bf\u4f30\u8ba1\u4e3a[16]&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">A^i&#061;ri\u2212mean({r1,r2,&#8230;,rG})std({r1,r2,&#8230;,rG})\\\\hat{A}_i &#061; \\\\frac{r_i &#8211; \\\\text{mean}(\\\\{r_1, r_2, &#8230;, r_G\\\\})}{\\\\text{std}(\\\\{r_1, r_2, &#8230;, r_G\\\\})}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0968em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9468em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\">A<\/span><\/span><span class=\"\" style=\"top: -3.2523em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1111em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.363em;vertical-align: -0.936em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.427em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord\">std<\/span><\/span><span class=\"mopen\">({<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">G<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">})<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord text\"><span class=\"mord\">mean<\/span><\/span><span class=\"mopen\">({<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">G<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">})<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.936em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u8be5\u4f30\u8ba1\u7684\u76f4\u89c2\u610f\u4e49\u662f&#xff1a;\u5c06\u7edd\u5bf9\u5956\u52b1\u8f6c\u6362\u4e3a\u7ec4\u5185\u76f8\u5bf9\u6392\u540d&#xff0c;\u9ad8\u4e8e\u5e73\u5747\u6c34\u5e73\u7684\u56de\u7b54\u83b7\u5f97\u6b63\u4f18\u52bf&#xff0c;\u4f4e\u4e8e\u5e73\u5747\u6c34\u5e73\u7684\u83b7\u5f97\u8d1f\u4f18\u52bf\u3002\u8fd9\u79cd\u5f52\u4e00\u5316\u5904\u7406\u6d88\u9664\u4e86\u4e0d\u540c\u95ee\u9898\u95f4\u5956\u52b1\u5c3a\u5ea6\u7684\u5dee\u5f02&#xff0c;\u4f7f\u5f97\u4f18\u52bf\u4f30\u8ba1\u66f4\u52a0\u7a33\u5b9a\u3002<\/p>\n<p>GRPO\u7684\u76ee\u6807\u51fd\u6570\u4e0ePPO\u7c7b\u4f3c&#xff0c;\u4f46\u4f7f\u7528\u7ec4\u5185\u4f30\u8ba1\u7684\u4f18\u52bf&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">JGRPO(\u03b8)&#061;Eq\u223cP(Q),{oi}i&#061;1G\u223c\u03c0\u03b8old[1G\u2211i&#061;1G1\u2223oi\u2223\u2211t&#061;1\u2223oi\u2223min\u2061(\u03c0\u03b8(oi,t\u2223q,oi,&lt;t)\u03c0\u03b8old(oi,t\u2223q,oi,&lt;t)A^i,clip(\u03c0\u03b8(oi,t\u2223q,oi,&lt;t)\u03c0\u03b8old(oi,t\u2223q,oi,&lt;t),1\u2212\u03f5,1&#043;\u03f5)A^i)\u2212\u03b2DKL(\u03c0\u03b8\u2225\u03c0ref)]J_{GRPO}(\\\\theta) &#061; \\\\mathbb{E}_{q \\\\sim P(Q), \\\\{o_i\\\\}_{i&#061;1}^G \\\\sim \\\\pi_{\\\\theta_{old}}} \\\\left[ \\\\frac{1}{G} \\\\sum_{i&#061;1}^{G} \\\\frac{1}{|o_i|} \\\\sum_{t&#061;1}^{|o_i|} \\\\min\\\\left( \\\\frac{\\\\pi_\\\\theta(o_{i,t}|q,o_{i,&lt;t})}{\\\\pi_{\\\\theta_{old}}(o_{i,t}|q,o_{i,&lt;t})} \\\\hat{A}_i, \\\\text{clip}\\\\left(\\\\frac{\\\\pi_\\\\theta(o_{i,t}|q,o_{i,&lt;t})}{\\\\pi_{\\\\theta_{old}}(o_{i,t}|q,o_{i,&lt;t})}, 1-\\\\epsilon, 1&#043;\\\\epsilon\\\\right) \\\\hat{A}_i \\\\right) &#8211; \\\\beta D_{KL}(\\\\pi_\\\\theta \\\\| \\\\pi_{ref}) \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0962em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">GRPO<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3.6em;vertical-align: -1.55em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.4618em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">q<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">P<\/span><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">Q<\/span><span class=\"mclose mtight\">)<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mopen mtight\">{<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3281em\"><span class=\"\" style=\"top: -2.357em;margin-left: 0em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose mtight\"><span class=\"mclose mtight\">}<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8329em\"><span class=\"\" style=\"top: -2.1777em;margin-left: 0em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -2.8448em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">G<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3223em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3448em;margin-left: -0.0278em;margin-right: 0.1em\"><span class=\"pstrut\" style=\"height: 2.6944em\"><\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3496em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.401em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.5189em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen\"><span class=\"delimsizing mult\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 2.05em\"><span class=\"\" style=\"top: -4.05em\"><span class=\"pstrut\" style=\"height: 5.6em\"><\/span><span class=\"\" style=\"width: 0.667em;height: 3.6em\"><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.55em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3214em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">G<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.686em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.8283em\"><span class=\"\" style=\"top: -1.8723em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">G<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2777em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3214em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.936em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.961em\"><span class=\"\" style=\"top: -1.8829em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.386em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2223<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3281em\"><span class=\"\" style=\"top: -2.357em;margin-left: 0em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mtight\">\u2223<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2671em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">min<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size3\">(<\/span><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.427em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0278em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">q<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mrel mtight\">&lt;<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">q<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mrel mtight\">&lt;<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9721em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9468em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\">A<\/span><\/span><span class=\"\" style=\"top: -3.2523em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1111em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord text\"><span class=\"mord\">clip<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size3\">(<\/span><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.427em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0278em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">q<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mrel mtight\">&lt;<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">q<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mrel mtight\">&lt;<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9721em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size3\">)<\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9468em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\">A<\/span><\/span><span class=\"\" style=\"top: -3.2523em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1111em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size3\">)<\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">D<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0715em\">K<\/span><span class=\"mord mathnormal mtight\">L<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2225<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">re<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1076em\">f<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mclose\"><span class=\"delimsizing mult\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 2.05em\"><span class=\"\" style=\"top: -4.05em\"><span class=\"pstrut\" style=\"height: 5.6em\"><\/span><span class=\"\" style=\"width: 0.667em;height: 3.6em\"><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.55em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u2223oi\u2223|o_i|<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><\/span><\/span><\/span><\/span> \u4e3a\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span> \u4e2a\u56de\u7b54\u7684token\u6570&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0ref\\\\pi_{ref}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7167em;vertical-align: -0.2861em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">re<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1076em\">f<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u53c2\u8003\u7b56\u7565&#xff08;\u901a\u5e38\u662f\u521d\u59cb\u6a21\u578b&#xff09;&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b2\\\\beta<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><\/span><\/span><\/span><\/span> \u63a7\u5236KL\u60e9\u7f5a\u5f3a\u5ea6\u3002GRPO\u5df2\u5728DeepSeek-R1\u7b49\u6a21\u578b\u7684\u8bad\u7ec3\u4e2d\u5c55\u73b0\u51fa\u5353\u8d8a\u6027\u80fd&#xff0c;\u6210\u4e3a\u5f53\u524dLLM Agent RL\u8bad\u7ec3\u7684\u4e3b\u6d41\u7b97\u6cd5[16]\u3002<\/p>\n<p>#mermaid-svg-CWME2HNERujIlpf8{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-CWME2HNERujIlpf8 .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-CWME2HNERujIlpf8 .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-CWME2HNERujIlpf8 .error-icon{fill:#552222;}#mermaid-svg-CWME2HNERujIlpf8 .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-CWME2HNERujIlpf8 .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-CWME2HNERujIlpf8 .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-CWME2HNERujIlpf8 .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-CWME2HNERujIlpf8 .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-CWME2HNERujIlpf8 .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-CWME2HNERujIlpf8 .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-CWME2HNERujIlpf8 .marker{fill:#333333;stroke:#333333;}#mermaid-svg-CWME2HNERujIlpf8 .marker.cross{stroke:#333333;}#mermaid-svg-CWME2HNERujIlpf8 svg{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-CWME2HNERujIlpf8 p{margin:0;}#mermaid-svg-CWME2HNERujIlpf8 .label{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;color:#333;}#mermaid-svg-CWME2HNERujIlpf8 .cluster-label text{fill:#333;}#mermaid-svg-CWME2HNERujIlpf8 .cluster-label span{color:#333;}#mermaid-svg-CWME2HNERujIlpf8 .cluster-label span p{background-color:transparent;}#mermaid-svg-CWME2HNERujIlpf8 .label text,#mermaid-svg-CWME2HNERujIlpf8 span{fill:#333;color:#333;}#mermaid-svg-CWME2HNERujIlpf8 .node rect,#mermaid-svg-CWME2HNERujIlpf8 .node circle,#mermaid-svg-CWME2HNERujIlpf8 .node ellipse,#mermaid-svg-CWME2HNERujIlpf8 .node polygon,#mermaid-svg-CWME2HNERujIlpf8 .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-CWME2HNERujIlpf8 .rough-node .label text,#mermaid-svg-CWME2HNERujIlpf8 .node .label text,#mermaid-svg-CWME2HNERujIlpf8 .image-shape .label,#mermaid-svg-CWME2HNERujIlpf8 .icon-shape .label{text-anchor:middle;}#mermaid-svg-CWME2HNERujIlpf8 .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-CWME2HNERujIlpf8 .rough-node .label,#mermaid-svg-CWME2HNERujIlpf8 .node .label,#mermaid-svg-CWME2HNERujIlpf8 .image-shape .label,#mermaid-svg-CWME2HNERujIlpf8 .icon-shape .label{text-align:center;}#mermaid-svg-CWME2HNERujIlpf8 .node.clickable{cursor:pointer;}#mermaid-svg-CWME2HNERujIlpf8 .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-CWME2HNERujIlpf8 .arrowheadPath{fill:#333333;}#mermaid-svg-CWME2HNERujIlpf8 .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-CWME2HNERujIlpf8 .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-CWME2HNERujIlpf8 .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-CWME2HNERujIlpf8 .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-CWME2HNERujIlpf8 .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-CWME2HNERujIlpf8 .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-CWME2HNERujIlpf8 .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-CWME2HNERujIlpf8 .cluster text{fill:#333;}#mermaid-svg-CWME2HNERujIlpf8 .cluster span{color:#333;}#mermaid-svg-CWME2HNERujIlpf8 div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-CWME2HNERujIlpf8 .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-CWME2HNERujIlpf8 rect.text{fill:none;stroke-width:0;}#mermaid-svg-CWME2HNERujIlpf8 .icon-shape,#mermaid-svg-CWME2HNERujIlpf8 .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-CWME2HNERujIlpf8 .icon-shape p,#mermaid-svg-CWME2HNERujIlpf8 .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-CWME2HNERujIlpf8 .icon-shape rect,#mermaid-svg-CWME2HNERujIlpf8 .image-shape rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-CWME2HNERujIlpf8 .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-CWME2HNERujIlpf8 .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-CWME2HNERujIlpf8 :root{&#8211;mermaid-font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;}<span class=\"nodeLabel\"><\/p>\n<p>\u6838\u5fc3\u521b\u65b0<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7b56\u7565\u4f18\u5316\u6f14\u8fdb<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7b56\u7565\u68af\u5ea6<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>Actor-Critic<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>TRPO\u4fe1\u4efb\u57df<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>PPO\u88c1\u526a<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>GRPO\u7ec4\u5185\u4f18\u52bf<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>KL\u7ea6\u675f<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6982\u7387\u6bd4\u88c1\u526a<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u65e0Critic\u7f51\u7edc<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7ec4\u5185\u5f52\u4e00\u5316<\/p>\n<p><\/span><\/p>\n<h2>3 Agent\u5de5\u4f5c\u6d41\u7684\u6570\u5b66\u5efa\u6a21&#xff1a;SOP\u5bfb\u627e\u7684\u5f62\u5f0f\u5316<\/h2>\n<h3>3.1 \u5de5\u4f5c\u6d41\u4f5c\u4e3a\u5c42\u6b21\u5316\u51b3\u7b56\u5e8f\u5217<\/h3>\n<h4>3.1.1 \u5de5\u4f5c\u6d41\u7684\u6811\u72b6\u7ed3\u6784\u8868\u793a<\/h4>\n<p>Agent\u5de5\u4f5c\u6d41\u53ef\u4ee5\u81ea\u7136\u5730\u5efa\u6a21\u4e3a\u5c42\u6b21\u5316\u7684\u51b3\u7b56\u6811\u7ed3\u6784\u3002\u5728\u8fd9\u79cd\u8868\u793a\u4e2d&#xff0c;\u6bcf\u4e2a\u8282\u70b9\u4ee3\u8868\u4e00\u4e2a\u51b3\u7b56\u70b9&#xff0c;\u8fb9\u4ee3\u8868\u53ef\u9009\u7684\u64cd\u4f5c&#xff0c;\u800c\u8def\u5f84\u5219\u5bf9\u5e94\u5b8c\u6574\u7684\u5de5\u4f5c\u6d41\u6267\u884c\u5e8f\u5217\u3002\u5f62\u5f0f\u5316\u5730&#xff0c;\u5de5\u4f5c\u6d41\u6811 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">T\\\\mathcal{T}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.2542em\">T<\/span><\/span><\/span><\/span><\/span> \u5b9a\u4e49\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">(N,E,s0)(\\\\mathcal{N}, \\\\mathcal{E}, s_0)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathcal\" style=\"margin-right: 0.1474em\">N<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0894em\">E<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">N\\\\mathcal{N}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.1474em\">N<\/span><\/span><\/span><\/span><\/span> \u4e3a\u8282\u70b9\u96c6\u5408&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">E\u2286N\u00d7N\\\\mathcal{E} \\\\subseteq \\\\mathcal{N} \\\\times \\\\mathcal{N}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8193em;vertical-align: -0.136em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0894em\">E<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2286<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.7667em;vertical-align: -0.0833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.1474em\">N<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u00d7<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.1474em\">N<\/span><\/span><\/span><\/span><\/span> \u4e3a\u8fb9\u96c6\u5408&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">s0\u2208Ns_0 \\\\in \\\\mathcal{N}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6891em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.1474em\">N<\/span><\/span><\/span><\/span><\/span> \u4e3a\u6839\u8282\u70b9[16]\u3002<\/p>\n<p>\u5bf9\u4e8e\u590d\u6742\u4efb\u52a1&#xff0c;\u5de5\u4f5c\u6d41\u6811\u53ef\u80fd\u975e\u5e38\u5e9e\u5927\u3002\u4ee5\u5305\u542b <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">nn<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">n<\/span><\/span><\/span><\/span><\/span> \u4e2a\u6b65\u9aa4\u7684\u5de5\u4f5c\u6d41\u4e3a\u4f8b&#xff0c;\u82e5\u6bcf\u6b65\u6709 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">kk<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0315em\">k<\/span><\/span><\/span><\/span><\/span> \u4e2a\u53ef\u9009\u64cd\u4f5c&#xff0c;\u5219\u5b8c\u6574\u7684\u5de5\u4f5c\u6d41\u6811\u5305\u542b <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">O(kn)O(k^n)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">O<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0315em\">k<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6644em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">n<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u6761\u8def\u5f84\u3002\u8fd9\u79cd\u6307\u6570\u7ea7\u589e\u957f\u4f7f\u5f97\u7a77\u4e3e\u641c\u7d22\u4e0d\u53ef\u884c&#xff0c;\u5fc5\u987b\u501f\u52a9\u5f3a\u5316\u5b66\u4e60\u7684\u5f15\u5bfc\u641c\u7d22\u673a\u5236\u3002<\/p>\n<p>\u5c42\u6b21\u5316\u5f3a\u5316\u5b66\u4e60&#xff08;Hierarchical Reinforcement Learning, HRL&#xff09;\u4e3a\u5de5\u4f5c\u6d41\u4f18\u5316\u63d0\u4f9b\u4e86\u6709\u6548\u7684\u62bd\u8c61\u673a\u5236\u3002\u5728HRL\u6846\u67b6\u4e2d&#xff0c;\u7b56\u7565\u88ab\u7ec4\u7ec7\u4e3a\u4e24\u5c42\u7ed3\u6784&#xff1a;\u9ad8\u5c42\u7b56\u7565&#xff08;Meta-Policy&#xff09;\u8d1f\u8d23\u9009\u62e9\u5b50\u76ee\u6807&#xff08;Subgoal&#xff09;&#xff0c;\u4f4e\u5c42\u7b56\u7565&#xff08;Sub-Policy&#xff09;\u8d1f\u8d23\u5b9e\u73b0\u5b50\u76ee\u6807\u3002\u8fd9\u79cd\u5206\u89e3\u663e\u8457\u964d\u4f4e\u4e86\u641c\u7d22\u7a7a\u95f4\u590d\u6742\u5ea6[16]\u3002<\/p>\n<p>Options\u6846\u67b6\u662fHRL\u7684\u7ecf\u5178\u5b9e\u73b0&#xff0c;\u5b83\u5c06&#034;\u9009\u9879&#034;&#xff08;Option&#xff09;\u5b9a\u4e49\u4e3a\u4e09\u5143\u7ec4 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c9&#061;(I\u03c9,\u03c0\u03c9,\u03b2\u03c9)\\\\omega &#061; (\\\\mathcal{I}_\\\\omega, \\\\pi_\\\\omega, \\\\beta_\\\\omega)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c9<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathcal\" style=\"margin-right: 0.0738em\">I<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0738em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c9<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c9<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0528em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c9<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">I\u03c9\u2286S\\\\mathcal{I}_\\\\omega \\\\subseteq \\\\mathcal{S}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\" style=\"margin-right: 0.0738em\">I<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0738em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c9<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2286<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.075em\">S<\/span><\/span><\/span><\/span><\/span> \u4e3a\u542f\u52a8\u96c6&#xff08;Initiation Set&#xff09;&#xff0c;\u8868\u793a\u9009\u9879\u53ef\u6267\u884c\u7684\u72b6\u6001\u96c6\u5408&#xff1b;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u03c9\\\\pi_\\\\omega<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c9<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u9009\u9879\u5185\u90e8\u7b56\u7565&#xff1b;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b2\u03c9:S\u2192[0,1]\\\\beta_\\\\omega: \\\\mathcal{S} \\\\rightarrow [0,1]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0528em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c9<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">:<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.075em\">S<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2192<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">[<\/span><span class=\"mord\">0<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mclose\">]<\/span><\/span><\/span><\/span><\/span> \u4e3a\u7ec8\u6b62\u6761\u4ef6&#xff0c;\u8868\u793a\u5728\u5404\u72b6\u6001\u4e0b\u9009\u9879\u7ec8\u6b62\u7684\u6982\u7387[16]\u3002<\/p>\n<p>\u5728Agent\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d&#xff0c;\u9009\u9879\u53ef\u4ee5\u5bf9\u5e94\u4e8e\u6807\u51c6\u7684\u64cd\u4f5c\u6a21\u5757&#xff0c;\u5982&#034;\u4fe1\u606f\u68c0\u7d22&#034;\u3001\u201c\u6570\u636e\u5206\u6790\u201d\u3001&#034;\u7ed3\u679c\u9a8c\u8bc1&#034;\u7b49\u3002\u901a\u8fc7\u9884\u5b9a\u4e49\u8fd9\u4e9b\u9009\u9879&#xff0c;Agent\u53ef\u4ee5\u5728\u66f4\u9ad8\u62bd\u8c61\u5c42\u6b21\u4e0a\u89c4\u5212\u5de5\u4f5c\u6d41&#xff0c;\u907f\u514d\u9677\u5165\u4f4e\u5c42\u7ec6\u8282\u3002<\/p>\n<h4>3.1.2 \u6807\u51c6\u64cd\u4f5c\u7a0b\u5e8f&#xff08;SOP&#xff09;\u7684\u6570\u5b66\u5b9a\u4e49<\/h4>\n<p>\u6807\u51c6\u64cd\u4f5c\u7a0b\u5e8f&#xff08;SOP&#xff09;\u662f\u5de5\u4f5c\u6d41\u7684\u89c4\u8303\u5316\u8868\u8fbe&#xff0c;\u5b9a\u4e49\u4e86\u5728\u7279\u5b9a\u573a\u666f\u4e0b\u5e94\u9075\u5faa\u7684\u64cd\u4f5c\u5e8f\u5217\u3002\u4ece\u6570\u5b66\u89d2\u5ea6&#xff0c;SOP\u53ef\u4ee5\u5b9a\u4e49\u4e3a\u6761\u4ef6-\u52a8\u4f5c\u89c4\u5219\u7684\u96c6\u5408&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">SOP&#061;{(ci,ai,pi)}i&#061;1M\\\\text{SOP} &#061; \\\\{(c_i, a_i, p_i)\\\\}_{i&#061;1}^{M}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord text\"><span class=\"mord\">SOP<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.1413em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">c<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">p<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mclose\"><span class=\"mclose\">}<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8913em\"><span class=\"\" style=\"top: -2.453em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.109em\">M<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">cic_i<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">c<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u89e6\u53d1\u6761\u4ef6&#xff08;Context Condition&#xff09;&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">aia_i<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u6267\u884c\u52a8\u4f5c&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">pip_i<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">p<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u4f18\u5148\u7ea7\u5206\u6570\u3002\u5f53\u591a\u4e2a\u6761\u4ef6\u540c\u65f6\u6ee1\u8db3\u65f6&#xff0c;Agent\u9009\u62e9\u4f18\u5148\u7ea7\u6700\u9ad8\u7684\u89c4\u5219\u6267\u884c[16]\u3002<\/p>\n<p>SOP\u7684\u4f18\u5316\u76ee\u6807\u53ef\u4ee5\u5f62\u5f0f\u5316\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">max\u2061SOPE\u03c4\u223cD[R(\u03c4,SOP)]\\\\max_{\\\\text{SOP}} \\\\mathbb{E}_{\\\\tau \\\\sim \\\\mathcal{D}} \\\\left[ R(\\\\tau, \\\\text{SOP}) \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.4943em;vertical-align: -0.7443em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.3557em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord text mtight\"><span class=\"mord mtight\">SOP<\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7443em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mathcal mtight\" style=\"margin-right: 0.0278em\">D<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\">[<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord text\"><span class=\"mord\">SOP<\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">D\\\\mathcal{D}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0278em\">D<\/span><\/span><\/span><\/span><\/span> \u4e3a\u4efb\u52a1\u5206\u5e03&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">R(\u03c4,SOP)R(\\\\tau, \\\\text{SOP})<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord text\"><span class=\"mord\">SOP<\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u4e3a\u4f7f\u7528SOP\u6267\u884c\u4efb\u52a1 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c4\\\\tau<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><\/span><\/span><\/span><\/span> \u83b7\u5f97\u7684\u5956\u52b1\u3002\u8be5\u4f18\u5316\u95ee\u9898\u5177\u6709\u7ec4\u5408\u7279\u6027&#xff0c;\u56e0\u4e3aSOP\u7684\u89c4\u5219\u96c6\u5408\u3001\u6761\u4ef6\u5b9a\u4e49\u548c\u4f18\u5148\u7ea7\u5206\u914d\u90fd\u662f\u79bb\u6563\u9009\u62e9\u3002<\/p>\n<p>\u4e00\u79cd\u6709\u6548\u7684\u4f18\u5316\u7b56\u7565\u662f\u5c06SOP\u5b66\u4e60\u5206\u89e3\u4e3a\u4e24\u4e2a\u5b50\u95ee\u9898&#xff1a;\u89c4\u5219\u53d1\u73b0&#xff08;Rule Discovery&#xff09;\u548c\u89c4\u5219\u6392\u5e8f&#xff08;Rule Ranking&#xff09;\u3002\u89c4\u5219\u53d1\u73b0\u8d1f\u8d23\u8bc6\u522b\u6709\u7528\u7684\u6761\u4ef6-\u52a8\u4f5c\u6a21\u5f0f&#xff0c;\u89c4\u5219\u6392\u5e8f\u5219\u786e\u5b9a\u89c4\u5219\u7684\u6267\u884c\u4f18\u5148\u7ea7\u3002\u5f3a\u5316\u5b66\u4e60\u53ef\u4ee5\u540c\u65f6\u4f18\u5316\u8fd9\u4e24\u4e2a\u65b9\u9762&#xff1a;\u901a\u8fc7\u63a2\u7d22\u53d1\u73b0\u65b0\u89c4\u5219&#xff0c;\u901a\u8fc7\u5229\u7528\u4f18\u5316\u89c4\u5219\u7ec4\u5408[16]\u3002<\/p>\n<h4>3.1.3 \u5de5\u4f5c\u6d41\u72b6\u6001\u7a7a\u95f4\u7684\u7ef4\u5ea6\u707e\u96be\u4e0e\u5e94\u5bf9<\/h4>\n<p>Agent\u5de5\u4f5c\u6d41\u7684\u72b6\u6001\u7a7a\u95f4\u901a\u5e38\u5177\u6709\u9ad8\u7ef4\u5ea6\u548c\u590d\u6742\u7ed3\u6784&#xff0c;\u8fd9\u5e26\u6765\u4e86\u7ef4\u5ea6\u707e\u96be&#xff08;Curse of Dimensionality&#xff09;\u95ee\u9898\u3002\u72b6\u6001\u53ef\u80fd\u5305\u542b\u6587\u672c\u5e8f\u5217\u3001\u6570\u503c\u5411\u91cf\u3001\u7ed3\u6784\u5316\u6570\u636e\u7b49\u591a\u79cd\u6a21\u6001&#xff0c;\u4f20\u7edf\u8868\u683c\u578b\u65b9\u6cd5\u65e0\u6cd5\u6709\u6548\u5904\u7406[16]\u3002<\/p>\n<p>\u6df1\u5ea6\u5f3a\u5316\u5b66\u4e60&#xff08;Deep Reinforcement Learning&#xff09;\u901a\u8fc7\u795e\u7ecf\u7f51\u7edc\u51fd\u6570\u903c\u8fd1\u89e3\u51b3\u4e86\u8fd9\u4e00\u6311\u6218\u3002\u7b56\u7565\u7f51\u7edc\u548c\u4ef7\u503c\u7f51\u7edc\u5c06\u539f\u59cb\u72b6\u6001\u6620\u5c04\u4e3a\u52a8\u4f5c\u6982\u7387\u6216\u4ef7\u503c\u4f30\u8ba1&#xff0c;\u5b9e\u73b0\u4e86\u5bf9\u9ad8\u7ef4\u72b6\u6001\u7a7a\u95f4\u7684\u6709\u6548\u6cdb\u5316\u3002\u5bf9\u4e8e\u6587\u672c\u578b\u72b6\u6001&#xff0c;\u901a\u5e38\u4f7f\u7528\u9884\u8bad\u7ec3\u7684\u8bed\u8a00\u6a21\u578b&#xff08;\u5982BERT\u3001GPT&#xff09;\u63d0\u53d6\u8bed\u4e49\u7279\u5f81&#xff1b;\u5bf9\u4e8e\u7ed3\u6784\u5316\u6570\u636e&#xff0c;\u5219\u4f7f\u7528\u56fe\u795e\u7ecf\u7f51\u7edc&#xff08;Graph Neural Network&#xff09;\u7f16\u7801\u5173\u7cfb\u4fe1\u606f[16]\u3002<\/p>\n<p>\u72b6\u6001\u538b\u7f29&#xff08;State Compression&#xff09;\u662f\u53e6\u4e00\u91cd\u8981\u6280\u672f\u3002\u901a\u8fc7\u81ea\u7f16\u7801\u5668&#xff08;Autoencoder&#xff09;\u6216\u53d8\u5206\u81ea\u7f16\u7801\u5668&#xff08;Variational Autoencoder, VAE&#xff09;&#xff0c;\u53ef\u4ee5\u5c06\u9ad8\u7ef4\u72b6\u6001\u538b\u7f29\u4e3a\u4f4e\u7ef4\u6f5c\u5728\u8868\u793a&#xff0c;\u540c\u65f6\u4fdd\u7559\u51b3\u7b56\u76f8\u5173\u7684\u5173\u952e\u4fe1\u606f\u3002\u538b\u7f29\u540e\u7684\u72b6\u6001\u4e0d\u4ec5\u964d\u4f4e\u4e86\u8ba1\u7b97\u590d\u6742\u5ea6&#xff0c;\u8fd8\u6709\u52a9\u4e8e\u53d1\u73b0\u72b6\u6001\u7a7a\u95f4\u7684\u5185\u5728\u7ed3\u6784[16]\u3002<\/p>\n<h3>3.2 \u5de5\u4f5c\u6d41\u4f18\u5316\u7684\u5f3a\u5316\u5b66\u4e60\u5f62\u5f0f\u5316<\/h3>\n<h4>3.2.1 \u5c06SOP\u5bfb\u627e\u5efa\u6a21\u4e3a\u7b56\u7565\u641c\u7d22\u95ee\u9898<\/h4>\n<p>\u5c06SOP\u5bfb\u627e\u95ee\u9898\u5f62\u5f0f\u5316\u4e3a\u5f3a\u5316\u5b66\u4e60\u95ee\u9898\u9700\u8981\u660e\u786e\u5b9a\u4e49\u72b6\u6001\u3001\u52a8\u4f5c\u548c\u5956\u52b1\u3002\u5728\u8fd9\u79cd\u5f62\u5f0f\u5316\u4e2d&#xff0c;Agent\u7684\u76ee\u6807\u662f\u5b66\u4e60\u5230\u4e00\u79cd\u7b56\u7565&#xff0c;\u8be5\u7b56\u7565\u80fd\u591f\u6839\u636e\u5f53\u524d\u4efb\u52a1\u4e0a\u4e0b\u6587\u9009\u62e9\u6700\u4f18\u7684\u64cd\u4f5c\u5e8f\u5217[15]\u3002<\/p>\n<p>\u72b6\u6001\u8bbe\u8ba1&#xff1a;\u72b6\u6001\u9700\u8981\u7f16\u7801\u4efb\u52a1\u76f8\u5173\u7684\u5168\u90e8\u4fe1\u606f\u3002\u5bf9\u4e8e\u5de5\u4f5c\u6d41\u4f18\u5316\u95ee\u9898&#xff0c;\u72b6\u6001\u901a\u5e38\u5305\u542b&#xff1a;<\/p>\n<ul>\n<li>\u4efb\u52a1\u63cf\u8ff0\u7684\u7279\u5f81\u5411\u91cf <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">etaske_{task}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">e<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0315em\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/li>\n<li>\u5f53\u524d\u5df2\u6267\u884c\u7684\u64cd\u4f5c\u5e8f\u5217 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">h&#061;(a1,a2,&#8230;,at)h &#061; (a_1, a_2, &#8230;, a_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">h<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/li>\n<li>\u5404\u4e2d\u95f4\u6b65\u9aa4\u7684\u6267\u884c\u7ed3\u679c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">{r1,r2,&#8230;,rt}\\\\{r_1, r_2, &#8230;, r_t\\\\}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">}<\/span><\/span><\/span><\/span><\/span><\/li>\n<li>\u5f53\u524d\u53ef\u7528\u7684\u64cd\u4f5c\u96c6\u5408 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">Aavailable\\\\mathcal{A}_{available}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"mord mathnormal mtight\">ai<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">ab<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">e<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/li>\n<\/ul>\n<p>\u7efc\u5408\u72b6\u6001\u8868\u793a\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">st&#061;Encoder(etask,h,{ri},Aavailable)s_t &#061; \\\\text{Encoder}(e_{task}, h, \\\\{r_i\\\\}, \\\\mathcal{A}_{available})<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord text\"><span class=\"mord\">Encoder<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">e<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0315em\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">h<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">}<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"mord mathnormal mtight\">ai<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">ab<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">e<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5176\u4e2dEncoder\u4e3a\u7f16\u7801\u7f51\u7edc\u3002<\/p>\n<p>\u52a8\u4f5c\u8bbe\u8ba1&#xff1a;\u52a8\u4f5c\u7a7a\u95f4\u5bf9\u5e94\u4e8e\u5de5\u4f5c\u6d41\u4e2d\u7684\u53ef\u6267\u884c\u64cd\u4f5c\u3002\u6839\u636e\u7c92\u5ea6\u4e0d\u540c&#xff0c;\u52a8\u4f5c\u53ef\u4ee5\u662f&#xff1a;<\/p>\n<ul>\n<li>\u539f\u5b50\u64cd\u4f5c&#xff1a;\u5982\u8c03\u7528\u7279\u5b9aAPI\u3001\u6267\u884c\u7279\u5b9a\u4ee3\u7801\u7247\u6bb5<\/li>\n<li>\u590d\u5408\u64cd\u4f5c&#xff1a;\u5982&#034;\u68c0\u7d22\u76f8\u5173\u4fe1\u606f\u5e76\u603b\u7ed3&#034;\u3001\u201c\u751f\u6210\u5019\u9009\u65b9\u6848\u5e76\u8bc4\u4f30\u201d<\/li>\n<li>\u5143\u64cd\u4f5c&#xff1a;\u5982&#034;\u56de\u9000\u5230\u4e0a\u4e00\u6b65&#034;\u3001\u201c\u8bf7\u6c42\u4eba\u5de5\u534f\u52a9\u201d<\/li>\n<\/ul>\n<p>\u5956\u52b1\u8bbe\u8ba1&#xff1a;\u5956\u52b1\u51fd\u6570\u9700\u8981\u53cd\u6620\u5de5\u4f5c\u6d41\u7684\u6267\u884c\u8d28\u91cf\u3002\u5e38\u89c1\u7684\u5956\u52b1\u6784\u6210\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u7ed3\u679c\u5956\u52b1&#xff1a;\u4efb\u52a1\u5b8c\u6210\u65f6\u6839\u636e\u6b63\u786e\u6027\u7ed9\u4e88\u5956\u52b1 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">routcome\u2208{\u22121,0,&#043;1}r_{outcome} \\\\in \\\\{-1, 0, &#043;1\\\\}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6891em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\">u<\/span><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mord mathnormal mtight\">co<\/span><span class=\"mord mathnormal mtight\">m<\/span><span class=\"mord mathnormal mtight\">e<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\">\u2212<\/span><span class=\"mord\">1<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">0<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#043;<\/span><span class=\"mord\">1<\/span><span class=\"mclose\">}<\/span><\/span><\/span><\/span><\/span><\/li>\n<li>\u6548\u7387\u5956\u52b1&#xff1a;\u6839\u636e\u6267\u884c\u6b65\u9aa4\u6570\u6216\u8d44\u6e90\u6d88\u8017\u7ed9\u4e88\u60e9\u7f5a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">refficiency&#061;\u2212\u03b1\u22c5Tr_{efficiency} &#061; -\\\\alpha \\\\cdot T<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7167em;vertical-align: -0.2861em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">e<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1076em\">ff<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mord mathnormal mtight\">c<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mord mathnormal mtight\">e<\/span><span class=\"mord mathnormal mtight\">n<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">cy<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6667em;vertical-align: -0.0833em\"><\/span><span class=\"mord\">\u2212<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><\/span><\/li>\n<li>\u8fc7\u7a0b\u5956\u52b1&#xff1a;\u7531\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b\u63d0\u4f9b\u7684\u4e2d\u95f4\u6b65\u9aa4\u8d28\u91cf\u8bc4\u4f30 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">rprocessr_{process}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7167em;vertical-align: -0.2861em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">p<\/span><span class=\"mord mathnormal mtight\">rocess<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/li>\n<\/ul>\n<p>\u603b\u5956\u52b1\u4e3a\u5404\u7ec4\u6210\u90e8\u5206\u7684\u52a0\u6743\u548c&#xff1a;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">R&#061;routcome&#043;refficiency&#043;\u2211trprocess(t)R &#061; r_{outcome} &#043; r_{efficiency} &#043; \\\\sum_t r_{process}(t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.7333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\">u<\/span><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mord mathnormal mtight\">co<\/span><span class=\"mord mathnormal mtight\">m<\/span><span class=\"mord mathnormal mtight\">e<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8694em;vertical-align: -0.2861em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">e<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1076em\">ff<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mord mathnormal mtight\">c<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mord mathnormal mtight\">e<\/span><span class=\"mord mathnormal mtight\">n<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">cy<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0497em;vertical-align: -0.2997em\"><\/span><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position: relative;top: 0em\">\u2211<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1308em\"><span class=\"\" style=\"top: -2.4003em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2997em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">p<\/span><span class=\"mord mathnormal mtight\">rocess<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">t<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>[16]\u3002<\/p>\n<h4>3.2.2 \u52a8\u4f5c\u7a7a\u95f4\u7684\u7ed3\u6784\u5316\u8bbe\u8ba1<\/h4>\n<p>Agent\u5de5\u4f5c\u6d41\u7684\u52a8\u4f5c\u7a7a\u95f4\u5177\u6709\u5185\u5728\u7ed3\u6784&#xff0c;\u5408\u7406\u5229\u7528\u8fd9\u79cd\u7ed3\u6784\u53ef\u4ee5\u63d0\u5347\u5b66\u4e60\u6548\u7387\u3002\u52a8\u4f5c\u7a7a\u95f4\u53ef\u4ee5\u5206\u89e3\u4e3a\u591a\u4e2a\u6b63\u4ea4\u7684\u5b50\u7a7a\u95f4&#xff0c;\u6bcf\u4e2a\u5b50\u7a7a\u95f4\u5bf9\u5e94\u4e00\u7c7b\u7279\u5b9a\u64cd\u4f5c[42]\u3002<\/p>\n<p>\u5de5\u5177\u8c03\u7528\u5b50\u7a7a\u95f4&#xff1a;\u5305\u542b\u6240\u6709\u53ef\u8c03\u7528\u7684\u5916\u90e8\u5de5\u5177&#xff08;\u5982\u641c\u7d22\u5f15\u64ce\u3001\u8ba1\u7b97\u5668\u3001\u4ee3\u7801\u6267\u884c\u5668&#xff09;\u3002\u6bcf\u4e2a\u5de5\u5177\u5bf9\u5e94\u4e00\u4e2a\u52a8\u4f5c&#xff0c;\u52a8\u4f5c\u7684\u53c2\u6570&#xff08;\u5982\u641c\u7d22\u67e5\u8be2\u3001\u4ee3\u7801\u5185\u5bb9&#xff09;\u7531\u7b56\u7565\u7f51\u7edc\u751f\u6210\u3002<\/p>\n<p>\u63a8\u7406\u6a21\u5f0f\u5b50\u7a7a\u95f4&#xff1a;\u5b9a\u4e49\u4e86Agent\u7684\u601d\u8003\u65b9\u5f0f&#xff0c;\u5982&#xff1a;<\/p>\n<ul>\n<li>Chain-of-Thought&#xff08;CoT&#xff09;&#xff1a;\u9010\u6b65\u63a8\u7406&#xff0c;\u663e\u5f0f\u5c55\u793a\u601d\u8003\u8fc7\u7a0b<\/li>\n<li>Tree-of-Thought&#xff08;ToT&#xff09;&#xff1a;\u751f\u6210\u591a\u4e2a\u5019\u9009\u601d\u8def&#xff0c;\u8bc4\u4f30\u540e\u9009\u62e9\u6700\u4f18<\/li>\n<li>ReAct&#xff1a;\u63a8\u7406\u4e0e\u884c\u52a8\u4ea4\u66ff\u8fdb\u884c&#xff0c;\u6839\u636e\u89c2\u5bdf\u8c03\u6574\u7b56\u7565<\/li>\n<\/ul>\n<p>\u63a7\u5236\u6d41\u5b50\u7a7a\u95f4&#xff1a;\u63a7\u5236\u5de5\u4f5c\u6d41\u7684\u6267\u884c\u903b\u8f91&#xff0c;\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u987a\u5e8f\u6267\u884c&#xff1a;\u6309\u9884\u5b9a\u987a\u5e8f\u6267\u884c\u64cd\u4f5c\u5e8f\u5217<\/li>\n<li>\u6761\u4ef6\u5206\u652f&#xff1a;\u6839\u636e\u4e2d\u95f4\u7ed3\u679c\u9009\u62e9\u4e0d\u540c\u6267\u884c\u8def\u5f84<\/li>\n<li>\u5faa\u73af\u8fed\u4ee3&#xff1a;\u91cd\u590d\u6267\u884c\u67d0\u64cd\u4f5c\u76f4\u5230\u6ee1\u8db3\u7ec8\u6b62\u6761\u4ef6<\/li>\n<li>\u5e76\u884c\u6267\u884c&#xff1a;\u540c\u65f6\u6267\u884c\u591a\u4e2a\u72ec\u7acb\u64cd\u4f5c<\/li>\n<\/ul>\n<p>\u52a8\u4f5c\u7a7a\u95f4\u7684\u7ed3\u6784\u5316\u8bbe\u8ba1\u4f7f\u5f97\u7b56\u7565\u53ef\u4ee5\u5206\u5c42\u5b66\u4e60&#xff1a;\u9996\u5148\u5b66\u4e60\u9ad8\u5c42\u63a7\u5236\u7b56\u7565&#xff08;\u9009\u62e9\u63a8\u7406\u6a21\u5f0f\u548c\u63a7\u5236\u6d41&#xff09;&#xff0c;\u7136\u540e\u5728\u9009\u5b9a\u6846\u67b6\u5185\u5b66\u4e60\u4f4e\u5c42\u64cd\u4f5c\u9009\u62e9\u3002\u8fd9\u79cd\u5c42\u6b21\u5316\u5b66\u4e60\u663e\u8457\u964d\u4f4e\u4e86\u6837\u672c\u590d\u6742\u5ea6[16]\u3002<\/p>\n<h4>3.2.3 \u591a\u76ee\u6807\u4f18\u5316\u4e0e\u5e15\u7d2f\u6258\u524d\u6cbf<\/h4>\n<p>\u5b9e\u9645\u5de5\u4f5c\u6d41\u4f18\u5316\u5f80\u5f80\u6d89\u53ca\u591a\u4e2a\u76f8\u4e92\u51b2\u7a81\u7684\u76ee\u6807&#xff0c;\u5982\u51c6\u786e\u7387\u3001\u54cd\u5e94\u65f6\u95f4\u3001\u8d44\u6e90\u6d88\u8017\u7b49\u3002\u591a\u76ee\u6807\u4f18\u5316&#xff08;Multi-Objective Optimization&#xff09;\u6846\u67b6\u4e3a\u5904\u7406\u8fd9\u79cd\u590d\u6742\u6027\u63d0\u4f9b\u4e86\u7cfb\u7edf\u65b9\u6cd5[42]\u3002<\/p>\n<p>\u8bbe\u6709 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">mm<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">m<\/span><\/span><\/span><\/span><\/span> \u4e2a\u4f18\u5316\u76ee\u6807 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">f1,f2,&#8230;,fmf_1, f_2, &#8230;, f_m<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1076em\">f<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.1076em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1076em\">f<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.1076em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1076em\">f<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.1076em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">m<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xff0c;\u591a\u76ee\u6807\u4f18\u5316\u95ee\u9898\u5f62\u5f0f\u5316\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">min\u2061\u03c0(f1(\u03c0),f2(\u03c0),&#8230;,fm(\u03c0))\\\\min_{\\\\pi} (f_1(\\\\pi), f_2(\\\\pi), &#8230;, f_m(\\\\pi))<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.45em;vertical-align: -0.7em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6679em\"><span class=\"\" style=\"top: -2.4em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">min<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1076em\">f<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.1076em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mclose\">)<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1076em\">f<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.1076em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mclose\">)<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1076em\">f<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.1076em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">m<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mclose\">))<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5e15\u7d2f\u6258\u6700\u4f18&#xff08;Pareto Optimality&#xff09;\u662f\u591a\u76ee\u6807\u4f18\u5316\u7684\u6838\u5fc3\u6982\u5ff5\u3002\u7b56\u7565 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u2217\\\\pi^*<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6887em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6887em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u662f\u5e15\u7d2f\u6258\u6700\u4f18\u7684&#xff0c;\u5982\u679c\u4e0d\u5b58\u5728\u5176\u4ed6\u7b56\u7565 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u2032\\\\pi&#039;<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7519em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7519em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u5728\u6240\u6709\u76ee\u6807\u4e0a\u90fd\u4e0d\u52a3\u4e8e <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u2217\\\\pi^*<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6887em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6887em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e14\u81f3\u5c11\u5728\u4e00\u4e2a\u76ee\u6807\u4e0a\u4e25\u683c\u4f18\u4e8e <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u2217\\\\pi^*<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6887em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6887em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>\u3002\u6240\u6709\u5e15\u7d2f\u6258\u6700\u4f18\u7b56\u7565\u6784\u6210\u5e15\u7d2f\u6258\u524d\u6cbf&#xff08;Pareto Front&#xff09;[16]\u3002<\/p>\n<p>\u5728Agent\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d&#xff0c;\u4e0d\u540c\u5e94\u7528\u573a\u666f\u5bf9\u76ee\u6807\u7684\u504f\u597d\u4e0d\u540c\u3002\u4f8b\u5982&#xff0c;\u5b9e\u65f6\u4ea4\u4e92\u573a\u666f\u4f18\u5148\u8003\u8651\u54cd\u5e94\u901f\u5ea6&#xff0c;\u800c\u79bb\u7ebf\u5206\u6790\u573a\u666f\u66f4\u5173\u6ce8\u7ed3\u679c\u51c6\u786e\u6027\u3002\u901a\u8fc7\u6c42\u89e3\u5e15\u7d2f\u6258\u524d\u6cbf&#xff0c;\u53ef\u4ee5\u83b7\u5f97\u9002\u5e94\u4e0d\u540c\u504f\u597d\u7684\u7b56\u7565\u96c6\u5408\u3002<\/p>\n<p>\u6807\u91cf\u5316&#xff08;Scalarization&#xff09;\u662f\u5904\u7406\u591a\u76ee\u6807\u4f18\u5316\u7684\u5e38\u7528\u65b9\u6cd5&#xff0c;\u5c06\u591a\u4e2a\u76ee\u6807\u7ec4\u5408\u4e3a\u5355\u4e00\u6807\u91cf\u76ee\u6807&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">J(\u03c0;\u03bb)&#061;\u2211i&#061;1m\u03bbifi(\u03c0)J(\\\\pi; \\\\lambda) &#061; \\\\sum_{i&#061;1}^{m} \\\\lambda_i f_i(\\\\pi)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mpunct\">;<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.9291em;vertical-align: -1.2777em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.6514em\"><span class=\"\" style=\"top: -1.8723em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">m<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2777em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1076em\">f<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.1076em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bb&#061;(\u03bb1,&#8230;,\u03bbm)\\\\lambda &#061; (\\\\lambda_1, &#8230;, \\\\lambda_m)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">m<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u4e3a\u6743\u91cd\u5411\u91cf&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u2211i\u03bbi&#061;1\\\\sum_i \\\\lambda_i &#061; 1<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0497em;vertical-align: -0.2997em\"><\/span><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position: relative;top: 0em\">\u2211<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.162em\"><span class=\"\" style=\"top: -2.4003em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2997em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">1<\/span><\/span><\/span><\/span><\/span>\u3002\u901a\u8fc7\u6539\u53d8\u6743\u91cd\u5411\u91cf&#xff0c;\u53ef\u4ee5\u63a2\u7d22\u5e15\u7d2f\u6258\u524d\u6cbf\u4e0a\u7684\u4e0d\u540c\u70b9\u3002\u5728\u5f3a\u5316\u5b66\u4e60\u4e2d&#xff0c;\u53ef\u4ee5\u901a\u8fc7\u8bad\u7ec3\u591a\u4e2a\u5177\u6709\u4e0d\u540c\u5956\u52b1\u6743\u91cd\u7684\u7b56\u7565\u6765\u8fd1\u4f3c\u5e15\u7d2f\u6258\u524d\u6cbf[42]\u3002<\/p>\n<h3>3.3 \u957f\u7a0b\u4fe1\u7528\u5206\u914d\u95ee\u9898\u4e0e\u89e3\u51b3\u65b9\u6848<\/h3>\n<h4>3.3.1 \u4fe1\u7528\u5206\u914d\u95ee\u9898\u7684\u672c\u8d28\u5206\u6790<\/h4>\n<p>\u957f\u7a0b\u4fe1\u7528\u5206\u914d&#xff08;Long-Term Credit Assignment&#xff09;\u662f\u5f3a\u5316\u5b66\u4e60\u4e2d\u7684\u6838\u5fc3\u6311\u6218&#xff0c;\u5c24\u5176\u5728Agent\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d\u8868\u73b0\u7a81\u51fa\u3002\u5f53Agent\u6267\u884c\u5305\u542b\u6570\u5341\u751a\u81f3\u4e0a\u767e\u4e2a\u6b65\u9aa4\u7684\u5de5\u4f5c\u6d41\u65f6&#xff0c;\u6700\u7ec8\u5956\u52b1\u53ef\u80fd\u4ec5\u4e0e\u5c11\u6570\u5173\u952e\u6b65\u9aa4\u76f8\u5173&#xff0c;\u5982\u4f55\u51c6\u786e\u8bc6\u522b\u8fd9\u4e9b\u5173\u952e\u6b65\u9aa4\u5e76\u5206\u914d\u76f8\u5e94\u7684\u4fe1\u7528\u81f3\u5173\u91cd\u8981[16]\u3002<\/p>\n<p>\u5f62\u5f0f\u5316\u5730&#xff0c;\u8bbe\u5de5\u4f5c\u6d41\u5305\u542b <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">TT<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><\/span> \u4e2a\u6b65\u9aa4&#xff0c;\u6700\u7ec8\u5956\u52b1\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">RTR_T<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0077em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>\u3002\u4fe1\u7528\u5206\u914d\u95ee\u9898\u8981\u6c42\u8ba1\u7b97\u6bcf\u4e2a\u6b65\u9aa4 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">tt<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6151em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><\/span><\/span> \u5bf9\u6700\u7ec8\u5956\u52b1\u7684\u8d21\u732e <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u2202RT\u2202at\\\\frac{\\\\partial R_T}{\\\\partial a_t}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.3415em;vertical-align: -0.4451em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8964em\"><span class=\"\" style=\"top: -2.655em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\" style=\"margin-right: 0.0556em\">\u2202<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2963em\"><span class=\"\" style=\"top: -2.357em;margin-left: 0em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.4103em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\" style=\"margin-right: 0.0556em\">\u2202<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3567em;margin-left: -0.0077em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1433em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4451em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span>\u3002\u5728\u590d\u6742\u5de5\u4f5c\u6d41\u4e2d&#xff0c;\u8fd9\u79cd\u8d21\u732e\u53ef\u80fd\u662f\u95f4\u63a5\u7684\u3001\u5ef6\u8fdf\u7684&#xff0c;\u4e14\u5b58\u5728\u975e\u7ebf\u6027\u4ea4\u4e92\u3002<\/p>\n<p>\u4fe1\u7528\u5206\u914d\u56f0\u96be\u7684\u539f\u56e0\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u5ef6\u8fdf\u6548\u5e94&#xff1a;\u65e9\u671f\u6b65\u9aa4\u7684\u5f71\u54cd\u53ef\u80fd\u901a\u8fc7\u72b6\u6001\u8f6c\u79fb\u7d2f\u79ef\u5230\u540e\u671f\u624d\u663e\u73b0<\/li>\n<li>\u5197\u4f59\u64cd\u4f5c&#xff1a;\u67d0\u4e9b\u6b65\u9aa4\u53ef\u80fd\u5bf9\u6700\u7ec8\u7ed3\u679c\u65e0\u5b9e\u8d28\u5f71\u54cd<\/li>\n<li>\u975e\u7ebf\u6027\u4ea4\u4e92&#xff1a;\u591a\u4e2a\u6b65\u9aa4\u7684\u7ec4\u5408\u6548\u5e94\u4e0d\u7b49\u4e8e\u5404\u81ea\u6548\u5e94\u7684\u7b80\u5355\u52a0\u548c<\/li>\n<li>\u968f\u673a\u6027&#xff1a;\u73af\u5883\u968f\u673a\u6027\u4f7f\u5f97\u5355\u6b21\u6267\u884c\u7684\u4fe1\u7528\u4f30\u8ba1\u4e0d\u53ef\u9760<\/li>\n<\/ul>\n<h4>3.3.2 \u65f6\u5e8f\u5dee\u5206\u4e0e\u8d44\u683c\u8ff9\u65b9\u6cd5<\/h4>\n<p>\u65f6\u5e8f\u5dee\u5206&#xff08;Temporal Difference, TD&#xff09;\u5b66\u4e60\u901a\u8fc7\u81ea\u4e3e&#xff08;Bootstrapping&#xff09;\u673a\u5236\u5b9e\u73b0\u4fe1\u7528\u7684\u9010\u6b65\u4f20\u64ad\u3002TD(0)\u7b97\u6cd5\u4f7f\u7528\u524d\u4e00\u6b65\u7684\u4ef7\u503c\u4f30\u8ba1\u66f4\u65b0\u5f53\u524d\u4f30\u8ba1&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">V(st)\u2190V(st)&#043;\u03b1[rt&#043;\u03b3V(st&#043;1)\u2212V(st)]V(s_t) \\\\leftarrow V(s_t) &#043; \\\\alpha [r_t &#043; \\\\gamma V(s_{t&#043;1}) &#8211; V(s_t)]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2190<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">\u03b3V<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)]<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>TD\u8bef\u5dee <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b4t&#061;rt&#043;\u03b3V(st&#043;1)\u2212V(st)\\\\delta_t &#061; r_t &#043; \\\\gamma V(s_{t&#043;1}) &#8211; V(s_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.7333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">\u03b3V<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u53cd\u6620\u4e86\u5f53\u524d\u4ef7\u503c\u4f30\u8ba1\u4e0e\u5b9e\u9645\u56de\u62a5\u7684\u504f\u5dee&#xff0c;\u6cbf\u7740\u8f68\u8ff9\u53cd\u5411\u4f20\u64adTD\u8bef\u5dee\u53ef\u4ee5\u5b9e\u73b0\u4fe1\u7528\u7684\u5206\u914d[42]\u3002<\/p>\n<p>TD(<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bb\\\\lambda<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><\/span><\/span><\/span><\/span>)\u7b97\u6cd5\u901a\u8fc7\u8d44\u683c\u8ff9&#xff08;Eligibility Trace&#xff09;\u673a\u5236\u5b9e\u73b0\u4e86\u591a\u6b65TD\u7684\u52a0\u6743\u5e73\u5747\u3002\u8d44\u683c\u8ff9\u8bb0\u5f55\u4e86\u5404\u72b6\u6001\u88ab\u8bbf\u95ee\u7684\u5386\u53f2&#xff0c;\u5e76\u5728\u83b7\u5f97\u5956\u52b1\u65f6\u6309\u8ff9\u5206\u914d\u4fe1\u7528&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">et(s)&#061;\u03b3\u03bbet\u22121(s)&#043;1[st&#061;s]e_t(s) &#061; \\\\gamma \\\\lambda e_{t-1}(s) &#043; \\\\mathbf{1}[s_t &#061; s]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">e<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\">\u03b3\u03bb<\/span><span class=\"mord\"><span class=\"mord mathnormal\">e<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">\u2212<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathbf\">1<\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">V(s)\u2190V(s)&#043;\u03b1\u03b4tet(s)V(s) \\\\leftarrow V(s) &#043; \\\\alpha \\\\delta_t e_t(s)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2190<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\">e<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bb\u2208[0,1]\\\\lambda \\\\in [0,1]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7335em;vertical-align: -0.0391em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">[<\/span><span class=\"mord\">0<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mclose\">]<\/span><\/span><\/span><\/span><\/span> \u63a7\u5236\u8ff9\u7684\u8870\u51cf\u901f\u5ea6\u3002\u5f53 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bb&#061;0\\\\lambda &#061; 0<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">0<\/span><\/span><\/span><\/span><\/span> \u65f6\u9000\u5316\u4e3aTD(0)&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bb&#061;1\\\\lambda &#061; 1<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">1<\/span><\/span><\/span><\/span><\/span> \u65f6\u7b49\u4ef7\u4e8e\u8499\u7279\u5361\u6d1b\u65b9\u6cd5\u3002\u9002\u4e2d\u7684<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bb\\\\lambda<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><\/span><\/span><\/span><\/span>\u503c&#xff08;\u59820.9&#xff09;\u901a\u5e38\u80fd\u5728\u504f\u5dee\u548c\u65b9\u5dee\u95f4\u53d6\u5f97\u826f\u597d\u5e73\u8861[16]\u3002<\/p>\n<h4>3.3.3 \u6ce8\u610f\u529b\u673a\u5236\u5728\u4fe1\u7528\u5206\u914d\u4e2d\u7684\u5e94\u7528<\/h4>\n<p>Transformer\u67b6\u6784\u4e2d\u7684\u81ea\u6ce8\u610f\u529b\u673a\u5236&#xff08;Self-Attention&#xff09;\u4e3a\u4fe1\u7528\u5206\u914d\u63d0\u4f9b\u4e86\u65b0\u7684\u89c6\u89d2\u3002\u6ce8\u610f\u529b\u6743\u91cd\u5929\u7136\u5730\u91cf\u5316\u4e86\u4e0d\u540c\u4f4d\u7f6e\u4e4b\u95f4\u7684\u5173\u8054\u5f3a\u5ea6&#xff0c;\u53ef\u4ee5\u76f4\u63a5\u7528\u4e8e\u4fe1\u7528\u5206\u914d[42]\u3002<\/p>\n<p>\u5728\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d&#xff0c;\u53ef\u4ee5\u5c06\u5386\u53f2\u6b65\u9aa4\u5e8f\u5217\u4f5c\u4e3a\u8f93\u5165&#xff0c;\u901a\u8fc7Transformer\u7f16\u7801\u5668\u8ba1\u7b97\u5404\u6b65\u9aa4\u7684\u4e0a\u4e0b\u6587\u8868\u793a\u3002\u6ce8\u610f\u529b\u77e9\u9635 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">A\u2208RT\u00d7TA \\\\in \\\\mathbb{R}^{T \\\\times T}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7224em;vertical-align: -0.0391em\"><\/span><span class=\"mord mathnormal\">A<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8413em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">R<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8413em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><span class=\"mbin mtight\">\u00d7<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u7684\u5143\u7d20 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">AijA_{ij}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.9694em;vertical-align: -0.2861em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0572em\">ij<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u8868\u793a\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">jj<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.854em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0572em\">j<\/span><\/span><\/span><\/span><\/span> \u6b65\u5bf9\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span> \u6b65\u7684\u91cd\u8981\u6027\u3002\u5f53\u5728\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">TT<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><\/span> \u6b65\u83b7\u5f97\u6700\u7ec8\u5956\u52b1\u65f6&#xff0c;\u53ef\u4ee5\u901a\u8fc7\u53cd\u5411\u4f20\u64ad\u6ce8\u610f\u529b\u6743\u91cd\u8ba1\u7b97\u5404\u6b65\u9aa4\u7684\u8d21\u732e\u5ea6\u3002<\/p>\n<p>\u5177\u4f53\u5730&#xff0c;\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">tt<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6151em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><\/span><\/span> \u6b65\u7684\u4fe1\u7528\u53ef\u4ee5\u8ba1\u7b97\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">Credit(t)&#061;\u2211i&#061;tTAi,t\u22c5\u03b4i\\\\text{Credit}(t) &#061; \\\\sum_{i&#061;t}^{T} A_{i,t} \\\\cdot \\\\delta_i<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord text\"><span class=\"mord\">Credit<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">t<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3.106em;vertical-align: -1.2777em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.8283em\"><span class=\"\" style=\"top: -1.8723em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2777em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b4i\\\\delta_i<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span> \u6b65\u7684TD\u8bef\u5dee\u3002\u8fd9\u79cd\u57fa\u4e8e\u6ce8\u610f\u529b\u7684\u4fe1\u7528\u5206\u914d\u65b9\u6cd5\u80fd\u591f\u6355\u6349\u957f\u8ddd\u79bb\u4f9d\u8d56\u5173\u7cfb&#xff0c;\u5728\u590d\u6742\u5de5\u4f5c\u6d41\u4e2d\u8868\u73b0\u4f18\u4e8e\u4f20\u7edf\u65b9\u6cd5[16]\u3002<\/p>\n<h2>4 \u8fc7\u7a0b\u5956\u52b1\u6a21\u578b&#xff1a;\u7ec6\u7c92\u5ea6\u4fe1\u7528\u5206\u914d\u7684\u6838\u5fc3\u673a\u5236<\/h2>\n<h3>4.1 \u4ece\u7ed3\u679c\u5956\u52b1\u5230\u8fc7\u7a0b\u5956\u52b1\u7684\u8303\u5f0f\u8f6c\u53d8<\/h3>\n<h4>4.1.1 \u7ed3\u679c\u5956\u52b1\u6a21\u578b\u7684\u5c40\u9650\u6027<\/h4>\n<p>\u4f20\u7edf\u5f3a\u5316\u5b66\u4e60\u5728Agent\u5de5\u4f5c\u6d41\u8bad\u7ec3\u4e2d\u4e3b\u8981\u4f9d\u8d56\u7ed3\u679c\u5956\u52b1\u6a21\u578b&#xff08;Outcome Reward Model, ORM&#xff09;&#xff0c;\u5373\u4ec5\u5728\u4efb\u52a1\u5b8c\u6210\u65f6\u6839\u636e\u6700\u7ec8\u7ed3\u679c\u7ed9\u4e88\u5956\u52b1\u3002\u8fd9\u79cd\u7a00\u758f\u5956\u52b1\u673a\u5236\u5b58\u5728\u6839\u672c\u6027\u5c40\u9650[42]\u3002<\/p>\n<p>\u9996\u5148&#xff0c;ORM\u65e0\u6cd5\u63d0\u4f9b\u4e2d\u95f4\u6b65\u9aa4\u7684\u8d28\u91cf\u53cd\u9988\u3002\u5728\u590d\u6742\u5de5\u4f5c\u6d41\u4e2d&#xff0c;Agent\u53ef\u80fd\u5728\u65e9\u671f\u6b65\u9aa4\u5c31\u72af\u4e0b\u5173\u952e\u9519\u8bef&#xff0c;\u4f46\u8fd9\u4e9b\u9519\u8bef\u76f4\u5230\u6700\u540e\u624d\u901a\u8fc7\u5931\u8d25\u7ed3\u679c\u663e\u73b0\u3002\u7f3a\u4e4f\u53ca\u65f6\u53cd\u9988\u4f7f\u5f97Agent\u96be\u4ee5\u5b9a\u4f4d\u95ee\u9898\u6839\u6e90&#xff0c;\u5b66\u4e60\u6548\u7387\u4f4e\u4e0b\u3002<\/p>\n<p>\u5176\u6b21&#xff0c;ORM\u9762\u4e34\u4e25\u91cd\u7684\u4fe1\u7528\u5206\u914d\u56f0\u96be\u3002\u5f53\u5de5\u4f5c\u6d41\u5305\u542b\u6570\u5341\u4e2a\u6b65\u9aa4\u65f6&#xff0c;\u5c06\u6700\u7ec8\u5956\u52b1\u5f52\u56e0\u4e8e\u5177\u4f53\u6b65\u9aa4\u51e0\u4e4e\u4e0d\u53ef\u80fd\u3002\u8fd9\u5bfc\u81f4\u7b56\u7565\u68af\u5ea6\u4f30\u8ba1\u7684\u9ad8\u65b9\u5dee&#xff0c;\u8bad\u7ec3\u8fc7\u7a0b\u4e0d\u7a33\u5b9a\u3002<\/p>\n<p>\u7b2c\u4e09&#xff0c;ORM\u65e0\u6cd5\u533a\u5206\u4e0d\u540c\u7a0b\u5ea6\u7684\u5931\u8d25\u3002\u4e24\u4e2a\u90fd\u5bfc\u81f4\u5931\u8d25\u7684\u5de5\u4f5c\u6d41&#xff0c;\u4e00\u4e2a\u53ef\u80fd\u5728\u7b2c\u4e00\u6b65\u5c31\u51fa\u9519&#xff0c;\u53e6\u4e00\u4e2a\u4ec5\u5728\u6700\u540e\u4e00\u6b65\u5931\u8bef&#xff0c;ORM\u5bf9\u4e24\u8005\u7ed9\u4e88\u76f8\u540c\u7684\u60e9\u7f5a&#xff0c;\u8fd9\u663e\u7136\u4e0d\u5408\u7406\u3002<\/p>\n<h4>4.1.2 \u8fc7\u7a0b\u5956\u52b1\u6a21\u578b\u7684\u5b9a\u4e49\u4e0e\u6570\u5b66\u5f62\u5f0f<\/h4>\n<p>\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b&#xff08;Process Reward Model, PRM&#xff09;\u901a\u8fc7\u5728\u6bcf\u4e00\u6b65\u63d0\u4f9b\u7ec6\u7c92\u5ea6\u7684\u8bc4\u4f30\u4fe1\u53f7&#xff0c;\u6709\u6548\u89e3\u51b3\u4e86ORM\u7684\u5c40\u9650\u6027\u3002PRM\u4e3a\u5de5\u4f5c\u6d41\u7684\u6bcf\u4e2a\u4e2d\u95f4\u6b65\u9aa4\u5206\u914d\u4e00\u4e2a\u5956\u52b1\u503c&#xff0c;\u53cd\u6620\u8be5\u6b65\u9aa4\u7684\u8d28\u91cf\u548c\u8fdb\u5c55[16]\u3002<\/p>\n<p>\u5f62\u5f0f\u5316\u5730&#xff0c;\u5bf9\u4e8e\u5de5\u4f5c\u6d41\u8f68\u8ff9 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c4&#061;(s0,a0,r0,s1,a1,r1,&#8230;,sT,aT,rT)\\\\tau &#061; (s_0, a_0, r_0, s_1, a_1, r_1, &#8230;, s_T, a_T, r_T)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;PRM\u5b9a\u4e49\u4e3a\u6620\u5c04&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">PRM:(st,at)\u21a6rtproc\u2208R\\\\text{PRM}: (s_t, a_t) \\\\mapsto r_t^{proc} \\\\in \\\\mathbb{R}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord text\"><span class=\"mord\">PRM<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">:<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u21a6<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0281em;vertical-align: -0.2458em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7823em\"><span class=\"\" style=\"top: -2.4542em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.1809em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">p<\/span><span class=\"mord mathnormal mtight\">roc<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2458em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6889em\"><\/span><span class=\"mord mathbb\">R<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">rtprocr_t^{proc}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0281em;vertical-align: -0.2458em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7823em\"><span class=\"\" style=\"top: -2.4542em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.1809em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">p<\/span><span class=\"mord mathnormal mtight\">roc<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2458em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">tt<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6151em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><\/span><\/span> \u6b65\u7684\u8fc7\u7a0b\u5956\u52b1&#xff0c;\u53ef\u4ee5\u57fa\u4e8e\u4ee5\u4e0b\u51c6\u5219\u8ba1\u7b97&#xff1a;<\/p>\n<ul>\n<li>\u6b63\u786e\u6027&#xff1a;\u8be5\u6b65\u9aa4\u7684\u903b\u8f91\u662f\u5426\u6b63\u786e&#xff0c;\u662f\u5426\u7b26\u5408\u4e8b\u5b9e<\/li>\n<li>\u8fdb\u5c55\u6027&#xff1a;\u8be5\u6b65\u9aa4\u662f\u5426\u671d\u7740\u6700\u7ec8\u76ee\u6807\u63a8\u8fdb<\/li>\n<li>\u6548\u7387\u6027&#xff1a;\u8be5\u6b65\u9aa4\u662f\u5426\u7b80\u6d01\u9ad8\u6548&#xff0c;\u65e0\u5197\u4f59\u64cd\u4f5c<\/li>\n<li>\u4e00\u81f4\u6027&#xff1a;\u8be5\u6b65\u9aa4\u4e0e\u524d\u540e\u6b65\u9aa4\u662f\u5426\u903b\u8f91\u4e00\u81f4<\/li>\n<\/ul>\n<p>PRM\u7684\u8bad\u7ec3\u901a\u5e38\u91c7\u7528\u76d1\u7763\u5b66\u4e60\u65b9\u6cd5\u3002\u9996\u5148\u9700\u8981\u6784\u5efa\u6b65\u9aa4\u7ea7\u6807\u6ce8\u6570\u636e\u96c6&#xff0c;\u5bf9\u6bcf\u4e2a\u4e2d\u95f4\u6b65\u9aa4\u6807\u6ce8\u5176\u8d28\u91cf\u5206\u6570\u3002\u7531\u4e8e\u4eba\u5de5\u6807\u6ce8\u6210\u672c\u9ad8\u6602&#xff0c;\u7814\u7a76\u8005\u63d0\u51fa\u4e86\u591a\u79cd\u81ea\u52a8\u6807\u6ce8\u65b9\u6cd5[16]\u3002<\/p>\n<h4>4.1.3 PRM\u4e0eORM\u7684\u534f\u540c\u673a\u5236<\/h4>\n<p>\u5728\u5b9e\u9645\u5e94\u7528\u4e2d&#xff0c;PRM\u4e0eORM\u5e76\u975e\u76f8\u4e92\u66ff\u4ee3&#xff0c;\u800c\u662f\u534f\u540c\u5de5\u4f5c\u3002ORM\u63d0\u4f9b\u6700\u7ec8\u7ed3\u679c\u7684\u5ba2\u89c2\u8bc4\u4f30&#xff0c;PRM\u63d0\u4f9b\u4e2d\u95f4\u8fc7\u7a0b\u7684\u7ec6\u7c92\u5ea6\u6307\u5bfc&#xff0c;\u4e24\u8005\u7ed3\u5408\u5f62\u6210\u5b8c\u6574\u7684\u5956\u52b1\u4fe1\u53f7[42]\u3002<\/p>\n<p>\u7ec4\u5408\u5956\u52b1\u51fd\u6570\u53ef\u4ee5\u8868\u793a\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">Rtotal&#061;\u03b1\u22c5Routcome&#043;(1\u2212\u03b1)\u22c5\u2211t&#061;1T\u03b3t\u22121rtprocR_{total} &#061; \\\\alpha \\\\cdot R_{outcome} &#043; (1-\\\\alpha) \\\\cdot \\\\sum_{t&#061;1}^{T} \\\\gamma^{t-1} r_t^{proc}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0077em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.4445em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0077em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\">u<\/span><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mord mathnormal mtight\">co<\/span><span class=\"mord mathnormal mtight\">m<\/span><span class=\"mord mathnormal mtight\">e<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3.0954em;vertical-align: -1.2671em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.8283em\"><span class=\"\" style=\"top: -1.8829em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2671em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8641em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">\u2212<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7823em\"><span class=\"\" style=\"top: -2.4542em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.1809em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">p<\/span><span class=\"mord mathnormal mtight\">roc<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2458em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b1\u2208[0,1]\\\\alpha \\\\in [0,1]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5782em;vertical-align: -0.0391em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">[<\/span><span class=\"mord\">0<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mclose\">]<\/span><\/span><\/span><\/span><\/span> \u63a7\u5236\u4e24\u79cd\u5956\u52b1\u7684\u6743\u91cd&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b3\\\\gamma<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><\/span><\/span><\/span><\/span> \u4e3a\u6298\u6263\u56e0\u5b50\u3002\u5728\u8bad\u7ec3\u521d\u671f&#xff0c;\u53ef\u4ee5\u589e\u5927 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b1\\\\alpha<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><\/span><\/span><\/span><\/span> \u4f7fAgent\u9996\u5148\u5b66\u4f1a\u5b8c\u6210\u4efb\u52a1&#xff1b;\u968f\u7740\u8bad\u7ec3\u8fdb\u884c&#xff0c;\u9010\u6e10\u964d\u4f4e <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b1\\\\alpha<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><\/span><\/span><\/span><\/span> \u4ee5\u5f3a\u5316\u5bf9\u8fc7\u7a0b\u8d28\u91cf\u7684\u5173\u6ce8\u3002<\/p>\n<p>PRM\u8fd8\u53ef\u4ee5\u5728\u63a8\u7406\u9636\u6bb5\u6307\u5bfc\u641c\u7d22\u3002\u901a\u8fc7PRM\u8bc4\u5206&#xff0c;\u53ef\u4ee5\u5b9e\u73b0\u6b65\u9aa4\u7ea7\u7684\u675f\u641c\u7d22&#xff08;Beam Search&#xff09;&#xff1a;\u5728\u6bcf\u4e00\u6b65\u751f\u6210\u591a\u4e2a\u5019\u9009\u52a8\u4f5c&#xff0c;\u7531PRM\u8bc4\u5206\u540e\u4fdd\u7559Top-K\u4e2a\u6700\u6709\u5e0c\u671b\u7684\u5019\u9009\u3002\u8fd9\u79cdPRM\u5f15\u5bfc\u7684\u641c\u7d22\u663e\u8457\u63d0\u5347\u4e86Agent\u5728\u590d\u6742\u4efb\u52a1\u4e0a\u7684\u8868\u73b0[16]\u3002<\/p>\n<p>#mermaid-svg-hOQH51wFsilKanIW{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-hOQH51wFsilKanIW .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-hOQH51wFsilKanIW .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-hOQH51wFsilKanIW .error-icon{fill:#552222;}#mermaid-svg-hOQH51wFsilKanIW .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-hOQH51wFsilKanIW .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-hOQH51wFsilKanIW .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-hOQH51wFsilKanIW .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-hOQH51wFsilKanIW .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-hOQH51wFsilKanIW .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-hOQH51wFsilKanIW .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-hOQH51wFsilKanIW .marker{fill:#333333;stroke:#333333;}#mermaid-svg-hOQH51wFsilKanIW .marker.cross{stroke:#333333;}#mermaid-svg-hOQH51wFsilKanIW svg{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-hOQH51wFsilKanIW p{margin:0;}#mermaid-svg-hOQH51wFsilKanIW .label{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;color:#333;}#mermaid-svg-hOQH51wFsilKanIW .cluster-label text{fill:#333;}#mermaid-svg-hOQH51wFsilKanIW .cluster-label span{color:#333;}#mermaid-svg-hOQH51wFsilKanIW .cluster-label span p{background-color:transparent;}#mermaid-svg-hOQH51wFsilKanIW .label text,#mermaid-svg-hOQH51wFsilKanIW span{fill:#333;color:#333;}#mermaid-svg-hOQH51wFsilKanIW .node rect,#mermaid-svg-hOQH51wFsilKanIW .node circle,#mermaid-svg-hOQH51wFsilKanIW .node ellipse,#mermaid-svg-hOQH51wFsilKanIW .node polygon,#mermaid-svg-hOQH51wFsilKanIW .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-hOQH51wFsilKanIW .rough-node .label text,#mermaid-svg-hOQH51wFsilKanIW .node .label text,#mermaid-svg-hOQH51wFsilKanIW .image-shape .label,#mermaid-svg-hOQH51wFsilKanIW .icon-shape .label{text-anchor:middle;}#mermaid-svg-hOQH51wFsilKanIW .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-hOQH51wFsilKanIW .rough-node .label,#mermaid-svg-hOQH51wFsilKanIW .node .label,#mermaid-svg-hOQH51wFsilKanIW .image-shape .label,#mermaid-svg-hOQH51wFsilKanIW .icon-shape .label{text-align:center;}#mermaid-svg-hOQH51wFsilKanIW .node.clickable{cursor:pointer;}#mermaid-svg-hOQH51wFsilKanIW .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-hOQH51wFsilKanIW .arrowheadPath{fill:#333333;}#mermaid-svg-hOQH51wFsilKanIW .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-hOQH51wFsilKanIW .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-hOQH51wFsilKanIW .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-hOQH51wFsilKanIW .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-hOQH51wFsilKanIW .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-hOQH51wFsilKanIW .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-hOQH51wFsilKanIW .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-hOQH51wFsilKanIW .cluster text{fill:#333;}#mermaid-svg-hOQH51wFsilKanIW .cluster span{color:#333;}#mermaid-svg-hOQH51wFsilKanIW div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-hOQH51wFsilKanIW .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-hOQH51wFsilKanIW rect.text{fill:none;stroke-width:0;}#mermaid-svg-hOQH51wFsilKanIW .icon-shape,#mermaid-svg-hOQH51wFsilKanIW .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-hOQH51wFsilKanIW .icon-shape p,#mermaid-svg-hOQH51wFsilKanIW .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-hOQH51wFsilKanIW .icon-shape rect,#mermaid-svg-hOQH51wFsilKanIW .image-shape rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-hOQH51wFsilKanIW .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-hOQH51wFsilKanIW .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-hOQH51wFsilKanIW :root{&#8211;mermaid-font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;}<span class=\"nodeLabel\"><\/p>\n<p>PRM\u8fc7\u7a0b\u5956\u52b1<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>r1<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>r2<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>r3<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>r4<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6b65\u9aa41<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6b65\u9aa42<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6b65\u9aa43<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6700\u7ec8\u7ed3\u679c<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5956\u52b1\u4fe1\u53f7<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>ORM\u7ed3\u679c\u5956\u52b1<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u53ea\u6709\u6700\u7ec8\u5956\u52b1<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6b65\u9aa41<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6b65\u9aa42<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6b65\u9aa43<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6700\u7ec8\u7ed3\u679c<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5956\u52b1\u4fe1\u53f7<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>PRM<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>r1<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>r2<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>r3<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>r4<\/p>\n<p><\/span><\/p>\n<h3>4.2 \u8fc7\u7a0b\u5956\u52b1\u6a21\u578b\u7684\u8bad\u7ec3\u65b9\u6cd5<\/h3>\n<h4>4.2.1 \u8499\u7279\u5361\u6d1b\u4f30\u8ba1\u4e0e\u81ea\u52a8\u6807\u6ce8<\/h4>\n<p>\u8499\u7279\u5361\u6d1b&#xff08;Monte Carlo, MC&#xff09;\u4f30\u8ba1\u662fPRM\u81ea\u52a8\u6807\u6ce8\u7684\u4e3b\u6d41\u65b9\u6cd5\u3002\u5176\u6838\u5fc3\u601d\u60f3\u662f&#xff1a;\u4ece\u67d0\u4e00\u6b65\u51fa\u53d1&#xff0c;\u91c7\u6837\u591a\u6761\u540e\u7eed\u8f68\u8ff9&#xff0c;\u6839\u636e\u8fd9\u4e9b\u8f68\u8ff9\u7684\u6700\u7ec8\u6210\u529f\u7387\u4f30\u8ba1\u8be5\u6b65\u7684\u8d28\u91cf[56]\u3002<\/p>\n<p>\u5177\u4f53\u5730&#xff0c;\u5bf9\u4e8e\u6b65\u9aa4 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">(st,at)(s_t, a_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u4ece\u8be5\u6b65\u5f00\u59cb\u91c7\u6837 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">NN<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><\/span> \u6761\u5b8c\u6210\u8f68\u8ff9 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">{\u03c4(i)}i&#061;1N\\\\{\\\\tau^{(i)}\\\\}_{i&#061;1}^N<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.1467em;vertical-align: -0.2587em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.888em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\"><span class=\"mclose\">}<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8413em\"><span class=\"\" style=\"top: -2.4413em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2587em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xff0c;\u6bcf\u6761\u8f68\u8ff9\u4f7f\u7528\u5f53\u524d\u7b56\u7565\u6216\u968f\u673a\u7b56\u7565\u751f\u6210\u3002\u8be5\u6b65\u7684\u8fc7\u7a0b\u5956\u52b1\u4f30\u8ba1\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">r^tproc&#061;1N\u2211i&#061;1NR(\u03c4(i))\\\\hat{r}_t^{proc} &#061; \\\\frac{1}{N} \\\\sum_{i&#061;1}^{N} R(\\\\tau^{(i)})<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0281em;vertical-align: -0.2458em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7823em\"><span class=\"\" style=\"top: -2.4542em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.1809em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">p<\/span><span class=\"mord mathnormal mtight\">roc<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2458em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3.106em;vertical-align: -1.2777em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3214em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.686em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.8283em\"><span class=\"\" style=\"top: -1.8723em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2777em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.938em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">R(\u03c4(i))R(\\\\tau^{(i)})<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.138em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.888em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u4e3a\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span> \u6761\u8f68\u8ff9\u7684\u6700\u7ec8\u5956\u52b1&#xff08;\u901a\u5e38\u4e3a0\u62161&#xff09;\u3002\u5f53 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">NN<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><\/span> \u8db3\u591f\u5927\u65f6&#xff0c;\u8be5\u4f30\u8ba1\u6536\u655b\u5230\u771f\u5b9e\u7684\u671f\u671b\u56de\u62a5\u3002<\/p>\n<p>MC\u4f30\u8ba1\u7684\u65b9\u5dee\u4e0e <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">NN<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><\/span> \u6210\u53cd\u6bd4\u3002\u4e3a\u964d\u4f4e\u65b9\u5dee&#xff0c;\u53ef\u4ee5\u91c7\u7528\u91cd\u8981\u6027\u91c7\u6837&#xff08;Importance Sampling&#xff09;\u6280\u672f&#xff0c;\u4f7f\u7528\u66f4\u9ad8\u6548\u7684\u7b56\u7565\u751f\u6210\u8f68\u8ff9&#xff0c;\u7136\u540e\u901a\u8fc7\u91cd\u8981\u6027\u6743\u91cd\u6821\u6b63\u504f\u5dee\u3002\u53e6\u4e00\u79cd\u65b9\u6cd5\u662f\u4f7f\u7528\u5171\u540c\u968f\u673a\u6570&#xff08;Common Random Numbers&#xff09;&#xff0c;\u5728\u4e0d\u540c\u6b65\u9aa4\u7684\u4f30\u8ba1\u4e2d\u4f7f\u7528\u76f8\u540c\u7684\u968f\u673a\u79cd\u5b50&#xff0c;\u51cf\u5c11\u4f30\u8ba1\u95f4\u7684\u65b9\u5dee[42]\u3002<\/p>\n<h4>4.2.2 \u65f6\u5e8f\u5dee\u5206\u4f30\u8ba1\u4e0e\u4ef7\u503c\u4f20\u64ad<\/h4>\n<p>\u65f6\u5e8f\u5dee\u5206&#xff08;TD&#xff09;\u4f30\u8ba1\u63d0\u4f9b\u4e86\u53e6\u4e00\u79cdPRM\u8bad\u7ec3\u65b9\u6cd5&#xff0c;\u5b83\u901a\u8fc7\u81ea\u4e3e\u673a\u5236\u5b9e\u73b0\u4ef7\u503c\u7684\u9010\u6b65\u4f20\u64ad\u3002\u4e0eMC\u4f30\u8ba1\u9700\u8981\u5b8c\u6574\u8f68\u8ff9\u4e0d\u540c&#xff0c;TD\u4f30\u8ba1\u53ef\u4ee5\u5728\u6bcf\u4e00\u6b65\u66f4\u65b0&#xff0c;\u5177\u6709\u66f4\u4f4e\u7684\u8ba1\u7b97\u6210\u672c[14]\u3002<\/p>\n<p>TD-based PRM\u8bad\u7ec3\u4f7f\u7528\u4ee5\u4e0b\u66f4\u65b0\u89c4\u5219&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">PRM(st,at)\u2190PRM(st,at)&#043;\u03b2[rt&#043;\u03b3max\u2061a\u2032PRM(st&#043;1,a\u2032)\u2212PRM(st,at)]\\\\text{PRM}(s_t, a_t) \\\\leftarrow \\\\text{PRM}(s_t, a_t) &#043; \\\\beta [r_t &#043; \\\\gamma \\\\max_{a&#039;} \\\\text{PRM}(s_{t&#043;1}, a&#039;) &#8211; \\\\text{PRM}(s_t, a_t)]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord text\"><span class=\"mord\">PRM<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2190<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord text\"><span class=\"mord\">PRM<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.5459em;vertical-align: -0.744em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.356em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.744em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord text\"><span class=\"mord\">PRM<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord text\"><span class=\"mord\">PRM<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)]<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u8be5\u66f4\u65b0\u4f7f\u5f97PRM\u503c\u6cbf\u7740\u8f68\u8ff9\u5411\u540e\u4f20\u64ad&#xff0c;\u6700\u7ec8\u6b65\u9aa4\u7684\u7ed3\u679c\u5956\u52b1\u901a\u8fc7\u591a\u6b65TD\u8bef\u5dee\u4f20\u9012\u5230\u524d\u9762\u5404\u6b65\u3002<\/p>\n<p>\u5e7f\u4e49\u4f18\u52bf\u4f30\u8ba1&#xff08;Generalized Advantage Estimation, GAE&#xff09;\u7ed3\u5408\u4e86MC\u548cTD\u7684\u4f18\u70b9&#xff0c;\u901a\u8fc7\u53c2\u6570 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bb\\\\lambda<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><\/span><\/span><\/span><\/span> \u63a7\u5236\u4e24\u8005\u7684\u6743\u8861[16]&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">A^tGAE&#061;\u2211l&#061;0\u221e(\u03b3\u03bb)l\u03b4t&#043;l\\\\hat{A}_t^{GAE} &#061; \\\\sum_{l&#061;0}^{\\\\infty} (\\\\gamma \\\\lambda)^l \\\\delta_{t&#043;l}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.1938em;vertical-align: -0.247em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9468em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\">A<\/span><\/span><span class=\"\" style=\"top: -3.2523em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1111em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8913em\"><span class=\"\" style=\"top: -2.453em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">G<\/span><span class=\"mord mathnormal mtight\">A<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0576em\">E<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.9535em;vertical-align: -1.3021em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.6514em\"><span class=\"\" style=\"top: -1.8479em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u221e<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3021em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">\u03b3\u03bb<\/span><span class=\"mclose\"><span class=\"mclose\">)<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8991em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b4t&#061;rt&#043;\u03b3V(st&#043;1)\u2212V(st)\\\\delta_t &#061; r_t &#043; \\\\gamma V(s_{t&#043;1}) &#8211; V(s_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.7333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">\u03b3V<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.2222em\">V<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u4e3aTD\u8bef\u5dee\u3002\u5f53 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bb&#061;0\\\\lambda &#061; 0<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">0<\/span><\/span><\/span><\/span><\/span> \u65f6\u9000\u5316\u4e3a\u5355\u6b65TD&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bb&#061;1\\\\lambda &#061; 1<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">1<\/span><\/span><\/span><\/span><\/span> \u65f6\u7b49\u4ef7\u4e8eMC\u4f30\u8ba1\u3002\u9002\u4e2d\u7684<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bb\\\\lambda<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><\/span><\/span><\/span><\/span>\u503c&#xff08;\u59820.95&#xff09;\u5728\u5b9e\u8df5\u4e2d\u8868\u73b0\u6700\u4f73\u3002<\/p>\n<h4>4.2.3 \u5bf9\u6bd4\u5b66\u4e60\u4e0e\u6392\u540d\u635f\u5931<\/h4>\n<p>PRM\u7684\u8bad\u7ec3\u53ef\u4ee5\u5efa\u6a21\u4e3a\u6392\u5e8f\u95ee\u9898&#xff1a;\u7ed9\u5b9a\u4e24\u4e2a\u6b65\u9aa4&#xff0c;\u5224\u65ad\u54ea\u4e2a\u66f4\u4f18\u3002\u8fd9\u79cd\u6392\u5e8f\u89c6\u89d2\u81ea\u7136\u5730\u5bfc\u51fa\u4e86\u5bf9\u6bd4\u5b66\u4e60&#xff08;Contrastive Learning&#xff09;\u65b9\u6cd5[16]\u3002<\/p>\n<p>\u5bf9\u4e8e\u540c\u4e00\u95ee\u9898\u7684\u4e24\u4e2a\u4e0d\u540c\u6267\u884c\u8def\u5f84&#xff0c;\u5982\u679c\u8def\u5f84A\u6700\u7ec8\u6210\u529f\u800c\u8def\u5f84B\u5931\u8d25&#xff0c;\u5219\u8def\u5f84A\u4e2d\u7684\u6240\u6709\u6b65\u9aa4\u5e94\u83b7\u5f97\u6bd4\u8def\u5f84B\u5bf9\u5e94\u6b65\u9aa4\u66f4\u9ad8\u7684PRM\u5206\u6570\u3002\u5f62\u5f0f\u5316\u5730&#xff0c;\u8bbe <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">(stA,atA)(s_t^A, a_t^A)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0913em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8413em\"><span class=\"\" style=\"top: -2.453em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">A<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8413em\"><span class=\"\" style=\"top: -2.453em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">A<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u548c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">(stB,atB)(s_t^B, a_t^B)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0913em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8413em\"><span class=\"\" style=\"top: -2.453em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0502em\">B<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8413em\"><span class=\"\" style=\"top: -2.453em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0502em\">B<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u4e3a\u4e24\u6761\u8def\u5f84\u7684\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">tt<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6151em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><\/span><\/span> \u6b65&#xff0c;\u6392\u540d\u635f\u5931\u5b9a\u4e49\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">Lrank&#061;max\u2061(0,m\u2212PRM(stA,atA)&#043;PRM(stB,atB))\\\\mathcal{L}_{rank} &#061; \\\\max(0, m &#8211; \\\\text{PRM}(s_t^A, a_t^A) &#043; \\\\text{PRM}(s_t^B, a_t^B))<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\">L<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0315em\">ank<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mop\">max<\/span><span class=\"mopen\">(<\/span><span class=\"mord\">0<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">m<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.1413em;vertical-align: -0.25em\"><\/span><span class=\"mord text\"><span class=\"mord\">PRM<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8913em\"><span class=\"\" style=\"top: -2.453em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">A<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8913em\"><span class=\"\" style=\"top: -2.453em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">A<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.1413em;vertical-align: -0.25em\"><\/span><span class=\"mord text\"><span class=\"mord\">PRM<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8913em\"><span class=\"\" style=\"top: -2.453em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0502em\">B<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8913em\"><span class=\"\" style=\"top: -2.453em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0502em\">B<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">))<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">mm<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">m<\/span><\/span><\/span><\/span><\/span> \u4e3a\u8fb9\u9645\u53c2\u6570\u3002\u8be5\u635f\u5931\u9f13\u52b1\u6210\u529f\u8def\u5f84\u7684\u6b65\u9aa4\u5f97\u5206\u6bd4\u5931\u8d25\u8def\u5f84\u9ad8\u81f3\u5c11 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">mm<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">m<\/span><\/span><\/span><\/span><\/span>\u3002<\/p>\n<p>\u5bf9\u6bd4\u5b66\u4e60\u8fd8\u53ef\u4ee5\u6269\u5c55\u5230\u591a\u8def\u5f84\u6bd4\u8f83\u3002\u7ed9\u5b9a\u540c\u4e00\u95ee\u9898\u7684 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">KK<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0715em\">K<\/span><\/span><\/span><\/span><\/span> \u6761\u8def\u5f84\u53ca\u5176\u6700\u7ec8\u6210\u529f\u7387&#xff0c;\u53ef\u4ee5\u4f7f\u7528ListNet\u6216ListMLE\u7b49\u6392\u5e8f\u635f\u5931\u51fd\u6570\u8bad\u7ec3PRM&#xff0c;\u4f7f\u5176\u8f93\u51fa\u7684\u6b65\u9aa4\u5206\u6570\u4e0e\u6700\u7ec8\u6210\u529f\u7387\u4e00\u81f4[56]\u3002<\/p>\n<h3>4.3 PRM\u5728\u5de5\u4f5c\u6d41\u4f18\u5316\u4e2d\u7684\u5e94\u7528<\/h3>\n<h4>4.3.1 \u6b65\u9aa4\u7ea7\u675f\u641c\u7d22\u4e0e\u63a8\u7406\u5f15\u5bfc<\/h4>\n<p>PRM\u6700\u76f4\u63a5\u7684\u5e94\u7528\u662f\u5728\u63a8\u7406\u9636\u6bb5\u5f15\u5bfc\u641c\u7d22\u8fc7\u7a0b\u3002\u4f20\u7edf\u7684\u8d2a\u5a6a\u89e3\u7801&#xff08;Greedy Decoding&#xff09;\u6bcf\u4e00\u6b65\u9009\u62e9\u6982\u7387\u6700\u9ad8\u7684\u52a8\u4f5c&#xff0c;\u5bb9\u6613\u9677\u5165\u5c40\u90e8\u6700\u4f18\u3002PRM\u5f15\u5bfc\u7684\u675f\u641c\u7d22&#xff08;Beam Search&#xff09;\u5219\u8003\u8651\u591a\u6b65\u7d2f\u79ef\u4ef7\u503c&#xff0c;\u80fd\u591f\u53d1\u73b0\u66f4\u4f18\u7684\u5168\u5c40\u89e3[42]\u3002<\/p>\n<p>\u675f\u641c\u7d22\u7b97\u6cd5\u7ef4\u62a4 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">KK<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0715em\">K<\/span><\/span><\/span><\/span><\/span> \u4e2a\u5019\u9009\u8def\u5f84&#xff08;\u675f\u5bbd&#xff09;\u3002\u5728\u6bcf\u4e00\u6b65&#xff0c;\u5bf9\u6bcf\u4e2a\u5019\u9009\u8def\u5f84\u6269\u5c55\u6240\u6709\u53ef\u80fd\u7684\u52a8\u4f5c&#xff0c;\u7531PRM\u8bc4\u4f30\u6269\u5c55\u540e\u7684\u8d28\u91cf&#xff0c;\u4fdd\u7559\u5f97\u5206\u6700\u9ad8\u7684 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">KK<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0715em\">K<\/span><\/span><\/span><\/span><\/span> \u4e2a\u5019\u9009\u3002\u5f62\u5f0f\u5316\u5730&#xff0c;\u8bbe\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">tt<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6151em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><\/span><\/span> \u6b65\u7684\u5019\u9009\u96c6\u5408\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">Bt&#061;{(st(i),scorei)}i&#061;1K\\\\mathcal{B}_t &#061; \\\\{(s_t^{(i)}, \\\\text{score}_i)\\\\}_{i&#061;1}^K<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\" style=\"margin-right: 0.0304em\">B<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0304em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.3035em;vertical-align: -0.2587em\"><\/span><span class=\"mopen\">{(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.0448em\"><span class=\"\" style=\"top: -2.4542em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.2198em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2458em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord\">score<\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mclose\"><span class=\"mclose\">}<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8413em\"><span class=\"\" style=\"top: -2.4413em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0715em\">K<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2587em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5219\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">t&#043;1t&#043;1<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6984em;vertical-align: -0.0833em\"><\/span><span class=\"mord mathnormal\">t<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">1<\/span><\/span><\/span><\/span><\/span> \u6b65\u7684\u5019\u9009\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">Bt&#043;1&#061;TopK(s,a){score(s)&#043;PRM(s,a)\u2223s\u2208Bt,a\u2208A(s)}\\\\mathcal{B}_{t&#043;1} &#061; \\\\text{TopK}_{(s,a)} \\\\left\\\\{ \\\\text{score}(s) &#043; \\\\text{PRM}(s, a) \\\\mid s \\\\in \\\\mathcal{B}_t, a \\\\in \\\\mathcal{A}(s) \\\\right\\\\}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8917em;vertical-align: -0.2083em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\" style=\"margin-right: 0.0304em\">B<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0304em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.1691em;vertical-align: -0.4191em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord\">TopK<\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2809em\"><span class=\"\" style=\"top: -2.4559em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4191em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\">{<\/span><span class=\"mord text\"><span class=\"mord\">score<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord text\"><span class=\"mord\">PRM<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2223<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\" style=\"margin-right: 0.0304em\">B<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0304em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mord mathcal\">A<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\">}<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>PRM\u5f15\u5bfc\u7684\u675f\u641c\u7d22\u663e\u8457\u63d0\u5347\u4e86Agent\u5728\u6570\u5b66\u63a8\u7406\u3001\u4ee3\u7801\u751f\u6210\u7b49\u590d\u6742\u4efb\u52a1\u4e0a\u7684\u8868\u73b0\u3002\u7814\u7a76\u8868\u660e&#xff0c;\u4f7f\u7528PRM\u5f15\u5bfc\u7684\u641c\u7d22&#xff0c;7B\u53c2\u6570\u7684\u6a21\u578b\u53ef\u4ee5\u8fbe\u5230\u4e0e175B\u6a21\u578b\u76f8\u5f53\u7684\u6027\u80fd[7]\u3002<\/p>\n<h4>4.3.2 PRM\u4e0e\u7b56\u7565\u4f18\u5316\u7684\u7aef\u5230\u7aef\u8bad\u7ec3<\/h4>\n<p>PRM\u4e0d\u4ec5\u53ef\u4ee5\u7528\u4e8e\u63a8\u7406\u5f15\u5bfc&#xff0c;\u8fd8\u53ef\u4ee5\u4e0e\u7b56\u7565\u7f51\u7edc\u7aef\u5230\u7aef\u8054\u5408\u8bad\u7ec3&#xff0c;\u5b9e\u73b0\u8fc7\u7a0b\u76d1\u7763\u5f3a\u5316\u5b66\u4e60&#xff08;Process-Supervised RL&#xff09;\u3002\u5728\u8fd9\u79cd\u6846\u67b6\u4e0b&#xff0c;PRM\u63d0\u4f9b\u7684\u6b65\u9aa4\u7ea7\u5956\u52b1\u76f4\u63a5\u7528\u4e8e\u7b56\u7565\u66f4\u65b0[61]\u3002<\/p>\n<p>\u8bad\u7ec3\u8fc7\u7a0b\u4ea4\u66ff\u8fdb\u884c\u4ee5\u4e0b\u6b65\u9aa4&#xff1a;<\/p>\n<li>\u4f7f\u7528\u5f53\u524d\u7b56\u7565\u91c7\u6837\u5de5\u4f5c\u6d41\u8f68\u8ff9<\/li>\n<li>\u4f7f\u7528PRM\u8bc4\u4f30\u5404\u6b65\u9aa4\u8d28\u91cf&#xff0c;\u751f\u6210\u8fc7\u7a0b\u5956\u52b1<\/li>\n<li>\u4f7f\u7528PPO\u6216GRPO\u7b97\u6cd5\u66f4\u65b0\u7b56\u7565&#xff0c;\u6700\u5927\u5316\u7d2f\u79ef\u8fc7\u7a0b\u5956\u52b1<\/li>\n<li>\u4f7f\u7528\u65b0\u91c7\u6837\u7684\u8f68\u8ff9\u66f4\u65b0PRM&#xff0c;\u63d0\u9ad8\u8bc4\u4f30\u51c6\u786e\u6027<\/li>\n<p>\u8fd9\u79cd\u8054\u5408\u8bad\u7ec3\u4f7f\u5f97\u7b56\u7565\u548cPRM\u76f8\u4e92\u4fc3\u8fdb&#xff1a;\u66f4\u597d\u7684\u7b56\u7565\u751f\u6210\u66f4\u9ad8\u8d28\u91cf\u7684\u8f68\u8ff9&#xff0c;\u7528\u4e8e\u8bad\u7ec3\u66f4\u51c6\u786e\u7684PRM&#xff1b;\u66f4\u51c6\u786e\u7684PRM\u63d0\u4f9b\u66f4\u53ef\u9760\u7684\u5956\u52b1\u4fe1\u53f7&#xff0c;\u5f15\u5bfc\u7b56\u7565\u8fdb\u4e00\u6b65\u4f18\u5316\u3002<\/p>\n<h4>4.3.3 \u9519\u8bef\u5b9a\u4f4d\u4e0e\u56de\u6eaf\u673a\u5236<\/h4>\n<p>PRM\u7684\u4e00\u4e2a\u91cd\u8981\u5e94\u7528\u662f\u9519\u8bef\u5b9a\u4f4d&#xff08;Error Localization&#xff09;\u3002\u5f53\u5de5\u4f5c\u6d41\u6267\u884c\u5931\u8d25\u65f6&#xff0c;PRM\u53ef\u4ee5\u5e2e\u52a9\u8bc6\u522b\u7b2c\u4e00\u4e2a\u9519\u8bef\u53d1\u751f\u7684\u6b65\u9aa4&#xff0c;\u4e3a\u56de\u6eaf\u548c\u4fee\u6b63\u63d0\u4f9b\u4f9d\u636e[62]\u3002<\/p>\n<p>OmegaPRM\u7b97\u6cd5\u901a\u8fc7\u4e8c\u5206\u641c\u7d22\u9ad8\u6548\u5b9a\u4f4d\u9519\u8bef\u6b65\u9aa4\u3002\u7ed9\u5b9a\u4e00\u6761\u5931\u8d25\u8f68\u8ff9&#xff0c;\u7b97\u6cd5\u9996\u5148\u68c0\u67e5\u4e2d\u70b9\u6b65\u9aa4\u7684PRM\u5206\u6570\u3002\u5982\u679c\u4e2d\u70b9\u524d\u7684\u6b65\u9aa4\u5f97\u5206\u6b63\u5e38\u800c\u4e2d\u70b9\u540e\u5f97\u5206\u9aa4\u964d&#xff0c;\u5219\u9519\u8bef\u4f4d\u4e8e\u4e2d\u70b9\u9644\u8fd1\u3002\u901a\u8fc7\u9012\u5f52\u4e8c\u5206&#xff0c;\u53ef\u4ee5\u5728 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">O(log\u2061T)O(\\\\log T)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">O<\/span><span class=\"mopen\">(<\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">T<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u6b21\u8bc4\u4f30\u5185\u5b9a\u4f4d\u7b2c\u4e00\u4e2a\u9519\u8bef\u6b65\u9aa4[16]\u3002<\/p>\n<p>\u5b9a\u4f4d\u9519\u8bef\u540e&#xff0c;Agent\u53ef\u4ee5\u6267\u884c\u56de\u6eaf&#xff08;Backtracking&#xff09;\u64cd\u4f5c&#xff0c;\u56de\u9000\u5230\u9519\u8bef\u524d\u7684\u72b6\u6001\u5e76\u5c1d\u8bd5\u66ff\u4ee3\u52a8\u4f5c\u3002\u8fd9\u79cd\u8bd5\u9519\u5b66\u4e60\u673a\u5236\u4f7f\u5f97Agent\u80fd\u591f\u4ece\u5931\u8d25\u4e2d\u5feb\u901f\u6062\u590d&#xff0c;\u6301\u7eed\u63d0\u5347\u5de5\u4f5c\u6d41\u8d28\u91cf\u3002<\/p>\n<p>#mermaid-svg-tj5FxsjQOBEbuNS1{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-tj5FxsjQOBEbuNS1 .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-tj5FxsjQOBEbuNS1 .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-tj5FxsjQOBEbuNS1 .error-icon{fill:#552222;}#mermaid-svg-tj5FxsjQOBEbuNS1 .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-tj5FxsjQOBEbuNS1 .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-tj5FxsjQOBEbuNS1 .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-tj5FxsjQOBEbuNS1 .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-tj5FxsjQOBEbuNS1 .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-tj5FxsjQOBEbuNS1 .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-tj5FxsjQOBEbuNS1 .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-tj5FxsjQOBEbuNS1 .marker{fill:#333333;stroke:#333333;}#mermaid-svg-tj5FxsjQOBEbuNS1 .marker.cross{stroke:#333333;}#mermaid-svg-tj5FxsjQOBEbuNS1 svg{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-tj5FxsjQOBEbuNS1 p{margin:0;}#mermaid-svg-tj5FxsjQOBEbuNS1 .label{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;color:#333;}#mermaid-svg-tj5FxsjQOBEbuNS1 .cluster-label text{fill:#333;}#mermaid-svg-tj5FxsjQOBEbuNS1 .cluster-label span{color:#333;}#mermaid-svg-tj5FxsjQOBEbuNS1 .cluster-label span p{background-color:transparent;}#mermaid-svg-tj5FxsjQOBEbuNS1 .label text,#mermaid-svg-tj5FxsjQOBEbuNS1 span{fill:#333;color:#333;}#mermaid-svg-tj5FxsjQOBEbuNS1 .node rect,#mermaid-svg-tj5FxsjQOBEbuNS1 .node circle,#mermaid-svg-tj5FxsjQOBEbuNS1 .node ellipse,#mermaid-svg-tj5FxsjQOBEbuNS1 .node polygon,#mermaid-svg-tj5FxsjQOBEbuNS1 .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-tj5FxsjQOBEbuNS1 .rough-node .label text,#mermaid-svg-tj5FxsjQOBEbuNS1 .node .label text,#mermaid-svg-tj5FxsjQOBEbuNS1 .image-shape .label,#mermaid-svg-tj5FxsjQOBEbuNS1 .icon-shape .label{text-anchor:middle;}#mermaid-svg-tj5FxsjQOBEbuNS1 .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-tj5FxsjQOBEbuNS1 .rough-node .label,#mermaid-svg-tj5FxsjQOBEbuNS1 .node .label,#mermaid-svg-tj5FxsjQOBEbuNS1 .image-shape .label,#mermaid-svg-tj5FxsjQOBEbuNS1 .icon-shape .label{text-align:center;}#mermaid-svg-tj5FxsjQOBEbuNS1 .node.clickable{cursor:pointer;}#mermaid-svg-tj5FxsjQOBEbuNS1 .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-tj5FxsjQOBEbuNS1 .arrowheadPath{fill:#333333;}#mermaid-svg-tj5FxsjQOBEbuNS1 .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-tj5FxsjQOBEbuNS1 .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-tj5FxsjQOBEbuNS1 .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-tj5FxsjQOBEbuNS1 .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-tj5FxsjQOBEbuNS1 .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-tj5FxsjQOBEbuNS1 .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-tj5FxsjQOBEbuNS1 .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-tj5FxsjQOBEbuNS1 .cluster text{fill:#333;}#mermaid-svg-tj5FxsjQOBEbuNS1 .cluster span{color:#333;}#mermaid-svg-tj5FxsjQOBEbuNS1 div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-tj5FxsjQOBEbuNS1 .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-tj5FxsjQOBEbuNS1 rect.text{fill:none;stroke-width:0;}#mermaid-svg-tj5FxsjQOBEbuNS1 .icon-shape,#mermaid-svg-tj5FxsjQOBEbuNS1 .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-tj5FxsjQOBEbuNS1 .icon-shape p,#mermaid-svg-tj5FxsjQOBEbuNS1 .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-tj5FxsjQOBEbuNS1 .icon-shape rect,#mermaid-svg-tj5FxsjQOBEbuNS1 .image-shape rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-tj5FxsjQOBEbuNS1 .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-tj5FxsjQOBEbuNS1 .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-tj5FxsjQOBEbuNS1 :root{&#8211;mermaid-font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;}<span class=\"nodeLabel\"><\/p>\n<p>\u9519\u8bef\u5b9a\u4f4d<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5931\u8d25\u8f68\u8ff9<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u4e8c\u5206\u641c\u7d22<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>PRM\u8bc4\u5206<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5b9a\u4f4d\u9996\u4e2a\u9519\u8bef<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u56de\u6eaf\u4fee\u6b63<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>PRM\u8bad\u7ec3\u6d41\u7a0b<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u91c7\u6837\u8f68\u8ff9<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>PRM\u8bc4\u4f30\u6b65\u9aa4<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u751f\u6210\u8fc7\u7a0b\u5956\u52b1<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7b56\u7565\u66f4\u65b0PPO\/GRPO<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u65b0\u8f68\u8ff9\u91c7\u6837<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u66f4\u65b0PRM<\/p>\n<p><\/span><\/p>\n<h2>5 \u591a\u667a\u80fd\u4f53\u534f\u4f5c\u4e0e\u81ea\u6211\u5bf9\u5f08&#xff1a;\u5206\u5e03\u5f0f\u8fdb\u5316\u673a\u5236<\/h2>\n<h3>5.1 \u591a\u667a\u80fd\u4f53\u7cfb\u7edf\u7684\u535a\u5f08\u8bba\u57fa\u7840<\/h3>\n<h4>5.1.1 \u7eb3\u4ec0\u5747\u8861\u4e0e\u7b56\u7565\u7a33\u5b9a\u6027<\/h4>\n<p>\u591a\u667a\u80fd\u4f53\u7cfb\u7edf&#xff08;Multi-Agent System, MAS&#xff09;\u7684\u7814\u7a76\u4e3a\u7406\u89e3Agent\u95f4\u7684\u4ea4\u4e92\u63d0\u4f9b\u4e86\u535a\u5f08\u8bba\u6846\u67b6\u3002\u5728MAS\u4e2d&#xff0c;\u6bcf\u4e2aAgent\u90fd\u662f\u7406\u6027\u7684\u51b3\u7b56\u8005&#xff0c;\u5176\u6536\u76ca\u4e0d\u4ec5\u53d6\u51b3\u4e8e\u81ea\u8eab\u7b56\u7565&#xff0c;\u8fd8\u53d7\u5176\u4ed6Agent\u7b56\u7565\u7684\u5f71\u54cd[56]\u3002<\/p>\n<p>\u5f62\u5f0f\u5316\u5730&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">NN<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><\/span> \u4e2aAgent\u7684\u535a\u5f08\u53ef\u4ee5\u8868\u793a\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">G&#061;(N,{Ai}i\u2208N,{ui}i\u2208N)G &#061; (\\\\mathcal{N}, \\\\{\\\\mathcal{A}_i\\\\}_{i \\\\in \\\\mathcal{N}}, \\\\{u_i\\\\}_{i \\\\in \\\\mathcal{N}})<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\">G<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathcal\" style=\"margin-right: 0.1474em\">N<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\"><span class=\"mord mathcal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\"><span class=\"mclose\">}<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">\u2208<\/span><span class=\"mord mathcal mtight\" style=\"margin-right: 0.1474em\">N<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1774em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\"><span class=\"mord mathnormal\">u<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\"><span class=\"mclose\">}<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">\u2208<\/span><span class=\"mord mathcal mtight\" style=\"margin-right: 0.1474em\">N<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1774em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">N&#061;{1,2,&#8230;,N}\\\\mathcal{N} &#061; \\\\{1, 2, &#8230;, N\\\\}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.1474em\">N<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\">1<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">2<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><span class=\"mclose\">}<\/span><\/span><\/span><\/span><\/span> \u4e3aAgent\u96c6\u5408&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">Ai\\\\mathcal{A}_i<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3aAgent <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span> \u7684\u52a8\u4f5c\u7a7a\u95f4&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ui:A1\u00d7&#8230;\u00d7AN\u2192Ru_i: \\\\mathcal{A}_1 \\\\times &#8230; \\\\times \\\\mathcal{A}_N \\\\rightarrow \\\\mathbb{R}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">u<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">:<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u00d7<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6667em;vertical-align: -0.0833em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u00d7<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2192<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6889em\"><\/span><span class=\"mord mathbb\">R<\/span><\/span><\/span><\/span><\/span> \u4e3aAgent <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span> \u7684\u6548\u7528\u51fd\u6570\u3002<\/p>\n<p>\u7eb3\u4ec0\u5747\u8861&#xff08;Nash Equilibrium&#xff09;\u662f\u535a\u5f08\u8bba\u7684\u6838\u5fc3\u89e3\u6982\u5ff5\u3002\u7b56\u7565\u7ec4\u5408 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c3\u2217&#061;(\u03c31\u2217,&#8230;,\u03c3N\u2217)\\\\sigma^* &#061; (\\\\sigma_1^*, &#8230;, \\\\sigma_N^*)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6887em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6887em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0253em;vertical-align: -0.2753em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6887em\"><span class=\"\" style=\"top: -2.4519em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2481em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6887em\"><span class=\"\" style=\"top: -2.4247em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2753em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u662f\u7eb3\u4ec0\u5747\u8861&#xff0c;\u5982\u679c\u5bf9\u4e8e\u6bcf\u4e2aAgent <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u6ee1\u8db3[42]&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">ui(\u03c3i\u2217,\u03c3\u2212i\u2217)\u2265ui(\u03c3i\u2032,\u03c3\u2212i\u2217),\u2200\u03c3i\u2032\u2208\u0394(Ai)u_i(\\\\sigma_i^*, \\\\sigma_{-i}^*) \\\\geq u_i(\\\\sigma_i&#039;, \\\\sigma_{-i}^*), \\\\quad \\\\forall \\\\sigma_i&#039; \\\\in \\\\Delta(\\\\mathcal{A}_i)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0553em;vertical-align: -0.3053em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">u<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7387em\"><span class=\"\" style=\"top: -2.453em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7387em\"><span class=\"\" style=\"top: -2.453em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2212<\/span><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3053em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2265<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.1072em;vertical-align: -0.3053em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">u<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -2.453em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7387em\"><span class=\"\" style=\"top: -2.453em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2212<\/span><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3053em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 1em\"><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">\u2200<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8019em\"><span class=\"\" style=\"top: -2.453em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\">\u0394<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathcal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c3\u2212i\u2217\\\\sigma_{-i}^*<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0057em;vertical-align: -0.317em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6887em\"><span class=\"\" style=\"top: -2.4413em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2212<\/span><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.317em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u8868\u793a\u9664Agent <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span> \u5916\u5176\u4ed6Agent\u7684\u5747\u8861\u7b56\u7565\u3002\u7eb3\u4ec0\u5747\u8861\u7684\u76f4\u89c2\u542b\u4e49\u662f&#xff1a;\u5728\u5747\u8861\u72b6\u6001\u4e0b&#xff0c;\u4efb\u4f55Agent\u5355\u65b9\u9762\u6539\u53d8\u7b56\u7565\u90fd\u65e0\u6cd5\u83b7\u5f97\u66f4\u9ad8\u6536\u76ca\u3002<\/p>\n<p>\u5728Agent\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d&#xff0c;\u7eb3\u4ec0\u5747\u8861\u5bf9\u5e94\u4e8e\u7a33\u5b9a\u7684\u591aAgent\u534f\u4f5c\u6a21\u5f0f\u3002\u5f53\u6240\u6709Agent\u90fd\u91c7\u7528\u5747\u8861\u7b56\u7565\u65f6&#xff0c;\u7cfb\u7edf\u8fbe\u5230\u81ea\u6d3d\u72b6\u6001&#xff0c;\u6ca1\u6709Agent\u6709\u52a8\u529b\u504f\u79bb\u5f53\u524d\u884c\u4e3a\u6a21\u5f0f[66]\u3002<\/p>\n<h4>5.1.2 \u96f6\u548c\u535a\u5f08\u4e0e\u5bf9\u6297\u8bad\u7ec3<\/h4>\n<p>\u96f6\u548c\u535a\u5f08&#xff08;Zero-Sum Game&#xff09;\u662f\u4e00\u7c7b\u7279\u6b8a\u7684\u535a\u5f08&#xff0c;\u5176\u4e2d\u4e00\u4e2aAgent\u7684\u6536\u76ca\u7b49\u4e8e\u5176\u4ed6Agent\u7684\u635f\u5931\u4e4b\u548c\u3002\u5f62\u5f0f\u5316\u5730&#xff0c;\u5bf9\u4e8e\u4e24\u4e2aAgent\u7684\u96f6\u548c\u535a\u5f08&#xff0c;\u6ee1\u8db3 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">u1(a1,a2)&#043;u2(a1,a2)&#061;0u_1(a_1, a_2) &#043; u_2(a_1, a_2) &#061; 0<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">u<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">u<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">0<\/span><\/span><\/span><\/span><\/span>[61]\u3002<\/p>\n<p>\u96f6\u548c\u535a\u5f08\u7684\u6c42\u89e3\u76ee\u6807\u662f\u627e\u5230\u6781\u5927\u6781\u5c0f\u7b56\u7565&#xff08;Maximin Strategy&#xff09;&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c31\u2217&#061;arg\u2061max\u2061\u03c31min\u2061\u03c32u1(\u03c31,\u03c32)\\\\sigma_1^* &#061; \\\\arg\\\\max_{\\\\sigma_1} \\\\min_{\\\\sigma_2} u_1(\\\\sigma_1, \\\\sigma_2)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.9857em;vertical-align: -0.247em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7387em\"><span class=\"\" style=\"top: -2.453em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.5501em;vertical-align: -0.8001em\"><\/span><span class=\"mop\">ar<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.4em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3173em\"><span class=\"\" style=\"top: -2.357em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8001em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6679em\"><span class=\"\" style=\"top: -2.4em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3173em\"><span class=\"\" style=\"top: -2.357em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">min<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8001em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">u<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u6839\u636e\u6781\u5c0f\u6781\u5927\u5b9a\u7406&#xff08;Minimax Theorem&#xff09;&#xff0c;\u5728\u96f6\u548c\u535a\u5f08\u4e2d\u6781\u5927\u6781\u5c0f\u503c\u7b49\u4e8e\u6781\u5c0f\u6781\u5927\u503c&#xff0c;\u5373&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">max\u2061\u03c31min\u2061\u03c32u1(\u03c31,\u03c32)&#061;min\u2061\u03c32max\u2061\u03c31u1(\u03c31,\u03c32)\\\\max_{\\\\sigma_1} \\\\min_{\\\\sigma_2} u_1(\\\\sigma_1, \\\\sigma_2) &#061; \\\\min_{\\\\sigma_2} \\\\max_{\\\\sigma_1} u_1(\\\\sigma_1, \\\\sigma_2)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.5501em;vertical-align: -0.8001em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.4em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3173em\"><span class=\"\" style=\"top: -2.357em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8001em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6679em\"><span class=\"\" style=\"top: -2.4em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3173em\"><span class=\"\" style=\"top: -2.357em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">min<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8001em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">u<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.5501em;vertical-align: -0.8001em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6679em\"><span class=\"\" style=\"top: -2.4em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3173em\"><span class=\"\" style=\"top: -2.357em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">min<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8001em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.4em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3173em\"><span class=\"\" style=\"top: -2.357em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8001em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">u<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5bf9\u6297\u8bad\u7ec3&#xff08;Adversarial Training&#xff09;\u5229\u7528\u96f6\u548c\u535a\u5f08\u6846\u67b6\u63d0\u5347Agent\u7684\u9c81\u68d2\u6027\u3002\u901a\u8fc7\u8ba9Agent\u4e0e\u5bf9\u6297\u6027\u5bf9\u624b\u535a\u5f08&#xff0c;Agent\u5b66\u4f1a\u5e94\u5bf9\u6700\u574f\u60c5\u51b5&#xff0c;\u4ece\u800c\u5728\u5b9e\u9645\u5e94\u7528\u4e2d\u8868\u73b0\u66f4\u7a33\u5b9a\u3002\u5728\u81ea\u6211\u8fdb\u5316\u573a\u666f\u4e2d&#xff0c;\u53ef\u4ee5\u5c06\u8fc7\u53bb\u7684\u81ea\u5df1\u4f5c\u4e3a\u5bf9\u624b&#xff0c;\u901a\u8fc7\u81ea\u6211\u5bf9\u5f08&#xff08;Self-Play&#xff09;\u5b9e\u73b0\u6301\u7eed\u6539\u8fdb[62]\u3002<\/p>\n<h4>5.1.3 \u5408\u4f5c\u535a\u5f08\u4e0e\u8054\u76df\u5f62\u6210<\/h4>\n<p>\u4e0e\u96f6\u548c\u535a\u5f08\u4e0d\u540c&#xff0c;\u5408\u4f5c\u535a\u5f08&#xff08;Cooperative Game&#xff09;\u5141\u8bb8\u591a\u4e2aAgent\u901a\u8fc7\u534f\u4f5c\u5b9e\u73b0\u5171\u8d62\u3002\u5728Agent\u5de5\u4f5c\u6d41\u4e2d&#xff0c;\u4e0d\u540cAgent\u901a\u5e38\u626e\u6f14\u4e0d\u540c\u89d2\u8272&#xff08;\u5982\u89c4\u5212\u8005\u3001\u6267\u884c\u8005\u3001\u9a8c\u8bc1\u8005&#xff09;&#xff0c;\u901a\u8fc7\u534f\u4f5c\u5b8c\u6210\u590d\u6742\u4efb\u52a1[16]\u3002<\/p>\n<p>\u5408\u4f5c\u535a\u5f08\u7684\u6838\u5fc3\u95ee\u9898\u662f\u8054\u76df\u5f62\u6210&#xff08;Coalition Formation&#xff09;\u548c\u6536\u76ca\u5206\u914d&#xff08;Payoff Allocation&#xff09;\u3002\u8bbe <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">S\u2286NS \\\\subseteq \\\\mathcal{N}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8193em;vertical-align: -0.136em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0576em\">S<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2286<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.1474em\">N<\/span><\/span><\/span><\/span><\/span> \u4e3a\u4e00\u4e2a\u8054\u76df&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">v(S)v(S)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0576em\">S<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u4e3a\u8054\u76df <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">SS<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0576em\">S<\/span><\/span><\/span><\/span><\/span> \u80fd\u591f\u83b7\u5f97\u7684\u96c6\u4f53\u6536\u76ca\u3002\u7279\u5f81\u51fd\u6570&#xff08;Characteristic Function&#xff09;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">v:2N\u2192Rv: 2^{\\\\mathcal{N}} \\\\rightarrow \\\\mathbb{R}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">:<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8413em\"><\/span><span class=\"mord\"><span class=\"mord\">2<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8413em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathcal mtight\" style=\"margin-right: 0.1474em\">N<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2192<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6889em\"><\/span><span class=\"mord mathbb\">R<\/span><\/span><\/span><\/span><\/span> \u5b9a\u4e49\u4e86\u6240\u6709\u53ef\u80fd\u8054\u76df\u7684\u6536\u76ca\u3002<\/p>\n<p>\u590f\u666e\u5229\u503c&#xff08;Shapley Value&#xff09;\u662f\u5408\u4f5c\u535a\u5f08\u4e2d\u516c\u5e73\u5206\u914d\u6536\u76ca\u7684\u7ecf\u5178\u65b9\u6cd5\u3002Agent <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span> \u7684\u590f\u666e\u5229\u503c\u4e3a[56]&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03d5i(v)&#061;\u2211S\u2286N\u2216{i}\u2223S\u2223!(N\u2212\u2223S\u2223\u22121)!N![v(S\u222a{i})\u2212v(S)]\\\\phi_i(v) &#061; \\\\sum_{S \\\\subseteq \\\\mathcal{N} \\\\setminus \\\\{i\\\\}} \\\\frac{|S|!(N-|S|-1)!}{N!} [v(S \\\\cup \\\\{i\\\\}) &#8211; v(S)]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03d5<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.943em;vertical-align: -1.516em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.05em\"><span class=\"\" style=\"top: -1.809em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0576em\">S<\/span><span class=\"mrel mtight\">\u2286<\/span><span class=\"mord mathcal mtight\" style=\"margin-right: 0.1474em\">N<\/span><span class=\"mbin mtight\">\u2216<\/span><span class=\"mopen mtight\">{<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">}<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.516em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.427em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><span class=\"mclose\">!<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0576em\">S<\/span><span class=\"mord\">\u2223<\/span><span class=\"mclose\">!<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0576em\">S<\/span><span class=\"mord\">\u2223<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\">1<\/span><span class=\"mclose\">)!<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.686em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mopen\">[<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0576em\">S<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u222a<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord mathnormal\">i<\/span><span class=\"mclose\">})<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0576em\">S<\/span><span class=\"mclose\">)]<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u590f\u666e\u5229\u503c\u6ee1\u8db3\u6548\u7387\u6027\u3001\u5bf9\u79f0\u6027\u3001\u54d1\u5143\u6027\u548c\u53ef\u52a0\u6027\u56db\u4e2a\u516c\u7406&#xff0c;\u88ab\u8ba4\u4e3a\u662f\u516c\u5e73\u5206\u914d\u7684\u9ec4\u91d1\u6807\u51c6\u3002\u5728\u591aAgent\u5de5\u4f5c\u6d41\u4e2d&#xff0c;\u590f\u666e\u5229\u503c\u53ef\u4ee5\u7528\u4e8e\u8bc4\u4f30\u5404Agent\u5bf9\u4efb\u52a1\u5b8c\u6210\u7684\u8fb9\u9645\u8d21\u732e&#xff0c;\u6307\u5bfc\u89d2\u8272\u5206\u914d\u548c\u6fc0\u52b1\u673a\u5236\u8bbe\u8ba1[42]\u3002<\/p>\n<h3>5.2 \u591a\u667a\u80fd\u4f53\u5f3a\u5316\u5b66\u4e60\u6846\u67b6<\/h3>\n<h4>5.2.1 \u72ec\u7acb\u5b66\u4e60\u4e0e\u8054\u5408\u5b66\u4e60\u7684\u6743\u8861<\/h4>\n<p>\u591a\u667a\u80fd\u4f53\u5f3a\u5316\u5b66\u4e60&#xff08;Multi-Agent Reinforcement Learning, MARL&#xff09;\u9762\u4e34\u72ec\u7acb\u5b66\u4e60&#xff08;Independent Learning&#xff09;\u4e0e\u8054\u5408\u5b66\u4e60&#xff08;Joint Learning&#xff09;\u7684\u6743\u8861\u3002\u72ec\u7acb\u5b66\u4e60\u4e2d\u6bcf\u4e2aAgent\u5c06\u5176\u4ed6Agent\u89c6\u4e3a\u73af\u5883\u7684\u4e00\u90e8\u5206&#xff0c;\u72ec\u7acb\u8fd0\u884c\u5355Agent RL\u7b97\u6cd5&#xff1b;\u8054\u5408\u5b66\u4e60\u5219\u5c06\u591aAgent\u7cfb\u7edf\u5efa\u6a21\u4e3a\u8054\u5408MDP&#xff0c;\u5b66\u4e60\u5168\u5c40\u6700\u4f18\u7b56\u7565[66]\u3002<\/p>\n<p>\u72ec\u7acb\u5b66\u4e60\u7684\u4f18\u70b9\u662f\u8ba1\u7b97\u590d\u6742\u5ea6\u4f4e&#xff0c;\u6bcf\u4e2aAgent\u53ea\u9700\u7ef4\u62a4\u81ea\u8eab\u7b56\u7565&#xff1b;\u7f3a\u70b9\u662f\u975e\u5e73\u7a33\u6027&#xff08;Non-Stationarity&#xff09;\u95ee\u9898\u2014\u2014\u4ece\u5355\u4e2aAgent\u89c6\u89d2&#xff0c;\u73af\u5883\u52a8\u6001\u56e0\u5176\u4ed6Agent\u7b56\u7565\u53d8\u5316\u800c\u6539\u53d8&#xff0c;\u8fdd\u53cd\u4e86MDP\u7684\u5e73\u7a33\u6027\u5047\u8bbe\u3002<\/p>\n<p>\u8054\u5408\u5b66\u4e60\u7406\u8bba\u4e0a\u53ef\u4ee5\u627e\u5230\u5168\u5c40\u6700\u4f18&#xff0c;\u4f46\u9762\u4e34\u7ef4\u5ea6\u707e\u96be&#xff1a;\u8054\u5408\u52a8\u4f5c\u7a7a\u95f4\u968fAgent\u6570\u91cf\u6307\u6570\u589e\u957f&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u2223Ajoint\u2223&#061;\u220fi&#061;1N\u2223Ai\u2223|\\\\mathcal{A}_{joint}| &#061; \\\\prod_{i&#061;1}^N |\\\\mathcal{A}_i|<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0361em;vertical-align: -0.2861em\"><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathcal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0572em\">j<\/span><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\">in<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.2809em;vertical-align: -0.2997em\"><\/span><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position: relative;top: 0em\">\u220f<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9812em\"><span class=\"\" style=\"top: -2.4003em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.2029em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2997em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathcal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><\/span><\/span><\/span><\/span>\u3002\u5bf9\u4e8e\u5927\u89c4\u6a21\u7cfb\u7edf&#xff0c;\u8054\u5408\u5b66\u4e60\u4e0d\u53ef\u884c\u3002<\/p>\n<p>\u5b9e\u8df5\u4e2d\u5e38\u7528\u7684\u6298\u8877\u65b9\u6848\u662f\u96c6\u4e2d\u5f0f\u8bad\u7ec3\u5206\u5e03\u5f0f\u6267\u884c&#xff08;Centralized Training with Decentralized Execution, CTDE&#xff09;\u3002\u5728\u8bad\u7ec3\u9636\u6bb5&#xff0c;\u4f7f\u7528\u5168\u5c40\u4fe1\u606f\u5b66\u4e60\u7b56\u7565&#xff1b;\u5728\u6267\u884c\u9636\u6bb5&#xff0c;\u6bcf\u4e2aAgent\u4ec5\u57fa\u4e8e\u5c40\u90e8\u89c2\u6d4b\u51b3\u7b56\u3002\u8fd9\u79cd\u67b6\u6784\u65e2\u5229\u7528\u4e86\u5168\u5c40\u4fe1\u606f\u63d0\u5347\u8bad\u7ec3\u6548\u7387&#xff0c;\u53c8\u4fdd\u6301\u4e86\u6267\u884c\u7684\u5206\u5e03\u5f0f\u7279\u6027[61]\u3002<\/p>\n<h4>5.2.2 \u503c\u5206\u89e3\u4e0eQMIX\u7b97\u6cd5<\/h4>\n<p>\u503c\u5206\u89e3&#xff08;Value Decomposition&#xff09;\u662fCTDE\u67b6\u6784\u4e0b\u7684\u91cd\u8981\u6280\u672f&#xff0c;\u5b83\u5c06\u8054\u5408Q\u51fd\u6570\u5206\u89e3\u4e3a\u5404Agent Q\u51fd\u6570\u7684\u7ec4\u5408&#xff0c;\u4f7f\u5f97\u6bcf\u4e2aAgent\u53ef\u4ee5\u72ec\u7acb\u51b3\u7b56\u540c\u65f6\u4fdd\u8bc1\u5168\u5c40\u4e00\u81f4\u6027[62]\u3002<\/p>\n<p>QMIX\u7b97\u6cd5\u662f\u503c\u5206\u89e3\u7684\u4ee3\u8868\u6027\u65b9\u6cd5\u3002\u5b83\u5047\u8bbe\u8054\u5408Q\u51fd\u6570\u53ef\u4ee5\u8868\u793a\u4e3a\u5404Agent Q\u51fd\u6570\u7684\u975e\u7ebf\u6027\u7ec4\u5408&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">Qjoint(s,a)&#061;f(Q1(s,a1),Q2(s,a2),&#8230;,QN(s,aN);s)Q_{joint}(s, \\\\mathbf{a}) &#061; f(Q_1(s, a_1), Q_2(s, a_2), &#8230;, Q_N(s, a_N); s)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0361em;vertical-align: -0.2861em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0572em\">j<\/span><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\">in<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathbf\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1076em\">f<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mpunct\">;<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ff<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1076em\">f<\/span><\/span><\/span><\/span><\/span> \u4e3a\u6df7\u5408\u7f51\u7edc&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">a&#061;(a1,&#8230;,aN)\\\\mathbf{a} &#061; (a_1, &#8230;, a_N)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4444em\"><\/span><span class=\"mord mathbf\">a<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u4e3a\u8054\u5408\u52a8\u4f5c\u3002\u4e3a\u4fdd\u8bc1\u5206\u5e03\u5f0f\u6267\u884c\u7684\u6700\u4f18\u6027&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ff<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1076em\">f<\/span><\/span><\/span><\/span><\/span> \u9700\u8981\u6ee1\u8db3\u5355\u8c03\u6027\u7ea6\u675f&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u2202Qjoint\u2202Qi\u22650,\u2200i\\\\frac{\\\\partial Q_{joint}}{\\\\partial Q_i} \\\\geq 0, \\\\quad \\\\forall i<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 2.2519em;vertical-align: -0.8804em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3714em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\" style=\"margin-right: 0.0556em\">\u2202<\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\" style=\"margin-right: 0.0556em\">\u2202<\/span><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0572em\">j<\/span><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\">in<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8804em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2265<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord\">0<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 1em\"><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">\u2200<\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u8be5\u7ea6\u675f\u786e\u4fdd\u5355\u4e2aAgent\u6700\u5927\u5316\u81ea\u8eabQ\u51fd\u6570\u4e5f\u6700\u5927\u5316\u8054\u5408Q\u51fd\u6570\u3002QMIX\u4f7f\u7528\u8d85\u7f51\u7edc&#xff08;Hypernetwork&#xff09;\u751f\u6210\u6df7\u5408\u7f51\u7edc\u7684\u6743\u91cd&#xff0c;\u4ee5\u72b6\u6001\u4e3a\u6761\u4ef6\u5b9e\u73b0\u7075\u6d3b\u7684\u975e\u7ebf\u6027\u7ec4\u5408[16]\u3002<\/p>\n<h4>5.2.3 \u901a\u4fe1\u534f\u8bae\u4e0e\u4fe1\u606f\u5171\u4eab<\/h4>\n<p>\u6709\u6548\u7684\u901a\u4fe1\u662f\u591aAgent\u534f\u4f5c\u7684\u5173\u952e\u3002\u5728\u90e8\u5206\u53ef\u89c2\u6d4b\u73af\u5883\u4e2d&#xff0c;Agent\u9700\u8981\u901a\u8fc7\u901a\u4fe1\u5171\u4eab\u4fe1\u606f&#xff0c;\u5f25\u8865\u5c40\u90e8\u89c2\u6d4b\u7684\u4e0d\u8db3\u3002\u901a\u4fe1\u534f\u8bae\u7684\u8bbe\u8ba1\u9700\u8981\u5728\u4fe1\u606f\u4e30\u5bcc\u6027\u548c\u901a\u4fe1\u5f00\u9500\u4e4b\u95f4\u53d6\u5f97\u5e73\u8861[56]\u3002<\/p>\n<p>\u901a\u4fe1\u5185\u5bb9\u53ef\u4ee5\u5206\u4e3a\u4ee5\u4e0b\u51e0\u7c7b&#xff1a;<\/p>\n<ul>\n<li>\u89c2\u6d4b\u5171\u4eab&#xff1a;Agent\u5206\u4eab\u5c40\u90e8\u89c2\u6d4b&#xff0c;\u6269\u5927\u5176\u4ed6Agent\u7684\u4fe1\u606f\u8303\u56f4<\/li>\n<li>\u610f\u56fe\u5e7f\u64ad&#xff1a;Agent\u5ba3\u5e03\u5373\u5c06\u6267\u884c\u7684\u52a8\u4f5c&#xff0c;\u4fbf\u4e8e\u534f\u8c03\u907f\u514d\u51b2\u7a81<\/li>\n<li>\u8bf7\u6c42-\u54cd\u5e94&#xff1a;Agent\u4e3b\u52a8\u8bf7\u6c42\u7279\u5b9a\u4fe1\u606f&#xff0c;\u5176\u4ed6Agent\u54cd\u5e94<\/li>\n<li>\u5171\u8bc6\u8fbe\u6210&#xff1a;Agent\u901a\u8fc7\u591a\u8f6e\u901a\u4fe1\u5c31\u5171\u540c\u8ba1\u5212\u8fbe\u6210\u4e00\u81f4<\/li>\n<\/ul>\n<p>TarMAC&#xff08;Targeted Multi-Agent Communication&#xff09;\u7b97\u6cd5\u5f15\u5165\u4e86\u6ce8\u610f\u529b\u673a\u5236\u6307\u5bfc\u901a\u4fe1\u3002\u6bcf\u4e2aAgent\u8ba1\u7b97\u5bf9\u5176\u4ed6Agent\u7684\u6ce8\u610f\u529b\u6743\u91cd&#xff0c;\u4ec5\u4e0e\u6ce8\u610f\u529b\u9ad8\u7684Agent\u901a\u4fe1&#xff0c;\u6709\u6548\u964d\u4f4e\u4e86\u901a\u4fe1\u5f00\u9500[42]\u3002<\/p>\n<p>\u5728Agent\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d&#xff0c;\u901a\u4fe1\u534f\u8bae\u7684\u8bbe\u8ba1\u5c24\u4e3a\u91cd\u8981\u3002\u4e0d\u540cAgent\u53ef\u80fd\u8d1f\u8d23\u5de5\u4f5c\u6d41\u7684\u4e0d\u540c\u9636\u6bb5&#xff08;\u5982\u4fe1\u606f\u6536\u96c6\u3001\u5206\u6790\u63a8\u7406\u3001\u7ed3\u679c\u9a8c\u8bc1&#xff09;&#xff0c;\u901a\u8fc7\u6807\u51c6\u5316\u901a\u4fe1\u534f\u8bae&#xff0c;\u53ef\u4ee5\u5b9e\u73b0\u65e0\u7f1d\u7684\u4efb\u52a1\u4ea4\u63a5\u548c\u72b6\u6001\u540c\u6b65[66]\u3002<\/p>\n<h3>5.3 \u81ea\u6211\u5bf9\u5f08\u4e0e\u81ea\u52a8\u8bfe\u7a0b\u5b66\u4e60<\/h3>\n<h4>5.3.1 \u81ea\u6211\u5bf9\u5f08\u7684\u7406\u8bba\u57fa\u7840<\/h4>\n<p>\u81ea\u6211\u5bf9\u5f08&#xff08;Self-Play&#xff09;\u662f\u4e00\u79cd\u5f3a\u5927\u7684\u591aAgent\u8bad\u7ec3\u8303\u5f0f&#xff0c;Agent\u901a\u8fc7\u4e0e\u81ea\u8eab\u5386\u53f2\u7248\u672c\u535a\u5f08\u6765\u5b66\u4e60\u3002\u8fd9\u79cd\u65b9\u6cd5\u6700\u65e9\u5728\u535a\u5f08\u8bba\u4e2d\u63d0\u51fa&#xff0c;\u8fd1\u5e74\u6765\u5728AlphaGo\u3001OpenAI Five\u7b49\u7cfb\u7edf\u4e2d\u5c55\u73b0\u51fa\u60ca\u4eba\u6548\u679c[61]\u3002<\/p>\n<p>\u81ea\u6211\u5bf9\u5f08\u7684\u6838\u5fc3\u4f18\u52bf\u5728\u4e8e\u81ea\u52a8\u751f\u6210\u9002\u5e94Agent\u5f53\u524d\u6c34\u5e73\u7684\u5bf9\u624b\u3002\u5f53Agent\u8f83\u5f31\u65f6&#xff0c;\u5386\u53f2\u7248\u672c\u4e5f\u8f83\u5f31&#xff0c;Agent\u5bb9\u6613\u83b7\u80dc\u83b7\u5f97\u6b63\u53cd\u9988&#xff1b;\u968f\u7740Agent\u53d8\u5f3a&#xff0c;\u5386\u53f2\u7248\u672c\u4e5f\u53d8\u5f3a&#xff0c;\u63d0\u4f9b\u9002\u5ea6\u6311\u6218\u4fc3\u8fdb\u8fdb\u4e00\u6b65\u5b66\u4e60\u3002\u8fd9\u79cd\u81ea\u52a8\u8c03\u6574\u7684\u96be\u5ea6\u66f2\u7ebf\u7c7b\u4f3c\u4e8e\u8bfe\u7a0b\u5b66\u4e60&#xff08;Curriculum Learning&#xff09;[62]\u3002<\/p>\n<p>\u865a\u6784\u81ea\u6211\u5bf9\u5f08&#xff08;Fictitious Self-Play, FSP&#xff09;\u662f\u4e00\u79cd\u7406\u8bba\u4e0a\u6709\u4fdd\u8bc1\u7684\u81ea\u6211\u5bf9\u5f08\u53d8\u4f53\u3002\u5728FSP\u4e2d&#xff0c;Agent\u4e0d\u662f\u4e0e\u5355\u4e00\u5386\u53f2\u7248\u672c\u535a\u5f08&#xff0c;\u800c\u662f\u4e0e\u5386\u53f2\u7b56\u7565\u7684\u6df7\u5408\u5206\u5e03\u535a\u5f08\u3002\u8bbe <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0t\\\\pi_t<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">tt<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6151em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><\/span><\/span> \u8fed\u4ee3\u7684\u7b56\u7565&#xff0c;FSP\u7ef4\u62a4\u5e73\u5747\u7b56\u7565 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u02c9t&#061;1t\u2211i&#061;1t\u03c0i\\\\bar{\\\\pi}_t &#061; \\\\frac{1}{t} \\\\sum_{i&#061;1}^t \\\\pi_i<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7178em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.5678em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.25em\"><span class=\"mord\">\u02c9<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.2785em;vertical-align: -0.345em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8451em\"><span class=\"\" style=\"top: -2.655em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.394em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.345em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position: relative;top: 0em\">\u2211<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9335em\"><span class=\"\" style=\"top: -2.4003em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.2029em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2997em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xff0c;Agent\u4e0e <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0\u02c9t\\\\bar{\\\\pi}_t<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7178em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.5678em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.25em\"><span class=\"mord\">\u02c9<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u535a\u5f08\u5e76\u66f4\u65b0\u7b56\u7565\u3002\u53ef\u4ee5\u8bc1\u660e&#xff0c;FSP\u6536\u655b\u5230\u7eb3\u4ec0\u5747\u8861[16]\u3002<\/p>\n<h4>5.3.2 \u81ea\u52a8\u8bfe\u7a0b\u751f\u6210\u4e0e\u96be\u5ea6\u8c03\u8282<\/h4>\n<p>\u81ea\u52a8\u8bfe\u7a0b\u5b66\u4e60&#xff08;Automatic Curriculum Learning&#xff09;\u901a\u8fc7\u52a8\u6001\u8c03\u6574\u8bad\u7ec3\u4efb\u52a1\u7684\u96be\u5ea6&#xff0c;\u52a0\u901f\u5b66\u4e60\u8fdb\u7a0b\u3002\u5728Agent\u5de5\u4f5c\u6d41\u4f18\u5316\u4e2d&#xff0c;\u8bfe\u7a0b\u53ef\u4ee5\u4f53\u73b0\u4e3a\u4efb\u52a1\u590d\u6742\u5ea6\u3001\u73af\u5883\u566a\u58f0\u3001\u5bf9\u624b\u5f3a\u5ea6\u7b49\u591a\u4e2a\u7ef4\u5ea6[56]\u3002<\/p>\n<p>WebRL\u7b97\u6cd5\u63d0\u51fa\u4e86\u57fa\u4e8e\u5931\u8d25\u6a21\u5f0f\u7684\u81ea\u52a8\u8bfe\u7a0b\u751f\u6210\u3002\u5f53Agent\u5728\u7279\u5b9a\u7c7b\u578b\u4efb\u52a1\u4e0a\u5931\u8d25\u65f6&#xff0c;\u7b97\u6cd5\u81ea\u52a8\u751f\u6210\u76f8\u4f3c\u4f46\u7a0d\u7b80\u5355\u7684\u53d8\u4f53\u4efb\u52a1&#xff0c;\u5e2e\u52a9Agent\u9010\u6b65\u638c\u63e1\u6240\u9700\u6280\u80fd\u3002\u5177\u4f53\u5730&#xff0c;\u8bbeAgent\u5728\u4efb\u52a1 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">tt<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6151em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><\/span><\/span> \u4e0a\u5931\u8d25&#xff0c;\u8bfe\u7a0b\u751f\u6210\u5668\u6267\u884c\u4ee5\u4e0b\u6b65\u9aa4[42]&#xff1a;<\/p>\n<li>\u5206\u6790\u5931\u8d25\u539f\u56e0&#xff08;\u5982\u4fe1\u606f\u7f3a\u5931\u3001\u63a8\u7406\u9519\u8bef\u3001\u5de5\u5177\u4f7f\u7528\u4e0d\u5f53&#xff09;<\/li>\n<li>\u9488\u5bf9\u5931\u8d25\u539f\u56e0\u751f\u6210\u7b80\u5316\u4efb\u52a1&#xff08;\u5982\u63d0\u4f9b\u66f4\u591a\u63d0\u793a\u3001\u51cf\u5c11\u63a8\u7406\u6b65\u9aa4\u3001\u9650\u5236\u5de5\u5177\u9009\u62e9&#xff09;<\/li>\n<li>\u5728\u7b80\u5316\u4efb\u52a1\u4e0a\u8bad\u7ec3Agent\u76f4\u81f3\u638c\u63e1<\/li>\n<li>\u9010\u6b65\u589e\u52a0\u96be\u5ea6&#xff0c;\u6700\u7ec8\u56de\u5230\u539f\u59cb\u4efb\u52a1<\/li>\n<p>\u8fd9\u79cd\u4ece\u6613\u5230\u96be\u7684\u6e10\u8fdb\u5b66\u4e60\u7b26\u5408\u4eba\u7c7b\u8ba4\u77e5\u89c4\u5f8b&#xff0c;\u663e\u8457\u63d0\u5347\u4e86\u6837\u672c\u6548\u7387\u3002<\/p>\n<h4>5.3.3 \u591a\u667a\u80fd\u4f53\u534f\u4f5c\u4e2d\u7684\u89d2\u8272\u6d8c\u73b0<\/h4>\n<p>\u5728\u81ea\u6211\u5bf9\u5f08\u8fc7\u7a0b\u4e2d&#xff0c;\u591aAgent\u7cfb\u7edf\u53ef\u80fd\u81ea\u53d1\u6d8c\u73b0\u51fa\u89d2\u8272\u5206\u5de5&#xff08;Role Emergence&#xff09;\u3002\u5373\u4f7f\u6240\u6709Agent\u521d\u59cb\u65f6\u5177\u6709\u76f8\u540c\u7684\u80fd\u529b&#xff0c;\u7ecf\u8fc7\u5145\u5206\u8bad\u7ec3\u540e&#xff0c;\u4e0d\u540cAgent\u53ef\u80fd specialize \u5230\u4e0d\u540c\u7684\u5b50\u4efb\u52a1&#xff0c;\u5f62\u6210\u9ad8\u6548\u7684\u534f\u4f5c\u6a21\u5f0f[66]\u3002<\/p>\n<p>\u89d2\u8272\u6d8c\u73b0\u7684\u673a\u5236\u53ef\u4ee5\u901a\u8fc7\u535a\u5f08\u8bba\u89e3\u91ca\u3002\u5728\u91cd\u590d\u535a\u5f08\u4e2d&#xff0c;Agent\u901a\u8fc7\u8bd5\u9519\u53d1\u73b0\u67d0\u4e9b\u7b56\u7565\u7ec4\u5408\u80fd\u591f\u4ea7\u751f\u66f4\u9ad8\u6536\u76ca&#xff0c;\u8fd9\u4e9b\u7b56\u7565\u7ec4\u5408\u9010\u6e10\u56fa\u5316\u4e3a\u7a33\u5b9a\u7684\u89d2\u8272\u5206\u5de5\u3002\u4f8b\u5982&#xff0c;\u5728\u534f\u4f5c\u4efb\u52a1\u4e2d&#xff0c;\u4e00\u4e2aAgent\u53ef\u80fd\u81ea\u53d1\u627f\u62c5\u89c4\u5212\u8005\u89d2\u8272&#xff0c;\u8d1f\u8d23\u5206\u89e3\u4efb\u52a1&#xff1b;\u53e6\u4e00\u4e2aAgent\u627f\u62c5\u6267\u884c\u8005\u89d2\u8272&#xff0c;\u8d1f\u8d23\u5177\u4f53\u64cd\u4f5c[61]\u3002<\/p>\n<p>\u89d2\u8272\u6d8c\u73b0\u7684\u597d\u5904\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u4e13\u4e1a\u5316\u4f18\u52bf&#xff1a;\u6bcf\u4e2aAgent\u4e13\u6ce8\u4e8e\u7279\u5b9a\u5b50\u4efb\u52a1&#xff0c;\u6280\u80fd\u6df1\u5ea6\u63d0\u5347<\/li>\n<li>\u534f\u8c03\u7b80\u5316&#xff1a;\u660e\u786e\u7684\u89d2\u8272\u5206\u5de5\u51cf\u5c11\u4e86\u51b3\u7b56\u51b2\u7a81<\/li>\n<li>\u53ef\u6269\u5c55\u6027&#xff1a;\u65b0\u589eAgent\u53ef\u4ee5\u586b\u8865\u65b0\u89d2\u8272&#xff0c;\u7cfb\u7edf\u80fd\u529b\u6269\u5c55<\/li>\n<\/ul>\n<p>\u5728Agent\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d&#xff0c;\u89d2\u8272\u6d8c\u73b0\u4e3a\u81ea\u52a8\u53d1\u73b0\u6700\u4f18SOP\u63d0\u4f9b\u4e86\u65b0\u601d\u8def\u3002\u901a\u8fc7\u591aAgent\u81ea\u6211\u5bf9\u5f08&#xff0c;\u7cfb\u7edf\u53ef\u4ee5\u81ea\u53d1\u63a2\u7d22\u4e0d\u540c\u7684\u5de5\u4f5c\u6d41\u7ec4\u7ec7\u5f62\u5f0f&#xff0c;\u6700\u7ec8\u6536\u655b\u5230\u9ad8\u6548\u7684\u5206\u5de5\u534f\u4f5c\u6a21\u5f0f[62]\u3002<\/p>\n<p>#mermaid-svg-avVJ2PbQv2Sluhwu{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-avVJ2PbQv2Sluhwu .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-avVJ2PbQv2Sluhwu .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-avVJ2PbQv2Sluhwu .error-icon{fill:#552222;}#mermaid-svg-avVJ2PbQv2Sluhwu .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-avVJ2PbQv2Sluhwu .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-avVJ2PbQv2Sluhwu .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-avVJ2PbQv2Sluhwu .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-avVJ2PbQv2Sluhwu .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-avVJ2PbQv2Sluhwu .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-avVJ2PbQv2Sluhwu .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-avVJ2PbQv2Sluhwu .marker{fill:#333333;stroke:#333333;}#mermaid-svg-avVJ2PbQv2Sluhwu .marker.cross{stroke:#333333;}#mermaid-svg-avVJ2PbQv2Sluhwu svg{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-avVJ2PbQv2Sluhwu p{margin:0;}#mermaid-svg-avVJ2PbQv2Sluhwu .label{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;color:#333;}#mermaid-svg-avVJ2PbQv2Sluhwu .cluster-label text{fill:#333;}#mermaid-svg-avVJ2PbQv2Sluhwu .cluster-label span{color:#333;}#mermaid-svg-avVJ2PbQv2Sluhwu .cluster-label span p{background-color:transparent;}#mermaid-svg-avVJ2PbQv2Sluhwu .label text,#mermaid-svg-avVJ2PbQv2Sluhwu span{fill:#333;color:#333;}#mermaid-svg-avVJ2PbQv2Sluhwu .node rect,#mermaid-svg-avVJ2PbQv2Sluhwu .node circle,#mermaid-svg-avVJ2PbQv2Sluhwu .node ellipse,#mermaid-svg-avVJ2PbQv2Sluhwu .node polygon,#mermaid-svg-avVJ2PbQv2Sluhwu .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-avVJ2PbQv2Sluhwu .rough-node .label text,#mermaid-svg-avVJ2PbQv2Sluhwu .node .label text,#mermaid-svg-avVJ2PbQv2Sluhwu .image-shape .label,#mermaid-svg-avVJ2PbQv2Sluhwu .icon-shape .label{text-anchor:middle;}#mermaid-svg-avVJ2PbQv2Sluhwu .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-avVJ2PbQv2Sluhwu .rough-node .label,#mermaid-svg-avVJ2PbQv2Sluhwu .node .label,#mermaid-svg-avVJ2PbQv2Sluhwu .image-shape .label,#mermaid-svg-avVJ2PbQv2Sluhwu .icon-shape .label{text-align:center;}#mermaid-svg-avVJ2PbQv2Sluhwu .node.clickable{cursor:pointer;}#mermaid-svg-avVJ2PbQv2Sluhwu .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-avVJ2PbQv2Sluhwu .arrowheadPath{fill:#333333;}#mermaid-svg-avVJ2PbQv2Sluhwu .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-avVJ2PbQv2Sluhwu .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-avVJ2PbQv2Sluhwu .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-avVJ2PbQv2Sluhwu .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-avVJ2PbQv2Sluhwu .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-avVJ2PbQv2Sluhwu .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-avVJ2PbQv2Sluhwu .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-avVJ2PbQv2Sluhwu .cluster text{fill:#333;}#mermaid-svg-avVJ2PbQv2Sluhwu .cluster span{color:#333;}#mermaid-svg-avVJ2PbQv2Sluhwu div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-avVJ2PbQv2Sluhwu .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-avVJ2PbQv2Sluhwu rect.text{fill:none;stroke-width:0;}#mermaid-svg-avVJ2PbQv2Sluhwu .icon-shape,#mermaid-svg-avVJ2PbQv2Sluhwu .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-avVJ2PbQv2Sluhwu .icon-shape p,#mermaid-svg-avVJ2PbQv2Sluhwu .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-avVJ2PbQv2Sluhwu .icon-shape rect,#mermaid-svg-avVJ2PbQv2Sluhwu .image-shape rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-avVJ2PbQv2Sluhwu .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-avVJ2PbQv2Sluhwu .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-avVJ2PbQv2Sluhwu :root{&#8211;mermaid-font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;}<span class=\"nodeLabel\"><\/p>\n<p>\u81ea\u6211\u5bf9\u5f08\u8fdb\u5316<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u5bf9\u5f08<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u5bf9\u5f08<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>&#8230;<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u6536\u655b<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7b56\u7565v1<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7b56\u7565v2<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7b56\u7565v3<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7b56\u7565vN<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7eb3\u4ec0\u5747\u8861<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u591aAgent\u534f\u4f5c\u6846\u67b6<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u4efb\u52a1\u5206\u89e3<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u89d2\u8272\u5206\u914d<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5e76\u884c\u6267\u884c<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7ed3\u679c\u6574\u5408<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u8d28\u91cf\u9a8c\u8bc1<\/p>\n<p><\/span><\/p>\n<h2>6 \u8bb0\u5fc6\u673a\u5236\u4e0e\u5de5\u4f5c\u6d41\u4f18\u5316&#xff1a;\u7ecf\u9a8c\u79ef\u7d2f\u4e0e\u590d\u7528<\/h2>\n<h3>6.1 \u667a\u80fd\u4f53\u8bb0\u5fc6\u7684\u7c7b\u578b\u4e0e\u7ed3\u6784<\/h3>\n<h4>6.1.1 \u77ed\u671f\u8bb0\u5fc6\u4e0e\u957f\u671f\u8bb0\u5fc6\u7684\u533a\u5206<\/h4>\n<p>\u8bb0\u5fc6\u673a\u5236\u662f\u667a\u80fd\u4f53\u5b9e\u73b0\u6301\u7eed\u5b66\u4e60\u548c\u7ecf\u9a8c\u79ef\u7d2f\u7684\u57fa\u7840\u3002\u4e0e\u4eba\u7c7b\u8ba4\u77e5\u7cfb\u7edf\u7c7b\u4f3c&#xff0c;Agent\u8bb0\u5fc6\u53ef\u4ee5\u5206\u4e3a\u77ed\u671f\u8bb0\u5fc6&#xff08;Short-Term Memory, STM&#xff09;\u548c\u957f\u671f\u8bb0\u5fc6&#xff08;Long-Term Memory, LTM&#xff09;\u4e24\u7c7b&#xff0c;\u4e24\u8005\u5728\u529f\u80fd\u7279\u6027\u548c\u5b9e\u73b0\u673a\u5236\u4e0a\u5b58\u5728\u672c\u8d28\u5dee\u5f02[16]\u3002<\/p>\n<p>\u77ed\u671f\u8bb0\u5fc6\u8d1f\u8d23\u7ef4\u62a4\u5f53\u524d\u4efb\u52a1\u6267\u884c\u7684\u4e0a\u4e0b\u6587\u4fe1\u606f&#xff0c;\u7c7b\u4f3c\u4e8e\u4eba\u7c7b\u7684\u5de5\u4f5c\u8bb0\u5fc6&#xff08;Working Memory&#xff09;\u3002\u5728LLM Agent\u4e2d&#xff0c;\u77ed\u671f\u8bb0\u5fc6\u901a\u5e38\u901a\u8fc7\u4e0a\u4e0b\u6587\u7a97\u53e3&#xff08;Context Window&#xff09;\u5b9e\u73b0&#xff0c;\u5305\u542b\u6700\u8fd1\u7684\u5bf9\u8bdd\u5386\u53f2\u3001\u4e2d\u95f4\u63a8\u7406\u7ed3\u679c\u548c\u5de5\u5177\u8c03\u7528\u8bb0\u5f55\u3002\u77ed\u671f\u8bb0\u5fc6\u7684\u7279\u70b9\u662f\u8bbf\u95ee\u901f\u5ea6\u5feb\u3001\u5bb9\u91cf\u6709\u9650\u3001\u751f\u547d\u5468\u671f\u77ed\u2014\u2014\u968f\u7740\u4efb\u52a1\u7ed3\u675f\u6216\u4e0a\u4e0b\u6587\u91cd\u7f6e\u800c\u6e05\u7a7a[56]\u3002<\/p>\n<p>\u957f\u671f\u8bb0\u5fc6\u8d1f\u8d23\u8de8\u4efb\u52a1\u3001\u8de8\u4f1a\u8bdd\u7684\u4fe1\u606f\u6301\u4e45\u5316\u5b58\u50a8&#xff0c;\u4f7fAgent\u80fd\u591f\u79ef\u7d2f\u77e5\u8bc6\u3001\u5b66\u4e60\u504f\u597d\u3001\u907f\u514d\u91cd\u590d\u9519\u8bef\u3002\u957f\u671f\u8bb0\u5fc6\u7684\u5b9e\u73b0\u901a\u5e38\u4f9d\u8d56\u5916\u90e8\u5b58\u50a8\u7cfb\u7edf&#xff0c;\u5982\u5411\u91cf\u6570\u636e\u5e93&#xff08;Vector Database&#xff09;\u6216\u77e5\u8bc6\u56fe\u8c31&#xff08;Knowledge Graph&#xff09;\u3002\u4e0e\u77ed\u671f\u8bb0\u5fc6\u76f8\u6bd4&#xff0c;\u957f\u671f\u8bb0\u5fc6\u5bb9\u91cf\u5927\u3001\u751f\u547d\u5468\u671f\u957f&#xff0c;\u4f46\u8bbf\u95ee\u9700\u8981\u989d\u5916\u7684\u68c0\u7d22\u5f00\u9500[42]\u3002<\/p>\n<p>\u88682 \u77ed\u671f\u8bb0\u5fc6\u4e0e\u957f\u671f\u8bb0\u5fc6\u7684\u5bf9\u6bd4<\/p>\n<table>\n<tr>\u7279\u6027\u7ef4\u5ea6\u77ed\u671f\u8bb0\u5fc6&#xff08;STM&#xff09;\u957f\u671f\u8bb0\u5fc6&#xff08;LTM&#xff09;<\/tr>\n<tbody>\n<tr>\n<td>\u5b58\u50a8\u4ecb\u8d28<\/td>\n<td>\u6a21\u578b\u4e0a\u4e0b\u6587\u7a97\u53e3<\/td>\n<td>\u5916\u90e8\u6570\u636e\u5e93\/\u5b58\u50a8<\/td>\n<\/tr>\n<tr>\n<td>\u5bb9\u91cf\u9650\u5236<\/td>\n<td>\u53d7\u9650\u4e8e\u4e0a\u4e0b\u6587\u957f\u5ea6&#xff08;\u901a\u5e384K-128K tokens&#xff09;<\/td>\n<td>\u7406\u8bba\u4e0a\u65e0\u4e0a\u9650&#xff0c;\u53d7\u5b58\u50a8\u8d44\u6e90\u7ea6\u675f<\/td>\n<\/tr>\n<tr>\n<td>\u8bbf\u95ee\u5ef6\u8fdf<\/td>\n<td>\u6781\u4f4e&#xff08;\u6a21\u578b\u524d\u5411\u4f20\u64ad\u7684\u4e00\u90e8\u5206&#xff09;<\/td>\n<td>\u8f83\u9ad8&#xff08;\u9700\u8981\u68c0\u7d22\u548c\u7f16\u7801&#xff09;<\/td>\n<\/tr>\n<tr>\n<td>\u751f\u547d\u5468\u671f<\/td>\n<td>\u5355\u4f1a\u8bdd\u5185\u6709\u6548<\/td>\n<td>\u8de8\u4f1a\u8bdd\u6301\u4e45\u5316<\/td>\n<\/tr>\n<tr>\n<td>\u4fe1\u606f\u7c7b\u578b<\/td>\n<td>\u5f53\u524d\u4efb\u52a1\u4e0a\u4e0b\u6587\u3001\u8fd1\u671f\u4ea4\u4e92<\/td>\n<td>\u5386\u53f2\u7ecf\u9a8c\u3001\u7528\u6237\u504f\u597d\u3001\u9886\u57df\u77e5\u8bc6<\/td>\n<\/tr>\n<tr>\n<td>\u66f4\u65b0\u9891\u7387<\/td>\n<td>\u5b9e\u65f6\u66f4\u65b0<\/td>\n<td>\u5b9a\u671f\u6279\u91cf\u66f4\u65b0\u6216\u4e8b\u4ef6\u89e6\u53d1<\/td>\n<\/tr>\n<tr>\n<td>\u5178\u578b\u5b9e\u73b0<\/td>\n<td>In-context Learning<\/td>\n<td>Vector DB &#043; Embedding Model<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h4>6.1.2 \u60c5\u666f\u8bb0\u5fc6\u3001\u8bed\u4e49\u8bb0\u5fc6\u4e0e\u7a0b\u5e8f\u8bb0\u5fc6<\/h4>\n<p>\u957f\u671f\u8bb0\u5fc6\u53ef\u4ee5\u8fdb\u4e00\u6b65\u7ec6\u5206\u4e3a\u4e09\u79cd\u7c7b\u578b&#xff0c;\u5bf9\u5e94\u4eba\u7c7b\u8bb0\u5fc6\u7cfb\u7edf\u7684\u4e0d\u540c\u529f\u80fd[66]&#xff1a;<\/p>\n<p>**\u60c5\u666f\u8bb0\u5fc6&#xff08;Episodic Memory&#xff09;**\u5b58\u50a8\u7279\u5b9a\u4e8b\u4ef6\u548c\u7ecf\u5386&#xff0c;\u8bb0\u5f55Agent\u6267\u884c\u8fc7\u7684\u4efb\u52a1\u8f68\u8ff9\u3001\u9047\u5230\u7684\u56f0\u96be\u3001\u6210\u529f\u7684\u89e3\u51b3\u65b9\u6848\u7b49\u3002\u60c5\u666f\u8bb0\u5fc6\u652f\u6301\u57fa\u4e8e\u6848\u4f8b\u7684\u63a8\u7406&#xff08;Case-Based Reasoning&#xff09;\u2014\u2014\u5f53\u9762\u5bf9\u65b0\u4efb\u52a1\u65f6&#xff0c;\u68c0\u7d22\u76f8\u4f3c\u5386\u53f2\u6848\u4f8b\u5e76\u590d\u7528\u5176\u89e3\u51b3\u65b9\u6848\u3002\u5f62\u5f0f\u5316\u5730&#xff0c;\u60c5\u666f\u8bb0\u5fc6\u53ef\u4ee5\u8868\u793a\u4e3a\u4e8b\u4ef6\u5e8f\u5217 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">Mepi&#061;{(st,at,rt,ot)}t&#061;1T\\\\mathcal{M}_{epi} &#061; \\\\{(s_t, a_t, r_t, o_t)\\\\}_{t&#061;1}^T<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.9694em;vertical-align: -0.2861em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\">M<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">e<\/span><span class=\"mord mathnormal mtight\">p<\/span><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0913em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mclose\"><span class=\"mclose\">}<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8413em\"><span class=\"\" style=\"top: -2.4519em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2481em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">oto_t<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u6267\u884c\u89c2\u5bdf[61]\u3002<\/p>\n<p>**\u8bed\u4e49\u8bb0\u5fc6&#xff08;Semantic Memory&#xff09;**\u5b58\u50a8\u4e8b\u5b9e\u6027\u77e5\u8bc6\u548c\u6982\u5ff5\u5173\u7cfb&#xff0c;\u5982\u9886\u57df\u77e5\u8bc6\u3001\u7528\u6237\u504f\u597d\u3001\u5de5\u5177\u529f\u80fd\u63cf\u8ff0\u7b49\u3002\u8bed\u4e49\u8bb0\u5fc6\u901a\u5e38\u4ee5\u7ed3\u6784\u5316\u5f62\u5f0f\u7ec4\u7ec7&#xff0c;\u5982\u77e5\u8bc6\u56fe\u8c31\u6216\u952e\u503c\u5bf9\u5b58\u50a8\u3002\u4e0e\u60c5\u666f\u8bb0\u5fc6\u4e0d\u540c&#xff0c;\u8bed\u4e49\u8bb0\u5fc6\u62bd\u8c61\u4e86\u5177\u4f53\u7ecf\u5386&#xff0c;\u63d0\u53d6\u4e86\u901a\u7528\u77e5\u8bc6\u3002\u4f8b\u5982&#xff0c;\u4ece\u591a\u6b21&#034;\u4f7f\u7528Python\u5904\u7406CSV\u6587\u4ef6&#034;\u7684\u7ecf\u5386\u4e2d&#xff0c;Agent\u53ef\u4ee5\u62bd\u8c61\u51fa&#034;pandas\u662f\u5904\u7406\u8868\u683c\u6570\u636e\u7684\u6709\u6548\u5de5\u5177&#034;\u8fd9\u4e00\u8bed\u4e49\u77e5\u8bc6[62]\u3002<\/p>\n<p>**\u7a0b\u5e8f\u8bb0\u5fc6&#xff08;Procedural Memory&#xff09;**\u5b58\u50a8\u6280\u80fd\u548c\u64cd\u4f5c\u6d41\u7a0b&#xff0c;\u5373&#034;\u5982\u4f55\u505a&#034;\u7684\u77e5\u8bc6\u3002\u5728Agent\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d&#xff0c;\u7a0b\u5e8f\u8bb0\u5fc6\u5bf9\u5e94\u4e8e\u5b66\u4e60\u5230\u7684SOP\u2014\u2014\u9762\u5bf9\u7279\u5b9a\u7c7b\u578b\u4efb\u52a1\u65f6\u5e94\u9075\u5faa\u7684\u64cd\u4f5c\u5e8f\u5217\u3002\u7a0b\u5e8f\u8bb0\u5fc6\u901a\u5e38\u4ee5\u6761\u4ef6-\u52a8\u4f5c\u89c4\u5219\u6216\u7b56\u7565\u7f51\u7edc\u7684\u5f62\u5f0f\u5b58\u50a8&#xff0c;\u652f\u6301\u5feb\u901f\u3001\u81ea\u52a8\u5316\u7684\u6267\u884c[16]\u3002<\/p>\n<h4>6.1.3 \u8bb0\u5fc6\u7684\u7ec4\u7ec7\u4e0e\u7d22\u5f15\u673a\u5236<\/h4>\n<p>\u9ad8\u6548\u7684\u8bb0\u5fc6\u7ec4\u7ec7\u4e0e\u68c0\u7d22\u673a\u5236\u662f\u8bb0\u5fc6\u7cfb\u7edf\u5b9e\u7528\u6027\u7684\u5173\u952e\u3002\u5bf9\u4e8e\u5927\u89c4\u6a21\u8bb0\u5fc6\u5e93&#xff0c;\u7ebf\u6027\u626b\u63cf\u4e0d\u53ef\u884c&#xff0c;\u9700\u8981\u5efa\u7acb\u6709\u6548\u7684\u7d22\u5f15\u7ed3\u6784[56]\u3002<\/p>\n<p>\u5411\u91cf\u7d22\u5f15\u662f\u5f53\u524d\u4e3b\u6d41\u7684\u8bb0\u5fc6\u7ec4\u7ec7\u65b9\u5f0f\u3002\u6bcf\u6761\u8bb0\u5fc6\u901a\u8fc7\u5d4c\u5165\u6a21\u578b&#xff08;Embedding Model&#xff09;\u7f16\u7801\u4e3a\u7a20\u5bc6\u5411\u91cf&#xff0c;\u5b58\u50a8\u5728\u5411\u91cf\u6570\u636e\u5e93\u4e2d\u3002\u68c0\u7d22\u65f6&#xff0c;\u5c06\u67e5\u8be2\u7f16\u7801\u4e3a\u5411\u91cf&#xff0c;\u901a\u8fc7\u8fd1\u4f3c\u6700\u8fd1\u90bb&#xff08;Approximate Nearest Neighbor, ANN&#xff09;\u7b97\u6cd5\u5feb\u901f\u627e\u5230\u76f8\u4f3c\u8bb0\u5fc6\u3002\u5e38\u7528\u7684ANN\u7b97\u6cd5\u5305\u62ecHNSW&#xff08;Hierarchical Navigable Small World&#xff09;\u3001IVF&#xff08;Inverted File Index&#xff09;\u7b49&#xff0c;\u80fd\u591f\u5728\u767e\u4e07\u7ea7\u5411\u91cf\u4e2d\u5b9e\u73b0\u6beb\u79d2\u7ea7\u68c0\u7d22[42]\u3002<\/p>\n<p>\u56fe\u7d22\u5f15\u9002\u7528\u4e8e\u5177\u6709\u590d\u6742\u5173\u7cfb\u7684\u8bb0\u5fc6\u3002\u77e5\u8bc6\u56fe\u8c31\u5c06\u5b9e\u4f53\u548c\u5173\u7cfb\u8868\u793a\u4e3a\u8282\u70b9\u548c\u8fb9&#xff0c;\u652f\u6301\u591a\u8df3\u63a8\u7406\u548c\u5173\u7cfb\u67e5\u8be2\u3002\u4f8b\u5982&#xff0c;Agent\u53ef\u4ee5\u67e5\u8be2&#034;\u4e0e\u7528\u6237A\u8ba8\u8bba\u8fc7\u7684\u6240\u6709\u4e0ePython\u76f8\u5173\u7684\u5de5\u5177&#034;&#xff0c;\u8fd9\u79cd\u590d\u6742\u67e5\u8be2\u96be\u4ee5\u901a\u8fc7\u7eaf\u5411\u91cf\u68c0\u7d22\u5b9e\u73b0\u3002\u56fe\u7d22\u5f15\u7684\u6311\u6218\u5728\u4e8e\u6784\u5efa\u548c\u7ef4\u62a4\u6210\u672c\u8f83\u9ad8&#xff0c;\u9700\u8981\u5b9e\u4f53\u94fe\u63a5\u548c\u5173\u7cfb\u62bd\u53d6\u7b49\u9884\u5904\u7406[66]\u3002<\/p>\n<p>\u6df7\u5408\u7d22\u5f15\u7ed3\u5408\u4e86\u591a\u79cd\u7d22\u5f15\u7684\u4f18\u52bf\u3002\u4f8b\u5982&#xff0c;\u5148\u7528\u5411\u91cf\u68c0\u7d22\u53ec\u56de\u5019\u9009\u8bb0\u5fc6&#xff0c;\u518d\u7528\u56fe\u5173\u7cfb\u8fc7\u6ee4\u548c\u6392\u5e8f&#xff1b;\u6216\u5148\u7528\u5173\u952e\u8bcd\u7d22\u5f15\u5feb\u901f\u5b9a\u4f4d\u76f8\u5173\u8bb0\u5fc6\u5b50\u96c6&#xff0c;\u518d\u5728\u5b50\u96c6\u5185\u8fdb\u884c\u5411\u91cf\u76f8\u4f3c\u5ea6\u8ba1\u7b97\u3002\u6df7\u5408\u7d22\u5f15\u5728\u5b9e\u8df5\u4e2d\u5f80\u5f80\u80fd\u8fbe\u5230\u6700\u4f73\u7684\u68c0\u7d22\u6548\u679c[61]\u3002<\/p>\n<h3>6.2 \u8bb0\u5fc6\u7684\u7f16\u7801\u3001\u5b58\u50a8\u4e0e\u68c0\u7d22<\/h3>\n<h4>6.2.1 \u4fe1\u606f\u62bd\u53d6\u4e0e\u77e5\u8bc6\u84b8\u998f<\/h4>\n<p>\u4ece\u539f\u59cb\u4ea4\u4e92\u8bb0\u5f55\u4e2d\u63d0\u53d6\u6709\u4ef7\u503c\u7684\u8bb0\u5fc6\u4fe1\u606f\u662f\u8bb0\u5fc6\u7cfb\u7edf\u7684\u9996\u8981\u4efb\u52a1\u3002\u76f4\u63a5\u5b58\u50a8\u539f\u59cb\u6587\u672c\u4e0d\u4ec5\u6d6a\u8d39\u5b58\u50a8\u7a7a\u95f4&#xff0c;\u8fd8\u4f1a\u589e\u52a0\u68c0\u7d22\u566a\u58f0&#xff0c;\u9700\u8981\u901a\u8fc7\u4fe1\u606f\u62bd\u53d6&#xff08;Information Extraction&#xff09;\u63d0\u70bc\u5173\u952e\u77e5\u8bc6[62]\u3002<\/p>\n<p>\u4fe1\u606f\u62bd\u53d6\u7684\u4e3b\u8981\u4efb\u52a1\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u5b9e\u4f53\u8bc6\u522b&#xff1a;\u8bc6\u522b\u6587\u672c\u4e2d\u7684\u5173\u952e\u5b9e\u4f53&#xff08;\u5982\u5de5\u5177\u540d\u79f0\u3001\u6570\u636e\u683c\u5f0f\u3001\u7528\u6237\u504f\u597d&#xff09;<\/li>\n<li>\u5173\u7cfb\u62bd\u53d6&#xff1a;\u63d0\u53d6\u5b9e\u4f53\u4e4b\u95f4\u7684\u5173\u7cfb&#xff08;\u5982&#034;\u5de5\u5177A\u9002\u7528\u4e8e\u4efb\u52a1B&#034;&#xff09;<\/li>\n<li>\u4e8b\u4ef6\u62bd\u53d6&#xff1a;\u8bc6\u522b\u91cd\u8981\u4e8b\u4ef6\u53ca\u5176\u53c2\u4e0e\u8005\u3001\u65f6\u95f4\u3001\u7ed3\u679c<\/li>\n<li>\u6458\u8981\u751f\u6210&#xff1a;\u5c06\u957f\u6587\u672c\u538b\u7f29\u4e3a\u7b80\u6d01\u7684\u6458\u8981&#xff0c;\u4fdd\u7559\u6838\u5fc3\u4fe1\u606f<\/li>\n<\/ul>\n<p>\u5728Agent\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d&#xff0c;\u4fe1\u606f\u62bd\u53d6\u53ef\u4ee5\u9488\u5bf9\u6027\u5730\u8bbe\u8ba1\u3002\u4f8b\u5982&#xff0c;\u4ece\u4e00\u6b21\u4ee3\u7801\u751f\u6210\u4efb\u52a1\u4e2d&#xff0c;\u53ef\u4ee5\u62bd\u53d6&#xff1a;\u4f7f\u7528\u7684\u7f16\u7a0b\u8bed\u8a00\u3001\u89e3\u51b3\u7684\u95ee\u9898\u7c7b\u578b\u3001\u8c03\u7528\u7684\u5e93\u51fd\u6570\u3001\u9047\u5230\u7684\u9519\u8bef\u53ca\u89e3\u51b3\u65b9\u6848\u7b49\u3002\u8fd9\u4e9b\u7ed3\u6784\u5316\u4fe1\u606f\u6bd4\u539f\u59cb\u5bf9\u8bdd\u8bb0\u5f55\u66f4\u6613\u4e8e\u68c0\u7d22\u548c\u590d\u7528[16]\u3002<\/p>\n<p>\u77e5\u8bc6\u84b8\u998f&#xff08;Knowledge Distillation&#xff09;\u662f\u53e6\u4e00\u79cd\u8bb0\u5fc6\u538b\u7f29\u6280\u672f\u3002\u5b83\u901a\u8fc7\u8bad\u7ec3\u4e00\u4e2a\u8f7b\u91cf\u7ea7\u6a21\u578b\u6765\u8fd1\u4f3c\u5927\u578b\u6a21\u578b\u7684\u884c\u4e3a&#xff0c;\u5c06\u5927\u578b\u6a21\u578b\u4e2d\u7684\u77e5\u8bc6\u8fc1\u79fb\u5230\u8f7b\u91cf\u7ea7\u6a21\u578b\u3002\u5728\u8bb0\u5fc6\u7cfb\u7edf\u4e2d&#xff0c;\u53ef\u4ee5\u4f7f\u7528\u77e5\u8bc6\u84b8\u998f\u8bad\u7ec3\u4e13\u95e8\u7684\u7f16\u7801\u5668&#xff0c;\u4f7f\u5176\u751f\u6210\u7684\u5d4c\u5165\u66f4\u597d\u5730\u6355\u6349\u8bb0\u5fc6\u7684\u5173\u952e\u7279\u5f81[56]\u3002<\/p>\n<h4>6.2.2 \u5411\u91cf\u5d4c\u5165\u4e0e\u76f8\u4f3c\u5ea6\u68c0\u7d22<\/h4>\n<p>\u5411\u91cf\u5d4c\u5165&#xff08;Vector Embedding&#xff09;\u662f\u73b0\u4ee3\u8bb0\u5fc6\u7cfb\u7edf\u7684\u6838\u5fc3\u6280\u672f\u3002\u5b83\u5c06\u79bb\u6563\u7684\u8bb0\u5fc6\u5185\u5bb9\u6620\u5c04\u5230\u8fde\u7eed\u5411\u91cf\u7a7a\u95f4&#xff0c;\u4f7f\u5f97\u8bed\u4e49\u76f8\u4f3c\u7684\u8bb0\u5fc6\u5728\u5411\u91cf\u7a7a\u95f4\u4e2d\u8ddd\u79bb\u76f8\u8fd1[42]\u3002<\/p>\n<p>\u5d4c\u5165\u6a21\u578b\u901a\u5e38\u57fa\u4e8e\u9884\u8bad\u7ec3\u7684\u8bed\u8a00\u6a21\u578b&#xff08;\u5982BERT\u3001Sentence-BERT&#xff09;&#xff0c;\u901a\u8fc7\u5bf9\u6bd4\u5b66\u4e60&#xff08;Contrastive Learning&#xff09;\u5fae\u8c03\u4ee5\u9002\u5e94\u7279\u5b9a\u9886\u57df\u7684\u8bed\u4e49\u76f8\u4f3c\u5ea6\u5224\u65ad\u3002\u5bf9\u6bd4\u5b66\u4e60\u7684\u76ee\u6807\u662f\u6700\u5c0f\u5316\u76f8\u4f3c\u6837\u672c\u95f4\u7684\u8ddd\u79bb&#xff0c;\u6700\u5927\u5316\u4e0d\u76f8\u4f3c\u6837\u672c\u95f4\u7684\u8ddd\u79bb&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">Lcontrastive&#061;\u2212log\u2061exp\u2061(sim(x,x&#043;)\/\u03c4)exp\u2061(sim(x,x&#043;)\/\u03c4)&#043;\u2211iexp\u2061(sim(x,xi\u2212)\/\u03c4)\\\\mathcal{L}_{contrastive} &#061; -\\\\log \\\\frac{\\\\exp(\\\\text{sim}(x, x^&#043;)\/\\\\tau)}{\\\\exp(\\\\text{sim}(x, x^&#043;)\/\\\\tau) &#043; \\\\sum_{i} \\\\exp(\\\\text{sim}(x, x_i^-)\/\\\\tau)}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\">L<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">co<\/span><span class=\"mord mathnormal mtight\">n<\/span><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mord mathnormal mtight\">s<\/span><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"mord mathnormal mtight\">e<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.4495em;vertical-align: -1.0012em\"><\/span><span class=\"mord\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.4483em\"><span class=\"\" style=\"top: -2.2985em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mop\">exp<\/span><span class=\"mopen\">(<\/span><span class=\"mord text\"><span class=\"mord\">sim<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">x<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">x<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6973em\"><span class=\"\" style=\"top: -2.989em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">&#043;<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mord\">\/<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position: relative;top: 0em\">\u2211<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.162em\"><span class=\"\" style=\"top: -2.4003em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2997em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">exp<\/span><span class=\"mopen\">(<\/span><span class=\"mord text\"><span class=\"mord\">sim<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">x<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">x<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8115em\"><span class=\"\" style=\"top: -2.4231em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><span class=\"\" style=\"top: -3.1031em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2212<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2769em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mord\">\/<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mclose\">)<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mop\">exp<\/span><span class=\"mopen\">(<\/span><span class=\"mord text\"><span class=\"mord\">sim<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">x<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">x<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7713em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">&#043;<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mord\">\/<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.0012em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">xx<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">x<\/span><\/span><\/span><\/span><\/span> \u4e3a\u951a\u6837\u672c&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">x&#043;x^&#043;<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7713em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">x<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7713em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">&#043;<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u6b63\u6837\u672c&#xff08;\u8bed\u4e49\u76f8\u4f3c&#xff09;&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">xi\u2212x_i^-<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0883em;vertical-align: -0.2769em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">x<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8115em\"><span class=\"\" style=\"top: -2.4231em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><span class=\"\" style=\"top: -3.1031em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2212<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2769em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u8d1f\u6837\u672c&#xff08;\u8bed\u4e49\u4e0d\u76f8\u4f3c&#xff09;&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">sim\\\\text{sim}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6679em\"><\/span><span class=\"mord text\"><span class=\"mord\">sim<\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u76f8\u4f3c\u5ea6\u51fd\u6570&#xff08;\u901a\u5e38\u7528\u4f59\u5f26\u76f8\u4f3c\u5ea6&#xff09;&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c4\\\\tau<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><\/span><\/span><\/span><\/span> \u4e3a\u6e29\u5ea6\u53c2\u6570[66]\u3002<\/p>\n<p>\u76f8\u4f3c\u5ea6\u68c0\u7d22\u7684\u6838\u5fc3\u662f\u6700\u8fd1\u90bb\u641c\u7d22\u3002\u7ed9\u5b9a\u67e5\u8be2\u5411\u91cf <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">qq<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">q<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5728\u8bb0\u5fc6\u5411\u91cf\u96c6\u5408 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">{v1,v2,&#8230;,vN}\\\\{v_1, v_2, &#8230;, v_N\\\\}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">}<\/span><\/span><\/span><\/span><\/span> \u4e2d\u627e\u5230\u6700\u76f8\u4f3c\u7684 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">KK<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0715em\">K<\/span><\/span><\/span><\/span><\/span> \u4e2a\u5411\u91cf&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">TopK(q)&#061;arg\u2061TopKi\u2009sim(q,vi)\\\\text{TopK}(q) &#061; \\\\arg\\\\text{TopK}_{i} \\\\, \\\\text{sim}(q, v_i)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord text\"><span class=\"mord\">TopK<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">q<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mop\">ar<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord\">TopK<\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2175em\"><span class=\"\" style=\"top: -2.4559em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2441em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord text\"><span class=\"mord\">sim<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">q<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u7cbe\u786e\u6700\u8fd1\u90bb\u641c\u7d22\u7684\u65f6\u95f4\u590d\u6742\u5ea6\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">O(Nd)O(Nd)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">O<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><span class=\"mord mathnormal\">d<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff08;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">dd<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">d<\/span><\/span><\/span><\/span><\/span> \u4e3a\u5411\u91cf\u7ef4\u5ea6&#xff09;&#xff0c;\u5bf9\u4e8e\u5927\u89c4\u6a21\u8bb0\u5fc6\u5e93\u4e0d\u53ef\u884c\u3002\u8fd1\u4f3c\u6700\u8fd1\u90bb\u7b97\u6cd5\u901a\u8fc7\u727a\u7272\u5c11\u91cf\u7cbe\u5ea6\u6362\u53d6\u6570\u91cf\u7ea7\u52a0\u901f&#xff0c;\u5982HNSW\u7b97\u6cd5\u7684\u67e5\u8be2\u590d\u6742\u5ea6\u4ec5\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">O(log\u2061N)O(\\\\log N)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">O<\/span><span class=\"mopen\">(<\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>[61]\u3002<\/p>\n<h4>6.2.3 \u8bb0\u5fc6\u7684\u9057\u5fd8\u4e0e\u66f4\u65b0\u7b56\u7565<\/h4>\n<p>\u8bb0\u5fc6\u7cfb\u7edf\u9700\u8981\u5904\u7406\u4fe1\u606f\u7684\u52a8\u6001\u53d8\u5316&#xff1a;\u65b0\u77e5\u8bc6\u4e0d\u65ad\u4ea7\u751f&#xff0c;\u65e7\u77e5\u8bc6\u53ef\u80fd\u8fc7\u65f6\u3002\u6709\u6548\u7684\u9057\u5fd8&#xff08;Forgetting&#xff09;\u548c\u66f4\u65b0&#xff08;Updating&#xff09;\u7b56\u7565\u786e\u4fdd\u8bb0\u5fc6\u5e93\u4fdd\u6301\u65f6\u6548\u6027\u548c\u51c6\u786e\u6027[62]\u3002<\/p>\n<p>\u65f6\u95f4\u8870\u51cf\u662f\u6700\u7b80\u5355\u7684\u9057\u5fd8\u7b56\u7565\u3002\u6bcf\u6761\u8bb0\u5fc6\u5173\u8054\u4e00\u4e2a\u65f6\u95f4\u6233&#xff0c;\u68c0\u7d22\u65f6\u6839\u636e\u65f6\u95f4\u8870\u51cf\u51fd\u6570\u964d\u4f4e\u65e7\u8bb0\u5fc6\u7684\u6743\u91cd&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">w(m)&#061;exp\u2061(\u2212\u03bb(tcurrent\u2212tm))w(m) &#061; \\\\exp(-\\\\lambda (t_{current} &#8211; t_m))<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0269em\">w<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">m<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mop\">exp<\/span><span class=\"mopen\">(<\/span><span class=\"mord\">\u2212<\/span><span class=\"mord mathnormal\">\u03bb<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">t<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">c<\/span><span class=\"mord mathnormal mtight\">u<\/span><span class=\"mord mathnormal mtight\">rre<\/span><span class=\"mord mathnormal mtight\">n<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">t<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">m<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">))<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">tmt_m<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7651em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">t<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">m<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u8bb0\u5fc6 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">mm<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">m<\/span><\/span><\/span><\/span><\/span> \u7684\u521b\u5efa\u65f6\u95f4&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bb\\\\lambda<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\">\u03bb<\/span><\/span><\/span><\/span><\/span> \u4e3a\u8870\u51cf\u7387\u3002\u8be5\u7b56\u7565\u5047\u8bbe\u65b0\u4fe1\u606f\u901a\u5e38\u66f4\u76f8\u5173&#xff0c;\u4f46\u53ef\u80fd\u8bef\u5220\u957f\u671f\u6709\u6548\u7684\u77e5\u8bc6\u3002<\/p>\n<p>\u8bbf\u95ee\u9891\u7387\u662f\u53e6\u4e00\u79cd\u9057\u5fd8\u4f9d\u636e\u3002\u9891\u7e41\u8bbf\u95ee\u7684\u8bb0\u5fc6\u88ab\u8ba4\u4e3a\u66f4\u6709\u4ef7\u503c&#xff0c;\u5e94\u4fdd\u7559&#xff1b;\u957f\u671f\u672a\u8bbf\u95ee\u7684\u8bb0\u5fc6\u53ef\u80fd\u88ab\u9057\u5fd8\u3002LFU&#xff08;Least Frequently Used&#xff09;\u548cLRU&#xff08;Least Recently Used&#xff09;\u662f\u7ecf\u5178\u7684\u7f13\u5b58\u6dd8\u6c70\u7b56\u7565&#xff0c;\u53ef\u4ee5\u5e94\u7528\u4e8e\u8bb0\u5fc6\u7ba1\u7406[16]\u3002<\/p>\n<p>\u4e00\u81f4\u6027\u66f4\u65b0\u5904\u7406\u77e5\u8bc6\u51b2\u7a81\u3002\u5f53\u65b0\u83b7\u53d6\u7684\u4fe1\u606f\u4e0e\u5df2\u6709\u8bb0\u5fc6\u77db\u76fe\u65f6&#xff0c;\u9700\u8981\u5224\u65ad\u54ea\u4e2a\u66f4\u53ef\u9760\u3002\u53ef\u4ee5\u57fa\u4e8e\u4fe1\u606f\u6765\u6e90\u7684\u53ef\u4fe1\u5ea6\u3001\u652f\u6301\u8bc1\u636e\u7684\u6570\u91cf\u3001\u4e0e\u5176\u4ed6\u77e5\u8bc6\u7684\u4e00\u81f4\u6027\u7b49\u56e0\u7d20\u7efc\u5408\u5224\u65ad\u3002\u8d1d\u53f6\u65af\u66f4\u65b0\u63d0\u4f9b\u4e86\u4e00\u79cd\u5f62\u5f0f\u5316\u6846\u67b6&#xff1a;\u5c06\u65b0\u65e7\u4fe1\u606f\u89c6\u4e3a\u89c2\u6d4b&#xff0c;\u6839\u636e\u8d1d\u53f6\u65af\u89c4\u5219\u66f4\u65b0\u5bf9\u77e5\u8bc6\u771f\u5b9e\u6027\u7684\u4fe1\u5ff5[56]\u3002<\/p>\n<h3>6.3 \u8bb0\u5fc6\u9a71\u52a8\u7684SOP\u5b66\u4e60\u4e0e\u4f18\u5316<\/h3>\n<h4>6.3.1 \u4ece\u7ecf\u9a8c\u4e2d\u63d0\u53d6\u64cd\u4f5c\u6a21\u5f0f<\/h4>\n<p>Agent\u7684\u5386\u53f2\u6267\u884c\u8bb0\u5f55\u8574\u542b\u4e86\u4e30\u5bcc\u7684\u64cd\u4f5c\u6a21\u5f0f&#xff0c;\u901a\u8fc7\u5206\u6790\u8fd9\u4e9b\u8bb0\u5f55\u53ef\u4ee5\u81ea\u52a8\u53d1\u73b0\u6709\u6548\u7684SOP\u3002\u8fd9\u79cd\u4ece\u7ecf\u9a8c\u4e2d\u5b66\u4e60SOP\u7684\u65b9\u6cd5\u907f\u514d\u4e86\u4eba\u5de5\u8bbe\u8ba1\u7684\u5c40\u9650\u6027&#xff0c;\u80fd\u591f\u53d1\u73b0\u975e\u76f4\u89c2\u4f46\u9ad8\u6548\u7684\u64cd\u4f5c\u5e8f\u5217[42]\u3002<\/p>\n<p>\u9891\u7e41\u6a21\u5f0f\u6316\u6398&#xff08;Frequent Pattern Mining&#xff09;\u662f\u63d0\u53d6\u64cd\u4f5c\u6a21\u5f0f\u7684\u57fa\u672c\u6280\u672f\u3002\u7ed9\u5b9a\u4e00\u7ec4\u6210\u529f\u7684\u5de5\u4f5c\u6d41\u8f68\u8ff9&#xff0c;\u7b97\u6cd5\u627e\u51fa\u9891\u7e41\u51fa\u73b0\u7684\u52a8\u4f5c\u5b50\u5e8f\u5217\u3002\u5f62\u5f0f\u5316\u5730&#xff0c;\u8bbe\u52a8\u4f5c\u5e8f\u5217\u6570\u636e\u5e93\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">D&#061;{\u03c41,\u03c42,&#8230;,\u03c4N}\\\\mathcal{D} &#061; \\\\{\\\\tau_1, \\\\tau_2, &#8230;, \\\\tau_N\\\\}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0278em\">D<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.1132em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.1132em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">&#8230;<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.1132em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.109em\">N<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">}<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u6a21\u5f0f <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">pp<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\">p<\/span><\/span><\/span><\/span><\/span> \u7684\u652f\u6301\u5ea6\u4e3a\u5305\u542b <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">pp<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\">p<\/span><\/span><\/span><\/span><\/span> \u7684\u5e8f\u5217\u6bd4\u4f8b&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">support(p)&#061;\u2223{\u03c4\u2208D:p\u2286\u03c4}\u2223\u2223D\u2223\\\\text{support}(p) &#061; \\\\frac{|\\\\{\\\\tau \\\\in \\\\mathcal{D} : p \\\\subseteq \\\\tau\\\\}|}{|\\\\mathcal{D}|}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord text\"><span class=\"mord\">support<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">p<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.363em;vertical-align: -0.936em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.427em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2223<\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0278em\">D<\/span><span class=\"mord\">\u2223<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2223<\/span><span class=\"mopen\">{<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.0278em\">D<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">:<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mord mathnormal\">p<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2286<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mclose\">}<\/span><span class=\"mord\">\u2223<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.936em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u9891\u7e41\u6a21\u5f0f\u662f\u652f\u6301\u5ea6\u8d85\u8fc7\u9608\u503c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b8\\\\theta<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><\/span> \u7684\u6a21\u5f0f&#xff1a;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">{p:support(p)\u2265\u03b8}\\\\{p : \\\\text{support}(p) \\\\geq \\\\theta\\\\}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord mathnormal\">p<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">:<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord text\"><span class=\"mord\">support<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">p<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2265<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">}<\/span><\/span><\/span><\/span><\/span>\u3002Apriori\u548cFP-Growth\u662f\u7ecf\u5178\u7684\u9891\u7e41\u6a21\u5f0f\u6316\u6398\u7b97\u6cd5[66]\u3002<\/p>\n<p>\u5e8f\u5217\u805a\u7c7b&#xff08;Sequence Clustering&#xff09;\u53ef\u4ee5\u53d1\u73b0\u4e0d\u540c\u7c7b\u578b\u7684SOP\u6a21\u677f\u3002\u5c06\u76f8\u4f3c\u7684\u5de5\u4f5c\u6d41\u8f68\u8ff9\u805a\u4e3a\u4e00\u7c7b&#xff0c;\u6bcf\u7c7b\u4ee3\u8868\u89e3\u51b3\u7279\u5b9a\u7c7b\u578b\u4efb\u52a1\u7684\u6807\u51c6\u65b9\u6cd5\u3002\u65b0\u4efb\u52a1\u5230\u6765\u65f6&#xff0c;\u9996\u5148\u5224\u65ad\u5176\u6240\u5c5e\u7c7b\u522b&#xff0c;\u7136\u540e\u590d\u7528\u5bf9\u5e94\u7c7b\u522b\u7684SOP\u3002\u8fd9\u79cd\u57fa\u4e8e\u6848\u4f8b\u7684\u63a8\u7406&#xff08;Case-Based Reasoning&#xff09;\u65b9\u6cd5\u5728\u5b9e\u8df5\u4e2d\u975e\u5e38\u6709\u6548[61]\u3002<\/p>\n<h4>6.3.2 \u6848\u4f8b\u5e93\u6784\u5efa\u4e0e\u76f8\u4f3c\u5ea6\u5339\u914d<\/h4>\n<p>\u6848\u4f8b\u5e93&#xff08;Case Library&#xff09;\u662f\u8bb0\u5fc6\u9a71\u52a8\u7684SOP\u5b66\u4e60\u7684\u6838\u5fc3\u7ec4\u4ef6\u3002\u6bcf\u4e2a\u6848\u4f8b\u5305\u542b\u95ee\u9898\u63cf\u8ff0\u3001\u89e3\u51b3\u65b9\u6848\u548c\u6267\u884c\u7ed3\u679c\u4e09\u90e8\u5206\u3002\u6848\u4f8b\u5e93\u7684\u6784\u5efa\u9700\u8981\u89e3\u51b3\u6848\u4f8b\u8868\u793a\u3001\u7d22\u5f15\u548c\u68c0\u7d22\u7b49\u5173\u952e\u95ee\u9898[62]\u3002<\/p>\n<p>\u6848\u4f8b\u8868\u793a\u5c06\u539f\u59cb\u4efb\u52a1\u4fe1\u606f\u7f16\u7801\u4e3a\u7ed3\u6784\u5316\u683c\u5f0f\u3002\u5bf9\u4e8e\u6587\u672c\u578b\u4efb\u52a1&#xff0c;\u53ef\u4ee5\u4f7f\u7528TF-IDF\u6216BERT\u5d4c\u5165\u8868\u793a\u95ee\u9898&#xff1b;\u5bf9\u4e8e\u7ed3\u6784\u5316\u4efb\u52a1&#xff0c;\u53ef\u4ee5\u4f7f\u7528\u7279\u5f81\u5411\u91cf\u7f16\u7801\u5173\u952e\u5c5e\u6027\u3002\u89e3\u51b3\u65b9\u6848\u90e8\u5206\u9700\u8981\u8bb0\u5f55\u5b8c\u6574\u7684\u64cd\u4f5c\u5e8f\u5217&#xff0c;\u5305\u62ec\u8c03\u7528\u7684\u5de5\u5177\u3001\u751f\u6210\u7684\u63a8\u7406\u3001\u8bbe\u7f6e\u7684\u53c2\u6570\u7b49[16]\u3002<\/p>\n<p>\u6848\u4f8b\u7d22\u5f15\u652f\u6301\u5feb\u901f\u68c0\u7d22\u3002\u5e38\u7528\u65b9\u6cd5\u662f\u5c06\u6848\u4f8b\u8868\u793a\u4e3a\u5411\u91cf&#xff0c;\u4f7f\u7528\u5411\u91cf\u6570\u636e\u5e93\u5b58\u50a8\u548c\u7d22\u5f15\u3002\u5bf9\u4e8e\u5927\u89c4\u6a21\u6848\u4f8b\u5e93&#xff0c;\u53ef\u4ee5\u5efa\u7acb\u5c42\u6b21\u5316\u7d22\u5f15&#xff1a;\u5148\u7528\u7c97\u7c92\u5ea6\u5206\u7c7b&#xff08;\u5982\u4efb\u52a1\u9886\u57df\u3001\u96be\u5ea6\u7ea7\u522b&#xff09;\u7f29\u5c0f\u641c\u7d22\u8303\u56f4&#xff0c;\u518d\u5728\u5019\u9009\u96c6\u5185\u8fdb\u884c\u7cbe\u7ec6\u5339\u914d[56]\u3002<\/p>\n<p>\u76f8\u4f3c\u5ea6\u5339\u914d\u5ea6\u91cf\u65b0\u4efb\u52a1\u4e0e\u5386\u53f2\u6848\u4f8b\u7684\u76f8\u4f3c\u6027\u3002\u5e38\u7528\u7684\u76f8\u4f3c\u5ea6\u5ea6\u91cf\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u4f59\u5f26\u76f8\u4f3c\u5ea6&#xff1a;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">simcos(x,y)&#061;x\u22c5y\u2225x\u2225\u2225y\u2225\\\\text{sim}_{cos}(x, y) &#061; \\\\frac{x \\\\cdot y}{\\\\|x\\\\| \\\\|y\\\\|}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord\">sim<\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">cos<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">x<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.2772em;vertical-align: -0.52em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7572em\"><span class=\"\" style=\"top: -2.655em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2225<\/span><span class=\"mord mathnormal mtight\">x<\/span><span class=\"mord mtight\">\u2225\u2225<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"mord mtight\">\u2225<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.4461em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">x<\/span><span class=\"mbin mtight\">\u22c5<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">y<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.52em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/li>\n<li>\u6b27\u6c0f\u8ddd\u79bb&#xff1a;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">simeuclid(x,y)&#061;\u2225x\u2212y\u2225\\\\text{sim}_{euclid}(x, y) &#061; \\\\|x &#8211; y\\\\|<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord\">sim<\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">e<\/span><span class=\"mord mathnormal mtight\">u<\/span><span class=\"mord mathnormal mtight\">c<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">x<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\">\u2225<\/span><span class=\"mord mathnormal\">x<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"mord\">\u2225<\/span><\/span><\/span><\/span><\/span><\/li>\n<li>\u7f16\u8f91\u8ddd\u79bb&#xff1a;\u9002\u7528\u4e8e\u5e8f\u5217\u578b\u8868\u793a&#xff0c;\u5ea6\u91cf\u5c06\u4e00\u4e2a\u5e8f\u5217\u53d8\u4e3a\u53e6\u4e00\u4e2a\u6240\u9700\u7684\u6700\u5c11\u64cd\u4f5c\u6570<\/li>\n<\/ul>\n<p>\u68c0\u7d22\u5230\u76f8\u4f3c\u6848\u4f8b\u540e&#xff0c;Agent\u53ef\u4ee5\u590d\u7528\u5176\u89e3\u51b3\u65b9\u6848&#xff0c;\u6216\u5728\u5176\u57fa\u7840\u4e0a\u8fdb\u884c\u9002\u5e94\u6027\u4fee\u6539[42]\u3002<\/p>\n<h4>6.3.3 \u8bb0\u5fc6\u5f15\u5bfc\u7684\u63a2\u7d22\u7b56\u7565<\/h4>\n<p>\u8bb0\u5fc6\u4e0d\u4ec5\u53ef\u4ee5\u7528\u4e8e\u89e3\u51b3\u65b9\u6848\u590d\u7528&#xff0c;\u8fd8\u53ef\u4ee5\u6307\u5bfc\u63a2\u7d22\u8fc7\u7a0b\u3002\u5f53Agent\u9762\u5bf9\u65b0\u4efb\u52a1\u65f6&#xff0c;\u53ef\u4ee5\u57fa\u4e8e\u8bb0\u5fc6\u5224\u65ad\u54ea\u4e9b\u64cd\u4f5c\u66f4\u6709\u53ef\u80fd\u6210\u529f&#xff0c;\u4f18\u5148\u5c1d\u8bd5\u9ad8\u6f5c\u529b\u9009\u9879&#xff0c;\u907f\u514d\u76f2\u76ee\u63a2\u7d22[66]\u3002<\/p>\n<p>\u4e0a\u7f6e\u4fe1\u754c&#xff08;Upper Confidence Bound, UCB&#xff09;\u7b97\u6cd5\u5c06\u8bb0\u5fc6\u4f5c\u4e3a\u5148\u9a8c\u77e5\u8bc6\u878d\u5165\u591a\u81c2 bandit \u95ee\u9898\u3002\u6bcf\u4e2a\u52a8\u4f5c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">aa<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">a<\/span><\/span><\/span><\/span><\/span> \u7ef4\u62a4\u4e00\u4e2a\u4ef7\u503c\u4f30\u8ba1 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">Q(a)Q(a)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\">Q<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u548c\u8bbf\u95ee\u8ba1\u6570 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">N(a)N(a)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>\u3002\u52a8\u4f5c\u9009\u62e9\u51c6\u5219\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">a\u2217&#061;arg\u2061max\u2061a[Q(a)&#043;cln\u2061TN(a)]a^* &#061; \\\\arg\\\\max_a \\\\left[ Q(a) &#043; c \\\\sqrt{\\\\frac{\\\\ln T}{N(a)}} \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7387em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7387em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mbin mtight\">\u2217<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3.1016em;vertical-align: -1.25em\"><\/span><span class=\"mop\">ar<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.4em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">a<\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size4\">[<\/span><\/span><span class=\"mord mathnormal\">Q<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">c<\/span><span class=\"mord sqrt\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.8516em\"><span class=\"svg-align\" style=\"top: -5em\"><span class=\"pstrut\" style=\"height: 5em\"><\/span><span class=\"mord\" style=\"padding-left: 1em\"><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3714em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mop\">ln<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.936em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.8116em\"><span class=\"pstrut\" style=\"height: 5em\"><\/span><span class=\"hide-tail\" style=\"min-width: 1.02em;height: 3.08em\"><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.1884em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size4\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">TT<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><\/span> \u4e3a\u603b\u5c1d\u8bd5\u6b21\u6570&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">cc<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">c<\/span><\/span><\/span><\/span><\/span> \u63a7\u5236\u63a2\u7d22\u7a0b\u5ea6\u3002\u8bb0\u5fc6\u53ef\u4ee5\u521d\u59cb\u5316 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">Q(a)Q(a)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\">Q<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u4f7fAgent\u4f18\u5148\u5c1d\u8bd5\u5386\u53f2\u4e0a\u8868\u73b0\u597d\u7684\u52a8\u4f5c[61]\u3002<\/p>\n<p>\u8d1d\u53f6\u65af\u4f18\u5316&#xff08;Bayesian Optimization&#xff09;\u63d0\u4f9b\u4e86\u66f4\u590d\u6742\u7684\u8bb0\u5fc6\u5f15\u5bfc\u63a2\u7d22\u6846\u67b6\u3002\u5b83\u7ef4\u62a4\u4e00\u4e2a\u5173\u4e8e\u52a8\u4f5c\u4ef7\u503c\u7684\u9ad8\u65af\u8fc7\u7a0b&#xff08;Gaussian Process&#xff09;\u5148\u9a8c&#xff0c;\u6839\u636e\u89c2\u6d4b\u4e0d\u65ad\u66f4\u65b0\u540e\u9a8c\u3002\u8bb0\u5fc6\u63d0\u4f9b\u4e86\u5148\u9a8c\u5206\u5e03\u7684\u521d\u59cb\u53c2\u6570&#xff0c;\u52a0\u901f\u6536\u655b\u3002\u8d1d\u53f6\u65af\u4f18\u5316\u7279\u522b\u9002\u7528\u4e8e\u8bc4\u4f30\u4ee3\u4ef7\u9ad8\u7684\u573a\u666f&#xff0c;\u5982\u9700\u8981\u8c03\u7528\u6602\u8d35API\u7684\u52a8\u4f5c\u9009\u62e9[62]\u3002<\/p>\n<p>#mermaid-svg-5p7vzdEFomkND7Vi{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-5p7vzdEFomkND7Vi .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-5p7vzdEFomkND7Vi .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-5p7vzdEFomkND7Vi .error-icon{fill:#552222;}#mermaid-svg-5p7vzdEFomkND7Vi .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-5p7vzdEFomkND7Vi .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-5p7vzdEFomkND7Vi .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-5p7vzdEFomkND7Vi .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-5p7vzdEFomkND7Vi .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-5p7vzdEFomkND7Vi .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-5p7vzdEFomkND7Vi .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-5p7vzdEFomkND7Vi .marker{fill:#333333;stroke:#333333;}#mermaid-svg-5p7vzdEFomkND7Vi .marker.cross{stroke:#333333;}#mermaid-svg-5p7vzdEFomkND7Vi svg{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-5p7vzdEFomkND7Vi p{margin:0;}#mermaid-svg-5p7vzdEFomkND7Vi .label{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;color:#333;}#mermaid-svg-5p7vzdEFomkND7Vi .cluster-label text{fill:#333;}#mermaid-svg-5p7vzdEFomkND7Vi .cluster-label span{color:#333;}#mermaid-svg-5p7vzdEFomkND7Vi .cluster-label span p{background-color:transparent;}#mermaid-svg-5p7vzdEFomkND7Vi .label text,#mermaid-svg-5p7vzdEFomkND7Vi span{fill:#333;color:#333;}#mermaid-svg-5p7vzdEFomkND7Vi .node rect,#mermaid-svg-5p7vzdEFomkND7Vi .node circle,#mermaid-svg-5p7vzdEFomkND7Vi .node ellipse,#mermaid-svg-5p7vzdEFomkND7Vi .node polygon,#mermaid-svg-5p7vzdEFomkND7Vi .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-5p7vzdEFomkND7Vi .rough-node .label text,#mermaid-svg-5p7vzdEFomkND7Vi .node .label text,#mermaid-svg-5p7vzdEFomkND7Vi .image-shape .label,#mermaid-svg-5p7vzdEFomkND7Vi .icon-shape .label{text-anchor:middle;}#mermaid-svg-5p7vzdEFomkND7Vi .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-5p7vzdEFomkND7Vi .rough-node .label,#mermaid-svg-5p7vzdEFomkND7Vi .node .label,#mermaid-svg-5p7vzdEFomkND7Vi .image-shape .label,#mermaid-svg-5p7vzdEFomkND7Vi .icon-shape .label{text-align:center;}#mermaid-svg-5p7vzdEFomkND7Vi .node.clickable{cursor:pointer;}#mermaid-svg-5p7vzdEFomkND7Vi .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-5p7vzdEFomkND7Vi .arrowheadPath{fill:#333333;}#mermaid-svg-5p7vzdEFomkND7Vi .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-5p7vzdEFomkND7Vi .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-5p7vzdEFomkND7Vi .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-5p7vzdEFomkND7Vi .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-5p7vzdEFomkND7Vi .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-5p7vzdEFomkND7Vi .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-5p7vzdEFomkND7Vi .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-5p7vzdEFomkND7Vi .cluster text{fill:#333;}#mermaid-svg-5p7vzdEFomkND7Vi .cluster span{color:#333;}#mermaid-svg-5p7vzdEFomkND7Vi div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-5p7vzdEFomkND7Vi .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-5p7vzdEFomkND7Vi rect.text{fill:none;stroke-width:0;}#mermaid-svg-5p7vzdEFomkND7Vi .icon-shape,#mermaid-svg-5p7vzdEFomkND7Vi .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-5p7vzdEFomkND7Vi .icon-shape p,#mermaid-svg-5p7vzdEFomkND7Vi .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-5p7vzdEFomkND7Vi .icon-shape rect,#mermaid-svg-5p7vzdEFomkND7Vi .image-shape rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-5p7vzdEFomkND7Vi .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-5p7vzdEFomkND7Vi .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-5p7vzdEFomkND7Vi :root{&#8211;mermaid-font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;}<span class=\"nodeLabel\"><\/p>\n<p>\u8bb0\u5fc6\u68c0\u7d22\u6d41\u7a0b<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u7f16\u7801<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>ANN\u641c\u7d22<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/p>\n<p>\u91cd\u6392\u5e8f<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u67e5\u8be2<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5411\u91cf\u8868\u793a<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5019\u9009\u8bb0\u5fc6<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u68c0\u7d22\u7ed3\u679c<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u8bb0\u5fc6\u5c42\u6b21\u7ed3\u6784<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u8f93\u5165\u4fe1\u606f<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u77ed\u671f\u8bb0\u5fc6<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u4fe1\u606f\u7b5b\u9009<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u60c5\u666f\u8bb0\u5fc6<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u8bed\u4e49\u8bb0\u5fc6<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7a0b\u5e8f\u8bb0\u5fc6<\/p>\n<p><\/span><\/p>\n<h2>7 \u63a2\u7d22-\u5229\u7528\u6743\u8861\u4e0e\u7b56\u7565\u4f18\u5316<\/h2>\n<h3>7.1 \u63a2\u7d22-\u5229\u7528\u56f0\u5883\u7684\u7406\u8bba\u5206\u6790<\/h3>\n<h4>7.1.1 \u591a\u81c2Bandit\u95ee\u9898\u7684\u542f\u793a<\/h4>\n<p>\u63a2\u7d22-\u5229\u7528\u6743\u8861&#xff08;Exploration-Exploitation Tradeoff&#xff09;\u662f\u5f3a\u5316\u5b66\u4e60\u7684\u6838\u5fc3\u56f0\u5883&#xff0c;\u4e5f\u662fAgent\u5de5\u4f5c\u6d41\u4f18\u5316\u5fc5\u987b\u9762\u5bf9\u7684\u57fa\u672c\u95ee\u9898\u3002\u591a\u81c2Bandit\u95ee\u9898&#xff08;Multi-Armed Bandit Problem&#xff09;\u662f\u7814\u7a76\u8fd9\u4e00\u95ee\u9898\u7684\u7ecf\u5178\u6a21\u578b[16]\u3002<\/p>\n<p>\u5728\u591a\u81c2Bandit\u95ee\u9898\u4e2d&#xff0c;Agent\u9762\u4e34 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">KK<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0715em\">K<\/span><\/span><\/span><\/span><\/span> \u4e2a\u52a8\u4f5c\u9009\u9879&#xff08;\u81c2&#xff09;&#xff0c;\u6bcf\u4e2a\u81c2\u7684\u5956\u52b1\u670d\u4ece\u672a\u77e5\u5206\u5e03\u3002Agent\u7684\u76ee\u6807\u662f\u901a\u8fc7\u6709\u9650\u6b21\u5c1d\u8bd5\u6700\u5927\u5316\u7d2f\u79ef\u5956\u52b1\u3002\u6bcf\u6b21\u5c1d\u8bd5\u65f6&#xff0c;Agent\u9762\u4e34\u4e24\u96be\u9009\u62e9&#xff1a;\u5229\u7528&#xff08;Exploitation&#xff09;\u5f53\u524d\u4f30\u8ba1\u5956\u52b1\u6700\u9ad8\u7684\u81c2\u4ee5\u83b7\u5f97\u5373\u65f6\u6536\u76ca&#xff0c;\u6216\u63a2\u7d22&#xff08;Exploration&#xff09;\u5176\u4ed6\u81c2\u4ee5\u83b7\u53d6\u66f4\u51c6\u786e\u7684\u4fe1\u606f[14]\u3002<\/p>\n<p>\u5f62\u5f0f\u5316\u5730&#xff0c;\u8bbe\u81c2 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span> \u7684\u771f\u5b9e\u671f\u671b\u5956\u52b1\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bci\\\\mu_i<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03bc<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xff0c;Agent\u5728\u65f6\u523b <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">tt<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6151em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><\/span><\/span> \u9009\u62e9\u81c2 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ItI_t<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0785em\">I<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0785em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xff0c;\u83b7\u5f97\u5956\u52b1 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">rt\u223cN(\u03bcIt,\u03c32)r_t \\\\sim \\\\mathcal{N}(\\\\mu_{I_t}, \\\\sigma^2)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u223c<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0642em;vertical-align: -0.2501em\"><\/span><span class=\"mord mathcal\" style=\"margin-right: 0.1474em\">N<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03bc<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0785em\">I<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2963em\"><span class=\"\" style=\"top: -2.357em;margin-left: -0.0785em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2501em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c3<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8141em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>\u3002\u7d2f\u79ef\u9057\u61be&#xff08;Cumulative Regret&#xff09;\u5b9a\u4e49\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">RT&#061;T\u22c5max\u2061i\u03bci\u2212\u2211t&#061;1T\u03bcItR_T &#061; T \\\\cdot \\\\max_i \\\\mu_i &#8211; \\\\sum_{t&#061;1}^T \\\\mu_{I_t}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0077em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">T<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.311em;vertical-align: -0.7277em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.3723em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7277em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03bc<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3.0954em;vertical-align: -1.2671em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.8283em\"><span class=\"\" style=\"top: -1.8829em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2671em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03bc<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0785em\">I<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2963em\"><span class=\"\" style=\"top: -2.357em;margin-left: -0.0785em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2501em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u9057\u61be\u5ea6\u91cf\u4e86Agent\u4e0e\u59cb\u7ec8\u9009\u62e9\u6700\u4f18\u81c2\u7684\u7406\u60f3\u7b56\u7565\u4e4b\u95f4\u7684\u5dee\u8ddd\u3002\u4f18\u79c0\u7684\u7b97\u6cd5\u5e94\u4fdd\u8bc1\u9057\u61be\u6b21\u7ebf\u6027\u589e\u957f&#xff0c;\u5373 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">lim\u2061T\u2192\u221eRT\/T&#061;0\\\\lim_{T \\\\rightarrow \\\\infty} R_T \/ T &#061; 0<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mop\"><span class=\"mop\">lim<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><span class=\"mrel mtight\">\u2192<\/span><span class=\"mord mtight\">\u221e<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0077em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\/<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">T<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">0<\/span><\/span><\/span><\/span><\/span>[56]\u3002<\/p>\n<h4>7.1.2 \u4fe1\u606f\u589e\u76ca\u4e0e\u4e0d\u786e\u5b9a\u6027\u91cf\u5316<\/h4>\n<p>\u4ece\u4fe1\u606f\u8bba\u89c6\u89d2&#xff0c;\u63a2\u7d22\u7684\u4ef7\u503c\u5728\u4e8e\u51cf\u5c11\u4e0d\u786e\u5b9a\u6027\u3002\u4fe1\u606f\u589e\u76ca&#xff08;Information Gain&#xff09;\u5ea6\u91cf\u4e86\u6267\u884c\u67d0\u52a8\u4f5c\u540e\u5bf9\u73af\u5883\u8ba4\u77e5\u7684\u6539\u8fdb\u7a0b\u5ea6\u3002\u9009\u62e9\u4fe1\u606f\u589e\u76ca\u9ad8\u7684\u52a8\u4f5c&#xff0c;\u5373\u4f7f\u5176\u5373\u65f6\u5956\u52b1\u4e0d\u9ad8&#xff0c;\u4e5f\u53ef\u80fd\u5e26\u6765\u957f\u671f\u6536\u76ca[42]\u3002<\/p>\n<p>\u8d1d\u53f6\u65af\u6846\u67b6\u4e0b&#xff0c;Agent\u7ef4\u62a4\u5bf9\u6bcf\u4e2a\u52a8\u4f5c\u4ef7\u503c\u5206\u5e03\u7684\u4fe1\u5ff5 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">P(\u03bci\u2223Ht)P(\\\\mu_i | \\\\mathcal{H}_t)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03bc<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathcal\" style=\"margin-right: 0.0097em\">H<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0097em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">Ht\\\\mathcal{H}_t<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathcal\" style=\"margin-right: 0.0097em\">H<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0097em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u5386\u53f2\u89c2\u6d4b\u3002\u4fe1\u606f\u589e\u76ca\u5b9a\u4e49\u4e3a\u6267\u884c\u52a8\u4f5c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">aa<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">a<\/span><\/span><\/span><\/span><\/span> \u524d\u540e\u4fe1\u5ff5\u5206\u5e03\u7684KL\u6563\u5ea6\u671f\u671b&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">IG(a)&#061;Er\u2223a,Ht[DKL(P(\u03bc\u2223Ht,a,r)\u2225P(\u03bc\u2223Ht))]IG(a) &#061; \\\\mathbb{E}_{r | a, \\\\mathcal{H}_t} [D_{KL}(P(\\\\mu | \\\\mathcal{H}_t, a, r) \\\\| P(\\\\mu | \\\\mathcal{H}_t))]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0785em\">I<\/span><span class=\"mord mathnormal\">G<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.1052em;vertical-align: -0.3552em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.5198em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"mord mtight\">\u2223<\/span><span class=\"mord mathnormal mtight\">a<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mtight\"><span class=\"mord mathcal mtight\" style=\"margin-right: 0.0097em\">H<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2963em\"><span class=\"\" style=\"top: -2.357em;margin-left: -0.0097em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.143em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3552em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">D<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0715em\">K<\/span><span class=\"mord mathnormal mtight\">L<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">\u03bc<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathcal\" style=\"margin-right: 0.0097em\">H<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0097em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"mclose\">)<\/span><span class=\"mord\">\u2225<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">\u03bc<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathcal\" style=\"margin-right: 0.0097em\">H<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0097em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">))]<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u4fe1\u606f\u589e\u76ca\u5f15\u5bfc\u7684\u63a2\u7d22\u7b56\u7565\u4f18\u5148\u9009\u62e9\u80fd\u591f\u6700\u5927\u7a0b\u5ea6\u51cf\u5c11\u4ef7\u503c\u4e0d\u786e\u5b9a\u6027\u7684\u52a8\u4f5c\u3002Thompson Sampling\u548cInformation-Directed Sampling\u662f\u57fa\u4e8e\u4fe1\u606f\u589e\u76ca\u7684\u4ee3\u8868\u6027\u7b97\u6cd5[66]\u3002<\/p>\n<p>\u4e0d\u786e\u5b9a\u6027\u91cf\u5316&#xff08;Uncertainty Quantification&#xff09;\u662f\u6307\u5bfc\u63a2\u7d22\u7684\u5173\u952e\u3002\u5bf9\u4e8e\u795e\u7ecf\u7f51\u7edc\u7b56\u7565&#xff0c;\u5e38\u7528\u7684\u4e0d\u786e\u5b9a\u6027\u4f30\u8ba1\u65b9\u6cd5\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u96c6\u6210\u65b9\u6cd5&#xff1a;\u8bad\u7ec3\u591a\u4e2a\u7f51\u7edc&#xff0c;\u7528\u9884\u6d4b\u65b9\u5dee\u4f30\u8ba1\u4e0d\u786e\u5b9a\u6027<\/li>\n<li>Dropout\u91c7\u6837&#xff1a;\u5728\u524d\u5411\u4f20\u64ad\u65f6\u542f\u7528Dropout&#xff0c;\u591a\u6b21\u91c7\u6837\u83b7\u5f97\u9884\u6d4b\u5206\u5e03<\/li>\n<li>\u8d1d\u53f6\u65af\u795e\u7ecf\u7f51\u7edc&#xff1a;\u5c06\u6743\u91cd\u89c6\u4e3a\u968f\u673a\u53d8\u91cf&#xff0c;\u901a\u8fc7\u540e\u9a8c\u63a8\u65ad\u83b7\u5f97\u9884\u6d4b\u4e0d\u786e\u5b9a\u6027<\/li>\n<\/ul>\n<p>\u5728\u5de5\u4f5c\u6d41\u4f18\u5316\u4e2d&#xff0c;\u4e0d\u786e\u5b9a\u6027\u9ad8\u7684\u6b65\u9aa4\u5e94\u83b7\u5f97\u66f4\u591a\u63a2\u7d22\u673a\u4f1a&#xff0c;\u4ee5\u9a8c\u8bc1\u5176\u771f\u5b9e\u6548\u679c[61]\u3002<\/p>\n<h4>7.1.3 \u4e50\u89c2\u9762\u5bf9\u4e0d\u786e\u5b9a\u6027\u7684\u539f\u5219<\/h4>\n<p>\u4e50\u89c2\u9762\u5bf9\u4e0d\u786e\u5b9a\u6027&#xff08;Optimism in the Face of Uncertainty&#xff09;\u662f\u8bbe\u8ba1\u9ad8\u6548\u63a2\u7d22\u7b56\u7565\u7684\u6838\u5fc3\u539f\u5219\u3002\u8be5\u539f\u5219\u4e3b\u5f20&#xff1a;\u5bf9\u4e8e\u4e0d\u786e\u5b9a\u6027\u9ad8\u7684\u9009\u9879&#xff0c;\u5e94\u4e50\u89c2\u5730\u5047\u8bbe\u5176\u53ef\u80fd\u5177\u6709\u9ad8\u6536\u76ca&#xff0c;\u4ece\u800c\u7ed9\u4e88\u63a2\u7d22\u673a\u4f1a[62]\u3002<\/p>\n<p>\u4e0a\u7f6e\u4fe1\u754c&#xff08;UCB&#xff09;\u7b97\u6cd5\u662f\u4e50\u89c2\u539f\u5219\u7684\u7ecf\u5178\u5b9e\u73b0\u3002UCB\u4e3a\u6bcf\u4e2a\u52a8\u4f5c\u7ef4\u62a4\u4e00\u4e2a\u7f6e\u4fe1\u533a\u95f4&#xff0c;\u9009\u62e9\u7f6e\u4fe1\u4e0a\u754c\u6700\u9ad8\u7684\u52a8\u4f5c&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">at&#061;arg\u2061max\u2061a[\u03bc^a&#043;cln\u2061tNa]a_t &#061; \\\\arg\\\\max_a \\\\left[ \\\\hat{\\\\mu}_a &#043; c \\\\sqrt{\\\\frac{\\\\ln t}{N_a}} \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3em;vertical-align: -1.25em\"><\/span><span class=\"mop\">ar<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.4em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">a<\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size4\">[<\/span><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\">\u03bc<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.2222em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">a<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">c<\/span><span class=\"mord sqrt\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.6016em\"><span class=\"svg-align\" style=\"top: -4.4em\"><span class=\"pstrut\" style=\"height: 4.4em\"><\/span><span class=\"mord\" style=\"padding-left: 1em\"><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3714em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.109em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">a<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mop\">ln<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.836em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.5616em\"><span class=\"pstrut\" style=\"height: 4.4em\"><\/span><span class=\"hide-tail\" style=\"min-width: 1.02em;height: 2.48em\"><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8384em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size4\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03bc^a\\\\hat{\\\\mu}_a<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\">\u03bc<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.2222em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">a<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u52a8\u4f5c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">aa<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">a<\/span><\/span><\/span><\/span><\/span> \u7684\u7ecf\u9a8c\u5e73\u5747\u5956\u52b1&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">NaN_a<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.109em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">a<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u9009\u62e9\u6b21\u6570\u3002\u7b2c\u4e8c\u9879\u4e3a\u7f6e\u4fe1\u533a\u95f4\u5bbd\u5ea6&#xff0c;\u968f\u7740\u5c1d\u8bd5\u6b21\u6570\u589e\u52a0\u800c\u51cf\u5c0f\u3002\u8be5\u516c\u5f0f\u5b9e\u73b0\u4e86\u81ea\u52a8\u7684\u63a2\u7d22-\u5229\u7528\u5e73\u8861&#xff1a;\u5bf9\u5c1d\u8bd5\u5c11\u7684\u52a8\u4f5c&#xff08;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">NaN_a<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.109em\">N<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.109em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">a<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u5c0f&#xff09;&#xff0c;\u63a2\u7d22\u9879\u5360\u4e3b\u5bfc&#xff1b;\u5bf9\u5c1d\u8bd5\u591a\u7684\u52a8\u4f5c&#xff0c;\u5229\u7528\u9879\u5360\u4e3b\u5bfc[16]\u3002<\/p>\n<p>UCB\u7b97\u6cd5\u7684\u9057\u61be\u4e0a\u754c\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">O(KTln\u2061T)O(\\\\sqrt{KT \\\\ln T})<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.1822em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">O<\/span><span class=\"mopen\">(<\/span><span class=\"mord sqrt\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9322em\"><span class=\"svg-align\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\" style=\"padding-left: 0.833em\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0715em\">K<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">T<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">ln<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">T<\/span><\/span><\/span><span class=\"\" style=\"top: -2.8922em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"hide-tail\" style=\"min-width: 0.853em;height: 1.08em\"><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1078em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff0c;\u63a5\u8fd1\u7406\u8bba\u4e0b\u754c\u3002\u5728Agent\u5de5\u4f5c\u6d41\u4f18\u5316\u4e2d&#xff0c;UCB\u53ef\u4ee5\u5e94\u7528\u4e8e\u5de5\u5177\u9009\u62e9\u3001\u63a8\u7406\u7b56\u7565\u9009\u62e9\u7b49\u591a\u4e2a\u5c42\u9762&#xff0c;\u5b9e\u73b0\u9ad8\u6548\u7684\u63a2\u7d22[124]\u3002<\/p>\n<h3>7.2 \u63a2\u7d22\u7b56\u7565\u7684\u8bbe\u8ba1\u4e0e\u5b9e\u73b0<\/h3>\n<h4>7.2.1 Epsilon-\u8d2a\u5a6a\u4e0e\u8870\u51cf\u7b56\u7565<\/h4>\n<p>Epsilon-\u8d2a\u5a6a&#xff08;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03f5\\\\epsilon<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><\/span><\/span>-Greedy&#xff09;\u662f\u6700\u7b80\u5355\u7684\u63a2\u7d22\u7b56\u7565&#xff0c;\u4ee5\u6982\u7387 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03f5\\\\epsilon<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><\/span><\/span> \u968f\u673a\u63a2\u7d22&#xff0c;\u4ee5\u6982\u7387 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">1\u2212\u03f51-\\\\epsilon<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7278em;vertical-align: -0.0833em\"><\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><\/span><\/span> \u9009\u62e9\u5f53\u524d\u6700\u4f18\u52a8\u4f5c\u3002\u5f62\u5f0f\u5316\u5730&#xff0c;\u7b56\u7565\u4e3a[56]&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0(a\u2223s)&#061;{1\u2212\u03f5&#043;\u03f5\u2223A\u2223if\u00a0a&#061;arg\u2061max\u2061a\u2032Q(s,a\u2032)\u03f5\u2223A\u2223otherwise\\\\pi(a|s) &#061; \\\\begin{cases} 1 &#8211; \\\\epsilon &#043; \\\\frac{\\\\epsilon}{|\\\\mathcal{A}|} &amp; \\\\text{if } a &#061; \\\\arg\\\\max_{a&#039;} Q(s, a&#039;) \\\\\\\\ \\\\frac{\\\\epsilon}{|\\\\mathcal{A}|} &amp; \\\\text{otherwise} \\\\end{cases}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3.056em;vertical-align: -1.278em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size4\">{<\/span><\/span><span class=\"mord\"><span class=\"mtable\"><span class=\"col-align-l\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.778em\"><span class=\"\" style=\"top: -3.778em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6954em\"><span class=\"\" style=\"top: -2.655em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2223<\/span><span class=\"mord mathcal mtight\">A<\/span><span class=\"mord mtight\">\u2223<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.394em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u03f5<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.52em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -2.25em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6954em\"><span class=\"\" style=\"top: -2.655em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2223<\/span><span class=\"mord mathcal mtight\">A<\/span><span class=\"mord mtight\">\u2223<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.394em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u03f5<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.52em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.278em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"arraycolsep\" style=\"width: 1em\"><\/span><span class=\"col-align-l\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.778em\"><span class=\"\" style=\"top: -3.778em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord\">if\u00a0<\/span><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mop\">ar<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\"><span class=\"mop\">max<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.328em\"><span class=\"\" style=\"top: -2.55em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">Q<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7519em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><span class=\"\" style=\"top: -2.25em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord\">otherwise<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.278em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03f5\\\\epsilon<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><\/span><\/span>-\u8d2a\u5a6a\u7684\u4f18\u70b9\u662f\u5b9e\u73b0\u7b80\u5355\u3001\u8ba1\u7b97\u5f00\u9500\u5c0f&#xff1b;\u7f3a\u70b9\u662f\u63a2\u7d22\u5b8c\u5168\u968f\u673a&#xff0c;\u53ef\u80fd\u5c1d\u8bd5\u660e\u663e\u52a3\u8d28\u7684\u52a8\u4f5c&#xff0c;\u6d6a\u8d39\u6837\u672c\u3002<\/p>\n<p>\u8870\u51cf<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03f5\\\\epsilon<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><\/span><\/span>-\u8d2a\u5a6a&#xff08;Decaying <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03f5\\\\epsilon<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><\/span><\/span>-Greedy&#xff09;\u968f\u7740\u65f6\u95f4\u9010\u6b65\u964d\u4f4e\u63a2\u7d22\u7387&#xff0c;\u5b9e\u73b0\u4ece\u63a2\u7d22\u5230\u5229\u7528\u7684\u8fc7\u6e21&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03f5t&#061;\u03f50\u22c5\u03b3t\\\\epsilon_t &#061; \\\\epsilon_0 \\\\cdot \\\\gamma^t<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.5945em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.038em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8436em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u6216<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03f5t&#061;\u03f501&#043;\u03b2t\\\\epsilon_t &#061; \\\\frac{\\\\epsilon_0}{1 &#043; \\\\beta t}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.988em;vertical-align: -0.8804em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.1076em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">\u03b2t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8804em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03f50\\\\epsilon_0<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u521d\u59cb\u63a2\u7d22\u7387&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b3\u2208(0,1)\\\\gamma \\\\in (0,1)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7335em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">0<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u4e3a\u8870\u51cf\u56e0\u5b50&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b2\\\\beta<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><\/span><\/span><\/span><\/span> \u63a7\u5236\u8870\u51cf\u901f\u5ea6\u3002\u8870\u51cf\u7b56\u7565\u7b26\u5408\u76f4\u89c9&#xff1a;\u521d\u671f\u9700\u8981\u5927\u91cf\u63a2\u7d22\u5efa\u7acb\u4ef7\u503c\u4f30\u8ba1&#xff0c;\u540e\u671f\u5e94\u66f4\u591a\u5229\u7528\u5df2\u77e5\u4fe1\u606f[42]\u3002<\/p>\n<h4>7.2.2 \u73bb\u5c14\u5179\u66fc\u63a2\u7d22\u4e0e\u71b5\u6b63\u5219\u5316<\/h4>\n<p>\u73bb\u5c14\u5179\u66fc\u63a2\u7d22&#xff08;Boltzmann Exploration&#xff09;\u6839\u636e\u52a8\u4f5c\u4ef7\u503c\u7684softmax\u5206\u5e03\u8fdb\u884c\u91c7\u6837&#xff0c;\u4ef7\u503c\u9ad8\u7684\u52a8\u4f5c\u88ab\u9009\u4e2d\u7684\u6982\u7387\u5927&#xff0c;\u4f46\u6240\u6709\u52a8\u4f5c\u90fd\u6709\u975e\u96f6\u6982\u7387&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c0(a\u2223s)&#061;exp\u2061(Q(s,a)\/\u03c4)\u2211a\u2032exp\u2061(Q(s,a\u2032)\/\u03c4)\\\\pi(a|s) &#061; \\\\frac{\\\\exp(Q(s,a)\/\\\\tau)}{\\\\sum_{a&#039;} \\\\exp(Q(s,a&#039;)\/\\\\tau)}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.4127em;vertical-align: -0.9857em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.427em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position: relative;top: 0em\">\u2211<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1783em\"><span class=\"\" style=\"top: -2.4003em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6828em\"><span class=\"\" style=\"top: -2.786em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2997em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">exp<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">Q<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6779em\"><span class=\"\" style=\"top: -2.989em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mord\">\/<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mclose\">)<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mop\">exp<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">Q<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mord\">\/<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9857em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c4\\\\tau<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><\/span><\/span><\/span><\/span> \u4e3a\u6e29\u5ea6\u53c2\u6570\u3002\u9ad8\u6e29&#xff08;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c4\u2192\u221e\\\\tau \\\\rightarrow \\\\infty<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2192<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord\">\u221e<\/span><\/span><\/span><\/span><\/span>&#xff09;\u65f6\u5206\u5e03\u8d8b\u4e8e\u5747\u5300&#xff0c;\u5b8c\u5168\u63a2\u7d22&#xff1b;\u4f4e\u6e29&#xff08;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03c4\u21920\\\\tau \\\\rightarrow 0<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2192<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">0<\/span><\/span><\/span><\/span><\/span>&#xff09;\u65f6\u5206\u5e03\u8d8b\u4e8e\u5c16\u9510&#xff0c;\u8d2a\u5a6a\u5229\u7528[66]\u3002<\/p>\n<p>\u71b5\u6b63\u5219\u5316&#xff08;Entropy Regularization&#xff09;\u5c06\u7b56\u7565\u71b5\u4f5c\u4e3a\u5956\u52b1\u7684\u4e00\u90e8\u5206&#xff0c;\u9f13\u52b1\u63a2\u7d22&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">J(\u03b8)&#061;E\u03c4\u223c\u03c0\u03b8[R(\u03c4)]&#043;\u03b1\u22c5H(\u03c0\u03b8(\u22c5\u2223s))J(\\\\theta) &#061; \\\\mathbb{E}_{\\\\tau \\\\sim \\\\pi_\\\\theta} [R(\\\\tau)] &#043; \\\\alpha \\\\cdot H(\\\\pi_\\\\theta(\\\\cdot|s))<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0059em;vertical-align: -0.2559em\"><\/span><span class=\"mord\"><span class=\"mord mathbb\">E<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1514em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mrel mtight\">\u223c<\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">[<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0077em\">R<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1132em\">\u03c4<\/span><span class=\"mclose\">)]<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.4445em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0813em\">H<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">\u22c5<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">))<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">H(\u03c0)&#061;\u2212\u2211a\u03c0(a)log\u2061\u03c0(a)H(\\\\pi) &#061; -\\\\sum_a \\\\pi(a) \\\\log \\\\pi(a)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0813em\">H<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.0497em;vertical-align: -0.2997em\"><\/span><span class=\"mord\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position: relative;top: 0em\">\u2211<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.0017em\"><span class=\"\" style=\"top: -2.4003em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">a<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2997em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u4e3a\u7b56\u7565\u71b5&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b1\\\\alpha<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><\/span><\/span><\/span><\/span> \u63a7\u5236\u6b63\u5219\u5316\u5f3a\u5ea6\u3002Soft Actor-Critic&#xff08;SAC&#xff09;\u7b97\u6cd5\u57fa\u4e8e\u6700\u5927\u71b5\u6846\u67b6&#xff0c;\u5728\u673a\u5668\u4eba\u63a7\u5236\u3001\u6e38\u620f\u7b49\u4efb\u52a1\u4e2d\u8868\u73b0\u51fa\u8272[61]\u3002<\/p>\n<h4>7.2.3 \u57fa\u4e8e\u597d\u5947\u5fc3\u7684\u5185\u5728\u52a8\u673a<\/h4>\n<p>\u5916\u5728\u5956\u52b1&#xff08;Extrinsic Reward&#xff09;\u7a00\u758f\u65f6&#xff0c;\u5185\u5728\u52a8\u673a&#xff08;Intrinsic Motivation&#xff09;\u53ef\u4ee5\u9a71\u52a8\u63a2\u7d22\u3002\u597d\u5947\u5fc3&#xff08;Curiosity&#xff09;\u662f\u4e00\u79cd\u91cd\u8981\u7684\u5185\u5728\u52a8\u673a&#xff0c;\u9f13\u52b1Agent\u63a2\u7d22\u65b0\u5947\u3001\u4e0d\u786e\u5b9a\u7684\u72b6\u6001[62]\u3002<\/p>\n<p>\u5185\u5728\u597d\u5947\u5fc3\u6a21\u5757&#xff08;Intrinsic Curiosity Module, ICM&#xff09;\u901a\u8fc7\u9884\u6d4b\u4e0b\u4e00\u4e2a\u72b6\u6001\u7684\u8868\u5f81\u6765\u5ea6\u91cf\u597d\u5947\u5fc3\u3002\u8bbe\u5f53\u524d\u72b6\u6001\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">sts_t<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xff0c;\u6267\u884c\u52a8\u4f5c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ata_t<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u540e\u89c2\u6d4b\u5230 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">st&#043;1s_{t&#043;1}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6389em;vertical-align: -0.2083em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>\u3002ICM\u5b66\u4e60\u4e00\u4e2a\u524d\u5411\u6a21\u578b\u9884\u6d4b\u4e0b\u4e00\u72b6\u6001\u8868\u5f81&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\u03d5^(st&#043;1)&#061;f(\u03d5(st),at;\u03b8F)\\\\hat{\\\\phi}(s_{t&#043;1}) &#061; f(\\\\phi(s_t), a_t; \\\\theta_F)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.2079em;vertical-align: -0.25em\"><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9579em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\">\u03d5<\/span><\/span><span class=\"\" style=\"top: -3.2634em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1667em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1076em\">f<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">\u03d5<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">;<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.1389em\">F<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5185\u5728\u5956\u52b1\u5b9a\u4e49\u4e3a\u9884\u6d4b\u8bef\u5dee&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">rtint&#061;\u2225\u03d5(st&#043;1)\u2212\u03d5^(st&#043;1)\u22252r_t^{int} &#061; \\\\|\\\\phi(s_{t&#043;1}) &#8211; \\\\hat{\\\\phi}(s_{t&#043;1})\\\\|^2<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.1217em;vertical-align: -0.247em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8747em\"><span class=\"\" style=\"top: -2.453em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">in<\/span><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.247em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\">\u2225<\/span><span class=\"mord mathnormal\">\u03d5<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.2079em;vertical-align: -0.25em\"><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9579em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\">\u03d5<\/span><\/span><span class=\"\" style=\"top: -3.2634em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1667em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mord\"><span class=\"mord\">\u2225<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8641em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u9884\u6d4b\u8bef\u5dee\u5927\u7684\u72b6\u6001\u88ab\u8ba4\u4e3a\u662f&#034;\u65b0\u5947\u7684&#034;&#xff0c;Agent\u88ab\u6fc0\u52b1\u53bb\u63a2\u7d22\u3002ICM\u907f\u514d\u4e86\u5728\u539f\u59cb\u89c2\u6d4b\u7a7a\u95f4\u9884\u6d4b&#xff0c;\u800c\u662f\u5728\u5b66\u4e60\u5230\u7684\u8868\u5f81\u7a7a\u95f4\u9884\u6d4b&#xff0c;\u8fc7\u6ee4\u4e86\u4e0e\u51b3\u7b56\u65e0\u5173\u7684\u566a\u58f0[66]\u3002<\/p>\n<p>\u5728\u5de5\u4f5c\u6d41\u4f18\u5316\u4e2d&#xff0c;\u597d\u5947\u5fc3\u53ef\u4ee5\u5f15\u5bfcAgent\u5c1d\u8bd5\u65b0\u9896\u7684\u64cd\u4f5c\u7ec4\u5408&#xff0c;\u53d1\u73b0\u975e\u4f20\u7edf\u7684\u89e3\u51b3\u65b9\u6848\u3002\u7814\u7a76\u8868\u660e&#xff0c;\u7ed3\u5408\u5185\u5728\u5956\u52b1\u7684\u8bad\u7ec3\u80fd\u591f\u53d1\u73b0\u66f4\u9c81\u68d2\u3001\u66f4\u901a\u7528\u7684SOP[16]\u3002<\/p>\n<h3>7.3 \u7b56\u7565\u4f18\u5316\u7684\u7a33\u5b9a\u6027\u4e0e\u6536\u655b\u6027<\/h3>\n<h4>7.3.1 \u7b56\u7565\u66f4\u65b0\u7684\u6b65\u957f\u63a7\u5236<\/h4>\n<p>\u7b56\u7565\u68af\u5ea6\u65b9\u6cd5\u4e2d&#xff0c;\u6b65\u957f&#xff08;Step Size&#xff09;\u7684\u9009\u62e9\u81f3\u5173\u91cd\u8981\u3002\u6b65\u957f\u8fc7\u5927\u5bfc\u81f4\u7b56\u7565\u5267\u70c8\u53d8\u5316&#xff0c;\u53ef\u80fd\u5d29\u6e83\u5230\u6b21\u4f18\u89e3&#xff1b;\u6b65\u957f\u8fc7\u5c0f\u5219\u6536\u655b\u7f13\u6162&#xff0c;\u4e14\u5bb9\u6613\u9677\u5165\u5c40\u90e8\u6700\u4f18[124]\u3002<\/p>\n<p>\u4fe1\u4efb\u57df\u65b9\u6cd5\u901a\u8fc7\u7ea6\u675f\u7b56\u7565\u66f4\u65b0\u7684\u5e45\u5ea6\u6765\u4fdd\u8bc1\u7a33\u5b9a\u6027\u3002TRPO\u4f7f\u7528KL\u6563\u5ea6\u7ea6\u675f&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">max\u2061\u03b8E[\u03c0\u03b8(a\u2223s)\u03c0\u03b8old(a\u2223s)A\u03c0\u03b8old(s,a)]\\\\max_\\\\theta \\\\mathbb{E} \\\\left[ \\\\frac{\\\\pi_\\\\theta(a|s)}{\\\\pi_{\\\\theta_{old}}(a|s)} A^{\\\\pi_{\\\\theta_{old}}}(s,a) \\\\right]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 2.4em;vertical-align: -0.95em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4306em\"><span class=\"\" style=\"top: -2.3479em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"\"><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7521em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathbb\">E<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size3\">[<\/span><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.427em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0278em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9419em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\">A<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7144em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0359em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3448em;margin-left: -0.0278em;margin-right: 0.1em\"><span class=\"pstrut\" style=\"height: 2.6944em\"><\/span><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3496em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.401em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mclose\">)<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size3\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">s.t.\u00a0E[DKL(\u03c0\u03b8old(\u22c5\u2223s)\u2225\u03c0\u03b8(\u22c5\u2223s))]\u2264\u03b4\\\\text{s.t. } \\\\mathbb{E}[D_{KL}(\\\\pi_{\\\\theta_{old}}(\\\\cdot|s) \\\\| \\\\pi_\\\\theta(\\\\cdot|s))] \\\\leq \\\\delta<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0059em;vertical-align: -0.2559em\"><\/span><span class=\"mord text\"><span class=\"mord\">s.t.\u00a0<\/span><\/span><span class=\"mord mathbb\">E<\/span><span class=\"mopen\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">D<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3283em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0715em\">K<\/span><span class=\"mord mathnormal mtight\">L<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3448em\"><span class=\"\" style=\"top: -2.3488em;margin-left: -0.0278em;margin-right: 0.0714em\"><span class=\"pstrut\" style=\"height: 2.5em\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0197em\">l<\/span><span class=\"mord mathnormal mtight\">d<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1512em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2559em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">\u22c5<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">)<\/span><span class=\"mord\">\u2225<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03c0<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">\u22c5<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mclose\">))]<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2264<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u8be5\u7ea6\u675f\u786e\u4fdd\u65b0\u7b56\u7565\u4e0e\u65e7\u7b56\u7565\u8db3\u591f\u63a5\u8fd1&#xff0c;\u907f\u514d\u707e\u96be\u6027\u9057\u5fd8&#xff08;Catastrophic Forgetting&#xff09;[56]\u3002<\/p>\n<p>PPO\u901a\u8fc7\u88c1\u526a\u76ee\u6807\u51fd\u6570\u8fd1\u4f3c\u5b9e\u73b0\u4fe1\u4efb\u57df\u7ea6\u675f&#xff0c;\u907f\u514d\u4e86\u6602\u8d35\u7684\u7ea6\u675f\u4f18\u5316\u3002\u88c1\u526a\u673a\u5236\u81ea\u52a8\u9650\u5236\u6982\u7387\u6bd4\u7684\u8303\u56f4&#xff0c;\u5f53\u6982\u7387\u6bd4\u8d85\u51fa <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">[1\u2212\u03f5,1&#043;\u03f5][1-\\\\epsilon, 1&#043;\\\\epsilon]<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">[<\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8389em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><span class=\"mclose\">]<\/span><\/span><\/span><\/span><\/span> \u533a\u95f4\u65f6&#xff0c;\u68af\u5ea6\u4e3a\u96f6&#xff0c;\u963b\u6b62\u8fdb\u4e00\u6b65\u66f4\u65b0[16]\u3002<\/p>\n<h4>7.3.2 \u7ecf\u9a8c\u56de\u653e\u4e0e\u6837\u672c\u6548\u7387<\/h4>\n<p>\u7ecf\u9a8c\u56de\u653e&#xff08;Experience Replay&#xff09;\u662f\u63d0\u5347\u6837\u672c\u6548\u7387\u7684\u5173\u952e\u6280\u672f\u3002\u5b83\u5c06Agent\u7684\u7ecf\u9a8c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">(s,a,r,s\u2032)(s, a, r, s&#039;)<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1.0019em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\">a<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">r<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7519em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span> \u5b58\u50a8\u5728\u56de\u653e\u7f13\u51b2\u533a&#xff08;Replay Buffer&#xff09;\u4e2d&#xff0c;\u8bad\u7ec3\u65f6\u968f\u673a\u91c7\u6837\u5c0f\u6279\u91cf\u6570\u636e\u3002\u8fd9\u79cd\u65b9\u6cd5\u6253\u7834\u4e86\u6837\u672c\u95f4\u7684\u65f6\u95f4\u76f8\u5173\u6027&#xff0c;\u63d0\u9ad8\u4e86\u6570\u636e\u5229\u7528\u6548\u7387[42]\u3002<\/p>\n<p>\u4f18\u5148\u7ecf\u9a8c\u56de\u653e&#xff08;Prioritized Experience Replay&#xff09;\u6839\u636eTD\u8bef\u5dee\u4e3a\u7ecf\u9a8c\u5206\u914d\u4f18\u5148\u7ea7&#xff0c;\u4f18\u5148\u91c7\u6837\u5b66\u4e60\u4ef7\u503c\u9ad8\u7684\u7ecf\u9a8c\u3002\u91c7\u6837\u6982\u7387\u4e3a&#xff1a;<\/p>\n<p><span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">P(i)&#061;\u2223\u03b4i\u2223\u03b1\u2211j\u2223\u03b4j\u2223\u03b1P(i) &#061; \\\\frac{|\\\\delta_i|^\\\\alpha}{\\\\sum_j |\\\\delta_j|^\\\\alpha}<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.1389em\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">i<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.5488em;vertical-align: -1.1218em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.427em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position: relative;top: 0em\">\u2211<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.162em\"><span class=\"\" style=\"top: -2.4003em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0572em\">j<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.4358em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0572em\">j<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord\">\u2223<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.5904em\"><span class=\"\" style=\"top: -2.989em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0037em\">\u03b1<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord\">\u2223<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6644em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0037em\">\u03b1<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.1218em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>\u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b4i\\\\delta_i<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0379em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u4e3a\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">ii<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6595em\"><\/span><span class=\"mord mathnormal\">i<\/span><\/span><\/span><\/span><\/span> \u6761\u7ecf\u9a8c\u7684TD\u8bef\u5dee&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\u03b1\\\\alpha<\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><\/span><\/span><\/span><\/span> \u63a7\u5236\u4f18\u5148\u7ea7\u7a0b\u5ea6\u3002\u4f18\u5148\u56de\u653e\u4f7f\u5f97Agent\u66f4\u5173\u6ce8&#034;\u4ee4\u4eba\u60ca\u8bb6&#034;\u7684\u7ecf\u9a8c&#xff0c;\u52a0\u901f\u5b66\u4e60[66]\u3002<\/p>\n<p>\u5728\u5de5\u4f5c\u6d41\u4f18\u5316\u4e2d&#xff0c;\u7ecf\u9a8c\u56de\u653e\u8fd8\u53ef\u4ee5\u5b9e\u73b0\u8de8\u4efb\u52a1\u5b66\u4e60\u3002\u4e0d\u540c\u4efb\u52a1\u7684\u7ecf\u9a8c\u5b58\u50a8\u5728\u540c\u4e00\u7f13\u51b2\u533a&#xff0c;Agent\u53ef\u4ee5\u4ece\u591a\u6837\u5316\u7684\u7ecf\u9a8c\u4e2d\u5b66\u4e60\u901a\u7528\u7684\u64cd\u4f5c\u6a21\u5f0f&#xff0c;\u63d0\u5347\u6cdb\u5316\u80fd\u529b[61]\u3002<\/p>\n<h4>7.3.3 \u591a\u8f6e\u8fed\u4ee3\u4e2d\u7684\u7b56\u7565\u9000\u5316\u95ee\u9898<\/h4>\n<p>\u591a\u8f6e\u5f3a\u5316\u5b66\u4e60\u8bad\u7ec3\u4e2d&#xff0c;\u7b56\u7565\u9000\u5316&#xff08;Policy Degradation&#xff09;\u662f\u5e38\u89c1\u95ee\u9898\u3002Agent\u53ef\u80fd\u5728\u8bad\u7ec3\u521d\u671f\u8868\u73b0\u826f\u597d&#xff0c;\u4f46\u968f\u7740\u8bad\u7ec3\u8fdb\u884c&#xff0c;\u7b56\u7565\u8d28\u91cf\u53cd\u800c\u4e0b\u964d\u3002\u8fd9\u79cd\u73b0\u8c61\u88ab\u79f0\u4e3a&#034;\u56de\u58f0\u9677\u9631&#034;&#xff08;Echo Trap&#xff09;[16]\u3002<\/p>\n<p>\u56de\u58f0\u9677\u9631\u7684\u4ea7\u751f\u673a\u5236\u662f&#xff1a;Agent\u5728\u65e9\u671f\u63a2\u7d22\u4e2d\u53d1\u73b0\u4e86\u67d0\u4e9b\u9ad8\u5956\u52b1\u8f68\u8ff9&#xff0c;\u968f\u7740\u8bad\u7ec3\u8fdb\u884c&#xff0c;\u7b56\u7565\u8fc7\u5ea6\u62df\u5408\u8fd9\u4e9b\u8f68\u8ff9&#xff0c;\u5931\u53bb\u4e86\u591a\u6837\u6027\u3002\u5f53\u8fd9\u4e9b\u8f68\u8ff9\u5b9e\u9645\u4e0a\u662f\u6b21\u4f18\u7684\u6216\u4f9d\u8d56\u4e8e\u7279\u5b9a\u6761\u4ef6\u65f6&#xff0c;\u7b56\u7565\u6027\u80fd\u5c31\u4f1a\u4e0b\u964d\u3002<\/p>\n<p>\u9632\u6b62\u7b56\u7565\u9000\u5316\u7684\u7b56\u7565\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u65e9\u505c&#xff08;Early Stopping&#xff09;&#xff1a;\u76d1\u63a7\u9a8c\u8bc1\u96c6\u6027\u80fd&#xff0c;\u5f53\u6027\u80fd\u4e0d\u518d\u63d0\u5347\u65f6\u505c\u6b62\u8bad\u7ec3<\/li>\n<li>\u53c2\u8003\u7b56\u7565\u7ea6\u675f&#xff08;Reference Policy Constraint&#xff09;&#xff1a;\u901a\u8fc7KL\u6563\u5ea6\u7ea6\u675f\u65b0\u7b56\u7565\u4e0e\u521d\u59cb\u53c2\u8003\u7b56\u7565\u7684\u8ddd\u79bb&#xff0c;\u9632\u6b62\u504f\u79bb\u592a\u8fdc<\/li>\n<li>\u591a\u6837\u6027\u5956\u52b1&#xff08;Diversity Reward&#xff09;&#xff1a;\u5956\u52b1\u751f\u6210\u591a\u6837\u5316\u8f68\u8ff9\u7684\u884c\u4e3a&#xff0c;\u9f13\u52b1\u63a2\u7d22<\/li>\n<li>\u96c6\u6210\u7b56\u7565&#xff08;Ensemble Policy&#xff09;&#xff1a;\u7ef4\u62a4\u591a\u4e2a\u7b56\u7565&#xff0c;\u901a\u8fc7\u6295\u7968\u6216\u5e73\u5747\u964d\u4f4e\u5355\u4e00\u7b56\u7565\u9000\u5316\u7684\u98ce\u9669<\/li>\n<\/ul>\n<p>\u5728Agent\u5de5\u4f5c\u6d41\u4f18\u5316\u4e2d&#xff0c;\u7b56\u7565\u9000\u5316\u8868\u73b0\u4e3a\u5de5\u4f5c\u6d41\u591a\u6837\u6027\u7684\u4e27\u5931\u2014\u2014Agent\u53cd\u590d\u4f7f\u7528\u76f8\u540c\u7684\u64cd\u4f5c\u5e8f\u5217&#xff0c;\u5373\u4f7f\u9762\u5bf9\u4e0d\u540c\u7c7b\u578b\u7684\u4efb\u52a1\u3002\u4fdd\u6301\u7b56\u7565\u591a\u6837\u6027\u5bf9\u4e8e\u53d1\u73b0\u9c81\u68d2\u7684SOP\u81f3\u5173\u91cd\u8981[62]\u3002<\/p>\n<p>#mermaid-svg-9SiUdYMNH32xXSk3{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-9SiUdYMNH32xXSk3 .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-9SiUdYMNH32xXSk3 .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-9SiUdYMNH32xXSk3 .error-icon{fill:#552222;}#mermaid-svg-9SiUdYMNH32xXSk3 .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-9SiUdYMNH32xXSk3 .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-9SiUdYMNH32xXSk3 .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-9SiUdYMNH32xXSk3 .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-9SiUdYMNH32xXSk3 .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-9SiUdYMNH32xXSk3 .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-9SiUdYMNH32xXSk3 .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-9SiUdYMNH32xXSk3 .marker{fill:#333333;stroke:#333333;}#mermaid-svg-9SiUdYMNH32xXSk3 .marker.cross{stroke:#333333;}#mermaid-svg-9SiUdYMNH32xXSk3 svg{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-9SiUdYMNH32xXSk3 p{margin:0;}#mermaid-svg-9SiUdYMNH32xXSk3 .label{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;color:#333;}#mermaid-svg-9SiUdYMNH32xXSk3 .cluster-label text{fill:#333;}#mermaid-svg-9SiUdYMNH32xXSk3 .cluster-label span{color:#333;}#mermaid-svg-9SiUdYMNH32xXSk3 .cluster-label span p{background-color:transparent;}#mermaid-svg-9SiUdYMNH32xXSk3 .label text,#mermaid-svg-9SiUdYMNH32xXSk3 span{fill:#333;color:#333;}#mermaid-svg-9SiUdYMNH32xXSk3 .node rect,#mermaid-svg-9SiUdYMNH32xXSk3 .node circle,#mermaid-svg-9SiUdYMNH32xXSk3 .node ellipse,#mermaid-svg-9SiUdYMNH32xXSk3 .node polygon,#mermaid-svg-9SiUdYMNH32xXSk3 .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-9SiUdYMNH32xXSk3 .rough-node .label text,#mermaid-svg-9SiUdYMNH32xXSk3 .node .label text,#mermaid-svg-9SiUdYMNH32xXSk3 .image-shape .label,#mermaid-svg-9SiUdYMNH32xXSk3 .icon-shape .label{text-anchor:middle;}#mermaid-svg-9SiUdYMNH32xXSk3 .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-9SiUdYMNH32xXSk3 .rough-node .label,#mermaid-svg-9SiUdYMNH32xXSk3 .node .label,#mermaid-svg-9SiUdYMNH32xXSk3 .image-shape .label,#mermaid-svg-9SiUdYMNH32xXSk3 .icon-shape .label{text-align:center;}#mermaid-svg-9SiUdYMNH32xXSk3 .node.clickable{cursor:pointer;}#mermaid-svg-9SiUdYMNH32xXSk3 .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-9SiUdYMNH32xXSk3 .arrowheadPath{fill:#333333;}#mermaid-svg-9SiUdYMNH32xXSk3 .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-9SiUdYMNH32xXSk3 .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-9SiUdYMNH32xXSk3 .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-9SiUdYMNH32xXSk3 .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-9SiUdYMNH32xXSk3 .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-9SiUdYMNH32xXSk3 .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-9SiUdYMNH32xXSk3 .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-9SiUdYMNH32xXSk3 .cluster text{fill:#333;}#mermaid-svg-9SiUdYMNH32xXSk3 .cluster span{color:#333;}#mermaid-svg-9SiUdYMNH32xXSk3 div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-9SiUdYMNH32xXSk3 .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-9SiUdYMNH32xXSk3 rect.text{fill:none;stroke-width:0;}#mermaid-svg-9SiUdYMNH32xXSk3 .icon-shape,#mermaid-svg-9SiUdYMNH32xXSk3 .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-9SiUdYMNH32xXSk3 .icon-shape p,#mermaid-svg-9SiUdYMNH32xXSk3 .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-9SiUdYMNH32xXSk3 .icon-shape rect,#mermaid-svg-9SiUdYMNH32xXSk3 .image-shape rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-9SiUdYMNH32xXSk3 .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-9SiUdYMNH32xXSk3 .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-9SiUdYMNH32xXSk3 :root{&#8211;mermaid-font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;}<span class=\"nodeLabel\"><\/p>\n<p>\u7a33\u5b9a\u6027\u4fdd\u969c<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7b56\u7565\u4f18\u5316<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u4fe1\u4efb\u57df\u7ea6\u675f<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u7ecf\u9a8c\u56de\u653e<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u65e9\u505c\u673a\u5236<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u63a2\u7d22-\u5229\u7528\u7b56\u7565<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u63a2\u7d22\u7b56\u7565<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>Epsilon-\u8d2a\u5a6a<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>UCB<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u73bb\u5c14\u5179\u66fc<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u597d\u5947\u5fc3\u9a71\u52a8<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u8870\u51cfepsilon<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u4e50\u89c2\u539f\u5219<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6e29\u5ea6\u8c03\u8282<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u9884\u6d4b\u8bef\u5dee<\/p>\n<p><\/span><\/p>\n<h2>8 \u672a\u6765\u5c55\u671b\u4e0e\u6311\u6218<\/h2>\n<h3>8.1 \u5f53\u524d\u7814\u7a76\u7684\u5c40\u9650\u6027<\/h3>\n<h4>8.1.1 \u6837\u672c\u6548\u7387\u4e0e\u8ba1\u7b97\u6210\u672c<\/h4>\n<p>\u5c3d\u7ba1\u5f3a\u5316\u5b66\u4e60\u5728Agent\u5de5\u4f5c\u6d41\u4f18\u5316\u4e2d\u5c55\u73b0\u51fa\u5de8\u5927\u6f5c\u529b&#xff0c;\u4f46\u6837\u672c\u6548\u7387\u4f4e\u4e0b\u4ecd\u662f\u5236\u7ea6\u5176\u5b9e\u9645\u5e94\u7528\u7684\u4e3b\u8981\u74f6\u9888\u3002\u8bad\u7ec3\u4e00\u4e2a\u80fd\u591f\u5904\u7406\u590d\u6742\u4efb\u52a1\u7684Agent\u5f80\u5f80\u9700\u8981\u6570\u767e\u4e07\u751a\u81f3\u6570\u5343\u4e07\u6b21\u4ea4\u4e92&#xff0c;\u8fd9\u5728\u771f\u5b9e\u73af\u5883\u4e2d\u662f\u4e0d\u53ef\u63a5\u53d7\u7684[137]\u3002<\/p>\n<p>\u8ba1\u7b97\u6210\u672c\u540c\u6837\u662f\u4e00\u4e2a\u4e25\u5cfb\u6311\u6218\u3002\u5927\u578b\u8bed\u8a00\u6a21\u578b\u4f5c\u4e3aAgent\u7684\u63a8\u7406\u5f15\u64ce&#xff0c;\u6bcf\u6b21\u524d\u5411\u4f20\u64ad\u90fd\u9700\u8981\u5927\u91cf\u8ba1\u7b97\u8d44\u6e90\u3002\u7ed3\u5408\u5f3a\u5316\u5b66\u4e60\u7684\u591a\u8f6e\u8fed\u4ee3\u8bad\u7ec3&#xff0c;\u603b\u4f53\u8ba1\u7b97\u6210\u672c\u53ef\u80fd\u8fbe\u5230\u6570\u767e\u4e07\u7f8e\u5143\u3002\u8fd9\u79cd\u9ad8\u6210\u672c\u9650\u5236\u4e86\u7814\u7a76\u7684\u666e\u53ca\u548c\u6280\u672f\u7684 democratization[16]\u3002<\/p>\n<p>\u63d0\u5347\u6837\u672c\u6548\u7387\u7684\u53ef\u80fd\u65b9\u5411\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u6a21\u578b-based RL&#xff1a;\u5b66\u4e60\u73af\u5883\u6a21\u578b&#xff0c;\u901a\u8fc7\u89c4\u5212\u51cf\u5c11\u771f\u5b9e\u4ea4\u4e92\u9700\u6c42<\/li>\n<li>\u8fc1\u79fb\u5b66\u4e60&#xff1a;\u5c06\u5728\u76f8\u4f3c\u4efb\u52a1\u4e0a\u5b66\u5230\u7684\u77e5\u8bc6\u8fc1\u79fb\u5230\u65b0\u4efb\u52a1<\/li>\n<li>\u5143\u5b66\u4e60&#xff1a;\u5b66\u4e60\u5982\u4f55\u5feb\u901f\u5b66\u4e60&#xff0c;\u4f7fAgent\u80fd\u591f\u8fc5\u901f\u9002\u5e94\u65b0\u73af\u5883<\/li>\n<li>\u8bfe\u7a0b\u5b66\u4e60&#xff1a;\u4ece\u7b80\u5355\u4efb\u52a1\u5f00\u59cb&#xff0c;\u9010\u6b65\u589e\u52a0\u96be\u5ea6&#xff0c;\u52a0\u901f\u5b66\u4e60\u8fdb\u7a0b<\/li>\n<\/ul>\n<h4>8.1.2 \u5956\u52b1\u8bbe\u8ba1\u7684\u5de5\u7a0b\u4f9d\u8d56<\/h4>\n<p>\u5f3a\u5316\u5b66\u4e60\u7684\u6027\u80fd\u5f88\u5927\u7a0b\u5ea6\u4e0a\u4f9d\u8d56\u4e8e\u5956\u52b1\u51fd\u6570\u7684\u8bbe\u8ba1\u3002\u5728Agent\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d&#xff0c;\u5b9a\u4e49\u4e00\u4e2a\u65e2\u80fd\u51c6\u786e\u53cd\u6620\u4efb\u52a1\u76ee\u6807\u3001\u53c8\u80fd\u5f15\u5bfc\u6709\u6548\u5b66\u4e60\u7684\u5956\u52b1\u51fd\u6570\u6781\u5177\u6311\u6218\u6027[124]\u3002<\/p>\n<p>\u5956\u52b1\u8bbe\u8ba1\u9762\u4e34\u7684\u4e3b\u8981\u56f0\u96be\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u7a00\u758f\u5956\u52b1&#xff1a;\u5927\u591a\u6570\u4e2d\u95f4\u6b65\u9aa4\u6ca1\u6709\u660e\u786e\u7684\u53cd\u9988&#xff0c;\u5bfc\u81f4\u5b66\u4e60\u4fe1\u53f7\u5f31<\/li>\n<li>\u5956\u52b1\u4f5c\u5f0a&#xff08;Reward Hacking&#xff09;&#xff1a;Agent\u53ef\u80fd\u627e\u5230\u5956\u52b1\u51fd\u6570\u7684\u6f0f\u6d1e&#xff0c;\u4ee5\u975e\u9884\u671f\u65b9\u5f0f\u83b7\u5f97\u9ad8\u5956\u52b1<\/li>\n<li>\u591a\u76ee\u6807\u6743\u8861&#xff1a;\u5b9e\u9645\u4efb\u52a1\u5f80\u5f80\u6d89\u53ca\u591a\u4e2a\u76ee\u6807&#xff0c;\u5982\u4f55\u5e73\u8861\u8fd9\u4e9b\u76ee\u6807\u7f3a\u4e4f\u7edf\u4e00\u6807\u51c6<\/li>\n<li>\u9886\u57df\u7279\u5f02\u6027&#xff1a;\u4e0d\u540c\u4efb\u52a1\u9700\u8981\u4e0d\u540c\u7684\u5956\u52b1\u8bbe\u8ba1&#xff0c;\u96be\u4ee5\u901a\u7528\u5316<\/li>\n<\/ul>\n<p>\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b&#xff08;PRM&#xff09;\u4e3a\u81ea\u52a8\u5956\u52b1\u5b66\u4e60\u63d0\u4f9b\u4e86\u65b9\u5411&#xff0c;\u4f46PRM\u672c\u8eab\u7684\u8bad\u7ec3\u4ecd\u9700\u8981\u5927\u91cf\u6807\u6ce8\u6570\u636e\u3002\u5f00\u53d1\u80fd\u591f\u4ece\u5c11\u91cf\u793a\u8303\u6216\u81ea\u7136\u8bed\u8a00\u6307\u4ee4\u4e2d\u81ea\u52a8\u63a8\u65ad\u5956\u52b1\u7684\u65b9\u6cd5&#xff0c;\u662f\u964d\u4f4e\u5de5\u7a0b\u4f9d\u8d56\u7684\u5173\u952e[56]\u3002<\/p>\n<h4>8.1.3 \u6cdb\u5316\u80fd\u529b\u4e0e\u5206\u5e03\u5916\u573a\u666f<\/h4>\n<p>\u5f53\u524dAgent\u5de5\u4f5c\u6d41\u4f18\u5316\u65b9\u6cd5\u5728\u8bad\u7ec3\u5206\u5e03\u5185\u8868\u73b0\u826f\u597d&#xff0c;\u4f46\u9762\u5bf9\u5206\u5e03\u5916&#xff08;Out-of-Distribution, OOD&#xff09;\u573a\u666f\u65f6\u5f80\u5f80\u5931\u6548\u3002\u8fd9\u79cd\u6cdb\u5316\u80fd\u529b\u7684\u7f3a\u5931\u9650\u5236\u4e86Agent\u5728\u5b9e\u9645\u5e94\u7528\u4e2d\u7684\u53ef\u9760\u6027[141]\u3002<\/p>\n<p>\u6cdb\u5316\u80fd\u529b\u4e0d\u8db3\u7684\u539f\u56e0\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u8fc7\u62df\u5408\u8bad\u7ec3\u4efb\u52a1&#xff1a;Agent\u53ef\u80fd\u8bb0\u4f4f\u4e86\u8bad\u7ec3\u4efb\u52a1\u7684\u7279\u5b9a\u6a21\u5f0f&#xff0c;\u800c\u975e\u5b66\u4e60\u901a\u7528\u539f\u7406<\/li>\n<li>\u7f3a\u4e4f\u7ec4\u5408\u6cdb\u5316&#xff1a;\u9762\u5bf9\u8bad\u7ec3\u65f6\u672a\u89c1\u8fc7\u7684\u64cd\u4f5c\u7ec4\u5408&#xff0c;Agent\u96be\u4ee5\u6709\u6548\u5e94\u5bf9<\/li>\n<li>\u5bf9\u566a\u58f0\u654f\u611f&#xff1a;\u771f\u5b9e\u73af\u5883\u4e2d\u7684\u89c2\u6d4b\u566a\u58f0\u3001\u5de5\u5177\u6545\u969c\u7b49\u53ef\u80fd\u5bfc\u81f4Agent\u884c\u4e3a\u5f02\u5e38<\/li>\n<\/ul>\n<p>\u63d0\u5347\u6cdb\u5316\u80fd\u529b\u9700\u8981&#xff1a;\u66f4\u4e30\u5bcc\u7684\u8bad\u7ec3\u6570\u636e\u3001\u66f4\u5f3a\u7684\u6b63\u5219\u5316\u6280\u672f\u3001\u663e\u5f0f\u7684\u56e0\u679c\u63a8\u7406\u80fd\u529b&#xff0c;\u4ee5\u53ca\u66f4\u9c81\u68d2\u7684\u7b56\u7565\u8868\u793a[42]\u3002<\/p>\n<h3>8.2 \u524d\u6cbf\u7814\u7a76\u65b9\u5411<\/h3>\n<h4>8.2.1 \u4e16\u754c\u6a21\u578b\u4e0e\u6a21\u578b\u9884\u6d4b\u63a7\u5236<\/h4>\n<p>\u4e16\u754c\u6a21\u578b&#xff08;World Model&#xff09;\u662f\u5f3a\u5316\u5b66\u4e60\u7684\u524d\u6cbf\u65b9\u5411&#xff0c;\u5b83\u4f7fAgent\u80fd\u591f\u5b66\u4e60\u73af\u5883\u52a8\u6001&#xff0c;\u5728\u5185\u90e8\u6a21\u62df\u4e2d\u8fdb\u884c\u89c4\u5212\u548c\u51b3\u7b56\u3002\u4e0e\u5b66\u4e60\u5230\u7684\u7b56\u7565\u76f8\u6bd4&#xff0c;\u57fa\u4e8e\u4e16\u754c\u6a21\u578b\u7684\u89c4\u5212\u5177\u6709\u66f4\u597d\u7684\u6cdb\u5316\u80fd\u529b\u548c\u53ef\u89e3\u91ca\u6027[66]\u3002<\/p>\n<p>\u4e16\u754c\u6a21\u578b\u7684\u6838\u5fc3\u7ec4\u4ef6\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u72b6\u6001\u8868\u5f81\u6a21\u578b&#xff1a;\u5c06\u9ad8\u7ef4\u89c2\u6d4b\u538b\u7f29\u4e3a\u7d27\u51d1\u7684\u6f5c\u5728\u72b6\u6001<\/li>\n<li>\u8f6c\u79fb\u6a21\u578b&#xff1a;\u9884\u6d4b\u7ed9\u5b9a\u52a8\u4f5c\u540e\u7684\u4e0b\u4e00\u72b6\u6001<\/li>\n<li>\u5956\u52b1\u6a21\u578b&#xff1a;\u9884\u6d4b\u72b6\u6001-\u52a8\u4f5c\u5bf9\u7684\u5373\u65f6\u5956\u52b1<\/li>\n<\/ul>\n<p>\u6709\u4e86\u4e16\u754c\u6a21\u578b&#xff0c;Agent\u53ef\u4ee5\u4f7f\u7528\u6a21\u578b\u9884\u6d4b\u63a7\u5236&#xff08;Model Predictive Control, MPC&#xff09;\u8fdb\u884c\u89c4\u5212\u3002MPC\u901a\u8fc7\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22&#xff08;MCTS&#xff09;\u6216\u4ea4\u53c9\u71b5\u65b9\u6cd5&#xff08;CEM&#xff09;\u5728\u6a21\u578b\u4e2d\u6a21\u62df\u591a\u6761\u8f68\u8ff9&#xff0c;\u9009\u62e9\u6700\u4f18\u52a8\u4f5c\u3002\u8fd9\u79cd\u65b9\u6cd5\u5c06\u5b66\u4e60\u4e0e\u89c4\u5212\u5206\u79bb&#xff0c;\u7b56\u7565\u53ef\u4ee5\u66f4\u7075\u6d3b\u5730\u9002\u5e94\u65b0\u76ee\u6807[61]\u3002<\/p>\n<p>\u5728Agent\u5de5\u4f5c\u6d41\u573a\u666f\u4e2d&#xff0c;\u4e16\u754c\u6a21\u578b\u53ef\u4ee5\u9884\u6d4b\u4e0d\u540c\u64cd\u4f5c\u7684\u6548\u679c&#xff0c;\u5e2e\u52a9Agent\u8fdb\u884c\u524d\u77bb\u6027\u7684\u5de5\u4f5c\u6d41\u89c4\u5212\u3002\u4f8b\u5982&#xff0c;\u5728\u6267\u884c\u590d\u6742\u6570\u636e\u5206\u6790\u524d&#xff0c;Agent\u53ef\u4ee5\u5728\u4e16\u754c\u6a21\u578b\u4e2d\u6a21\u62df\u4e0d\u540c\u5206\u6790\u8def\u5f84&#xff0c;\u9009\u62e9\u6700\u53ef\u80fd\u6210\u529f\u7684\u65b9\u6848[145]\u3002<\/p>\n<h4>8.2.2 \u795e\u7ecf\u7b26\u53f7\u7ed3\u5408\u4e0e\u53ef\u89e3\u91ca\u6027<\/h4>\n<p>\u7eaf\u795e\u7ecf\u7f51\u7edc\u65b9\u6cd5\u867d\u7136\u5728\u6a21\u5f0f\u8bc6\u522b\u65b9\u9762\u8868\u73b0\u51fa\u8272&#xff0c;\u4f46\u5728\u903b\u8f91\u63a8\u7406\u548c\u53ef\u89e3\u91ca\u6027\u65b9\u9762\u5b58\u5728\u4e0d\u8db3\u3002\u795e\u7ecf\u7b26\u53f7\u7ed3\u5408&#xff08;Neuro-Symbolic Integration&#xff09;\u65e8\u5728\u878d\u5408\u795e\u7ecf\u7f51\u7edc\u7684\u5b66\u4e60\u80fd\u529b\u548c\u7b26\u53f7\u7cfb\u7edf\u7684\u63a8\u7406\u80fd\u529b[62]\u3002<\/p>\n<p>\u795e\u7ecf\u7b26\u53f7Agent\u7684\u5178\u578b\u67b6\u6784\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u611f\u77e5\u6a21\u5757&#xff1a;\u795e\u7ecf\u7f51\u7edc\u5904\u7406\u539f\u59cb\u8f93\u5165&#xff0c;\u63d0\u53d6\u7ed3\u6784\u5316\u4fe1\u606f<\/li>\n<li>\u7b26\u53f7\u63a8\u7406\u5f15\u64ce&#xff1a;\u57fa\u4e8e\u903b\u8f91\u89c4\u5219\u8fdb\u884c\u663e\u5f0f\u63a8\u7406<\/li>\n<li>\u795e\u7ecf-\u7b26\u53f7\u63a5\u53e3&#xff1a;\u5c06\u795e\u7ecf\u8f93\u51fa\u8f6c\u6362\u4e3a\u7b26\u53f7\u8868\u793a&#xff0c;\u6216\u5c06\u7b26\u53f7\u89c4\u5219\u5d4c\u5165\u795e\u7ecf\u7f51\u7edc<\/li>\n<\/ul>\n<p>\u53ef\u89e3\u91ca\u6027&#xff08;Interpretability&#xff09;\u662fAgent\u5de5\u4f5c\u6d41\u5728\u5b9e\u9645\u90e8\u7f72\u4e2d\u7684\u5173\u952e\u9700\u6c42\u3002\u7528\u6237\u9700\u8981\u7406\u89e3Agent\u7684\u51b3\u7b56\u4f9d\u636e&#xff0c;\u624d\u80fd\u4fe1\u4efb\u5e76\u6709\u6548\u76d1\u7763\u5176\u884c\u4e3a\u3002\u7b26\u53f7\u5316\u7684\u5de5\u4f5c\u6d41\u8868\u793a&#xff08;\u5982\u6761\u4ef6-\u52a8\u4f5c\u89c4\u5219&#xff09;\u6bd4\u795e\u7ecf\u7f51\u7edc\u6743\u91cd\u66f4\u5bb9\u6613\u7406\u89e3\u548c\u9a8c\u8bc1[137]\u3002<\/p>\n<p>\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b\u672c\u8eab\u5c31\u5177\u6709\u53ef\u89e3\u91ca\u6027\u4f18\u52bf\u2014\u2014\u5b83\u660e\u786e\u6307\u51fa\u4e86\u6bcf\u4e2a\u6b65\u9aa4\u7684\u8d28\u91cf\u8bc4\u4f30&#xff0c;\u5e2e\u52a9\u7528\u6237\u7406\u89e3Agent\u7684\u63a8\u7406\u8fc7\u7a0b\u3002\u672a\u6765\u7814\u7a76\u53ef\u4ee5\u8fdb\u4e00\u6b65\u63a2\u7d22\u5982\u4f55\u5c06PRM\u4e0e\u7b26\u53f7\u63a8\u7406\u7ed3\u5408&#xff0c;\u751f\u6210\u4eba\u7c7b\u53ef\u7406\u89e3\u7684\u5de5\u4f5c\u6d41\u89e3\u91ca[16]\u3002<\/p>\n<h4>8.2.3 \u6301\u7eed\u5b66\u4e60\u4e0e\u707e\u96be\u6027\u9057\u5fd8<\/h4>\n<p>\u6301\u7eed\u5b66\u4e60&#xff08;Continual Learning&#xff09;\u7814\u7a76\u5982\u4f55\u4f7fAgent\u5728\u5b66\u4e60\u65b0\u4efb\u52a1\u7684\u540c\u65f6\u4fdd\u6301\u65e7\u4efb\u52a1\u7684\u80fd\u529b\u3002\u8fd9\u662f\u5b9e\u73b0\u771f\u6b63\u81ea\u4e3b\u8fdb\u5316\u7684\u5173\u952e\u2014\u2014Agent\u9700\u8981\u5728\u6574\u4e2a\u751f\u547d\u5468\u671f\u4e2d\u4e0d\u65ad\u79ef\u7d2f\u77e5\u8bc6&#xff0c;\u800c\u975e\u9488\u5bf9\u6bcf\u4e2a\u4efb\u52a1\u4ece\u5934\u8bad\u7ec3[124]\u3002<\/p>\n<p>\u707e\u96be\u6027\u9057\u5fd8&#xff08;Catastrophic Forgetting&#xff09;\u662f\u6301\u7eed\u5b66\u4e60\u7684\u4e3b\u8981\u969c\u788d\u3002\u5f53\u795e\u7ecf\u7f51\u7edc\u9488\u5bf9\u65b0\u4efb\u52a1\u8bad\u7ec3\u65f6&#xff0c;\u53ef\u80fd\u8986\u76d6\u65e7\u4efb\u52a1\u7684\u6743\u91cd&#xff0c;\u5bfc\u81f4\u65e7\u4efb\u52a1\u6027\u80fd\u6025\u5267\u4e0b\u964d\u3002\u89e3\u51b3\u9057\u5fd8\u95ee\u9898\u7684\u4e3b\u8981\u7b56\u7565\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u6b63\u5219\u5316\u65b9\u6cd5&#xff1a;\u9650\u5236\u91cd\u8981\u53c2\u6570\u7684\u53d8\u5316&#xff0c;\u5982EWC&#xff08;Elastic Weight Consolidation&#xff09;<\/li>\n<li>\u56de\u653e\u65b9\u6cd5&#xff1a;\u4fdd\u7559\u65e7\u4efb\u52a1\u6837\u672c&#xff0c;\u4e0e\u65b0\u4efb\u52a1\u4e00\u8d77\u8bad\u7ec3<\/li>\n<li>\u6a21\u5757\u5316\u67b6\u6784&#xff1a;\u4e3a\u4e0d\u540c\u4efb\u52a1\u4f7f\u7528\u4e0d\u540c\u5b50\u7f51\u7edc&#xff0c;\u907f\u514d\u5e72\u6270<\/li>\n<li>\u5143\u5b66\u4e60\u65b9\u6cd5&#xff1a;\u5b66\u4e60\u4e0d\u6613\u9057\u5fd8\u7684\u8868\u793a<\/li>\n<\/ul>\n<p>\u5728Agent\u5de5\u4f5c\u6d41\u4f18\u5316\u4e2d&#xff0c;\u6301\u7eed\u5b66\u4e60\u610f\u5473\u7740Agent\u80fd\u591f\u4ece\u6bcf\u4e2a\u65b0\u4efb\u52a1\u4e2d\u5b66\u4e60&#xff0c;\u4e0d\u65ad\u6539\u8fdb\u5176SOP\u5e93\u3002\u6709\u6548\u7684\u6301\u7eed\u5b66\u4e60\u673a\u5236\u5c06\u4f7fAgent\u8d8a\u7528\u8d8a\u806a\u660e&#xff0c;\u771f\u6b63\u5b9e\u73b0\u81ea\u6211\u8fdb\u5316[56]\u3002<\/p>\n<h3>8.3 \u5e94\u7528\u524d\u666f\u4e0e\u793e\u4f1a\u5f71\u54cd<\/h3>\n<h4>8.3.1 \u81ea\u52a8\u5316\u5de5\u4f5c\u6d41\u7f16\u6392\u7684\u5de5\u4e1a\u5e94\u7528<\/h4>\n<p>Agent\u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\u6280\u672f\u5728\u5de5\u4e1a\u9886\u57df\u5177\u6709\u5e7f\u9614\u7684\u5e94\u7528\u524d\u666f\u3002\u5728\u8f6f\u4ef6\u5f00\u53d1\u4e2d&#xff0c;\u80fd\u591f\u81ea\u4e3b\u89c4\u5212\u3001\u7f16\u7801\u3001\u6d4b\u8bd5\u3001\u90e8\u7f72\u7684AI\u5de5\u7a0b\u5e08\u5c06\u5927\u5e45\u63d0\u5347\u5f00\u53d1\u6548\u7387&#xff1b;\u5728\u5ba2\u6237\u670d\u52a1\u4e2d&#xff0c;\u80fd\u591f\u52a8\u6001\u8c03\u6574\u7b56\u7565\u7684\u667a\u80fd\u5ba2\u670d\u5c06\u63d0\u4f9b\u66f4\u4f18\u8d28\u7684\u7528\u6237\u4f53\u9a8c&#xff1b;\u5728\u79d1\u5b66\u7814\u7a76\u4e2d&#xff0c;\u80fd\u591f\u81ea\u4e3b\u8bbe\u8ba1\u5b9e\u9a8c\u3001\u5206\u6790\u6570\u636e\u7684AI\u52a9\u624b\u5c06\u52a0\u901f\u53d1\u73b0\u8fdb\u7a0b[141]\u3002<\/p>\n<p>\u88683 Agent\u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\u6280\u672f\u7684\u6f5c\u5728\u5e94\u7528\u9886\u57df<\/p>\n<table>\n<tr>\u5e94\u7528\u9886\u57df\u5f53\u524d\u75db\u70b9Agent\u89e3\u51b3\u65b9\u6848\u9884\u671f\u6548\u76ca<\/tr>\n<tbody>\n<tr>\n<td>\u8f6f\u4ef6\u5f00\u53d1<\/td>\n<td>\u91cd\u590d\u6027\u7f16\u7801\u4efb\u52a1\u8017\u65f6\u3001\u4eba\u5de5\u6d4b\u8bd5\u8986\u76d6\u4e0d\u5168<\/td>\n<td>\u81ea\u52a8\u751f\u6210\u4ee3\u7801\u3001\u81ea\u4e3b\u6d4b\u8bd5\u4fee\u590d<\/td>\n<td>\u5f00\u53d1\u6548\u7387\u63d0\u534750%&#043;&#xff0c;\u7f3a\u9677\u7387\u964d\u4f4e30%<\/td>\n<\/tr>\n<tr>\n<td>\u5ba2\u6237\u670d\u52a1<\/td>\n<td>\u4eba\u5de5\u5ba2\u670d\u6210\u672c\u9ad8\u3001\u54cd\u5e94\u6162\u3001\u8d28\u91cf\u4e0d\u7a33\u5b9a<\/td>\n<td>7&#215;24\u5c0f\u65f6\u667a\u80fd\u670d\u52a1\u3001\u4e2a\u6027\u5316\u5e94\u7b54<\/td>\n<td>\u6210\u672c\u964d\u4f4e60%&#xff0c;\u6ee1\u610f\u5ea6\u63d0\u534720%<\/td>\n<\/tr>\n<tr>\n<td>\u6570\u636e\u5206\u6790<\/td>\n<td>\u6570\u636e\u6e05\u6d17\u7e41\u7410\u3001\u5206\u6790\u6a21\u578b\u9009\u62e9\u56f0\u96be<\/td>\n<td>\u81ea\u52a8\u6570\u636e\u9884\u5904\u7406\u3001\u6a21\u578b\u81ea\u52a8\u8c03\u4f18<\/td>\n<td>\u5206\u6790\u5468\u671f\u7f29\u77ed70%<\/td>\n<\/tr>\n<tr>\n<td>\u5185\u5bb9\u521b\u4f5c<\/td>\n<td>\u521b\u610f\u67af\u7aed\u3001\u98ce\u683c\u4e00\u81f4\u6027\u96be\u4fdd\u8bc1<\/td>\n<td>\u8f85\u52a9\u521b\u610f\u751f\u6210\u3001\u98ce\u683c\u81ea\u52a8\u9002\u914d<\/td>\n<td>\u4ea7\u51fa\u6548\u7387\u63d0\u53473-5\u500d<\/td>\n<\/tr>\n<tr>\n<td>\u6559\u80b2\u57f9\u8bad<\/td>\n<td>\u4e2a\u6027\u5316\u6559\u5b66\u96be\u4ee5\u89c4\u6a21\u5316<\/td>\n<td>\u81ea\u9002\u5e94\u5b66\u4e60\u8def\u5f84\u3001\u667a\u80fd\u7b54\u7591<\/td>\n<td>\u5b66\u4e60\u6548\u679c\u63d0\u534740%<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h4>8.3.2 \u4eba\u673a\u534f\u4f5c\u7684\u65b0\u8303\u5f0f<\/h4>\n<p>Agent\u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\u5c06\u91cd\u5851\u4eba\u673a\u534f\u4f5c\u6a21\u5f0f\u3002\u4f20\u7edf\u7684\u4eba\u673a\u4ea4\u4e92\u662f\u547d\u4ee4-\u54cd\u5e94\u5f0f\u7684&#xff1a;\u4eba\u53d1\u51fa\u6307\u4ee4&#xff0c;\u673a\u5668\u6267\u884c\u3002\u672a\u6765\u7684\u534f\u4f5c\u5c06\u66f4\u52a0\u5bf9\u7b49\u548c\u52a8\u6001&#xff1a;\u4eba\u4e0eAgent\u5171\u540c\u89c4\u5212\u4efb\u52a1\u3001\u5206\u5de5\u6267\u884c\u3001\u76f8\u4e92\u5b66\u4e60[42]\u3002<\/p>\n<p>\u5728\u4eba\u673a\u534f\u4f5c\u4e2d&#xff0c;Agent\u9700\u8981\u5177\u5907\u4ee5\u4e0b\u80fd\u529b&#xff1a;<\/p>\n<ul>\n<li>\u610f\u56fe\u7406\u89e3&#xff1a;\u51c6\u786e\u7406\u89e3\u4eba\u7c7b\u7684\u76ee\u6807\u548c\u7ea6\u675f<\/li>\n<li>\u4e3b\u52a8\u6c9f\u901a&#xff1a;\u5728\u4e0d\u786e\u5b9a\u65f6\u4e3b\u52a8\u8be2\u95ee&#xff0c;\u5728\u6267\u884c\u4e2d\u4e3b\u52a8\u6c47\u62a5<\/li>\n<li>\u9002\u5e94\u6027&#xff1a;\u6839\u636e\u4eba\u7c7b\u53cd\u9988\u5feb\u901f\u8c03\u6574\u884c\u4e3a<\/li>\n<li>\u53ef\u6559\u6027&#xff1a;\u80fd\u591f\u4ece\u4eba\u7c7b\u793a\u8303\u548c\u7ea0\u6b63\u4e2d\u5b66\u4e60<\/li>\n<\/ul>\n<p>\u5f3a\u5316\u5b66\u4e60\u4e3a\u8bad\u7ec3\u534f\u4f5c\u578bAgent\u63d0\u4f9b\u4e86\u6846\u67b6\u3002\u901a\u8fc7\u5c06\u4eba\u7c7b\u5efa\u6a21\u4e3a\u73af\u5883\u7684\u4e00\u90e8\u5206&#xff0c;Agent\u53ef\u4ee5\u5b66\u4e60\u6700\u5927\u5316\u8054\u5408\u4efb\u52a1\u6210\u529f\u7387\u7684\u6700\u4f18\u7b56\u7565\u3002\u9006\u5f3a\u5316\u5b66\u4e60&#xff08;Inverse RL&#xff09;\u8fd8\u53ef\u4ee5\u4ece\u4eba\u7c7b\u884c\u4e3a\u4e2d\u63a8\u65ad\u5956\u52b1\u51fd\u6570&#xff0c;\u4f7fAgent\u66f4\u597d\u5730\u5bf9\u9f50\u4eba\u7c7b\u610f\u56fe[66]\u3002<\/p>\n<h4>8.3.3 \u5b89\u5168\u5bf9\u9f50\u4e0e\u4ef7\u503c\u7ea6\u675f<\/h4>\n<p>\u968f\u7740Agent\u81ea\u4e3b\u6027\u7684\u63d0\u5347&#xff0c;\u5b89\u5168\u5bf9\u9f50&#xff08;Safety Alignment&#xff09;\u6210\u4e3a\u4e0d\u53ef\u5ffd\u89c6\u7684\u95ee\u9898\u3002\u81ea\u6211\u8fdb\u5316\u7684Agent\u53ef\u80fd\u53d1\u73b0\u4eba\u7c7b\u672a\u9884\u671f\u5230\u7684\u7b56\u7565&#xff0c;\u5176\u4e2d\u4e00\u4e9b\u53ef\u80fd\u662f\u6709\u5bb3\u7684\u6216\u8fdd\u80cc\u4eba\u7c7b\u4ef7\u503c\u89c2\u7684[61]\u3002<\/p>\n<p>\u786e\u4fddAgent\u5b89\u5168\u7684\u5173\u952e\u63aa\u65bd\u5305\u62ec&#xff1a;<\/p>\n<ul>\n<li>\u4ef7\u503c\u5bf9\u9f50&#xff1a;\u5c06\u4eba\u7c7b\u4ef7\u503c\u89c2\u7f16\u7801\u4e3a\u7ea6\u675f\u6761\u4ef6\u6216\u5956\u52b1\u51fd\u6570\u7684\u4e00\u90e8\u5206<\/li>\n<li>\u80fd\u529b\u63a7\u5236&#xff1a;\u9650\u5236Agent\u7684\u884c\u52a8\u8303\u56f4&#xff0c;\u9632\u6b62\u5176\u83b7\u5f97\u5371\u9669\u80fd\u529b<\/li>\n<li>\u53ef\u4e2d\u65ad\u6027&#xff1a;\u786e\u4fdd\u4eba\u7c7b\u53ef\u4ee5\u968f\u65f6\u63a5\u7ba1\u6216\u505c\u6b62Agent<\/li>\n<li>\u900f\u660e\u6027&#xff1a;Agent\u80fd\u591f\u89e3\u91ca\u5176\u884c\u4e3a\u4f9d\u636e&#xff0c;\u63a5\u53d7\u4eba\u7c7b\u5ba1\u67e5<\/li>\n<\/ul>\n<p>Constitutional AI\u7b49\u65b9\u6cd5\u5c1d\u8bd5\u901a\u8fc7\u539f\u5219\u7ea6\u675f\u5f15\u5bfcAgent\u884c\u4e3a\u3002Agent\u5728\u8bad\u7ec3\u65f6\u4e0d\u4ec5\u6700\u5927\u5316\u4efb\u52a1\u5956\u52b1&#xff0c;\u8fd8\u9700\u9075\u5b88\u9884\u5b9a\u4e49\u7684\u884c\u4e3a\u51c6\u5219\u3002\u5f3a\u5316\u5b66\u4e60\u4e2d\u7684\u7ea6\u675fMDP&#xff08;Constrained MDP&#xff09;\u6846\u67b6\u4e3a\u5f62\u5f0f\u5316\u5b89\u5168\u7ea6\u675f\u63d0\u4f9b\u4e86\u6570\u5b66\u5de5\u5177[145]\u3002<\/p>\n<p>Agent\u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\u6280\u672f\u7684\u53d1\u5c55\u5fc5\u987b\u4e0e\u5b89\u5168\u7814\u7a76\u540c\u6b65\u63a8\u8fdb\u3002\u53ea\u6709\u5728\u786e\u4fdd\u53ef\u63a7\u3001\u53ef\u4fe1\u7684\u524d\u63d0\u4e0b&#xff0c;\u8fd9\u9879\u6280\u672f\u624d\u80fd\u771f\u6b63\u9020\u798f\u793e\u4f1a\u3002<\/p>\n<p>#mermaid-svg-MSZpvMoWBHaPfPcC{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;fill:#333;}@keyframes edge-animation-frame{from{stroke-dashoffset:0;}}@keyframes dash{to{stroke-dashoffset:0;}}#mermaid-svg-MSZpvMoWBHaPfPcC .edge-animation-slow{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 50s linear infinite;stroke-linecap:round;}#mermaid-svg-MSZpvMoWBHaPfPcC .edge-animation-fast{stroke-dasharray:9,5!important;stroke-dashoffset:900;animation:dash 20s linear infinite;stroke-linecap:round;}#mermaid-svg-MSZpvMoWBHaPfPcC .error-icon{fill:#552222;}#mermaid-svg-MSZpvMoWBHaPfPcC .error-text{fill:#552222;stroke:#552222;}#mermaid-svg-MSZpvMoWBHaPfPcC .edge-thickness-normal{stroke-width:1px;}#mermaid-svg-MSZpvMoWBHaPfPcC .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg-MSZpvMoWBHaPfPcC .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg-MSZpvMoWBHaPfPcC .edge-thickness-invisible{stroke-width:0;fill:none;}#mermaid-svg-MSZpvMoWBHaPfPcC .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg-MSZpvMoWBHaPfPcC .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg-MSZpvMoWBHaPfPcC .marker{fill:#333333;stroke:#333333;}#mermaid-svg-MSZpvMoWBHaPfPcC .marker.cross{stroke:#333333;}#mermaid-svg-MSZpvMoWBHaPfPcC svg{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:16px;}#mermaid-svg-MSZpvMoWBHaPfPcC p{margin:0;}#mermaid-svg-MSZpvMoWBHaPfPcC .label{font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;color:#333;}#mermaid-svg-MSZpvMoWBHaPfPcC .cluster-label text{fill:#333;}#mermaid-svg-MSZpvMoWBHaPfPcC .cluster-label span{color:#333;}#mermaid-svg-MSZpvMoWBHaPfPcC .cluster-label span p{background-color:transparent;}#mermaid-svg-MSZpvMoWBHaPfPcC .label text,#mermaid-svg-MSZpvMoWBHaPfPcC span{fill:#333;color:#333;}#mermaid-svg-MSZpvMoWBHaPfPcC .node rect,#mermaid-svg-MSZpvMoWBHaPfPcC .node circle,#mermaid-svg-MSZpvMoWBHaPfPcC .node ellipse,#mermaid-svg-MSZpvMoWBHaPfPcC .node polygon,#mermaid-svg-MSZpvMoWBHaPfPcC .node path{fill:#ECECFF;stroke:#9370DB;stroke-width:1px;}#mermaid-svg-MSZpvMoWBHaPfPcC .rough-node .label text,#mermaid-svg-MSZpvMoWBHaPfPcC .node .label text,#mermaid-svg-MSZpvMoWBHaPfPcC .image-shape .label,#mermaid-svg-MSZpvMoWBHaPfPcC .icon-shape .label{text-anchor:middle;}#mermaid-svg-MSZpvMoWBHaPfPcC .node .katex path{fill:#000;stroke:#000;stroke-width:1px;}#mermaid-svg-MSZpvMoWBHaPfPcC .rough-node .label,#mermaid-svg-MSZpvMoWBHaPfPcC .node .label,#mermaid-svg-MSZpvMoWBHaPfPcC .image-shape .label,#mermaid-svg-MSZpvMoWBHaPfPcC .icon-shape .label{text-align:center;}#mermaid-svg-MSZpvMoWBHaPfPcC .node.clickable{cursor:pointer;}#mermaid-svg-MSZpvMoWBHaPfPcC .root .anchor path{fill:#333333!important;stroke-width:0;stroke:#333333;}#mermaid-svg-MSZpvMoWBHaPfPcC .arrowheadPath{fill:#333333;}#mermaid-svg-MSZpvMoWBHaPfPcC .edgePath .path{stroke:#333333;stroke-width:2.0px;}#mermaid-svg-MSZpvMoWBHaPfPcC .flowchart-link{stroke:#333333;fill:none;}#mermaid-svg-MSZpvMoWBHaPfPcC .edgeLabel{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-MSZpvMoWBHaPfPcC .edgeLabel p{background-color:rgba(232,232,232, 0.8);}#mermaid-svg-MSZpvMoWBHaPfPcC .edgeLabel rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-MSZpvMoWBHaPfPcC .labelBkg{background-color:rgba(232, 232, 232, 0.5);}#mermaid-svg-MSZpvMoWBHaPfPcC .cluster rect{fill:#ffffde;stroke:#aaaa33;stroke-width:1px;}#mermaid-svg-MSZpvMoWBHaPfPcC .cluster text{fill:#333;}#mermaid-svg-MSZpvMoWBHaPfPcC .cluster span{color:#333;}#mermaid-svg-MSZpvMoWBHaPfPcC div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;font-size:12px;background:hsl(80, 100%, 96.2745098039%);border:1px solid #aaaa33;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg-MSZpvMoWBHaPfPcC .flowchartTitleText{text-anchor:middle;font-size:18px;fill:#333;}#mermaid-svg-MSZpvMoWBHaPfPcC rect.text{fill:none;stroke-width:0;}#mermaid-svg-MSZpvMoWBHaPfPcC .icon-shape,#mermaid-svg-MSZpvMoWBHaPfPcC .image-shape{background-color:rgba(232,232,232, 0.8);text-align:center;}#mermaid-svg-MSZpvMoWBHaPfPcC .icon-shape p,#mermaid-svg-MSZpvMoWBHaPfPcC .image-shape p{background-color:rgba(232,232,232, 0.8);padding:2px;}#mermaid-svg-MSZpvMoWBHaPfPcC .icon-shape rect,#mermaid-svg-MSZpvMoWBHaPfPcC .image-shape rect{opacity:0.5;background-color:rgba(232,232,232, 0.8);fill:rgba(232,232,232, 0.8);}#mermaid-svg-MSZpvMoWBHaPfPcC .label-icon{display:inline-block;height:1em;overflow:visible;vertical-align:-0.125em;}#mermaid-svg-MSZpvMoWBHaPfPcC .node .label-icon path{fill:currentColor;stroke:revert;stroke-width:revert;}#mermaid-svg-MSZpvMoWBHaPfPcC :root{&#8211;mermaid-font-family:\\&#8221;trebuchet ms\\&#8221;,verdana,arial,sans-serif;}<span class=\"nodeLabel\"><\/p>\n<p>\u5e94\u7528\u524d\u666f<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u8f6f\u4ef6\u5f00\u53d1<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5de5\u4e1a\u5e94\u7528<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5ba2\u6237\u670d\u52a1<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u79d1\u5b66\u7814\u7a76<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6559\u80b2\u57f9\u8bad<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u672a\u6765\u53d1\u5c55\u65b9\u5411<\/p>\n<p><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"edgeLabel\"><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5f53\u524d\u5c40\u9650<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6837\u672c\u6548\u7387<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5956\u52b1\u8bbe\u8ba1<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6cdb\u5316\u80fd\u529b<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u4e16\u754c\u6a21\u578b<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u81ea\u52a8\u5956\u52b1\u5b66\u4e60<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u795e\u7ecf\u7b26\u53f7\u7ed3\u5408<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u6301\u7eed\u5b66\u4e60<\/p>\n<p><\/span><span class=\"nodeLabel\"><\/p>\n<p>\u5b89\u5168\u5bf9\u9f50<\/p>\n<p><\/span><\/p>\n<hr \/>\n<h2>\u53c2\u8003\u6587\u732e<\/h2>\n<p>[1] Yao S, Zhao J, Yu D, et al. ReAct: Synergizing reasoning and acting in language models[C]\/\/International Conference on Learning Representations. 2023.<\/p>\n<p>[2] Lewis P, Perez E, Piktus A, et al. Retrieval-augmented generation for knowledge-intensive NLP tasks[J]. Advances in Neural Information Processing Systems, 2020, 33: 9459-9474.<\/p>\n<p>[3] Tao Z, Lin T E, Chen X, et al. A survey on self-evolution of large language models[J]. arXiv preprint arXiv:2404.14387, 2024.<\/p>\n<p>[4] Shinn N, Cassano F, Berman E, et al. Reflexion: Language agents with verbal reinforcement learning[C]\/\/Advances in Neural Information Processing Systems. 2023, 36.<\/p>\n<p>[5] Zhou Y, Levine S, Weston J, et al. Self-challenging language model agents[J]. arXiv preprint arXiv:2502.02392, 2025.<\/p>\n<p>[6] Putta P, Mills E, Garg N, et al. Agent Q: Advanced reasoning and learning for autonomous AI agents[J]. arXiv preprint arXiv:2408.07199, 2024.<\/p>\n<p>[7] Lightman H, Kosaraju V, Burda Y, et al. Let\u2019s verify step by step[C]\/\/International Conference on Learning Representations. 2024.<\/p>\n<p>[8] Shao Z, Wang P, Zhu Q, et al. DeepSeekMath: Pushing the limits of mathematical reasoning in open language models[J]. arXiv preprint arXiv:2402.03300, 2024.<\/p>\n<p>[9] Vezhnevets A S, Osindero S, Schaul T, et al. FeUdal networks for hierarchical reinforcement learning[C]\/\/International Conference on Machine Learning. PMLR, 2017: 3540-3549.<\/p>\n<p>[10] Lake B M, Ullman T D, Tenenbaum J B, et al. Building machines that learn and think like people[J]. Behavioral and Brain Sciences, 2017, 40.<\/p>\n<p>[11] Jimenez C E, Yang J, Wettig A, et al. SWE-bench: Can language models resolve real-world github issues?[C]\/\/The Twelfth International Conference on Learning Representations. 2024.<\/p>\n<p>[12] Boiko D A, MacKnight R, Gomes G. Emergent autonomous scientific research capabilities of large language models[J]. arXiv preprint arXiv:2304.05332, 2023.<\/p>\n<p>[13] Qin Y, Liang S, Ye Y, et al. ToolLLM: Facilitating large language models to master 16000&#043; real-world APIs[J]. arXiv preprint arXiv:2307.16789, 2023.<\/p>\n<p>[14] Sutton R S, Barto A G. Reinforcement learning: An introduction[M]. MIT press, 2018.<\/p>\n<p>[15] Wang L, Ma C, Feng X, et al. A survey on large language model based autonomous agents[J]. Frontiers of Computer Science, 2024, 18(6): 186345.<\/p>\n<p>[16] Zhang C, Zhang C, Li C, et al. Small language models need strong verifiers to self-correct reasoning[J]. arXiv preprint arXiv:2404.17140, 2024.<\/p>\n<p>[17] Bellman R. Dynamic programming[J]. Science, 1966, 153(3731): 34-37.<\/p>\n<p>[18] Schulman J, Moritz P, Levine S, et al. High-dimensional continuous control using generalized advantage estimation[C]\/\/International Conference on Learning Representations. 2016.<\/p>\n<p>[19] Kaelbling L P, Littman M L, Cassandra A R. Planning and acting in partially observable stochastic domains[J]. Artificial Intelligence, 1998, 101(1-2): 99-134.<\/p>\n<p>[20] Wang G, Xie Y, Jiang Y, et al. Voyage: An open-ended embodied agent with large language models[C]\/\/Empirical Methods in Natural Language Processing. 2023.<\/p>\n<p>[21] Sutton R S, McAllester D, Singh S, et al. Policy gradient methods for reinforcement learning with function approximation[C]\/\/Advances in Neural Information Processing Systems. 1999, 12.<\/p>\n<p>[22] Williams R J. Simple statistical gradient-following algorithms for connectionist reinforcement learning[J]. Machine Learning, 1992, 8: 229-256.<\/p>\n<p>[23] Greensmith E, Bartlett P L, Baxter J. Variance reduction techniques for gradient estimates in reinforcement learning[J]. Journal of Machine Learning Research, 2004, 5(9).<\/p>\n<p>[24] Mnih V, Badia A P, Mirza M, et al. Asynchronous methods for deep reinforcement learning[C]\/\/International Conference on Machine Learning. PMLR, 2016: 1928-1937.<\/p>\n<p>[25] Stiennon N, Ouyang L, Wu J, et al. Learning to summarize with human feedback[C]\/\/Advances in Neural Information Processing Systems. 2020, 33: 3008-3021.<\/p>\n<p>[26] Schulman J, Levine S, Abbeel P, et al. Trust region policy optimization[C]\/\/International Conference on Machine Learning. PMLR, 2015: 1889-1897.<\/p>\n<p>[27] Kakade S, Langford J. Approximately optimal approximate reinforcement learning[C]\/\/International Conference on Machine Learning. 2002, 2: 267-274.<\/p>\n<p>[28] Schulman J, Wolski F, Dhariwal P, et al. Proximal policy optimization algorithms[J]. arXiv preprint arXiv:1707.06347, 2017.<\/p>\n<p>[29] Engstrom L, Ilyas A, Santurkar S, et al. Implementation matters in deep RL: A case study on PPO and TRPO[C]\/\/International Conference on Learning Representations. 2020.<\/p>\n<p>[30] DeepSeek-AI. DeepSeek-R1: Incentivizing reasoning capability in LLMs via reinforcement learning[J]. arXiv preprint arXiv:2501.12948, 2025.<\/p>\n<p>[31] Liu Z, Qiao A, Neiswanger W. Q*: Improving multi-step reasoning for LLMs with deliberative planning[J]. arXiv preprint arXiv:2410.14255, 2024.<\/p>\n<p>[32] Guo D, Yang D, Zhang H, et al. DeepSeek-Coder-V2: Breaking the barrier of closed-source models in code intelligence[J]. arXiv preprint arXiv:2406.11931, 2024.<\/p>\n<p>[33] Hao S, Gu Y, Ma H, et al. Reasoning with language model is planning with world model[J]. arXiv preprint arXiv:2305.14992, 2023.<\/p>\n<p>[34] Bacon P L, Harb J, Precup D. The option-critic architecture[C]\/\/Proceedings of the AAAI Conference on Artificial Intelligence. 2017, 31(1).<\/p>\n<p>[35] Sutton R S, Precup D, Singh S. Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning[J]. Artificial Intelligence, 1999, 112(1-2): 181-211.<\/p>\n<p>[36] Xi Z, Chen W, Guo X, et al. The rise and potential of large language model based agents: A survey[J]. Science China Information Sciences, 2023, 66(8): 181201.<\/p>\n<p>[37] Lu J, Zhong W, Huang W, et al. SELF: Self-evolution with language feedback[J]. arXiv preprint arXiv:2310.00533, 2023.<\/p>\n<p>[38] Bellman R. Dynamic programming[M]. Princeton University Press, 1957.<\/p>\n<p>[39] Wang X, Zhu C, Zheng Z. Instruction polisher: Refine task instructions to eliminate ambiguity[J]. arXiv preprint arXiv:2402.09674, 2024.<\/p>\n<p>[40] Kingma D P, Welling M. Auto-encoding variational bayes[C]\/\/International Conference on Learning Representations. 2014.<\/p>\n<p>[41] Wang P, Li L, Shao Z, et al. Math-shepherd: Verify and reinforce LLMs step-by-step without human annotations[C]\/\/Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics. 2024: 9426-9439.<\/p>\n<p>[42] Qin Y, Hu S, Lin Y, et al. Tool learning with foundation models[J]. ACM Computing Surveys, 2024, 57(4): 1-40.<\/p>\n<p>[43] Song K, Moeini A, Wang P, et al. Reward is enough: LLMs are in-context reinforcement learners[J]. arXiv preprint arXiv:2506.06303, 2025.<\/p>\n<p>[44] Miettinen K. Nonlinear multiobjective optimization[M]. Springer Science &amp; Business Media, 1999.<\/p>\n<p>[45] Deb K. Multi-objective optimization using evolutionary algorithms[M]. John Wiley &amp; Sons, 2001.<\/p>\n<p>[46] Hayes C F, R\u0103dulescu R, Bargiacchi E, et al. A practical guide to multi-objective reinforcement learning and planning[J]. Autonomous Agents and Multi-Agent Systems, 2022, 36(1): 26.<\/p>\n<p>[47] Arjona-Medina J A, Gillhofer M, Widrich M, et al. RUDDER: Return decomposition for delayed rewards[C]\/\/Advances in Neural Information Processing Systems. 2019, 32.<\/p>\n<p>[48] Sutton R S. Learning to predict by the methods of temporal differences[J]. Machine Learning, 1988, 3: 9-44.<\/p>\n<p>[49] Seijen H V, Sutton R S. True online TD(lambda)[C]\/\/International Conference on Machine Learning. PMLR, 2014: 692-700.<\/p>\n<p>[50] Vaswani A, Shazeer N, Parmar N, et al. Attention is all you need[C]\/\/Advances in Neural Information Processing Systems. 2017, 30.<\/p>\n<p>[51] Parisotto E, Song H F, Rae J W, et al. Stabilizing transformers for reinforcement learning[C]\/\/International Conference on Machine Learning. PMLR, 2020: 7487-7498.<\/p>\n<p>[52] Cobbe K, Hesse C, Hilton J, et al. Training verifiers to solve math word problems[J]. arXiv preprint arXiv:2110.14168, 2021.<\/p>\n<p>[53] Uesato J, Kushman N, Kumar R, et al. Solving math word problems with process-and outcome-based feedback[J]. arXiv preprint arXiv:2211.14275, 2022.<\/p>\n<p>[54] Setlur A, Garg S, Geng X, et al. Rewarding progress: Scaling automated process verifiers for LLM reasoning[J]. arXiv preprint arXiv:2410.08146, 2024.<\/p>\n<p>[55] Snell C, Lee J, Xu K, et al. Scaling LLM test-time compute optimally can be more effective than scaling model parameters[J]. arXiv preprint arXiv:2408.03314, 2024.<\/p>\n<p>[56] Luo L, Liu Y, Liu R, et al. An empirical study of autoregressive pre-training from videos[J]. arXiv preprint arXiv:2405.01212, 2024.<\/p>\n<p>[57] Precup D, Sutton R S, Dasgupta S. Off-policy temporal-difference learning with function approximation[C]\/\/International Conference on Machine Learning. 2001.<\/p>\n<p>[58] Chen X, Zhong Z, Chen Z, et al. Meta learning for natural language processing: A survey[C]\/\/Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing. 2023: 6272-6289.<\/p>\n<p>[59] Xia R, Pan L, Luo J. Learning to rank for information retrieval and natural language processing[J]. Synthesis Lectures on Human Language Technologies, 2024, 17(3): 1-121.<\/p>\n<p>[60] Feng X, Zhang Z, Guo D. Towards large reasoning models: A survey of reinforced reasoning with large language models[J]. arXiv preprint arXiv:2501.09686, 2025.<\/p>\n<p>[61] Luo H, Sun Q, Xu C, et al. WizardMath: Empowering mathematical reasoning for large language models via reinforced evol-instruct[J]. arXiv preprint arXiv:2308.09583, 2023.<\/p>\n<p>[62] An S, Ma Y, Zhang Y, et al. Making language models better reasoners with step-aware verifier[J]. arXiv preprint arXiv:2206.02336, 2022.<\/p>\n<p>[63] Luo L, Liu Y, Liu R, et al. OmegaPRM: Omega-shaped process reward model for mathematical reasoning[J]. arXiv preprint arXiv:2406.07394, 2024.<\/p>\n<p>[64] Busoniu L, Babuska R, De Schutter B. A comprehensive survey of multiagent reinforcement learning[J]. IEEE Transactions on Systems, Man, and Cybernetics, Part C, 2008, 38(2): 156-172.<\/p>\n<p>[65] Nash J. Non-cooperative games[J]. Annals of Mathematics, 1951: 286-295.<\/p>\n<p>[66] Hernandez-Leal P, Kaisers M, Baarslag T, et al. A survey of learning in multiagent environments: Dealing with non-stationarity[J]. arXiv preprint arXiv:1707.09183, 2017.<\/p>\n<p>[67] Von Neumann J, Morgenstern O. Theory of games and economic behavior[M]. Princeton University Press, 1944.<\/p>\n<p>[68] Silver D, Hubert T, Schrittwieser J, et al. A general reinforcement learning algorithm that masters chess, shogi, and Go through self-play[J]. Science, 2018, 362(6419): 1140-1144.<\/p>\n<p>[69] Stone P, Veloso M. Multiagent systems: A survey from a machine learning perspective[J]. Autonomous Robots, 2000, 8(3): 345-383.<\/p>\n<p>[70] Shapley L S. A value for n-person games[J]. Contributions to the Theory of Games, 1953, 2(28): 307-317.<\/p>\n<p>[71] Ghorbani A, Zou J. Data shapley: Equitable valuation of data for machine learning[C]\/\/International Conference on Machine Learning. PMLR, 2019: 2242-2251.<\/p>\n<p>[72] Tan M. Multi-agent reinforcement learning: Independent vs. cooperative agents[C]\/\/International Conference on Machine Learning. 1993.<\/p>\n<p>[73] Lowe R, Wu Y, Tamar A, et al. Multi-agent actor-critic for mixed cooperative-competitive environments[C]\/\/Advances in Neural Information Processing Systems. 2017, 30.<\/p>\n<p>[74] Sunehag P, Lever G, Gruslys A, et al. Value-decomposition networks for cooperative multi-agent learning based on team reward[C]\/\/Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems. 2018: 2085-2087.<\/p>\n<p>[75] Rashid T, Samvelyan M, De Witt C S, et al. QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning[C]\/\/International Conference on Machine Learning. PMLR, 2018: 4295-4304.<\/p>\n<p>[76] Foerster J N, Assael Y M, de Freitas N, et al. Learning to communicate with deep multi-agent reinforcement learning[C]\/\/Advances in Neural Information Processing Systems. 2016, 29.<\/p>\n<p>[77] Das A, Gervet T, Romoff J, et al. TarMAC: Targeted multi-agent communication[C]\/\/International Conference on Machine Learning. PMLR, 2019: 1538-1546.<\/p>\n<p>[78] Hong S, Zheng X, Chen J, et al. MetaGPT: Meta programming for multi-agent collaborative framework[J]. arXiv preprint arXiv:2308.00352, 2023.<\/p>\n<p>[79] Silver D, Schrittwieser J, Simonyan K, et al. Mastering the game of Go without human knowledge[J]. Nature, 2017, 550(7676): 354-359.<\/p>\n<p>[80] Bengio Y, Louradour J, Collobert R, et al. Curriculum learning[C]\/\/International Conference on Machine Learning. 2009: 41-48.<\/p>\n<p>[81] Heinrich J, Lanctot M, Silver D. Fictitious self-play in extensive-form games[C]\/\/International Conference on Machine Learning. PMLR, 2015: 805-813.<\/p>\n<p>[82] Portelas R, Colas L, Weng L, et al. Automatic curriculum learning for deep RL: A short survey[C]\/\/International Joint Conference on Artificial Intelligence. 2020.<\/p>\n<p>[83] Qi Z, Liu X, Iong I L, et al. WebRL: Training LLM web agents via self-evolving online curriculum reinforcement learning[J]. arXiv preprint arXiv:2411.02337, 2024.<\/p>\n<p>[84] Baker B, Kanitscheider I, Markov T, et al. Emergent tool use from multi-agent autocurricula[C]\/\/International Conference on Learning Representations. 2020.<\/p>\n<p>[85] Singh A, Jain T, Sukhbaatar S. Learning when to communicate at scale in multiagent cooperative and competitive tasks[C]\/\/International Conference on Learning Representations. 2019.<\/p>\n<p>[86] Qian C, Cong X, Yang C, et al. Communicative agents for software development[C]\/\/Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics. 2024: 15174-15186.<\/p>\n<p>[87] Atkinson R C, Shiffrin R M. Human memory: A proposed system and its control processes[J]. Psychology of Learning and Motivation, 1968, 2: 89-195.<\/p>\n<p>[88] Wu Y, Min S, Bisk Y, et al. MemGPT: Towards LLMs as operating systems[J]. arXiv preprint arXiv:2310.08560, 2023.<\/p>\n<p>[89] Zhong W, Guo L, Gao Q, et al. MemoryBank: Enhancing large language models with long-term memory[J]. arXiv preprint arXiv:2305.10250, 2023.<\/p>\n<p>[90] Tulving E. Elements of episodic memory[M]. Oxford University Press, 1983.<\/p>\n<p>[91] Wang Z, Zhang S, Li Y, et al. APIGen: Automated pipeline for generating verifiable and diverse function-calling datasets[J]. arXiv preprint arXiv:2406.18518, 2024.<\/p>\n<p>[92] Miller G A. WordNet: A lexical database for English[J]. Communications of the ACM, 1995, 38(11): 39-41.<\/p>\n<p>[93] Anderson J R. Skill acquisition: Compilation of weak-method problem solutions[J]. Psychological Review, 1987, 94(2): 192.<\/p>\n<p>[94] Manning C D, Raghavan P, Sch\u00fctze H. Introduction to information retrieval[M]. Cambridge University Press, 2008.<\/p>\n<p>[95] Malkov Y A, Yashunin D A. Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs[J]. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2018, 42(4): 824-836.<\/p>\n<p>[96] Hogan A, Blomqvist E, Cochez M, et al. Knowledge graphs[J]. ACM Computing Surveys, 2021, 54(4): 1-37.<\/p>\n<p>[97] Wang S, Liu J, Wei Y. Hybrid index structures for fast approximate nearest neighbor search[J]. IEEE Transactions on Knowledge and Data Engineering, 2023, 35(5): 4567-4581.<\/p>\n<p>[98] Jurafsky D, Martin J H. Speech and language processing[M]. Pearson Education India, 2000.<\/p>\n<p>[99] Zhang S, Chen C, Liu Z. Knowledge extraction from neural networks for nlp tasks[J]. Natural Language Engineering, 2023, 29(3): 567-589.<\/p>\n<p>[100] Hinton G, Vinyals O, Dean J. Distilling the knowledge in a neural network[J]. arXiv preprint arXiv:1503.02531, 2015.<\/p>\n<p>[101] Mikolov T, Sutskever I, Chen K, et al. Distributed representations of words and phrases and their compositionality[C]\/\/Advances in Neural Information Processing Systems. 2013, 26.<\/p>\n<p>[102] Oord A, Li Y, Vinyals O. Representation learning with contrastive predictive coding[J]. arXiv preprint arXiv:1807.03748, 2018.<\/p>\n<p>[103] Johnson J, Douze M, J\u00e9gou H. Billion-scale similarity search with GPUs[J]. IEEE Transactions on Big Data, 2019, 7(3): 535-547.<\/p>\n<p>[104] French R M. Catastrophic forgetting in connectionist networks[J]. Trends in Cognitive Sciences, 1999, 3(4): 128-135.<\/p>\n<p>[105] O\u2019Neil E J, O\u2019Neil P E, Weikum G. The LRU-K page replacement algorithm for database disk buffering[C]\/\/ACM SIGMOD Record. 1993, 22(2): 297-306.<\/p>\n<p>[106] Korb K B, Nicholson A E. Bayesian artificial intelligence[M]. CRC Press, 2010.<\/p>\n<p>[107] Kolodner J L. An introduction to case-based reasoning[J]. Artificial Intelligence Review, 1994, 6(1): 3-34.<\/p>\n<p>[108] Han J, Pei J, Tong H. Data mining: Concepts and techniques[M]. Morgan Kaufmann, 2022.<\/p>\n<p>[109] Aamodt A, Plaza E. Case-based reasoning: Foundational issues, methodological variations, and system approaches[J]. AI Communications, 1994, 7(1): 39-59.<\/p>\n<p>[110] Watson I, Marir F. Case-based reasoning: A review[J]. Knowledge Engineering Review, 1994, 9(4): 355-381.<\/p>\n<p>[111] Richter M M, Weber R O. Case-based reasoning: A textbook[M]. Springer Science &amp; Business Media, 2016.<\/p>\n<p>[112] Weber R O, Ashley K D, Br\u00fcninghaus S. Textual case-based reasoning[J]. Knowledge Engineering Review, 2006, 20(3): 255-260.<\/p>\n<p>[113] Bergmann R, Kolodner J, Plaza E. Representation in case-based reasoning[C]\/\/Proceedings of the 15th International Joint Conference on Artificial Intelligence. 2005.<\/p>\n<p>[114] Pathak D, Agrawal P, Efros A A, et al. Curiosity-driven exploration by self-supervised prediction[C]\/\/International Conference on Machine Learning. PMLR, 2017: 2778-2787.<\/p>\n<p>[115] Auer P, Cesa-Bianchi N, Fischer P. Finite-time analysis of the multiarmed bandit problem[J]. Machine Learning, 2002, 47: 235-256.<\/p>\n<p>[116] Frazier P I. A tutorial on Bayesian optimization[J]. arXiv preprint arXiv:1807.02811, 2018.<\/p>\n<p>[117] Lattimore T, Szepesv\u00e1ri C. Bandit algorithms[M]. Cambridge University Press, 2020.<\/p>\n<p>[118] Bubeck S, Cesa-Bianchi N. Regret analysis of stochastic and nonstochastic multi-armed bandit problems[J]. Foundations and Trends in Machine Learning, 2012, 5(1): 1-122.<\/p>\n<p>[119] Lindley D V. On a measure of the information provided by an experiment[J]. The Annals of Mathematical Statistics, 1956, 27(4): 986-1005.<\/p>\n<p>[120] Russo D J, Van Roy B, Kazerouni A, et al. A tutorial on Thompson sampling[J]. Foundations and Trends in Machine Learning, 2018, 11(1): 1-96.<\/p>\n<p>[121] Gal Y, Ghahramani Z. Dropout as a Bayesian approximation: Representing model uncertainty in deep learning[C]\/\/International Conference on Machine Learning. PMLR, 2016: 1050-1059.<\/p>\n<p>[122] Lai T L, Robbins H. Asymptotically efficient adaptive allocation rules[J]. Advances in Applied Mathematics, 1985, 6(1): 4-22.<\/p>\n<p>[123] Auer P. Using confidence bounds for exploitation-exploration trade-offs[J]. Journal of Machine Learning Research, 2002, 3(Nov): 397-422.<\/p>\n<p>[124] Kuleshov V, Precup D. Algorithms for multi-armed bandit problems[J]. arXiv preprint arXiv:1402.6028, 2014.<\/p>\n<p>[125] Watkins C J, Dayan P. Q-learning[J]. Machine Learning, 1992, 8: 279-292.<\/p>\n<p>[126] Even-Dar E, Mannor S, Mansour Y. Action elimination and stopping conditions for the multi-armed bandit and reinforcement learning problems[J]. Journal of Machine Learning Research, 2006, 7(6).<\/p>\n<p>[127] Cesa-Bianchi N, Gentile C, Lugosi G, et al. Gambling in a rigged casino: The adversarial multi-armed bandit problem[J]. Foundations of Computer Science, 1996: 322-331.<\/p>\n<p>[128] Haarnoja T, Zhou A, Abbeel P, et al. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor[C]\/\/International Conference on Machine Learning. PMLR, 2018: 1861-1870.<\/p>\n<p>[129] Schmidhuber J. Formal theory of creativity, fun, and intrinsic motivation (1990-2010)[J]. IEEE Transactions on Autonomous Mental Development, 2010, 2(3): 230-247.<\/p>\n<p>[130] Burda Y, Edwards H, Storkey A, et al. Exploration by random network distillation[C]\/\/International Conference on Learning Representations. 2019.<\/p>\n<p>[131] Ilyas A, Engstrom L, Santurkar S, et al. A closer look at deep policy gradients[C]\/\/International Conference on Learning Representations. 2020.<\/p>\n<p>[132] Kirkpatrick J, Pascanu R, Rabinowitz N, et al. Overcoming catastrophic forgetting in neural networks[J]. Proceedings of the National Academy of Sciences, 2017, 114(13): 3521-3526.<\/p>\n<p>[133] Lin L J. Self-improving reactive agents based on reinforcement learning, planning and teaching[J]. Machine Learning, 1992, 8: 293-321.<\/p>\n<p>[134] Schaul T, Quan J, Antonoglou I, et al. Prioritized experience replay[C]\/\/International Conference on Learning Representations. 2016.<\/p>\n<p>[135] Taylor M E, Stone P. Transfer learning for reinforcement learning domains: A survey[J]. Journal of Machine Learning Research, 2009, 10(7).<\/p>\n<p>[136] Ecoffet A, Huizinga J, Lehman J, et al. Go-explore: a new approach for hard-exploration problems[J]. arXiv preprint arXiv:1901.10995, 2019.<\/p>\n<p>[137] Yu T, Quillen D, He Z, et al. Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning[C]\/\/Conference on Robot Learning. PMLR, 2020: 1094-1100.<\/p>\n<p>[138] Patterson D, Gonzalez J, Le Q, et al. Carbon emissions and large neural network training[J]. arXiv preprint arXiv:2104.10350, 2021.<\/p>\n<p>[139] Amodei D, Olah C, Steinhardt J, et al. Concrete problems in AI safety[J]. arXiv preprint arXiv:1606.06565, 2016.<\/p>\n<p>[140] Ziegler D M, Stiennon N, Wu J, et al. Fine-tuning language models from human preferences[J]. arXiv preprint arXiv:1909.08593, 2019.<\/p>\n<p>[141] Kirk R, Zhang A, Grefenstette E, et al. A survey on generalisation in reinforcement learning[J]. arXiv preprint arXiv:2111.09794, 2021.<\/p>\n<p>[142] Cobbe K, Klimov O, Hesse C, et al. Quantifying generalization in reinforcement learning[C]\/\/International Conference on Machine Learning. PMLR, 2019: 1282-1289.<\/p>\n<p>[143] Ha D, Schmidhuber J. World models[J]. arXiv preprint arXiv:1803.10122, 2018.<\/p>\n<p>[144] Chua K, Calandra R, McAllister R, et al. Deep reinforcement learning in a handful of trials using probabilistic dynamics models[C]\/\/Advances in Neural Information Processing Systems. 2018, 31.<\/p>\n<p>[145] Hafner D, Lillicrap T, Ba J, et al. Dream to control: Learning behaviors by latent imagination[C]\/\/International Conference on Learning Representations. 2020.<\/p>\n<p>[146] Garcez A d, Lamb L C. Neurosymbolic AI: The 3rd wave[J]. Artificial Intelligence Review, 2023, 56(11): 12387-12406.<\/p>\n<p>[147] Gunning D, Stefik M, Choi J, et al. XAI\u2014Explainable artificial intelligence[J]. Science Robotics, 2019, 4(37).<\/p>\n<p>[148] Camburu O M, Rockt\u00e4schel T, Lukasiewicz T, et al. e-SNLI: Natural language inference with natural language explanations[C]\/\/Advances in Neural Information Processing Systems. 2018, 31.<\/p>\n<p>[149] Parisi G I, Kemker R, Part J L, et al. Continual lifelong learning with neural networks: A review[J]. Neural Networks, 2019, 113: 54-71.<\/p>\n<p>[150] Thrun S. Lifelong learning algorithms[M]\/\/Learning to Learn. Springer, 1998: 181-209.<\/p>\n<p>[151] Bommasani R, Hudson D A, Adeli E, et al. On the opportunities and risks of foundation models[J]. arXiv preprint arXiv:2108.07258, 2021.<\/p>\n<p>[152] Amershi S, Weld D, Vorvoreanu M, et al. Guidelines for human-AI interaction[C]\/\/Proceedings of the 2019 CHI Conference on Human Factors in Computing Systems. 2019: 1-13.<\/p>\n<p>[153] Arora S, Doshi P. A survey of inverse reinforcement learning: Challenges, methods and progress[J]. Artificial Intelligence, 2021, 297: 103500.<\/p>\n<p>[154] Russell S. Human compatible: Artificial intelligence and the problem of control[M]. Penguin, 2019.<\/p>\n<p>[155] Bai Y, Kadavath S, Kundu S, et al. Constitutional AI: Harmlessness from AI feedback[J]. arXiv preprint arXiv:2212.08073, 2022.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>1 \u5f15\u8a00&#xff1a;\u4ece\u9759\u6001\u7f16\u6392\u5230\u52a8\u6001\u6f14\u5316\u7684\u8303\u5f0f\u8dc3\u8fc1<br \/>\n1.1 \u667a\u80fd\u4f53\u5de5\u4f5c\u6d41\u7684\u6f14\u8fdb\u8109\u7edc<br \/>\n\u4eba\u5de5\u667a\u80fd\u9886\u57df\u6b63\u7ecf\u5386\u7740\u4ece\u5355\u4e00\u6a21\u578b\u5230\u590d\u5408\u667a\u80fd\u7cfb\u7edf\u7684\u6df1\u523b\u53d8\u9769\u3002\u5927\u578b\u8bed\u8a00\u6a21\u578b&#xff08;Large Language Models, LLMs&#xff09;\u7684\u7a81\u7834\u6027\u8fdb\u5c55\u4e3a\u6784\u5efa\u901a\u7528\u667a\u80fd\u4f53&#xff08;Agent&#xff09;\u5960\u5b9a\u4e86\u575a\u5b9e\u57fa\u7840&#xff0c;\u7136\u800c&#xff0c;\u5982\u4f55\u4f7f\u8fd9\u4e9b\u667a\u80fd\u4f53\u5728\u590d\u6742\u591a\u53d8\u7684\u73af\u5883\u4e2d\u6301\u7eed\u4f18\u5316\u5176\u884c\u4e3a\u7b56\u7565&#xff0c;\u6210\u4e3a\u5f53\u524d\u7814\u7a76\u7684\u6838\u5fc3\u6311\u6218\u3002\u4f20\u7edf\u7684\u624b\u52a8\u8bbe\u8ba1\u5de5\u4f5c\u6d41&amp;#x<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[50,207,427],"topic":[],"class_list":["post-77041","post","type-post","status-publish","format-standard","hentry","category-server","tag-50","tag-207","tag-427"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>Agent \u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\uff1a\u5f3a\u5316\u5b66\u4e60\u5982\u4f55\u8ba9\u667a\u80fd\u4f53\u81ea\u52a8\u5bfb\u627e\u6700\u4f18 SOP - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/77041.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Agent \u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\uff1a\u5f3a\u5316\u5b66\u4e60\u5982\u4f55\u8ba9\u667a\u80fd\u4f53\u81ea\u52a8\u5bfb\u627e\u6700\u4f18 SOP - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"1 \u5f15\u8a00&#xff1a;\u4ece\u9759\u6001\u7f16\u6392\u5230\u52a8\u6001\u6f14\u5316\u7684\u8303\u5f0f\u8dc3\u8fc1 1.1 \u667a\u80fd\u4f53\u5de5\u4f5c\u6d41\u7684\u6f14\u8fdb\u8109\u7edc \u4eba\u5de5\u667a\u80fd\u9886\u57df\u6b63\u7ecf\u5386\u7740\u4ece\u5355\u4e00\u6a21\u578b\u5230\u590d\u5408\u667a\u80fd\u7cfb\u7edf\u7684\u6df1\u523b\u53d8\u9769\u3002\u5927\u578b\u8bed\u8a00\u6a21\u578b&#xff08;Large Language Models, LLMs&#xff09;\u7684\u7a81\u7834\u6027\u8fdb\u5c55\u4e3a\u6784\u5efa\u901a\u7528\u667a\u80fd\u4f53&#xff08;Agent&#xff09;\u5960\u5b9a\u4e86\u575a\u5b9e\u57fa\u7840&#xff0c;\u7136\u800c&#xff0c;\u5982\u4f55\u4f7f\u8fd9\u4e9b\u667a\u80fd\u4f53\u5728\u590d\u6742\u591a\u53d8\u7684\u73af\u5883\u4e2d\u6301\u7eed\u4f18\u5316\u5176\u884c\u4e3a\u7b56\u7565&#xff0c;\u6210\u4e3a\u5f53\u524d\u7814\u7a76\u7684\u6838\u5fc3\u6311\u6218\u3002\u4f20\u7edf\u7684\u624b\u52a8\u8bbe\u8ba1\u5de5\u4f5c\u6d41&amp;#x\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/77041.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2026-02-23T14:10:46+00:00\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"67 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/77041.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/77041.html\",\"name\":\"Agent \u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\uff1a\u5f3a\u5316\u5b66\u4e60\u5982\u4f55\u8ba9\u667a\u80fd\u4f53\u81ea\u52a8\u5bfb\u627e\u6700\u4f18 SOP - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2026-02-23T14:10:46+00:00\",\"dateModified\":\"2026-02-23T14:10:46+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/77041.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/77041.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/77041.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Agent \u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\uff1a\u5f3a\u5316\u5b66\u4e60\u5982\u4f55\u8ba9\u667a\u80fd\u4f53\u81ea\u52a8\u5bfb\u627e\u6700\u4f18 SOP\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Agent \u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\uff1a\u5f3a\u5316\u5b66\u4e60\u5982\u4f55\u8ba9\u667a\u80fd\u4f53\u81ea\u52a8\u5bfb\u627e\u6700\u4f18 SOP - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/77041.html","og_locale":"zh_CN","og_type":"article","og_title":"Agent \u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\uff1a\u5f3a\u5316\u5b66\u4e60\u5982\u4f55\u8ba9\u667a\u80fd\u4f53\u81ea\u52a8\u5bfb\u627e\u6700\u4f18 SOP - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"1 \u5f15\u8a00&#xff1a;\u4ece\u9759\u6001\u7f16\u6392\u5230\u52a8\u6001\u6f14\u5316\u7684\u8303\u5f0f\u8dc3\u8fc1 1.1 \u667a\u80fd\u4f53\u5de5\u4f5c\u6d41\u7684\u6f14\u8fdb\u8109\u7edc \u4eba\u5de5\u667a\u80fd\u9886\u57df\u6b63\u7ecf\u5386\u7740\u4ece\u5355\u4e00\u6a21\u578b\u5230\u590d\u5408\u667a\u80fd\u7cfb\u7edf\u7684\u6df1\u523b\u53d8\u9769\u3002\u5927\u578b\u8bed\u8a00\u6a21\u578b&#xff08;Large Language Models, LLMs&#xff09;\u7684\u7a81\u7834\u6027\u8fdb\u5c55\u4e3a\u6784\u5efa\u901a\u7528\u667a\u80fd\u4f53&#xff08;Agent&#xff09;\u5960\u5b9a\u4e86\u575a\u5b9e\u57fa\u7840&#xff0c;\u7136\u800c&#xff0c;\u5982\u4f55\u4f7f\u8fd9\u4e9b\u667a\u80fd\u4f53\u5728\u590d\u6742\u591a\u53d8\u7684\u73af\u5883\u4e2d\u6301\u7eed\u4f18\u5316\u5176\u884c\u4e3a\u7b56\u7565&#xff0c;\u6210\u4e3a\u5f53\u524d\u7814\u7a76\u7684\u6838\u5fc3\u6311\u6218\u3002\u4f20\u7edf\u7684\u624b\u52a8\u8bbe\u8ba1\u5de5\u4f5c\u6d41&amp;#x","og_url":"https:\/\/www.wsisp.com\/helps\/77041.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2026-02-23T14:10:46+00:00","author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"67 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/77041.html","url":"https:\/\/www.wsisp.com\/helps\/77041.html","name":"Agent \u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\uff1a\u5f3a\u5316\u5b66\u4e60\u5982\u4f55\u8ba9\u667a\u80fd\u4f53\u81ea\u52a8\u5bfb\u627e\u6700\u4f18 SOP - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2026-02-23T14:10:46+00:00","dateModified":"2026-02-23T14:10:46+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/77041.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/77041.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/77041.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"Agent \u5de5\u4f5c\u6d41\u81ea\u6211\u8fdb\u5316\uff1a\u5f3a\u5316\u5b66\u4e60\u5982\u4f55\u8ba9\u667a\u80fd\u4f53\u81ea\u52a8\u5bfb\u627e\u6700\u4f18 SOP"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/77041","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=77041"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/77041\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=77041"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=77041"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=77041"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=77041"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}