{"id":38365,"date":"2025-05-20T09:18:24","date_gmt":"2025-05-20T01:18:24","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/38365.html"},"modified":"2025-05-20T09:18:24","modified_gmt":"2025-05-20T01:18:24","slug":"%e3%80%90%e6%b7%b1%e5%ba%a6%e5%ad%a6%e4%b9%a0%e5%9f%ba%e7%a1%80%e3%80%91%e6%8d%9f%e5%a4%b1%e5%87%bd%e6%95%b0%e4%b8%8e%e4%bc%98%e5%8c%96%e7%ae%97%e6%b3%95%e8%af%a6%e8%a7%a3%ef%bc%9a%e4%bb%8e%e7%90%86","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/38365.html","title":{"rendered":"\u3010\u6df1\u5ea6\u5b66\u4e60\u57fa\u7840\u3011\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u8be6\u89e3\uff1a\u4ece\u7406\u8bba\u5230\u5b9e\u8df5"},"content":{"rendered":"<h3>\u3010\u6df1\u5ea6\u5b66\u4e60\u57fa\u7840\u3011\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u8be6\u89e3&#xff1a;\u4ece\u7406\u8bba\u5230\u5b9e\u8df5<\/h3>\n<p><img decoding=\"async\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/05\/20250520011823-682bd85f31042.jpg\" alt=\"\u5728\u8fd9\u91cc\u63d2\u5165\u56fe\u7247\u63cf\u8ff0\" \/><\/p>\n<hr \/>\n<h4>\u4e00\u3001\u5f15\u8a00<\/h4>\n<h5>1. \u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u5728\u6df1\u5ea6\u5b66\u4e60\u4e2d\u7684\u6838\u5fc3\u4f5c\u7528<\/h5>\n<p>\u5728\u6df1\u5ea6\u5b66\u4e60\u4e2d&#xff0c;\u6a21\u578b\u8bad\u7ec3\u7684\u672c\u8d28\u662f\u901a\u8fc7\u4e0d\u65ad\u8c03\u6574\u53c2\u6570&#xff0c;\u4f7f\u6a21\u578b\u8f93\u51fa\u5c3d\u53ef\u80fd\u63a5\u8fd1\u771f\u5b9e\u503c\u3002\u8fd9\u4e00\u8fc7\u7a0b\u7684\u6838\u5fc3\u9a71\u52a8\u529b\u662f\u635f\u5931\u51fd\u6570&#xff08;Loss Function&#xff09;\u548c\u4f18\u5316\u7b97\u6cd5&#xff08;Optimization Algorithm&#xff09;&#xff1a;<\/p>\n<ul>\n<li>\u635f\u5931\u51fd\u6570&#xff1a;\u91cf\u5316\u6a21\u578b\u9884\u6d4b\u503c\u4e0e\u771f\u5b9e\u503c\u7684\u5dee\u5f02&#xff0c;\u662f\u6a21\u578b\u6027\u80fd\u7684\u201c\u8bc4\u5206\u6807\u51c6\u201d\u3002\u4f8b\u5982&#xff0c;\u56de\u5f52\u4efb\u52a1\u4e2d\u5e38\u7528\u7684\u5747\u65b9\u8bef\u5dee&#xff08;MSE&#xff09;\u76f4\u63a5\u8861\u91cf\u9884\u6d4b\u503c\u4e0e\u771f\u5b9e\u503c\u7684\u8ddd\u79bb&#xff0c;\u800c\u5206\u7c7b\u4efb\u52a1\u4e2d\u7684\u4ea4\u53c9\u71b5\u635f\u5931&#xff08;Cross-Entropy&#xff09;\u5219\u8bc4\u4f30\u6982\u7387\u5206\u5e03\u7684\u5339\u914d\u7a0b\u5ea6\u3002<\/li>\n<li>\u4f18\u5316\u7b97\u6cd5&#xff1a;\u6839\u636e\u635f\u5931\u51fd\u6570\u7684\u68af\u5ea6\u4fe1\u606f&#xff0c;\u6307\u5bfc\u53c2\u6570\u66f4\u65b0\u7684\u65b9\u5411\u548c\u6b65\u957f\u3002\u4f8b\u5982&#xff0c;\u68af\u5ea6\u4e0b\u964d\u901a\u8fc7\u53cd\u5411\u4f20\u64ad\u8ba1\u7b97\u68af\u5ea6&#xff0c;\u9010\u6b65\u903c\u8fd1\u635f\u5931\u51fd\u6570\u7684\u6781\u5c0f\u503c\u70b9\u3002<\/li>\n<\/ul>\n<p>\u53ef\u4ee5\u8bf4&#xff0c;\u635f\u5931\u51fd\u6570\u5b9a\u4e49\u4e86\u6a21\u578b\u7684\u201c\u76ee\u6807\u201d&#xff0c;\u800c\u4f18\u5316\u7b97\u6cd5\u51b3\u5b9a\u4e86\u5982\u4f55\u9ad8\u6548\u5730\u201c\u62b5\u8fbe\u76ee\u6807\u201d\u3002\u4e8c\u8005\u5171\u540c\u51b3\u5b9a\u4e86\u6a21\u578b\u7684\u6536\u655b\u901f\u5ea6\u3001\u6cdb\u5316\u80fd\u529b\u4ee5\u53ca\u6700\u7ec8\u6027\u80fd\u3002<\/p>\n<h5>2. \u6587\u7ae0\u76ee\u6807&#xff1a;\u7cfb\u7edf\u638c\u63e1\u5e38\u89c1\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u7684\u539f\u7406\u3001\u5b9e\u73b0\u53ca\u8c03\u53c2\u6280\u5de7<\/h5>\n<p>\u672c\u6587\u5c06\u4ece\u7406\u8bba\u63a8\u5bfc\u3001\u4ee3\u7801\u5b9e\u73b0\u548c\u5b9e\u6218\u8c03\u53c2\u4e09\u4e2a\u7ef4\u5ea6\u5c55\u5f00&#xff1a;<\/p>\n<ul>\n<li>\u7406\u8bba&#xff1a;\u89e3\u6790\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u7684\u6570\u5b66\u539f\u7406&#xff0c;\u7406\u89e3\u5176\u9002\u7528\u573a\u666f\u4e0e\u5c40\u9650\u6027\u3002<\/li>\n<li>\u5b9e\u8df5&#xff1a;\u901a\u8fc7Python\u4ee3\u7801&#xff08;NumPy\/PyTorch&#xff09;\u624b\u5199\u6838\u5fc3\u7b97\u6cd5&#xff0c;\u5e76\u7ed3\u5408\u6846\u67b6API\u6f14\u793a\u5b9e\u9645\u5e94\u7528\u3002<\/li>\n<li>\u8c03\u53c2&#xff1a;\u603b\u7ed3\u5b66\u4e60\u7387\u8bbe\u7f6e\u3001\u6279\u91cf\u5927\u5c0f\u9009\u62e9\u7b49\u5173\u952e\u6280\u5de7&#xff0c;\u5e2e\u52a9\u8bfb\u8005\u907f\u5f00\u8bad\u7ec3\u4e2d\u7684\u5e38\u89c1\u201c\u5751\u201d\u3002<\/li>\n<\/ul>\n<p>\u901a\u8fc7\u672c\u6587&#xff0c;\u8bfb\u8005\u4e0d\u4ec5\u80fd\u638c\u63e1\u7ecf\u5178\u65b9\u6cd5&#xff08;\u5982MSE\u3001SGD\u3001Adam&#xff09;&#xff0c;\u8fd8\u80fd\u4e86\u89e3\u524d\u6cbf\u6539\u8fdb&#xff08;\u5982Focal Loss\u3001\u81ea\u9002\u5e94\u4f18\u5316\u5668&#xff09;&#xff0c;\u6700\u7ec8\u5177\u5907\u6839\u636e\u4efb\u52a1\u9700\u6c42\u7075\u6d3b\u8bbe\u8ba1\u8bad\u7ec3\u7b56\u7565\u7684\u80fd\u529b\u3002<\/p>\n<hr \/>\n<h4>\u4e8c\u3001\u635f\u5931\u51fd\u6570&#xff1a;\u6a21\u578b\u8bad\u7ec3\u7684\u201c\u6307\u5357\u9488\u201d<\/h4>\n<h5>1. \u56de\u5f52\u4efb\u52a1\u4e2d\u7684\u635f\u5931\u51fd\u6570<\/h5>\n<p>\u56de\u5f52\u4efb\u52a1\u7684\u76ee\u6807\u662f\u9884\u6d4b\u8fde\u7eed\u503c&#xff08;\u5982\u623f\u4ef7\u3001\u6e29\u5ea6&#xff09;&#xff0c;\u5176\u635f\u5931\u51fd\u6570\u9700\u8861\u91cf\u9884\u6d4b\u503c\u4e0e\u771f\u5b9e\u503c\u7684\u8ddd\u79bb\u3002\u4ee5\u4e0b\u662f\u4e24\u7c7b\u7ecf\u5178\u635f\u5931\u51fd\u6570&#xff1a;<\/p>\n<h6>1.1 \u5747\u65b9\u8bef\u5dee&#xff08;MSE, Mean Squared Error&#xff09;<\/h6>\n<ul>\n<li>\n<p>\u6570\u5b66\u516c\u5f0f&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            MSE <\/p>\n<p>            &#061; <\/p>\n<p>             1 <\/p>\n<p>             n <\/p>\n<p>             \u2211 <\/p>\n<p>              i <\/p>\n<p>              &#061; <\/p>\n<p>              1 <\/p>\n<p>             n <\/p>\n<p>            ( <\/p>\n<p>             y <\/p>\n<p>             i <\/p>\n<p>            \u2212 <\/p>\n<p>              y <\/p>\n<p>              ^ <\/p>\n<p>             i <\/p>\n<p>             ) <\/p>\n<p>             2 <\/p>\n<p>           \\\\text{MSE} &#061; \\\\frac{1}{n} \\\\sum_{i&#061;1}^n (y_i &#8211; \\\\hat{y}_i)^2 <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord text\"><span class=\"mord\">MSE<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.9291em;vertical-align: -1.2777em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3214em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">n<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.686em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.6514em\"><span class=\"\" style=\"top: -1.8723em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">n<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2777em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.1141em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\"><span class=\"mclose\">)<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8641em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            y <\/p>\n<p>            i <\/p>\n<p>          y_i <\/p>\n<p>      <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u662f\u771f\u5b9e\u503c&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>             y <\/p>\n<p>             ^ <\/p>\n<p>            i <\/p>\n<p>          \\\\hat{y}_i <\/p>\n<p>      <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u662f\u9884\u6d4b\u503c&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>           n <\/p>\n<p>          n <\/p>\n<p>      <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">n<\/span><\/span><\/span><\/span><\/span> \u662f\u6837\u672c\u6570\u91cf\u3002<\/p>\n<\/li>\n<li>\n<p>\u76f4\u89c2\u89e3\u91ca&#xff1a; MSE \u901a\u8fc7\u5e73\u65b9\u653e\u5927\u8f83\u5927\u8bef\u5dee\u7684\u5f71\u54cd&#xff08;\u5982\u9884\u6d4b\u8bef\u5dee\u4e3a2\u65f6&#xff0c;\u635f\u5931\u4e3a4&#xff1b;\u8bef\u5dee\u4e3a3\u65f6&#xff0c;\u635f\u5931\u4e3a9&#xff09;&#xff0c;\u56e0\u6b64\u5bf9\u5f02\u5e38\u503c\u654f\u611f\u3002<\/p>\n<\/li>\n<li>\n<p>\u9002\u7528\u573a\u666f&#xff1a;<\/p>\n<ul>\n<li>\u6570\u636e\u5206\u5e03\u63a5\u8fd1\u9ad8\u65af\u5206\u5e03&#xff08;\u65e0\u660e\u663e\u5f02\u5e38\u503c&#xff09;\u65f6\u6548\u679c\u6700\u4f73\u3002<\/li>\n<li>\u5e38\u7528\u4e8e\u7ebf\u6027\u56de\u5f52\u3001\u795e\u7ecf\u7f51\u7edc\u56de\u5f52\u4efb\u52a1\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u5c40\u9650\u6027&#xff1a;<\/p>\n<ul>\n<li>\u5bf9\u5f02\u5e38\u503c\u654f\u611f&#xff0c;\u53ef\u80fd\u5bfc\u81f4\u6a21\u578b\u8fc7\u5ea6\u62df\u5408\u566a\u58f0\u3002<\/li>\n<li>\u68af\u5ea6\u968f\u8bef\u5dee\u7ebf\u6027\u589e\u957f&#xff08;\u68af\u5ea6\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>            2 <\/p>\n<p>            ( <\/p>\n<p>             y <\/p>\n<p>             i <\/p>\n<p>            \u2212 <\/p>\n<p>              y <\/p>\n<p>              ^ <\/p>\n<p>             i <\/p>\n<p>            ) <\/p>\n<p>           2(y_i &#8211; \\\\hat{y}_i) <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\">2<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff09;&#xff0c;\u53ef\u80fd\u5f15\u53d1\u8bad\u7ec3\u4e0d\u7a33\u5b9a\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u4ee3\u7801\u5b9e\u73b0&#xff08;PyTorch&#xff09;&#xff1a;<\/p>\n<p> <span class=\"token keyword\">import<\/span> torch<span class=\"token punctuation\">.<\/span>nn <span class=\"token keyword\">as<\/span> nn<br \/>\nmse_loss <span class=\"token operator\">&#061;<\/span> nn<span class=\"token punctuation\">.<\/span>MSELoss<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\nloss <span class=\"token operator\">&#061;<\/span> mse_loss<span class=\"token punctuation\">(<\/span>predictions<span class=\"token punctuation\">,<\/span> targets<span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<h6>1.2 \u5e73\u5747\u7edd\u5bf9\u8bef\u5dee&#xff08;MAE, Mean Absolute Error&#xff09;<\/h6>\n<ul>\n<li>\n<p>\u6570\u5b66\u516c\u5f0f&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            MAE <\/p>\n<p>            &#061; <\/p>\n<p>             1 <\/p>\n<p>             n <\/p>\n<p>             \u2211 <\/p>\n<p>              i <\/p>\n<p>              &#061; <\/p>\n<p>              1 <\/p>\n<p>             n <\/p>\n<p>            \u2223 <\/p>\n<p>             y <\/p>\n<p>             i <\/p>\n<p>            \u2212 <\/p>\n<p>              y <\/p>\n<p>              ^ <\/p>\n<p>             i <\/p>\n<p>            \u2223 <\/p>\n<p>           \\\\text{MAE} &#061; \\\\frac{1}{n} \\\\sum_{i&#061;1}^n |y_i &#8211; \\\\hat{y}_i| <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord text\"><span class=\"mord\">MAE<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.9291em;vertical-align: -1.2777em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3214em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">n<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.686em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.6514em\"><span class=\"\" style=\"top: -1.8723em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">n<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2777em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<\/li>\n<li>\n<p>\u4e0eMSE\u7684\u5bf9\u6bd4&#xff1a;<\/p>\n<ul>\n<li>\u9c81\u68d2\u6027&#xff1a;MAE \u5bf9\u5f02\u5e38\u503c\u4e0d\u654f\u611f&#xff08;\u635f\u5931\u968f\u8bef\u5dee\u7ebf\u6027\u589e\u957f&#xff09;\u3002<\/li>\n<li>\u68af\u5ea6\u7279\u6027&#xff1a;MAE \u7684\u68af\u5ea6\u4e3a\u5e38\u6570&#xff08;\u00b11&#xff09;&#xff0c;\u8bad\u7ec3\u66f4\u7a33\u5b9a\u4f46\u6536\u655b\u901f\u5ea6\u8f83\u6162\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u9002\u7528\u573a\u666f&#xff1a;<\/p>\n<ul>\n<li>\u6570\u636e\u4e2d\u5b58\u5728\u663e\u8457\u5f02\u5e38\u503c&#xff08;\u5982\u91d1\u878d\u98ce\u63a7\u4e2d\u7684\u6781\u7aef\u503c&#xff09;\u3002<\/li>\n<li>\u9700\u8981\u7a33\u5b9a\u8bad\u7ec3\u8fc7\u7a0b\u7684\u573a\u666f\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u4ee3\u7801\u5b9e\u73b0&#xff08;PyTorch&#xff09;&#xff1a;<\/p>\n<p> mae_loss <span class=\"token operator\">&#061;<\/span> nn<span class=\"token punctuation\">.<\/span>L1Loss<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>  <span class=\"token comment\"># L1\u635f\u5931\u5373MAE  <\/span><br \/>\nloss <span class=\"token operator\">&#061;<\/span> mae_loss<span class=\"token punctuation\">(<\/span>predictions<span class=\"token punctuation\">,<\/span> targets<span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<h6>1.3 Huber Loss&#xff08;\u5e73\u6ed1\u5e73\u5747\u7edd\u5bf9\u8bef\u5dee&#xff09;<\/h6>\n<ul>\n<li>\n<p>\u6570\u5b66\u516c\u5f0f&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>             L <\/p>\n<p>             \u03b4 <\/p>\n<p>            ( <\/p>\n<p>            y <\/p>\n<p>            , <\/p>\n<p>             y <\/p>\n<p>             ^ <\/p>\n<p>            ) <\/p>\n<p>            &#061; <\/p>\n<p>             { <\/p>\n<p>                   1 <\/p>\n<p>                   2 <\/p>\n<p>                  ( <\/p>\n<p>                  y <\/p>\n<p>                  \u2212 <\/p>\n<p>                   y <\/p>\n<p>                   ^ <\/p>\n<p>                   ) <\/p>\n<p>                   2 <\/p>\n<p>                  \u5f53\u00a0 <\/p>\n<p>                  \u2223 <\/p>\n<p>                  y <\/p>\n<p>                  \u2212 <\/p>\n<p>                   y <\/p>\n<p>                   ^ <\/p>\n<p>                  \u2223 <\/p>\n<p>                  \u2264 <\/p>\n<p>                  \u03b4 <\/p>\n<p>                  \u03b4 <\/p>\n<p>                  \u2223 <\/p>\n<p>                  y <\/p>\n<p>                  \u2212 <\/p>\n<p>                   y <\/p>\n<p>                   ^ <\/p>\n<p>                  \u2223 <\/p>\n<p>                  \u2212 <\/p>\n<p>                   1 <\/p>\n<p>                   2 <\/p>\n<p>                   \u03b4 <\/p>\n<p>                   2 <\/p>\n<p>                 \u5426\u5219 <\/p>\n<p>           L_{\\\\delta}(y, \\\\hat{y}) &#061; \\\\begin{cases} \\\\frac{1}{2}(y &#8211; \\\\hat{y})^2 &amp; \\\\text{\u5f53 } |y &#8211; \\\\hat{y}| \\\\leq \\\\delta \\\\\\\\ \\\\delta |y &#8211; \\\\hat{y}| &#8211; \\\\frac{1}{2}\\\\delta^2 &amp; \\\\text{\u5426\u5219} \\\\end{cases} <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">L<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0379em\">\u03b4<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3em;vertical-align: -1.25em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\"><span class=\"delimsizing size4\">{<\/span><\/span><span class=\"mord\"><span class=\"mtable\"><span class=\"col-align-l\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.69em\"><span class=\"\" style=\"top: -3.69em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8451em\"><span class=\"\" style=\"top: -2.655em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.394em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.345em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\"><span class=\"mclose\">)<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8141em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -2.25em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8451em\"><span class=\"\" style=\"top: -2.655em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.394em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.345em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8141em\"><span class=\"\" style=\"top: -3.063em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.19em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"arraycolsep\" style=\"width: 1em\"><\/span><span class=\"col-align-l\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.69em\"><span class=\"\" style=\"top: -3.69em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord cjk_fallback\">\u5f53<\/span><span class=\"mord\">\u00a0<\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2264<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><\/span><\/span><span class=\"\" style=\"top: -2.25em\"><span class=\"pstrut\" style=\"height: 3.008em\"><\/span><span class=\"mord\"><span class=\"mord text\"><span class=\"mord cjk_fallback\">\u5426\u5219<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.19em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span> <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>           \u03b4 <\/p>\n<p>          \\\\delta <\/p>\n<p>      <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0379em\">\u03b4<\/span><\/span><\/span><\/span><\/span> \u662f\u8d85\u53c2\u6570&#xff0c;\u63a7\u5236 MSE \u4e0e MAE \u7684\u5207\u6362\u9608\u503c\u3002<\/p>\n<\/li>\n<li>\n<p>\u8bbe\u8ba1\u52a8\u673a&#xff1a; \u7ed3\u5408 MSE \u7684\u5e73\u6ed1\u6027\u548c MAE \u7684\u9c81\u68d2\u6027&#xff0c;\u5728\u8bef\u5dee\u8f83\u5c0f\u65f6\u4f7f\u7528 MSE \u52a0\u901f\u6536\u655b&#xff0c;\u8bef\u5dee\u8f83\u5927\u65f6\u4f7f\u7528 MAE \u51cf\u5c11\u5f02\u5e38\u503c\u5f71\u54cd\u3002<\/p>\n<\/li>\n<li>\n<p>\u4ee3\u7801\u5b9e\u73b0&#xff08;\u624b\u52a8\u5b9e\u73b0&#xff09;&#xff1a;<\/p>\n<p> <span class=\"token keyword\">def<\/span> <span class=\"token function\">huber_loss<\/span><span class=\"token punctuation\">(<\/span>y_true<span class=\"token punctuation\">,<\/span> y_pred<span class=\"token punctuation\">,<\/span> delta<span class=\"token operator\">&#061;<\/span><span class=\"token number\">1.0<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    error <span class=\"token operator\">&#061;<\/span> y_true <span class=\"token operator\">&#8211;<\/span> y_pred<br \/>\n    condition <span class=\"token operator\">&#061;<\/span> torch<span class=\"token punctuation\">.<\/span><span class=\"token builtin\">abs<\/span><span class=\"token punctuation\">(<\/span>error<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&lt;<\/span> delta<br \/>\n    squared_loss <span class=\"token operator\">&#061;<\/span> <span class=\"token number\">0.5<\/span> <span class=\"token operator\">*<\/span> torch<span class=\"token punctuation\">.<\/span>square<span class=\"token punctuation\">(<\/span>error<span class=\"token punctuation\">)<\/span><br \/>\n    linear_loss <span class=\"token operator\">&#061;<\/span> delta <span class=\"token operator\">*<\/span> <span class=\"token punctuation\">(<\/span>torch<span class=\"token punctuation\">.<\/span><span class=\"token builtin\">abs<\/span><span class=\"token punctuation\">(<\/span>error<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#8211;<\/span> <span class=\"token number\">0.5<\/span> <span class=\"token operator\">*<\/span> delta<span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">return<\/span> torch<span class=\"token punctuation\">.<\/span>mean<span class=\"token punctuation\">(<\/span>torch<span class=\"token punctuation\">.<\/span>where<span class=\"token punctuation\">(<\/span>condition<span class=\"token punctuation\">,<\/span> squared_loss<span class=\"token punctuation\">,<\/span> linear_loss<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<hr \/>\n<h5>2. \u5206\u7c7b\u4efb\u52a1\u4e2d\u7684\u635f\u5931\u51fd\u6570<\/h5>\n<p>\u5206\u7c7b\u4efb\u52a1\u7684\u76ee\u6807\u662f\u9884\u6d4b\u79bb\u6563\u7c7b\u522b\u6807\u7b7e&#xff0c;\u635f\u5931\u51fd\u6570\u9700\u8861\u91cf\u9884\u6d4b\u6982\u7387\u5206\u5e03\u4e0e\u771f\u5b9e\u5206\u5e03\u7684\u5dee\u5f02\u3002<\/p>\n<h6>2.1 \u4ea4\u53c9\u71b5\u635f\u5931&#xff08;Cross-Entropy Loss&#xff09;<\/h6>\n<ul>\n<li>\n<p>\u6570\u5b66\u516c\u5f0f&#xff08;\u4e8c\u5206\u7c7b&#xff09;&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            L <\/p>\n<p>            &#061; <\/p>\n<p>            \u2212 <\/p>\n<p>             1 <\/p>\n<p>             n <\/p>\n<p>             \u2211 <\/p>\n<p>              i <\/p>\n<p>              &#061; <\/p>\n<p>              1 <\/p>\n<p>             n <\/p>\n<p>             [ <\/p>\n<p>              y <\/p>\n<p>              i <\/p>\n<p>             log <\/p>\n<p>             \u2061 <\/p>\n<p>             ( <\/p>\n<p>               y <\/p>\n<p>               ^ <\/p>\n<p>              i <\/p>\n<p>             ) <\/p>\n<p>             &#043; <\/p>\n<p>             ( <\/p>\n<p>             1 <\/p>\n<p>             \u2212 <\/p>\n<p>              y <\/p>\n<p>              i <\/p>\n<p>             ) <\/p>\n<p>             log <\/p>\n<p>             \u2061 <\/p>\n<p>             ( <\/p>\n<p>             1 <\/p>\n<p>             \u2212 <\/p>\n<p>               y <\/p>\n<p>               ^ <\/p>\n<p>              i <\/p>\n<p>             ) <\/p>\n<p>             ] <\/p>\n<p>           L &#061; -\\\\frac{1}{n} \\\\sum_{i&#061;1}^n \\\\left[ y_i \\\\log(\\\\hat{y}_i) &#043; (1 &#8211; y_i) \\\\log(1 &#8211; \\\\hat{y}_i) \\\\right] <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\">L<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.9291em;vertical-align: -1.2777em\"><\/span><span class=\"mord\">\u2212<\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3214em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">n<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.686em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.6514em\"><span class=\"\" style=\"top: -1.8723em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">n<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2777em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top: 0em\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mclose delimcenter\" style=\"top: 0em\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            y <\/p>\n<p>            i <\/p>\n<p>           \u2208 <\/p>\n<p>           { <\/p>\n<p>           0 <\/p>\n<p>           , <\/p>\n<p>           1 <\/p>\n<p>           } <\/p>\n<p>          y_i \\\\in \\\\{0, 1\\\\} <\/p>\n<p>      <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7335em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\">0<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mclose\">}<\/span><\/span><\/span><\/span><\/span>&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>             y <\/p>\n<p>             ^ <\/p>\n<p>            i <\/p>\n<p>          \\\\hat{y}_i <\/p>\n<p>      <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u662f\u6a21\u578b\u9884\u6d4b\u7684\u6982\u7387\u3002<\/p>\n<\/li>\n<li>\n<p>\u6570\u5b66\u516c\u5f0f&#xff08;\u591a\u5206\u7c7b&#xff09;&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            L <\/p>\n<p>            &#061; <\/p>\n<p>            \u2212 <\/p>\n<p>             1 <\/p>\n<p>             n <\/p>\n<p>             \u2211 <\/p>\n<p>              i <\/p>\n<p>              &#061; <\/p>\n<p>              1 <\/p>\n<p>             n <\/p>\n<p>             \u2211 <\/p>\n<p>              c <\/p>\n<p>              &#061; <\/p>\n<p>              1 <\/p>\n<p>             C <\/p>\n<p>             y <\/p>\n<p>              i <\/p>\n<p>              , <\/p>\n<p>              c <\/p>\n<p>            log <\/p>\n<p>            \u2061 <\/p>\n<p>            ( <\/p>\n<p>              y <\/p>\n<p>              ^ <\/p>\n<p>              i <\/p>\n<p>              , <\/p>\n<p>              c <\/p>\n<p>            ) <\/p>\n<p>           L &#061; -\\\\frac{1}{n} \\\\sum_{i&#061;1}^n \\\\sum_{c&#061;1}^C y_{i,c} \\\\log(\\\\hat{y}_{i,c}) <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\">L<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 3.106em;vertical-align: -1.2777em\"><\/span><span class=\"mord\">\u2212<\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3214em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">n<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.686em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.6514em\"><span class=\"\" style=\"top: -1.8723em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">n<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2777em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.8283em\"><span class=\"\" style=\"top: -1.8829em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">c<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0715em\">C<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2671em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">c<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">c<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span> <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>           C <\/p>\n<p>          C <\/p>\n<p>      <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0715em\">C<\/span><\/span><\/span><\/span><\/span> \u4e3a\u7c7b\u522b\u6570&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            y <\/p>\n<p>             i <\/p>\n<p>             , <\/p>\n<p>             c <\/p>\n<p>          y_{i,c} <\/p>\n<p>      <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7167em;vertical-align: -0.2861em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">c<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u662f one-hot \u7f16\u7801\u7684\u771f\u5b9e\u6807\u7b7e&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>             y <\/p>\n<p>             ^ <\/p>\n<p>             i <\/p>\n<p>             , <\/p>\n<p>             c <\/p>\n<p>          \\\\hat{y}_{i,c} <\/p>\n<p>      <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.9805em;vertical-align: -0.2861em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mpunct mtight\">,<\/span><span class=\"mord mathnormal mtight\">c<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2861em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u662f Softmax \u8f93\u51fa\u7684\u9884\u6d4b\u6982\u7387\u3002<\/p>\n<\/li>\n<li>\n<p>\u4e0e Softmax \u7684\u7ed3\u5408&#xff1a; Softmax \u5c06\u6a21\u578b\u8f93\u51fa\u8f6c\u6362\u4e3a\u6982\u7387\u5206\u5e03&#xff0c;\u4ea4\u53c9\u71b5\u8861\u91cf\u4e24\u4e2a\u5206\u5e03\u7684\u5dee\u5f02\u3002\u4e8c\u8005\u8054\u5408\u4f7f\u7528\u53ef\u907f\u514d\u6570\u503c\u4e0d\u7a33\u5b9a\u3002<\/p>\n<\/li>\n<li>\n<p>\u4ee3\u7801\u5b9e\u73b0&#xff08;PyTorch&#xff09;&#xff1a;<\/p>\n<ul>\n<li>\u4e8c\u5206\u7c7b&#xff1a;bce_loss <span class=\"token operator\">&#061;<\/span> nn<span class=\"token punctuation\">.<\/span>BCELoss<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>  <span class=\"token comment\"># \u8f93\u5165\u9700\u7ecf\u8fc7 Sigmoid  <\/span><br \/>\nloss <span class=\"token operator\">&#061;<\/span> bce_loss<span class=\"token punctuation\">(<\/span>predictions<span class=\"token punctuation\">,<\/span> targets<span class=\"token punctuation\">)<\/span>\n <\/li>\n<li>\u591a\u5206\u7c7b&#xff1a;ce_loss <span class=\"token operator\">&#061;<\/span> nn<span class=\"token punctuation\">.<\/span>CrossEntropyLoss<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>  <span class=\"token comment\"># \u8f93\u5165\u4e3a\u539f\u59cblogits&#xff08;\u65e0\u9700Softmax&#xff09;  <\/span><br \/>\nloss <span class=\"token operator\">&#061;<\/span> ce_loss<span class=\"token punctuation\">(<\/span>logits<span class=\"token punctuation\">,<\/span> target_labels<span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h6>2.2 \u5408\u9875\u635f\u5931&#xff08;Hinge Loss&#xff09;<\/h6>\n<ul>\n<li>\n<p>\u6570\u5b66\u516c\u5f0f&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            L <\/p>\n<p>            &#061; <\/p>\n<p>             1 <\/p>\n<p>             n <\/p>\n<p>             \u2211 <\/p>\n<p>              i <\/p>\n<p>              &#061; <\/p>\n<p>              1 <\/p>\n<p>             n <\/p>\n<p>            max <\/p>\n<p>            \u2061 <\/p>\n<p>            ( <\/p>\n<p>            0 <\/p>\n<p>            , <\/p>\n<p>            1 <\/p>\n<p>            \u2212 <\/p>\n<p>             y <\/p>\n<p>             i <\/p>\n<p>            \u22c5 <\/p>\n<p>              y <\/p>\n<p>              ^ <\/p>\n<p>             i <\/p>\n<p>            ) <\/p>\n<p>           L &#061; \\\\frac{1}{n} \\\\sum_{i&#061;1}^n \\\\max(0, 1 &#8211; y_i \\\\cdot \\\\hat{y}_i) <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\">L<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.9291em;vertical-align: -1.2777em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3214em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">n<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.686em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.6514em\"><span class=\"\" style=\"top: -1.8723em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">n<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2777em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">max<\/span><span class=\"mopen\">(<\/span><span class=\"mord\">0<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6389em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span> \u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            y <\/p>\n<p>            i <\/p>\n<p>           \u2208 <\/p>\n<p>           { <\/p>\n<p>           \u2212 <\/p>\n<p>           1 <\/p>\n<p>           , <\/p>\n<p>           1 <\/p>\n<p>           } <\/p>\n<p>          y_i \\\\in \\\\{-1, 1\\\\} <\/p>\n<p>      <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7335em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">\u2208<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">{<\/span><span class=\"mord\">\u2212<\/span><span class=\"mord\">1<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\">1<\/span><span class=\"mclose\">}<\/span><\/span><\/span><\/span><\/span>&#xff0c;<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>             y <\/p>\n<p>             ^ <\/p>\n<p>            i <\/p>\n<p>          \\\\hat{y}_i <\/p>\n<p>      <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u662f\u6a21\u578b\u8f93\u51fa\u7684\u539f\u59cb\u5f97\u5206&#xff08;\u975e\u6982\u7387&#xff09;\u3002<\/p>\n<\/li>\n<li>\n<p>\u5e94\u7528\u573a\u666f&#xff1a;<\/p>\n<ul>\n<li>\u4e3b\u8981\u7528\u4e8e\u652f\u6301\u5411\u91cf\u673a&#xff08;SVM&#xff09;&#xff0c;\u5f3a\u8c03\u5206\u7c7b\u8fb9\u754c\u7684\u201c\u95f4\u9694\u201d\u6700\u5927\u5316\u3002<\/li>\n<li>\u5bf9\u9884\u6d4b\u7ed3\u679c\u7684\u7f6e\u4fe1\u5ea6\u8981\u6c42\u8f83\u9ad8&#xff08;\u5982\u4eba\u8138\u8bc6\u522b&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u4e0e\u4ea4\u53c9\u71b5\u7684\u5bf9\u6bd4&#xff1a;<\/p>\n<ul>\n<li>Hinge Loss \u5173\u6ce8\u4e8e\u5206\u7c7b\u6b63\u786e\u4e14\u7f6e\u4fe1\u5ea6\u9ad8\u4e8e\u9608\u503c\u7684\u6837\u672c&#xff0c;\u5bf9\u201c\u63a5\u8fd1\u6b63\u786e\u201d\u7684\u9884\u6d4b\u66f4\u5bbd\u5bb9\u3002<\/li>\n<li>\u4ea4\u53c9\u71b5\u5bf9\u6240\u6709\u9884\u6d4b\u6982\u7387\u8fdb\u884c\u7ec6\u7c92\u5ea6\u4f18\u5316&#xff0c;\u9002\u5408\u9700\u8981\u6982\u7387\u6821\u51c6\u7684\u4efb\u52a1&#xff08;\u5982\u533b\u5b66\u8bca\u65ad&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u4ee3\u7801\u5b9e\u73b0&#xff08;\u624b\u52a8\u5b9e\u73b0&#xff09;&#xff1a;<\/p>\n<p> <span class=\"token keyword\">def<\/span> <span class=\"token function\">hinge_loss<\/span><span class=\"token punctuation\">(<\/span>y_true<span class=\"token punctuation\">,<\/span> y_pred<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token comment\"># \u5047\u8bbe y_true \u4e3a \u00b11 \u7684\u6807\u7b7e  <\/span><br \/>\n    <span class=\"token keyword\">return<\/span> torch<span class=\"token punctuation\">.<\/span>mean<span class=\"token punctuation\">(<\/span>torch<span class=\"token punctuation\">.<\/span>clamp<span class=\"token punctuation\">(<\/span><span class=\"token number\">1<\/span> <span class=\"token operator\">&#8211;<\/span> y_true <span class=\"token operator\">*<\/span> y_pred<span class=\"token punctuation\">,<\/span> <span class=\"token builtin\">min<\/span><span class=\"token operator\">&#061;<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<hr \/>\n<h5>3. \u5176\u4ed6\u635f\u5931\u51fd\u6570<\/h5>\n<h6>3.1 Focal Loss&#xff08;\u7126\u70b9\u635f\u5931&#xff09;<\/h6>\n<ul>\n<li>\n<p>\u8bbe\u8ba1\u52a8\u673a&#xff1a; \u89e3\u51b3\u7c7b\u522b\u4e0d\u5e73\u8861\u95ee\u9898&#xff08;\u5982\u76ee\u6807\u68c0\u6d4b\u4e2d\u80cc\u666f\u4e0e\u524d\u666f\u7684\u6781\u7aef\u4e0d\u5e73\u8861&#xff09;&#xff0c;\u901a\u8fc7\u8c03\u8282\u56e0\u5b50\u964d\u4f4e\u6613\u5206\u7c7b\u6837\u672c\u7684\u6743\u91cd&#xff0c;\u4f7f\u6a21\u578b\u805a\u7126\u4e8e\u96be\u6837\u672c\u3002<\/p>\n<\/li>\n<li>\n<p>\u6570\u5b66\u516c\u5f0f&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            L <\/p>\n<p>            &#061; <\/p>\n<p>            \u2212 <\/p>\n<p>             1 <\/p>\n<p>             n <\/p>\n<p>             \u2211 <\/p>\n<p>              i <\/p>\n<p>              &#061; <\/p>\n<p>              1 <\/p>\n<p>             n <\/p>\n<p>            \u03b1 <\/p>\n<p>            ( <\/p>\n<p>            1 <\/p>\n<p>            \u2212 <\/p>\n<p>              y <\/p>\n<p>              ^ <\/p>\n<p>             i <\/p>\n<p>             ) <\/p>\n<p>             \u03b3 <\/p>\n<p>             y <\/p>\n<p>             i <\/p>\n<p>            log <\/p>\n<p>            \u2061 <\/p>\n<p>            ( <\/p>\n<p>              y <\/p>\n<p>              ^ <\/p>\n<p>             i <\/p>\n<p>            ) <\/p>\n<p>           L &#061; -\\\\frac{1}{n} \\\\sum_{i&#061;1}^n \\\\alpha (1 &#8211; \\\\hat{y}_i)^\\\\gamma y_i \\\\log(\\\\hat{y}_i) <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6833em\"><\/span><span class=\"mord mathnormal\">L<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.9291em;vertical-align: -1.2777em\"><\/span><span class=\"mord\">\u2212<\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3214em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">n<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.686em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.6514em\"><span class=\"\" style=\"top: -1.8723em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">&#061;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.05em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"\"><span class=\"mop op-symbol large-op\">\u2211<\/span><\/span><\/span><span class=\"\" style=\"top: -4.3em;margin-left: 0em\"><span class=\"pstrut\" style=\"height: 3.05em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">n<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.2777em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><span class=\"mopen\">(<\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\"><span class=\"mclose\">)<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7144em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0556em\">\u03b3<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mop\">lo<span style=\"margin-right: 0.0139em\">g<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">y<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.1944em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1944em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3117em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">i<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<ul>\n<li><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>            \u03b3 <\/p>\n<p>           \\\\gamma <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><\/span><\/span><\/span><\/span>&#xff08;\u805a\u7126\u53c2\u6570&#xff09;&#xff1a;\u589e\u5927 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            \u03b3 <\/p>\n<p>           \\\\gamma <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><\/span><\/span><\/span><\/span> \u4f1a\u66f4\u591a\u5173\u6ce8\u96be\u6837\u672c&#xff08;\u901a\u5e38\u53d62&#xff09;\u3002<\/li>\n<li><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>            \u03b1 <\/p>\n<p>           \\\\alpha <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><\/span><\/span><\/span><\/span>&#xff08;\u5e73\u8861\u53c2\u6570&#xff09;&#xff1a;\u7f13\u89e3\u7c7b\u522b\u4e0d\u5e73\u8861&#xff08;\u5982\u6b63\u6837\u672c\u5360\u6bd4\u5c11\u65f6&#xff0c;\u589e\u5927\u6b63\u6837\u672c\u7684 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            \u03b1 <\/p>\n<p>           \\\\alpha <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0037em\">\u03b1<\/span><\/span><\/span><\/span><\/span>&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u4ee3\u7801\u5b9e\u73b0&#xff08;PyTorch&#xff09;&#xff1a;<\/p>\n<p> <span class=\"token keyword\">class<\/span> <span class=\"token class-name\">FocalLoss<\/span><span class=\"token punctuation\">(<\/span>nn<span class=\"token punctuation\">.<\/span>Module<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token keyword\">def<\/span> <span class=\"token function\">__init__<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">,<\/span> alpha<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.25<\/span><span class=\"token punctuation\">,<\/span> gamma<span class=\"token operator\">&#061;<\/span><span class=\"token number\">2<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token builtin\">super<\/span><span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>__init__<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        self<span class=\"token punctuation\">.<\/span>alpha <span class=\"token operator\">&#061;<\/span> alpha<br \/>\n        self<span class=\"token punctuation\">.<\/span>gamma <span class=\"token operator\">&#061;<\/span> gamma<\/p>\n<p>    <span class=\"token keyword\">def<\/span> <span class=\"token function\">forward<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">,<\/span> inputs<span class=\"token punctuation\">,<\/span> targets<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        bce_loss <span class=\"token operator\">&#061;<\/span> nn<span class=\"token punctuation\">.<\/span>functional<span class=\"token punctuation\">.<\/span>binary_cross_entropy_with_logits<span class=\"token punctuation\">(<\/span>inputs<span class=\"token punctuation\">,<\/span> targets<span class=\"token punctuation\">,<\/span> reduction<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#039;none&#039;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        p_t <span class=\"token operator\">&#061;<\/span> torch<span class=\"token punctuation\">.<\/span>exp<span class=\"token punctuation\">(<\/span><span class=\"token operator\">&#8211;<\/span>bce_loss<span class=\"token punctuation\">)<\/span>  <span class=\"token comment\"># \u8ba1\u7b97\u6982\u7387  <\/span><br \/>\n        focal_loss <span class=\"token operator\">&#061;<\/span> self<span class=\"token punctuation\">.<\/span>alpha <span class=\"token operator\">*<\/span> <span class=\"token punctuation\">(<\/span><span class=\"token number\">1<\/span> <span class=\"token operator\">&#8211;<\/span> p_t<span class=\"token punctuation\">)<\/span><span class=\"token operator\">**<\/span>self<span class=\"token punctuation\">.<\/span>gamma <span class=\"token operator\">*<\/span> bce_loss<br \/>\n        <span class=\"token keyword\">return<\/span> focal_loss<span class=\"token punctuation\">.<\/span>mean<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<hr \/>\n<h4>\u4e09\u3001\u4f18\u5316\u7b97\u6cd5&#xff1a;\u6a21\u578b\u53c2\u6570\u7684\u201c\u5bfc\u822a\u4eea\u201d<\/h4>\n<h5>1. \u68af\u5ea6\u4e0b\u964d&#xff08;Gradient Descent&#xff09;\u57fa\u7840<\/h5>\n<p>\u68af\u5ea6\u4e0b\u964d\u662f\u4f18\u5316\u795e\u7ecf\u7f51\u7edc\u53c2\u6570\u7684\u6838\u5fc3\u65b9\u6cd5&#xff0c;\u5176\u6838\u5fc3\u601d\u60f3\u662f\u901a\u8fc7\u8fed\u4ee3\u8c03\u6574\u53c2\u6570&#xff0c;\u4f7f\u635f\u5931\u51fd\u6570\u6700\u5c0f\u5316\u3002<\/p>\n<h6>1.1 \u6570\u5b66\u539f\u7406<\/h6>\n<ul>\n<li>\n<p>\u53c2\u6570\u66f4\u65b0\u516c\u5f0f&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>             \u03b8 <\/p>\n<p>              t <\/p>\n<p>              &#043; <\/p>\n<p>              1 <\/p>\n<p>            &#061; <\/p>\n<p>             \u03b8 <\/p>\n<p>             t <\/p>\n<p>            \u2212 <\/p>\n<p>            \u03b7 <\/p>\n<p>            \u22c5 <\/p>\n<p>             \u2207 <\/p>\n<p>             \u03b8 <\/p>\n<p>            J <\/p>\n<p>            ( <\/p>\n<p>             \u03b8 <\/p>\n<p>             t <\/p>\n<p>            ) <\/p>\n<p>           \\\\theta_{t&#043;1} &#061; \\\\theta_t &#8211; \\\\eta \\\\cdot \\\\nabla_\\\\theta J(\\\\theta_t) <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.9028em;vertical-align: -0.2083em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6389em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03b7<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span> \u5176\u4e2d&#xff1a;<\/p>\n<ul>\n<li><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>             \u03b8 <\/p>\n<p>             t <\/p>\n<p>           \\\\theta_t <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xff1a;\u7b2c <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            t <\/p>\n<p>           t <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6151em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><\/span><\/span> \u6b21\u8fed\u4ee3\u7684\u53c2\u6570\u503c\u3002<\/li>\n<li><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>            \u03b7 <\/p>\n<p>           \\\\eta <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03b7<\/span><\/span><\/span><\/span><\/span>&#xff1a;\u5b66\u4e60\u7387&#xff08;Learning Rate&#xff09;&#xff0c;\u63a7\u5236\u53c2\u6570\u66f4\u65b0\u6b65\u957f\u3002<\/li>\n<li><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>             \u2207 <\/p>\n<p>             \u03b8 <\/p>\n<p>            J <\/p>\n<p>            ( <\/p>\n<p>             \u03b8 <\/p>\n<p>             t <\/p>\n<p>            ) <\/p>\n<p>           \\\\nabla_\\\\theta J(\\\\theta_t) <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span>&#xff1a;\u635f\u5931\u51fd\u6570\u5bf9\u53c2\u6570\u7684\u68af\u5ea6\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u68af\u5ea6\u65b9\u5411\u7684\u610f\u4e49&#xff1a; \u68af\u5ea6\u6307\u5411\u635f\u5931\u51fd\u6570\u589e\u957f\u6700\u5feb\u7684\u65b9\u5411&#xff0c;\u53cd\u5411\u66f4\u65b0\u53c2\u6570\u4ee5\u903c\u8fd1\u6700\u5c0f\u503c\u70b9\u3002<\/p>\n<\/li>\n<\/ul>\n<h6>1.2 \u5b66\u4e60\u7387\u7684\u4f5c\u7528\u4e0e\u9009\u62e9<\/h6>\n<ul>\n<li>\u5b66\u4e60\u7387\u7684\u5f71\u54cd&#xff1a;\n<ul>\n<li>\u8fc7\u5927&#xff1a;\u53c2\u6570\u66f4\u65b0\u6b65\u957f\u8fc7\u5927&#xff0c;\u53ef\u80fd\u5bfc\u81f4\u9707\u8361\u751a\u81f3\u53d1\u6563&#xff08;\u5982\u635f\u5931\u503c\u5ffd\u5927\u5ffd\u5c0f&#xff09;\u3002<\/li>\n<li>\u8fc7\u5c0f&#xff1a;\u6536\u655b\u901f\u5ea6\u6162&#xff0c;\u8bad\u7ec3\u65f6\u95f4\u957f\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\u5b66\u4e60\u7387\u9009\u62e9\u7b56\u7565&#xff1a;\n<ul>\n<li>\u7ecf\u9a8c\u503c&#xff1a;\u5e38\u7528\u521d\u59cb\u5b66\u4e60\u7387\u4e3a <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>            0.1 <\/p>\n<p>           0.1 <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">0.1<\/span><\/span><\/span><\/span><\/span>\u3001<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            0.01 <\/p>\n<p>           0.01 <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">0.01<\/span><\/span><\/span><\/span><\/span> \u6216 <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            0.001 <\/p>\n<p>           0.001 <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">0.001<\/span><\/span><\/span><\/span><\/span>\u3002<\/li>\n<li>\u5b66\u4e60\u7387\u8870\u51cf&#xff08;Learning Rate Decay&#xff09;&#xff1a; \u968f\u7740\u8bad\u7ec3\u8f6e\u6b21\u589e\u52a0\u9010\u6b65\u51cf\u5c0f\u5b66\u4e60\u7387&#xff0c;\u4f8b\u5982&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>              \u03b7 <\/p>\n<p>              t <\/p>\n<p>             &#061; <\/p>\n<p>               \u03b7 <\/p>\n<p>               0 <\/p>\n<p>               1 <\/p>\n<p>               &#043; <\/p>\n<p>               decay_rate <\/p>\n<p>               \u22c5 <\/p>\n<p>               t <\/p>\n<p>            \\\\eta_t &#061; \\\\frac{\\\\eta_0}{1 &#043; \\\\text{decay\\\\_rate} \\\\cdot t} <\/p>\n<p>        <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03b7<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.1036em;vertical-align: -0.996em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.1076em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord text\"><span class=\"mord\">decay_rate<\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">t<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03b7<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">0<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.996em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/li>\n<li>\u81ea\u9002\u5e94\u5b66\u4e60\u7387&#xff1a;\u7531\u4f18\u5316\u7b97\u6cd5\u81ea\u52a8\u8c03\u6574&#xff08;\u5982Adam&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<hr \/>\n<h5>2. \u68af\u5ea6\u4e0b\u964d\u7684\u4e09\u79cd\u53d8\u4f53<\/h5>\n<h6>2.1 \u6279\u91cf\u68af\u5ea6\u4e0b\u964d&#xff08;BGD, Batch Gradient Descent&#xff09;<\/h6>\n<ul>\n<li>\u539f\u7406&#xff1a;\u6bcf\u6b21\u8fed\u4ee3\u4f7f\u7528\u5168\u90e8\u8bad\u7ec3\u6570\u636e\u8ba1\u7b97\u68af\u5ea6\u3002<\/li>\n<li>\u4f18\u70b9&#xff1a;\u68af\u5ea6\u65b9\u5411\u51c6\u786e&#xff0c;\u66f4\u65b0\u7a33\u5b9a\u3002<\/li>\n<li>\u7f3a\u70b9&#xff1a;\n<ul>\n<li>\u8ba1\u7b97\u6210\u672c\u9ad8&#xff0c;\u5185\u5b58\u5360\u7528\u5927\u3002<\/li>\n<li>\u65e0\u6cd5\u5728\u7ebf\u66f4\u65b0\u6a21\u578b&#xff08;\u9700\u904d\u5386\u5168\u91cf\u6570\u636e&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\u4ee3\u7801\u793a\u4f8b&#xff1a;<span class=\"token keyword\">for<\/span> epoch <span class=\"token keyword\">in<\/span> <span class=\"token builtin\">range<\/span><span class=\"token punctuation\">(<\/span>num_epochs<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token comment\"># \u904d\u5386\u6574\u4e2a\u6570\u636e\u96c6\u8ba1\u7b97\u68af\u5ea6  <\/span><br \/>\n    gradients <span class=\"token operator\">&#061;<\/span> compute_gradient<span class=\"token punctuation\">(<\/span>entire_dataset<span class=\"token punctuation\">,<\/span> params<span class=\"token punctuation\">)<\/span><br \/>\n    params <span class=\"token operator\">&#061;<\/span> params <span class=\"token operator\">&#8211;<\/span> learning_rate <span class=\"token operator\">*<\/span> gradients\n <\/li>\n<\/ul>\n<h6>2.2 \u968f\u673a\u68af\u5ea6\u4e0b\u964d&#xff08;SGD, Stochastic Gradient Descent&#xff09;<\/h6>\n<ul>\n<li>\u539f\u7406&#xff1a;\u6bcf\u6b21\u8fed\u4ee3\u968f\u673a\u9009\u53d6\u4e00\u4e2a\u6837\u672c\u8ba1\u7b97\u68af\u5ea6\u3002<\/li>\n<li>\u4f18\u70b9&#xff1a;\n<ul>\n<li>\u8ba1\u7b97\u901f\u5ea6\u5feb&#xff0c;\u5185\u5b58\u5360\u7528\u4f4e\u3002<\/li>\n<li>\u9002\u5408\u5728\u7ebf\u5b66\u4e60&#xff08;\u5b9e\u65f6\u66f4\u65b0\u6a21\u578b&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\u7f3a\u70b9&#xff1a;\n<ul>\n<li>\u68af\u5ea6\u4f30\u8ba1\u566a\u58f0\u5927&#xff0c;\u66f4\u65b0\u65b9\u5411\u6ce2\u52a8\u5267\u70c8\u3002<\/li>\n<li>\u6536\u655b\u8def\u5f84\u66f2\u6298&#xff0c;\u53ef\u80fd\u9700\u8981\u66f4\u591a\u8fed\u4ee3\u6b21\u6570\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\u4ee3\u7801\u793a\u4f8b&#xff1a;<span class=\"token keyword\">for<\/span> epoch <span class=\"token keyword\">in<\/span> <span class=\"token builtin\">range<\/span><span class=\"token punctuation\">(<\/span>num_epochs<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    shuffle<span class=\"token punctuation\">(<\/span>dataset<span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">for<\/span> sample <span class=\"token keyword\">in<\/span> dataset<span class=\"token punctuation\">:<\/span><br \/>\n        gradients <span class=\"token operator\">&#061;<\/span> compute_gradient<span class=\"token punctuation\">(<\/span>sample<span class=\"token punctuation\">,<\/span> params<span class=\"token punctuation\">)<\/span><br \/>\n        params <span class=\"token operator\">&#061;<\/span> params <span class=\"token operator\">&#8211;<\/span> learning_rate <span class=\"token operator\">*<\/span> gradients\n <\/li>\n<\/ul>\n<h6>2.3 \u5c0f\u6279\u91cf\u68af\u5ea6\u4e0b\u964d&#xff08;Mini-batch GD&#xff09;<\/h6>\n<ul>\n<li>\u539f\u7406&#xff1a;\u6bcf\u6b21\u8fed\u4ee3\u4f7f\u7528**\u4e00\u5c0f\u6279\u6837\u672c&#xff08;Batch&#xff09;**\u8ba1\u7b97\u68af\u5ea6&#xff08;\u598232\u300164\u4e2a\u6837\u672c&#xff09;\u3002<\/li>\n<li>\u4f18\u70b9&#xff1a;\n<ul>\n<li>\u5e73\u8861\u8ba1\u7b97\u6548\u7387\u4e0e\u68af\u5ea6\u7a33\u5b9a\u6027\u3002<\/li>\n<li>\u9002\u5408GPU\u5e76\u884c\u8ba1\u7b97\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\u6279\u91cf\u5927\u5c0f\u9009\u62e9\u6280\u5de7&#xff1a;\n<ul>\n<li>\u8f83\u5c0f\u6279\u91cf&#xff08;\u598232&#xff09;&#xff1a;\u68af\u5ea6\u566a\u58f0\u5927&#xff0c;\u53ef\u80fd\u5e26\u6765\u6b63\u5219\u5316\u6548\u679c\u3002<\/li>\n<li>\u8f83\u5927\u6279\u91cf&#xff08;\u59821024&#xff09;&#xff1a;\u5185\u5b58\u5360\u7528\u9ad8&#xff0c;\u4f46\u68af\u5ea6\u65b9\u5411\u66f4\u51c6\u786e\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\u4ee3\u7801\u793a\u4f8b&#xff1a;batch_size <span class=\"token operator\">&#061;<\/span> <span class=\"token number\">64<\/span><br \/>\n<span class=\"token keyword\">for<\/span> epoch <span class=\"token keyword\">in<\/span> <span class=\"token builtin\">range<\/span><span class=\"token punctuation\">(<\/span>num_epochs<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    shuffle<span class=\"token punctuation\">(<\/span>dataset<span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">for<\/span> i <span class=\"token keyword\">in<\/span> <span class=\"token builtin\">range<\/span><span class=\"token punctuation\">(<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token builtin\">len<\/span><span class=\"token punctuation\">(<\/span>dataset<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> batch_size<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        batch <span class=\"token operator\">&#061;<\/span> dataset<span class=\"token punctuation\">[<\/span>i<span class=\"token punctuation\">:<\/span>i<span class=\"token operator\">&#043;<\/span>batch_size<span class=\"token punctuation\">]<\/span><br \/>\n        gradients <span class=\"token operator\">&#061;<\/span> compute_gradient<span class=\"token punctuation\">(<\/span>batch<span class=\"token punctuation\">,<\/span> params<span class=\"token punctuation\">)<\/span><br \/>\n        params <span class=\"token operator\">&#061;<\/span> params <span class=\"token operator\">&#8211;<\/span> learning_rate <span class=\"token operator\">*<\/span> gradients\n <\/li>\n<\/ul>\n<hr \/>\n<h5>3. \u6539\u8fdb\u578b\u4f18\u5316\u7b97\u6cd5<\/h5>\n<h6>3.1 \u52a8\u91cf\u6cd5&#xff08;Momentum&#xff09;<\/h6>\n<ul>\n<li>\n<p>\u539f\u7406&#xff1a;\u5f15\u5165\u201c\u52a8\u91cf\u201d\u6a21\u62df\u7269\u7406\u60ef\u6027&#xff0c;\u52a0\u901f\u6536\u655b\u5e76\u51cf\u5c11\u9707\u8361\u3002<\/p>\n<ul>\n<li>\u53c2\u6570\u66f4\u65b0\u516c\u5f0f&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>              v <\/p>\n<p>              t <\/p>\n<p>             &#061; <\/p>\n<p>             \u03b3 <\/p>\n<p>              v <\/p>\n<p>               t <\/p>\n<p>               \u2212 <\/p>\n<p>               1 <\/p>\n<p>             &#043; <\/p>\n<p>             \u03b7 <\/p>\n<p>              \u2207 <\/p>\n<p>              \u03b8 <\/p>\n<p>             J <\/p>\n<p>             ( <\/p>\n<p>              \u03b8 <\/p>\n<p>              t <\/p>\n<p>             ) <\/p>\n<p>            v_{t} &#061; \\\\gamma v_{t-1} &#043; \\\\eta \\\\nabla_\\\\theta J(\\\\theta_t) <\/p>\n<p>        <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.7917em;vertical-align: -0.2083em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">\u2212<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03b7<\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span> <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>              \u03b8 <\/p>\n<p>               t <\/p>\n<p>               &#043; <\/p>\n<p>               1 <\/p>\n<p>             &#061; <\/p>\n<p>              \u03b8 <\/p>\n<p>              t <\/p>\n<p>             \u2212 <\/p>\n<p>              v <\/p>\n<p>              t <\/p>\n<p>            \\\\theta_{t&#043;1} &#061; \\\\theta_t &#8211; v_t <\/p>\n<p>        <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.9028em;vertical-align: -0.2083em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            \u03b3 <\/p>\n<p>           \\\\gamma <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.625em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0556em\">\u03b3<\/span><\/span><\/span><\/span><\/span> \u662f\u52a8\u91cf\u7cfb\u6570&#xff08;\u901a\u5e38\u53d60.9&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u4f5c\u7528&#xff1a;<\/p>\n<ul>\n<li>\u5728\u68af\u5ea6\u65b9\u5411\u53d8\u5316\u65f6&#xff0c;\u52a8\u91cf\u9879\u6291\u5236\u9707\u8361\u3002<\/li>\n<li>\u5728\u68af\u5ea6\u65b9\u5411\u4e00\u81f4\u65f6&#xff0c;\u52a8\u91cf\u9879\u52a0\u901f\u66f4\u65b0\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u4ee3\u7801\u5b9e\u73b0&#xff08;PyTorch&#xff09;&#xff1a;<\/p>\n<p> optimizer <span class=\"token operator\">&#061;<\/span> torch<span class=\"token punctuation\">.<\/span>optim<span class=\"token punctuation\">.<\/span>SGD<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>parameters<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> lr<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.01<\/span><span class=\"token punctuation\">,<\/span> momentum<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.9<\/span><span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<h6>3.2 AdaGrad &amp; RMSProp<\/h6>\n<ul>\n<li>\n<p>AdaGrad&#xff08;\u81ea\u9002\u5e94\u68af\u5ea6&#xff09;&#xff1a;<\/p>\n<ul>\n<li>\u539f\u7406&#xff1a;\u4e3a\u6bcf\u4e2a\u53c2\u6570\u81ea\u9002\u5e94\u8c03\u6574\u5b66\u4e60\u7387&#xff0c;\u7d2f\u79ef\u5386\u53f2\u68af\u5ea6\u5e73\u65b9\u548c\u3002 <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>              \u03b8 <\/p>\n<p>               t <\/p>\n<p>               &#043; <\/p>\n<p>               1 <\/p>\n<p>             &#061; <\/p>\n<p>              \u03b8 <\/p>\n<p>              t <\/p>\n<p>             \u2212 <\/p>\n<p>              \u03b7 <\/p>\n<p>                 G <\/p>\n<p>                 t <\/p>\n<p>                &#043; <\/p>\n<p>                \u03f5 <\/p>\n<p>             \u22c5 <\/p>\n<p>              \u2207 <\/p>\n<p>              \u03b8 <\/p>\n<p>             J <\/p>\n<p>             ( <\/p>\n<p>              \u03b8 <\/p>\n<p>              t <\/p>\n<p>             ) <\/p>\n<p>            \\\\theta_{t&#043;1} &#061; \\\\theta_t &#8211; \\\\frac{\\\\eta}{\\\\sqrt{G_t &#043; \\\\epsilon}} \\\\cdot \\\\nabla_\\\\theta J(\\\\theta_t) <\/p>\n<p>        <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.9028em;vertical-align: -0.2083em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.0376em;vertical-align: -0.93em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.1076em\"><span class=\"\" style=\"top: -2.2583em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord sqrt\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8517em\"><span class=\"svg-align\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\" style=\"padding-left: 0.833em\"><span class=\"mord\"><span class=\"mord mathnormal\">G<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><span class=\"\" style=\"top: -2.8117em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"hide-tail\" style=\"min-width: 0.853em;height: 1.08em\"> <\/p>\n<p>                      <\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1883em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03b7<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.93em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span> \u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>             G <\/p>\n<p>             t <\/p>\n<p>           G_t <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">G<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> \u662f\u5386\u53f2\u68af\u5ea6\u5e73\u65b9\u7684\u7d2f\u52a0\u3002<\/li>\n<li>\u7f3a\u70b9&#xff1a;\u968f\u7740\u8bad\u7ec3\u8fdb\u884c&#xff0c;\u5206\u6bcd\u8fc7\u5927\u5bfc\u81f4\u5b66\u4e60\u7387\u8d8b\u8fd1\u4e8e\u96f6\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>RMSProp&#xff1a;<\/p>\n<ul>\n<li>\u6539\u8fdb&#xff1a;\u5f15\u5165\u6307\u6570\u8870\u51cf\u5e73\u5747&#xff0c;\u4ec5\u5173\u6ce8\u8fd1\u671f\u68af\u5ea6\u3002 <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>              G <\/p>\n<p>              t <\/p>\n<p>             &#061; <\/p>\n<p>             \u03b2 <\/p>\n<p>              G <\/p>\n<p>               t <\/p>\n<p>               \u2212 <\/p>\n<p>               1 <\/p>\n<p>             &#043; <\/p>\n<p>             ( <\/p>\n<p>             1 <\/p>\n<p>             \u2212 <\/p>\n<p>             \u03b2 <\/p>\n<p>             ) <\/p>\n<p>              \u2207 <\/p>\n<p>              \u03b8 <\/p>\n<p>             J <\/p>\n<p>             ( <\/p>\n<p>              \u03b8 <\/p>\n<p>              t <\/p>\n<p>              ) <\/p>\n<p>              2 <\/p>\n<p>            G_t &#061; \\\\beta G_{t-1} &#043; (1-\\\\beta) \\\\nabla_\\\\theta J(\\\\theta_t)^2 <\/p>\n<p>        <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8333em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">G<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.9028em;vertical-align: -0.2083em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"mord\"><span class=\"mord mathnormal\">G<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">\u2212<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.1141em;vertical-align: -0.25em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"mclose\">)<\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\"><span class=\"mclose\">)<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8641em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>              \u03b8 <\/p>\n<p>               t <\/p>\n<p>               &#043; <\/p>\n<p>               1 <\/p>\n<p>             &#061; <\/p>\n<p>              \u03b8 <\/p>\n<p>              t <\/p>\n<p>             \u2212 <\/p>\n<p>              \u03b7 <\/p>\n<p>                 G <\/p>\n<p>                 t <\/p>\n<p>                &#043; <\/p>\n<p>                \u03f5 <\/p>\n<p>             \u22c5 <\/p>\n<p>              \u2207 <\/p>\n<p>              \u03b8 <\/p>\n<p>             J <\/p>\n<p>             ( <\/p>\n<p>              \u03b8 <\/p>\n<p>              t <\/p>\n<p>             ) <\/p>\n<p>            \\\\theta_{t&#043;1} &#061; \\\\theta_t &#8211; \\\\frac{\\\\eta}{\\\\sqrt{G_t &#043; \\\\epsilon}} \\\\cdot \\\\nabla_\\\\theta J(\\\\theta_t) <\/p>\n<p>        <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.9028em;vertical-align: -0.2083em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.0376em;vertical-align: -0.93em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.1076em\"><span class=\"\" style=\"top: -2.2583em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord sqrt\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8517em\"><span class=\"svg-align\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\" style=\"padding-left: 0.833em\"><span class=\"mord\"><span class=\"mord mathnormal\">G<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><span class=\"\" style=\"top: -2.8117em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"hide-tail\" style=\"min-width: 0.853em;height: 1.08em\"> <\/p>\n<p>                      <\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1883em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03b7<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.93em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span> \u5176\u4e2d <span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            \u03b2 <\/p>\n<p>           \\\\beta <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><\/span><\/span><\/span><\/span> \u662f\u8870\u51cf\u56e0\u5b50&#xff08;\u901a\u5e38\u53d60.9&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u4ee3\u7801\u5b9e\u73b0&#xff08;PyTorch&#xff09;&#xff1a;<\/p>\n<p> <span class=\"token comment\"># AdaGrad  <\/span><br \/>\noptimizer <span class=\"token operator\">&#061;<\/span> torch<span class=\"token punctuation\">.<\/span>optim<span class=\"token punctuation\">.<\/span>Adagrad<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>parameters<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> lr<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.01<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># RMSProp  <\/span><br \/>\noptimizer <span class=\"token operator\">&#061;<\/span> torch<span class=\"token punctuation\">.<\/span>optim<span class=\"token punctuation\">.<\/span>RMSprop<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>parameters<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> lr<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.01<\/span><span class=\"token punctuation\">,<\/span> alpha<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.9<\/span><span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<h6>3.3 Adam&#xff08;Adaptive Moment Estimation&#xff09;<\/h6>\n<ul>\n<li>\n<p>\u539f\u7406&#xff1a;\u7ed3\u5408\u52a8\u91cf\u6cd5\u4e0eRMSProp&#xff0c;\u540c\u65f6\u8003\u8651\u68af\u5ea6\u7684\u4e00\u9636\u77e9&#xff08;\u5747\u503c&#xff09;\u548c\u4e8c\u9636\u77e9&#xff08;\u65b9\u5dee&#xff09;\u3002<\/p>\n<ul>\n<li>\u4e00\u9636\u77e9&#xff08;\u52a8\u91cf&#xff09;&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>              m <\/p>\n<p>              t <\/p>\n<p>             &#061; <\/p>\n<p>              \u03b2 <\/p>\n<p>              1 <\/p>\n<p>              m <\/p>\n<p>               t <\/p>\n<p>               \u2212 <\/p>\n<p>               1 <\/p>\n<p>             &#043; <\/p>\n<p>             ( <\/p>\n<p>             1 <\/p>\n<p>             \u2212 <\/p>\n<p>              \u03b2 <\/p>\n<p>              1 <\/p>\n<p>             ) <\/p>\n<p>              \u2207 <\/p>\n<p>              \u03b8 <\/p>\n<p>             J <\/p>\n<p>             ( <\/p>\n<p>              \u03b8 <\/p>\n<p>              t <\/p>\n<p>             ) <\/p>\n<p>            m_t &#061; \\\\beta_1 m_{t-1} &#043; (1-\\\\beta_1) \\\\nabla_\\\\theta J(\\\\theta_t) <\/p>\n<p>        <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">m<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.9028em;vertical-align: -0.2083em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0528em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\">m<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">\u2212<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0528em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/li>\n<li>\u4e8c\u9636\u77e9&#xff08;\u81ea\u9002\u5e94\u5b66\u4e60\u7387&#xff09;&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>              v <\/p>\n<p>              t <\/p>\n<p>             &#061; <\/p>\n<p>              \u03b2 <\/p>\n<p>              2 <\/p>\n<p>              v <\/p>\n<p>               t <\/p>\n<p>               \u2212 <\/p>\n<p>               1 <\/p>\n<p>             &#043; <\/p>\n<p>             ( <\/p>\n<p>             1 <\/p>\n<p>             \u2212 <\/p>\n<p>              \u03b2 <\/p>\n<p>              2 <\/p>\n<p>             ) <\/p>\n<p>              \u2207 <\/p>\n<p>              \u03b8 <\/p>\n<p>             J <\/p>\n<p>             ( <\/p>\n<p>              \u03b8 <\/p>\n<p>              t <\/p>\n<p>              ) <\/p>\n<p>              2 <\/p>\n<p>            v_t &#061; \\\\beta_2 v_{t-1} &#043; (1-\\\\beta_2) \\\\nabla_\\\\theta J(\\\\theta_t)^2 <\/p>\n<p>        <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.5806em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.9028em;vertical-align: -0.2083em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0528em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">\u2212<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1em;vertical-align: -0.25em\"><\/span><span class=\"mopen\">(<\/span><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 1.1141em;vertical-align: -0.25em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0528em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3361em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right: 0.0278em\">\u03b8<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0962em\">J<\/span><span class=\"mopen\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\"><span class=\"mclose\">)<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8641em\"><span class=\"\" style=\"top: -3.113em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/li>\n<li>\u504f\u5dee\u6821\u6b63&#xff08;\u5e94\u5bf9\u521d\u59cb\u96f6\u504f\u95ee\u9898&#xff09;&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>               m <\/p>\n<p>               ^ <\/p>\n<p>              t <\/p>\n<p>             &#061; <\/p>\n<p>               m <\/p>\n<p>               t <\/p>\n<p>               1 <\/p>\n<p>               \u2212 <\/p>\n<p>                \u03b2 <\/p>\n<p>                1 <\/p>\n<p>                t <\/p>\n<p>             , <\/p>\n<p>               v <\/p>\n<p>               ^ <\/p>\n<p>              t <\/p>\n<p>             &#061; <\/p>\n<p>               v <\/p>\n<p>               t <\/p>\n<p>               1 <\/p>\n<p>               \u2212 <\/p>\n<p>                \u03b2 <\/p>\n<p>                2 <\/p>\n<p>                t <\/p>\n<p>            \\\\hat{m}_t &#061; \\\\frac{m_t}{1 &#8211; \\\\beta_1^t}, \\\\quad \\\\hat{v}_t &#061; \\\\frac{v_t}{1 &#8211; \\\\beta_2^t} <\/p>\n<p>        <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\">m<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.25em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.0599em;vertical-align: -0.9523em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.1076em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7754em\"><span class=\"\" style=\"top: -2.4337em;margin-left: -0.0528em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><span class=\"\" style=\"top: -3.0448em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2663em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\">m<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9523em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right: 1em\"><\/span><span class=\"mspace\" style=\"margin-right: 0.1667em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.2222em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.0599em;vertical-align: -0.9523em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.1076em\"><span class=\"\" style=\"top: -2.314em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.7754em\"><span class=\"\" style=\"top: -2.4337em;margin-left: -0.0528em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><span class=\"\" style=\"top: -3.0448em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2663em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.9523em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/li>\n<li>\u53c2\u6570\u66f4\u65b0&#xff1a; <span class=\"katex--display\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>              \u03b8 <\/p>\n<p>               t <\/p>\n<p>               &#043; <\/p>\n<p>               1 <\/p>\n<p>             &#061; <\/p>\n<p>              \u03b8 <\/p>\n<p>              t <\/p>\n<p>             \u2212 <\/p>\n<p>               \u03b7 <\/p>\n<p>               \u22c5 <\/p>\n<p>                 m <\/p>\n<p>                 ^ <\/p>\n<p>                t <\/p>\n<p>                  v <\/p>\n<p>                  ^ <\/p>\n<p>                 t <\/p>\n<p>               &#043; <\/p>\n<p>               \u03f5 <\/p>\n<p>            \\\\theta_{t&#043;1} &#061; \\\\theta_t &#8211; \\\\frac{\\\\eta \\\\cdot \\\\hat{m}_t}{\\\\sqrt{\\\\hat{v}_t} &#043; \\\\epsilon} <\/p>\n<p>        <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.9028em;vertical-align: -0.2083em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><span class=\"mbin mtight\">&#043;<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2083em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><span class=\"mrel\">&#061;<\/span><span class=\"mspace\" style=\"margin-right: 0.2778em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.8444em;vertical-align: -0.15em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0278em\">\u03b8<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0278em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 2.3014em;vertical-align: -0.93em\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 1.3714em\"><span class=\"\" style=\"top: -2.2528em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord sqrt\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.8572em\"><span class=\"svg-align\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\" style=\"padding-left: 0.833em\"><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">v<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.2222em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0359em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"\" style=\"top: -2.8172em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"hide-tail\" style=\"min-width: 0.853em;height: 1.08em\"> <\/p>\n<p>                      <\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.1828em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">&#043;<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><span class=\"\" style=\"top: -3.23em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"frac-line\" style=\"border-bottom-width: 0.04em\"><\/span><\/span><span class=\"\" style=\"top: -3.677em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0359em\">\u03b7<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u22c5<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.6944em\"><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"mord mathnormal\">m<\/span><\/span><span class=\"\" style=\"top: -3em\"><span class=\"pstrut\" style=\"height: 3em\"><\/span><span class=\"accent-body\" style=\"left: -0.25em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.2806em\"><span class=\"\" style=\"top: -2.55em;margin-left: 0em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.93em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u8d85\u53c2\u6570\u8c03\u4f18&#xff1a;<\/p>\n<ul>\n<li><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>             \u03b2 <\/p>\n<p>             1 <\/p>\n<p>           \\\\beta_1 <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0528em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xff1a;\u901a\u5e38\u53d60.9&#xff0c;\u63a7\u5236\u52a8\u91cf\u8870\u51cf\u3002<\/li>\n<li><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>             \u03b2 <\/p>\n<p>             2 <\/p>\n<p>           \\\\beta_2 <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.8889em;vertical-align: -0.1944em\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right: 0.0528em\">\u03b2<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.3011em\"><span class=\"\" style=\"top: -2.55em;margin-left: -0.0528em;margin-right: 0.05em\"><span class=\"pstrut\" style=\"height: 2.7em\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height: 0.15em\"><span class=\"\"><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xff1a;\u901a\u5e38\u53d60.999&#xff0c;\u63a7\u5236\u4e8c\u9636\u77e9\u8870\u51cf\u3002<\/li>\n<li><span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\">\n<p>            \u03f5 <\/p>\n<p>           \\\\epsilon <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.4306em\"><\/span><span class=\"mord mathnormal\">\u03f5<\/span><\/span><\/span><\/span><\/span>&#xff1a;\u9632\u6b62\u9664\u96f6&#xff08;\u5982<span class=\"katex--inline\"><span class=\"katex\"><span class=\"katex-mathml\"> <\/p>\n<p>            1 <\/p>\n<p>            e <\/p>\n<p>            \u2212 <\/p>\n<p>            8 <\/p>\n<p>           1e-8 <\/p>\n<p>       <\/span><span class=\"katex-html\"><span class=\"base\"><span class=\"strut\" style=\"height: 0.7278em;vertical-align: -0.0833em\"><\/span><span class=\"mord\">1<\/span><span class=\"mord mathnormal\">e<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right: 0.2222em\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height: 0.6444em\"><\/span><span class=\"mord\">8<\/span><\/span><\/span><\/span><\/span>&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u4ee3\u7801\u5b9e\u73b0&#xff08;PyTorch&#xff09;&#xff1a;<\/p>\n<p> optimizer <span class=\"token operator\">&#061;<\/span> torch<span class=\"token punctuation\">.<\/span>optim<span class=\"token punctuation\">.<\/span>Adam<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>parameters<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> lr<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.001<\/span><span class=\"token punctuation\">,<\/span> betas<span class=\"token operator\">&#061;<\/span><span class=\"token punctuation\">(<\/span><span class=\"token number\">0.9<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token number\">0.999<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<hr \/>\n<h5>4. \u8c03\u53c2\u6280\u5de7<\/h5>\n<h6>4.1 \u5b66\u4e60\u7387\u8bbe\u7f6e<\/h6>\n<ul>\n<li>\u5b66\u4e60\u7387\u9884\u70ed&#xff08;Warmup&#xff09;&#xff1a; \u8bad\u7ec3\u521d\u671f\u9010\u6b65\u589e\u5927\u5b66\u4e60\u7387&#xff08;\u5982\u4ece0\u7ebf\u6027\u589e\u957f\u5230\u521d\u59cb\u503c&#xff09;&#xff0c;\u907f\u514d\u53c2\u6570\u66f4\u65b0\u5267\u70c8\u9707\u8361\u3002<\/li>\n<li>\u5b66\u4e60\u7387\u8870\u51cf\u7b56\u7565&#xff1a;\n<ul>\n<li>\u4f59\u5f26\u9000\u706b&#xff08;Cosine Annealing&#xff09;&#xff1a;\u5468\u671f\u6027\u8c03\u6574\u5b66\u4e60\u7387\u3002<\/li>\n<li>\u6309\u9700\u8870\u51cf&#xff08;ReduceLROnPlateau&#xff09;&#xff1a;\u5f53\u9a8c\u8bc1\u635f\u5931\u505c\u6ede\u65f6\u81ea\u52a8\u964d\u4f4e\u5b66\u4e60\u7387\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h6>4.2 \u6279\u91cf\u5927\u5c0f\u7684\u6743\u8861<\/h6>\n<ul>\n<li>\u5c0f\u6279\u91cf&#xff1a;\u66f4\u9002\u5408\u975e\u51f8\u4f18\u5316&#xff0c;\u53ef\u80fd\u627e\u5230\u66f4\u4f18\u7684\u5c40\u90e8\u6781\u5c0f\u503c\u3002<\/li>\n<li>\u5927\u6279\u91cf&#xff1a;\u9700\u589e\u5927\u5b66\u4e60\u7387&#xff0c;\u4f46\u53ef\u80fd\u964d\u4f4e\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u3002<\/li>\n<\/ul>\n<h6>4.3 \u65e9\u505c\u6cd5&#xff08;Early Stopping&#xff09;\u4e0e\u68af\u5ea6\u88c1\u526a<\/h6>\n<ul>\n<li>\u65e9\u505c\u6cd5&#xff1a;\u76d1\u63a7\u9a8c\u8bc1\u96c6\u635f\u5931&#xff0c;\u5f53\u8fde\u7eed\u591a\u8f6e\u4e0d\u4e0b\u964d\u65f6\u7ec8\u6b62\u8bad\u7ec3&#xff0c;\u9632\u6b62\u8fc7\u62df\u5408\u3002<\/li>\n<li>\u68af\u5ea6\u88c1\u526a&#xff1a;\u9650\u5236\u68af\u5ea6\u6700\u5927\u503c&#xff08;\u5982torch.nn.utils.clip_grad_norm_&#xff09;&#xff0c;\u9632\u6b62\u68af\u5ea6\u7206\u70b8\u3002<\/li>\n<\/ul>\n<hr \/>\n<h4>\u56db\u3001\u5b9e\u6218&#xff1a;\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u7684\u8c03\u53c2\u6280\u5de7<\/h4>\n<h5>1. \u635f\u5931\u51fd\u6570\u7684\u9009\u62e9\u539f\u5219<\/h5>\n<h6>1.1 \u6839\u636e\u4efb\u52a1\u7c7b\u578b\u5339\u914d\u635f\u5931\u51fd\u6570<\/h6>\n<ul>\n<li>\n<p>\u56de\u5f52\u4efb\u52a1&#xff1a;<\/p>\n<ul>\n<li>\u6570\u636e\u5206\u5e03\u63a5\u8fd1\u6b63\u6001\u5206\u5e03\u4e14\u65e0\u663e\u8457\u5f02\u5e38\u503c \u2192 MSE\u3002<\/li>\n<li>\u6570\u636e\u5b58\u5728\u5f02\u5e38\u503c \u2192 MAE \u6216 Huber Loss\u3002<\/li>\n<li>\u9700\u8981\u5e73\u8861\u9c81\u68d2\u6027\u4e0e\u6536\u655b\u901f\u5ea6 \u2192 Huber Loss&#xff08;\u8c03\u6574 (\\\\delta) \u53c2\u6570&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u5206\u7c7b\u4efb\u52a1&#xff1a;<\/p>\n<ul>\n<li>\u4e8c\u5206\u7c7b\u6216\u591a\u5206\u7c7b \u2192 \u4ea4\u53c9\u71b5\u635f\u5931&#xff08;\u642d\u914d Softmax\/Sigmoid&#xff09;\u3002<\/li>\n<li>\u7c7b\u522b\u4e25\u91cd\u4e0d\u5e73\u8861&#xff08;\u5982\u76ee\u6807\u68c0\u6d4b&#xff09; \u2192 Focal Loss&#xff08;\u8c03\u8282 (\\\\gamma) \u548c (\\\\alpha)&#xff09;\u3002<\/li>\n<li>\u5f3a\u8c03\u5206\u7c7b\u8fb9\u754c\u95f4\u9694 \u2192 Hinge Loss&#xff08;\u5982 SVM&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u7279\u6b8a\u4efb\u52a1&#xff1a;<\/p>\n<ul>\n<li>\u751f\u6210\u5bf9\u6297\u7f51\u7edc&#xff08;GAN&#xff09; \u2192 Wasserstein Loss&#xff08;\u7f13\u89e3\u6a21\u5f0f\u5d29\u6e83&#xff09;\u3002<\/li>\n<li>\u5f3a\u5316\u5b66\u4e60 \u2192 TD Error&#xff08;\u65f6\u5e8f\u5dee\u5206\u8bef\u5dee&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h6>1.2 \u5904\u7406\u566a\u58f0\u4e0e\u4e0d\u5e73\u8861\u6570\u636e\u7684\u7b56\u7565<\/h6>\n<ul>\n<li>\u5f02\u5e38\u503c\u5904\u7406&#xff1a;\n<ul>\n<li>\u4f7f\u7528\u9c81\u68d2\u635f\u5931\u51fd\u6570&#xff08;\u5982 MAE\u3001Huber Loss&#xff09;\u3002<\/li>\n<li>\u5bf9\u6570\u636e\u9884\u5904\u7406&#xff08;\u5982 Winsorizing \u7f29\u5c3e\u5904\u7406&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\u7c7b\u522b\u4e0d\u5e73\u8861&#xff1a;\n<ul>\n<li>\u635f\u5931\u51fd\u6570\u5c42\u9762&#xff1a;Focal Loss\u3001\u52a0\u6743\u4ea4\u53c9\u71b5&#xff08;nn.CrossEntropyLoss(weight&#061;class_weights)&#xff09;\u3002<\/li>\n<li>\u6570\u636e\u5c42\u9762&#xff1a;\u8fc7\u91c7\u6837\u5c11\u6570\u7c7b&#xff08;\u5982 SMOTE&#xff09;\u3001\u6b20\u91c7\u6837\u591a\u6570\u7c7b\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<hr \/>\n<h5>2. \u4f18\u5316\u7b97\u6cd5\u7684\u8c03\u53c2\u7ecf\u9a8c<\/h5>\n<h6>2.1 \u5b66\u4e60\u7387\u8bbe\u7f6e\u6280\u5de7<\/h6>\n<ul>\n<li>\n<p>\u5b66\u4e60\u7387\u9884\u70ed&#xff08;Warmup&#xff09;&#xff1a;<\/p>\n<ul>\n<li>\u4f5c\u7528&#xff1a;\u907f\u514d\u8bad\u7ec3\u521d\u671f\u53c2\u6570\u66f4\u65b0\u8fc7\u5927\u5bfc\u81f4\u9707\u8361\u3002<\/li>\n<li>\u5b9e\u73b0&#xff1a;\u5728\u524d (k) \u6b65&#xff08;\u5982 1000 \u6b65&#xff09;\u7ebf\u6027\u589e\u52a0\u5b66\u4e60\u7387\u81f3\u521d\u59cb\u503c\u3002<\/li>\n<\/ul>\n<p> <span class=\"token keyword\">def<\/span> <span class=\"token function\">warmup_lr<\/span><span class=\"token punctuation\">(<\/span>step<span class=\"token punctuation\">,<\/span> warmup_steps<span class=\"token punctuation\">,<\/span> initial_lr<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token keyword\">return<\/span> initial_lr <span class=\"token operator\">*<\/span> <span class=\"token builtin\">min<\/span><span class=\"token punctuation\">(<\/span>step <span class=\"token operator\">\/<\/span> warmup_steps<span class=\"token punctuation\">,<\/span> <span class=\"token number\">1.0<\/span><span class=\"token punctuation\">)<\/span>\n <\/li>\n<li>\n<p>\u5b66\u4e60\u7387\u8870\u51cf\u7b56\u7565&#xff1a;<\/p>\n<ul>\n<li>\u4f59\u5f26\u9000\u706b&#xff08;Cosine Annealing&#xff09;&#xff1a;\u5468\u671f\u6027\u91cd\u7f6e\u5b66\u4e60\u7387&#xff0c;\u8df3\u51fa\u5c40\u90e8\u6781\u5c0f\u3002scheduler <span class=\"token operator\">&#061;<\/span> torch<span class=\"token punctuation\">.<\/span>optim<span class=\"token punctuation\">.<\/span>lr_scheduler<span class=\"token punctuation\">.<\/span>CosineAnnealingLR<span class=\"token punctuation\">(<\/span>optimizer<span class=\"token punctuation\">,<\/span> T_max<span class=\"token operator\">&#061;<\/span><span class=\"token number\">50<\/span><span class=\"token punctuation\">)<\/span>\n <\/li>\n<li>\u6309\u9700\u8870\u51cf&#xff08;ReduceLROnPlateau&#xff09;&#xff1a;\u5f53\u9a8c\u8bc1\u635f\u5931\u505c\u6ede\u65f6\u81ea\u52a8\u964d\u4f4e\u5b66\u4e60\u7387\u3002scheduler <span class=\"token operator\">&#061;<\/span> torch<span class=\"token punctuation\">.<\/span>optim<span class=\"token punctuation\">.<\/span>lr_scheduler<span class=\"token punctuation\">.<\/span>ReduceLROnPlateau<span class=\"token punctuation\">(<\/span>optimizer<span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#039;min&#039;<\/span><span class=\"token punctuation\">,<\/span> patience<span class=\"token operator\">&#061;<\/span><span class=\"token number\">3<\/span><span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h6>2.2 \u6279\u91cf\u5927\u5c0f\u7684\u9009\u62e9\u4e0e\u5f71\u54cd<\/h6>\n<ul>\n<li>\u7ecf\u9a8c\u6cd5\u5219&#xff1a;\n<ul>\n<li>\u5c0f\u6279\u91cf&#xff08;32~256&#xff09;&#xff1a;\u9002\u5408\u5927\u591a\u6570\u4efb\u52a1&#xff0c;\u5e73\u8861\u5185\u5b58\u4e0e\u6536\u655b\u901f\u5ea6\u3002<\/li>\n<li>\u5927\u6279\u91cf&#xff08;&gt;1024&#xff09;&#xff1a;\u9700\u589e\u5927\u5b66\u4e60\u7387&#xff08;\u5982\u7ebf\u6027\u7f29\u653e\u89c4\u5219&#xff1a;lr &#061; base_lr * batch_size \/ 256&#xff09;\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\u5185\u5b58\u4e0d\u8db3\u65f6\u7684\u89e3\u51b3\u65b9\u6848&#xff1a;\n<ul>\n<li>\u4f7f\u7528\u68af\u5ea6\u7d2f\u79ef&#xff08;Gradient Accumulation&#xff09;&#xff1a;<span class=\"token keyword\">for<\/span> i<span class=\"token punctuation\">,<\/span> batch <span class=\"token keyword\">in<\/span> <span class=\"token builtin\">enumerate<\/span><span class=\"token punctuation\">(<\/span>dataloader<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    loss <span class=\"token operator\">&#061;<\/span> model<span class=\"token punctuation\">(<\/span>batch<span class=\"token punctuation\">)<\/span><br \/>\n    loss<span class=\"token punctuation\">.<\/span>backward<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">if<\/span> <span class=\"token punctuation\">(<\/span>i<span class=\"token operator\">&#043;<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token operator\">%<\/span> accumulation_steps <span class=\"token operator\">&#061;&#061;<\/span> <span class=\"token number\">0<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        optimizer<span class=\"token punctuation\">.<\/span>step<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        optimizer<span class=\"token punctuation\">.<\/span>zero_grad<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h6>2.3 \u65e9\u505c\u6cd5\u4e0e\u68af\u5ea6\u88c1\u526a<\/h6>\n<ul>\n<li>\n<p>\u65e9\u505c\u6cd5&#xff08;Early Stopping&#xff09;&#xff1a;<\/p>\n<p> best_loss <span class=\"token operator\">&#061;<\/span> <span class=\"token builtin\">float<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#039;inf&#039;<\/span><span class=\"token punctuation\">)<\/span><br \/>\npatience <span class=\"token operator\">&#061;<\/span> <span class=\"token number\">5<\/span><br \/>\ncounter <span class=\"token operator\">&#061;<\/span> <span class=\"token number\">0<\/span>  <\/p>\n<p><span class=\"token keyword\">for<\/span> epoch <span class=\"token keyword\">in<\/span> <span class=\"token builtin\">range<\/span><span class=\"token punctuation\">(<\/span><span class=\"token number\">100<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    train_model<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    val_loss <span class=\"token operator\">&#061;<\/span> evaluate<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">if<\/span> val_loss <span class=\"token operator\">&lt;<\/span> best_loss<span class=\"token punctuation\">:<\/span><br \/>\n        best_loss <span class=\"token operator\">&#061;<\/span> val_loss<br \/>\n        counter <span class=\"token operator\">&#061;<\/span> <span class=\"token number\">0<\/span><br \/>\n        torch<span class=\"token punctuation\">.<\/span>save<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>state_dict<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#039;best_model.pth&#039;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    <span class=\"token keyword\">else<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        counter <span class=\"token operator\">&#043;&#061;<\/span> <span class=\"token number\">1<\/span><br \/>\n        <span class=\"token keyword\">if<\/span> counter <span class=\"token operator\">&gt;&#061;<\/span> patience<span class=\"token punctuation\">:<\/span><br \/>\n            <span class=\"token keyword\">break<\/span>\n <\/li>\n<li>\n<p>\u68af\u5ea6\u88c1\u526a&#xff08;Gradient Clipping&#xff09;&#xff1a;<\/p>\n<ul>\n<li>\u9632\u6b62\u68af\u5ea6\u7206\u70b8&#xff08;\u5e38\u89c1\u4e8eRNN&#xff09;\u3002<\/li>\n<\/ul>\n<p> torch<span class=\"token punctuation\">.<\/span>nn<span class=\"token punctuation\">.<\/span>utils<span class=\"token punctuation\">.<\/span>clip_grad_norm_<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>parameters<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> max_norm<span class=\"token operator\">&#061;<\/span><span class=\"token number\">1.0<\/span><span class=\"token punctuation\">)<\/span>\n <\/li>\n<\/ul>\n<hr \/>\n<h5>3. \u4ee3\u7801\u793a\u4f8b&#xff1a;\u4ece\u96f6\u5b9e\u73b0\u4f18\u5316\u7b97\u6cd5<\/h5>\n<h6>3.1 \u624b\u5199SGD\u4f18\u5316\u5668<\/h6>\n<p><span class=\"token keyword\">class<\/span> <span class=\"token class-name\">SGD<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token keyword\">def<\/span> <span class=\"token function\">__init__<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">,<\/span> params<span class=\"token punctuation\">,<\/span> lr<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.01<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        self<span class=\"token punctuation\">.<\/span>params <span class=\"token operator\">&#061;<\/span> <span class=\"token builtin\">list<\/span><span class=\"token punctuation\">(<\/span>params<span class=\"token punctuation\">)<\/span><br \/>\n        self<span class=\"token punctuation\">.<\/span>lr <span class=\"token operator\">&#061;<\/span> lr<\/p>\n<p>    <span class=\"token keyword\">def<\/span> <span class=\"token function\">step<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">for<\/span> param <span class=\"token keyword\">in<\/span> self<span class=\"token punctuation\">.<\/span>params<span class=\"token punctuation\">:<\/span><br \/>\n            param<span class=\"token punctuation\">.<\/span>data <span class=\"token operator\">-&#061;<\/span> self<span class=\"token punctuation\">.<\/span>lr <span class=\"token operator\">*<\/span> param<span class=\"token punctuation\">.<\/span>grad<span class=\"token punctuation\">.<\/span>data<\/p>\n<p>    <span class=\"token keyword\">def<\/span> <span class=\"token function\">zero_grad<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">for<\/span> param <span class=\"token keyword\">in<\/span> self<span class=\"token punctuation\">.<\/span>params<span class=\"token punctuation\">:<\/span><br \/>\n            <span class=\"token keyword\">if<\/span> param<span class=\"token punctuation\">.<\/span>grad <span class=\"token keyword\">is<\/span> <span class=\"token keyword\">not<\/span> <span class=\"token boolean\">None<\/span><span class=\"token punctuation\">:<\/span><br \/>\n                param<span class=\"token punctuation\">.<\/span>grad<span class=\"token punctuation\">.<\/span>detach_<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n                param<span class=\"token punctuation\">.<\/span>grad<span class=\"token punctuation\">.<\/span>zero_<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token comment\"># \u4f7f\u7528\u793a\u4f8b  <\/span><br \/>\nmodel <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">.<\/span><span class=\"token punctuation\">.<\/span><span class=\"token punctuation\">.<\/span>  <span class=\"token comment\"># \u5b9a\u4e49\u6a21\u578b  <\/span><br \/>\noptimizer <span class=\"token operator\">&#061;<\/span> SGD<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>parameters<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> lr<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.01<\/span><span class=\"token punctuation\">)<\/span><br \/>\nloss_fn <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">.<\/span><span class=\"token punctuation\">.<\/span><span class=\"token punctuation\">.<\/span><br \/>\n<span class=\"token keyword\">for<\/span> x<span class=\"token punctuation\">,<\/span> y <span class=\"token keyword\">in<\/span> dataloader<span class=\"token punctuation\">:<\/span><br \/>\n    optimizer<span class=\"token punctuation\">.<\/span>zero_grad<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    loss <span class=\"token operator\">&#061;<\/span> loss_fn<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">(<\/span>x<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> y<span class=\"token punctuation\">)<\/span><br \/>\n    loss<span class=\"token punctuation\">.<\/span>backward<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    optimizer<span class=\"token punctuation\">.<\/span>step<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<h6>3.2 \u624b\u5199Adam\u4f18\u5316\u5668<\/h6>\n<p><span class=\"token keyword\">class<\/span> <span class=\"token class-name\">Adam<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token keyword\">def<\/span> <span class=\"token function\">__init__<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">,<\/span> params<span class=\"token punctuation\">,<\/span> lr<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.001<\/span><span class=\"token punctuation\">,<\/span> betas<span class=\"token operator\">&#061;<\/span><span class=\"token punctuation\">(<\/span><span class=\"token number\">0.9<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token number\">0.999<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> eps<span class=\"token operator\">&#061;<\/span><span class=\"token number\">1e<\/span><span class=\"token operator\">&#8211;<\/span><span class=\"token number\">8<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        self<span class=\"token punctuation\">.<\/span>params <span class=\"token operator\">&#061;<\/span> <span class=\"token builtin\">list<\/span><span class=\"token punctuation\">(<\/span>params<span class=\"token punctuation\">)<\/span><br \/>\n        self<span class=\"token punctuation\">.<\/span>lr <span class=\"token operator\">&#061;<\/span> lr<br \/>\n        self<span class=\"token punctuation\">.<\/span>beta1<span class=\"token punctuation\">,<\/span> self<span class=\"token punctuation\">.<\/span>beta2 <span class=\"token operator\">&#061;<\/span> betas<br \/>\n        self<span class=\"token punctuation\">.<\/span>eps <span class=\"token operator\">&#061;<\/span> eps<br \/>\n        self<span class=\"token punctuation\">.<\/span>m <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">[<\/span>torch<span class=\"token punctuation\">.<\/span>zeros_like<span class=\"token punctuation\">(<\/span>p<span class=\"token punctuation\">.<\/span>data<span class=\"token punctuation\">)<\/span> <span class=\"token keyword\">for<\/span> p <span class=\"token keyword\">in<\/span> self<span class=\"token punctuation\">.<\/span>params<span class=\"token punctuation\">]<\/span>  <span class=\"token comment\"># \u4e00\u9636\u77e9  <\/span><br \/>\n        self<span class=\"token punctuation\">.<\/span>v <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">[<\/span>torch<span class=\"token punctuation\">.<\/span>zeros_like<span class=\"token punctuation\">(<\/span>p<span class=\"token punctuation\">.<\/span>data<span class=\"token punctuation\">)<\/span> <span class=\"token keyword\">for<\/span> p <span class=\"token keyword\">in<\/span> self<span class=\"token punctuation\">.<\/span>params<span class=\"token punctuation\">]<\/span>  <span class=\"token comment\"># \u4e8c\u9636\u77e9  <\/span><br \/>\n        self<span class=\"token punctuation\">.<\/span>t <span class=\"token operator\">&#061;<\/span> <span class=\"token number\">0<\/span><\/p>\n<p>    <span class=\"token keyword\">def<\/span> <span class=\"token function\">step<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        self<span class=\"token punctuation\">.<\/span>t <span class=\"token operator\">&#043;&#061;<\/span> <span class=\"token number\">1<\/span><br \/>\n        <span class=\"token keyword\">for<\/span> i<span class=\"token punctuation\">,<\/span> param <span class=\"token keyword\">in<\/span> <span class=\"token builtin\">enumerate<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">.<\/span>params<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n            self<span class=\"token punctuation\">.<\/span>m<span class=\"token punctuation\">[<\/span>i<span class=\"token punctuation\">]<\/span> <span class=\"token operator\">&#061;<\/span> self<span class=\"token punctuation\">.<\/span>beta1 <span class=\"token operator\">*<\/span> self<span class=\"token punctuation\">.<\/span>m<span class=\"token punctuation\">[<\/span>i<span class=\"token punctuation\">]<\/span> <span class=\"token operator\">&#043;<\/span> <span class=\"token punctuation\">(<\/span><span class=\"token number\">1<\/span> <span class=\"token operator\">&#8211;<\/span> self<span class=\"token punctuation\">.<\/span>beta1<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">*<\/span> param<span class=\"token punctuation\">.<\/span>grad<span class=\"token punctuation\">.<\/span>data<br \/>\n            self<span class=\"token punctuation\">.<\/span>v<span class=\"token punctuation\">[<\/span>i<span class=\"token punctuation\">]<\/span> <span class=\"token operator\">&#061;<\/span> self<span class=\"token punctuation\">.<\/span>beta2 <span class=\"token operator\">*<\/span> self<span class=\"token punctuation\">.<\/span>v<span class=\"token punctuation\">[<\/span>i<span class=\"token punctuation\">]<\/span> <span class=\"token operator\">&#043;<\/span> <span class=\"token punctuation\">(<\/span><span class=\"token number\">1<\/span> <span class=\"token operator\">&#8211;<\/span> self<span class=\"token punctuation\">.<\/span>beta2<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">*<\/span> param<span class=\"token punctuation\">.<\/span>grad<span class=\"token punctuation\">.<\/span>data <span class=\"token operator\">**<\/span> <span class=\"token number\">2<\/span><br \/>\n            <span class=\"token comment\"># \u504f\u5dee\u6821\u6b63  <\/span><br \/>\n            m_hat <span class=\"token operator\">&#061;<\/span> self<span class=\"token punctuation\">.<\/span>m<span class=\"token punctuation\">[<\/span>i<span class=\"token punctuation\">]<\/span> <span class=\"token operator\">\/<\/span> <span class=\"token punctuation\">(<\/span><span class=\"token number\">1<\/span> <span class=\"token operator\">&#8211;<\/span> self<span class=\"token punctuation\">.<\/span>beta1 <span class=\"token operator\">**<\/span> self<span class=\"token punctuation\">.<\/span>t<span class=\"token punctuation\">)<\/span><br \/>\n            v_hat <span class=\"token operator\">&#061;<\/span> self<span class=\"token punctuation\">.<\/span>v<span class=\"token punctuation\">[<\/span>i<span class=\"token punctuation\">]<\/span> <span class=\"token operator\">\/<\/span> <span class=\"token punctuation\">(<\/span><span class=\"token number\">1<\/span> <span class=\"token operator\">&#8211;<\/span> self<span class=\"token punctuation\">.<\/span>beta2 <span class=\"token operator\">**<\/span> self<span class=\"token punctuation\">.<\/span>t<span class=\"token punctuation\">)<\/span><br \/>\n            <span class=\"token comment\"># \u66f4\u65b0\u53c2\u6570  <\/span><br \/>\n            param<span class=\"token punctuation\">.<\/span>data <span class=\"token operator\">-&#061;<\/span> self<span class=\"token punctuation\">.<\/span>lr <span class=\"token operator\">*<\/span> m_hat <span class=\"token operator\">\/<\/span> <span class=\"token punctuation\">(<\/span>torch<span class=\"token punctuation\">.<\/span>sqrt<span class=\"token punctuation\">(<\/span>v_hat<span class=\"token punctuation\">)<\/span> <span class=\"token operator\">&#043;<\/span> self<span class=\"token punctuation\">.<\/span>eps<span class=\"token punctuation\">)<\/span><\/p>\n<p>    <span class=\"token keyword\">def<\/span> <span class=\"token function\">zero_grad<\/span><span class=\"token punctuation\">(<\/span>self<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n        <span class=\"token keyword\">for<\/span> param <span class=\"token keyword\">in<\/span> self<span class=\"token punctuation\">.<\/span>params<span class=\"token punctuation\">:<\/span><br \/>\n            <span class=\"token keyword\">if<\/span> param<span class=\"token punctuation\">.<\/span>grad <span class=\"token keyword\">is<\/span> <span class=\"token keyword\">not<\/span> <span class=\"token boolean\">None<\/span><span class=\"token punctuation\">:<\/span><br \/>\n                param<span class=\"token punctuation\">.<\/span>grad<span class=\"token punctuation\">.<\/span>detach_<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n                param<span class=\"token punctuation\">.<\/span>grad<span class=\"token punctuation\">.<\/span>zero_<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token comment\"># \u4f7f\u7528\u793a\u4f8b  <\/span><br \/>\noptimizer <span class=\"token operator\">&#061;<\/span> Adam<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>parameters<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> lr<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.001<\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<h6>3.3 PyTorch\u6846\u67b6\u5b9e\u6218<\/h6>\n<p><span class=\"token keyword\">import<\/span> torch<br \/>\n<span class=\"token keyword\">from<\/span> torch <span class=\"token keyword\">import<\/span> nn<span class=\"token punctuation\">,<\/span> optim  <\/p>\n<p><span class=\"token comment\"># \u5b9a\u4e49\u6a21\u578b  <\/span><br \/>\nmodel <span class=\"token operator\">&#061;<\/span> nn<span class=\"token punctuation\">.<\/span>Sequential<span class=\"token punctuation\">(<\/span><br \/>\n    nn<span class=\"token punctuation\">.<\/span>Linear<span class=\"token punctuation\">(<\/span><span class=\"token number\">784<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token number\">256<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span><br \/>\n    nn<span class=\"token punctuation\">.<\/span>ReLU<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span><br \/>\n    nn<span class=\"token punctuation\">.<\/span>Linear<span class=\"token punctuation\">(<\/span><span class=\"token number\">256<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token number\">10<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token comment\"># \u9009\u62e9\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u5668  <\/span><br \/>\nloss_fn <span class=\"token operator\">&#061;<\/span> nn<span class=\"token punctuation\">.<\/span>CrossEntropyLoss<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\noptimizer <span class=\"token operator\">&#061;<\/span> optim<span class=\"token punctuation\">.<\/span>Adam<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>parameters<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> lr<span class=\"token operator\">&#061;<\/span><span class=\"token number\">1e<\/span><span class=\"token operator\">&#8211;<\/span><span class=\"token number\">3<\/span><span class=\"token punctuation\">,<\/span> weight_decay<span class=\"token operator\">&#061;<\/span><span class=\"token number\">1e<\/span><span class=\"token operator\">&#8211;<\/span><span class=\"token number\">4<\/span><span class=\"token punctuation\">)<\/span>  <span class=\"token comment\"># \u5e26L2\u6b63\u5219\u5316  <\/span><\/p>\n<p><span class=\"token comment\"># \u5b66\u4e60\u7387\u8c03\u5ea6\u5668  <\/span><br \/>\nscheduler <span class=\"token operator\">&#061;<\/span> optim<span class=\"token punctuation\">.<\/span>lr_scheduler<span class=\"token punctuation\">.<\/span>StepLR<span class=\"token punctuation\">(<\/span>optimizer<span class=\"token punctuation\">,<\/span> step_size<span class=\"token operator\">&#061;<\/span><span class=\"token number\">30<\/span><span class=\"token punctuation\">,<\/span> gamma<span class=\"token operator\">&#061;<\/span><span class=\"token number\">0.1<\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p><span class=\"token comment\"># \u8bad\u7ec3\u5faa\u73af  <\/span><br \/>\n<span class=\"token keyword\">for<\/span> epoch <span class=\"token keyword\">in<\/span> <span class=\"token builtin\">range<\/span><span class=\"token punctuation\">(<\/span><span class=\"token number\">100<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token keyword\">for<\/span> x<span class=\"token punctuation\">,<\/span> y <span class=\"token keyword\">in<\/span> dataloader<span class=\"token punctuation\">:<\/span><br \/>\n        optimizer<span class=\"token punctuation\">.<\/span>zero_grad<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        logits <span class=\"token operator\">&#061;<\/span> model<span class=\"token punctuation\">(<\/span>x<span class=\"token punctuation\">)<\/span><br \/>\n        loss <span class=\"token operator\">&#061;<\/span> loss_fn<span class=\"token punctuation\">(<\/span>logits<span class=\"token punctuation\">,<\/span> y<span class=\"token punctuation\">)<\/span><br \/>\n        loss<span class=\"token punctuation\">.<\/span>backward<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n        torch<span class=\"token punctuation\">.<\/span>nn<span class=\"token punctuation\">.<\/span>utils<span class=\"token punctuation\">.<\/span>clip_grad_norm_<span class=\"token punctuation\">(<\/span>model<span class=\"token punctuation\">.<\/span>parameters<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token number\">1.0<\/span><span class=\"token punctuation\">)<\/span>  <span class=\"token comment\"># \u68af\u5ea6\u88c1\u526a  <\/span><br \/>\n        optimizer<span class=\"token punctuation\">.<\/span>step<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n    scheduler<span class=\"token punctuation\">.<\/span>step<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<hr \/>\n<h5>4. \u8c03\u53c2\u6d41\u7a0b\u603b\u7ed3<\/h5>\n<li>\u4efb\u52a1\u5206\u6790&#xff1a;\u786e\u5b9a\u4efb\u52a1\u7c7b\u578b&#xff08;\u56de\u5f52\/\u5206\u7c7b&#xff09;\u4e0e\u6570\u636e\u7279\u70b9&#xff08;\u662f\u5426\u542b\u5f02\u5e38\u503c\u3001\u7c7b\u522b\u5206\u5e03&#xff09;\u3002<\/li>\n<li>\u635f\u5931\u51fd\u6570\u9009\u62e9&#xff1a;\u6839\u636e\u4efb\u52a1\u7279\u70b9\u9009\u62e9\u57fa\u7840\u635f\u5931\u51fd\u6570&#xff0c;\u5fc5\u8981\u65f6\u6dfb\u52a0\u6743\u91cd\u6216\u6539\u8fdb&#xff08;\u5982Focal Loss&#xff09;\u3002<\/li>\n<li>\u4f18\u5316\u7b97\u6cd5\u9009\u62e9&#xff1a;\n<ul>\n<li>\u9ed8\u8ba4\u4ece Adam \u5f00\u59cb&#xff08;\u5b66\u4e60\u7387\u8bbe\u4e3a3e-4&#xff09;\u3002<\/li>\n<li>\u5bf9\u51f8\u4f18\u5316\u95ee\u9898&#xff08;\u5982\u7ebf\u6027\u56de\u5f52&#xff09;\u53ef\u5c1d\u8bd5 SGD &#043; Momentum\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\u5b66\u4e60\u7387\u8c03\u4f18&#xff1a;\n<ul>\n<li>\u521d\u59cb\u5b66\u4e60\u7387\u901a\u8fc7\u7f51\u683c\u641c\u7d22&#xff08;\u59821e-5\u52301e-1&#xff09;\u3002<\/li>\n<li>\u6dfb\u52a0\u5b66\u4e60\u7387\u9884\u70ed\u4e0e\u8870\u51cf\u7b56\u7565\u3002<\/li>\n<\/ul>\n<\/li>\n<li>\u76d1\u63a7\u4e0e\u8c03\u6574&#xff1a;\n<ul>\n<li>\u4f7f\u7528TensorBoard\u76d1\u63a7\u8bad\u7ec3\/\u9a8c\u8bc1\u635f\u5931\u66f2\u7ebf\u3002<\/li>\n<li>\u65e9\u505c\u6cd5\u7ec8\u6b62\u8bad\u7ec3&#xff0c;\u68af\u5ea6\u88c1\u526a\u9632\u6b62\u7206\u70b8\u3002<\/li>\n<\/ul>\n<\/li>\n<h4>\u4e94\u3001\u603b\u7ed3\u4e0e\u5c55\u671b<\/h4>\n<hr \/>\n<h5>1. \u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u7684\u6838\u5fc3\u5173\u8054\u6027<\/h5>\n<p>\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u662f\u6df1\u5ea6\u5b66\u4e60\u6a21\u578b\u8bad\u7ec3\u7684\u4e24\u5927\u652f\u67f1&#xff0c;\u4e8c\u8005\u7684\u534f\u540c\u4f5c\u7528\u51b3\u5b9a\u4e86\u6a21\u578b\u7684\u6700\u7ec8\u6027\u80fd&#xff1a;<\/p>\n<ul>\n<li>\n<p>\u635f\u5931\u51fd\u6570\u4e3a\u4f18\u5316\u63d0\u4f9b\u65b9\u5411&#xff1a; \u635f\u5931\u51fd\u6570\u5b9a\u4e49\u4e86\u6a21\u578b\u9700\u8981\u6700\u5c0f\u5316\u7684\u76ee\u6807&#xff08;\u5982\u56de\u5f52\u4efb\u52a1\u7684\u8bef\u5dee\u3001\u5206\u7c7b\u4efb\u52a1\u7684\u6982\u7387\u5dee\u5f02&#xff09;&#xff0c;\u5176\u6570\u5b66\u6027\u8d28&#xff08;\u5982\u51f8\u6027\u3001\u5e73\u6ed1\u6027&#xff09;\u76f4\u63a5\u5f71\u54cd\u4f18\u5316\u96be\u5ea6\u3002\u4f8b\u5982&#xff0c;\u4ea4\u53c9\u71b5\u635f\u5931\u4e0eSoftmax\u7ed3\u5408\u65f6\u5177\u6709\u826f\u597d\u7684\u51f8\u6027&#xff0c;\u800cHinge Loss\u7684\u975e\u5e73\u6ed1\u6027\u53ef\u80fd\u9700\u7279\u5b9a\u4f18\u5316\u7b56\u7565\u3002<\/p>\n<\/li>\n<li>\n<p>\u4f18\u5316\u7b97\u6cd5\u51b3\u5b9a\u6536\u655b\u6548\u7387&#xff1a; \u68af\u5ea6\u4e0b\u964d\u7c7b\u7b97\u6cd5\u901a\u8fc7\u68af\u5ea6\u4fe1\u606f\u8fed\u4ee3\u66f4\u65b0\u53c2\u6570&#xff0c;\u5176\u53d8\u4f53&#xff08;\u5982Adam\u3001\u52a8\u91cf\u6cd5&#xff09;\u901a\u8fc7\u5f15\u5165\u52a8\u91cf\u3001\u81ea\u9002\u5e94\u5b66\u4e60\u7387\u7b49\u673a\u5236&#xff0c;\u52a0\u901f\u6536\u655b\u5e76\u907f\u514d\u5c40\u90e8\u6781\u5c0f\u3002\u4f8b\u5982&#xff0c;Adam\u5728\u975e\u51f8\u4f18\u5316\u95ee\u9898\u4e2d\u8868\u73b0\u4f18\u5f02&#xff0c;\u800cSGD&#043;Momentum\u5728\u7279\u5b9a\u4efb\u52a1&#xff08;\u5982\u56fe\u50cf\u5206\u7c7b&#xff09;\u4e2d\u4ecd\u5177\u7ade\u4e89\u529b\u3002<\/p>\n<\/li>\n<li>\n<p>\u5b9e\u8df5\u4e2d\u7684\u52a8\u6001\u5e73\u8861&#xff1a; \u635f\u5931\u51fd\u6570\u7684\u9009\u62e9\u9700\u4e0e\u4f18\u5316\u7b97\u6cd5\u53c2\u6570&#xff08;\u5982\u5b66\u4e60\u7387\u3001\u52a8\u91cf\u7cfb\u6570&#xff09;\u76f8\u5339\u914d\u3002\u4f8b\u5982&#xff1a;<\/p>\n<ul>\n<li>\u4f7f\u7528MSE\u65f6&#xff0c;\u56e0\u68af\u5ea6\u968f\u8bef\u5dee\u7ebf\u6027\u589e\u957f&#xff0c;\u9700\u8f83\u5c0f\u7684\u5b66\u4e60\u7387\u9632\u6b62\u9707\u8361\u3002<\/li>\n<li>\u4f7f\u7528Focal Loss\u65f6&#xff0c;\u56e0\u635f\u5931\u52a8\u6001\u8c03\u6574\u6837\u672c\u6743\u91cd&#xff0c;\u9700\u914d\u5408\u7a33\u5b9a\u7684\u4f18\u5316\u5668&#xff08;\u5982Adam&#xff09;\u907f\u514d\u8bad\u7ec3\u4e0d\u7a33\u5b9a\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<hr \/>\n<h5>2. \u672a\u6765\u65b9\u5411&#xff1a;\u81ea\u52a8\u5316\u4e0e\u65b0\u578b\u65b9\u6cd5\u63a2\u7d22<\/h5>\n<h6>2.1 \u81ea\u52a8\u5316\u8c03\u53c2&#xff08;AutoML&#xff09;<\/h6>\n<ul>\n<li>\u795e\u7ecf\u67b6\u6784\u641c\u7d22&#xff08;NAS&#xff09;\u4e0e\u4f18\u5316\u5668\u8054\u5408\u4f18\u5316&#xff1a; \u73b0\u6709AutoML\u5de5\u5177&#xff08;\u5982Google\u7684AutoML-Zero&#xff09;\u5df2\u5c1d\u8bd5\u81ea\u52a8\u8bbe\u8ba1\u4f18\u5316\u7b97\u6cd5&#xff0c;\u672a\u6765\u53ef\u80fd\u5b9e\u73b0\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u5668\u7684\u7aef\u5230\u7aef\u8054\u5408\u641c\u7d22\u3002<\/li>\n<li>\u5143\u5b66\u4e60&#xff08;Meta-Learning&#xff09;&#xff1a; \u901a\u8fc7\u5143\u5b66\u4e60\u6846\u67b6&#xff08;\u5982MAML&#xff09;&#xff0c;\u6a21\u578b\u53ef\u81ea\u52a8\u9002\u5e94\u4e0d\u540c\u4efb\u52a1\u7684\u6700\u4f18\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b56\u7565\u3002<\/li>\n<\/ul>\n<h6>2.2 \u81ea\u9002\u5e94\u4f18\u5316\u7b97\u6cd5\u7684\u6539\u8fdb<\/h6>\n<ul>\n<li>\u52a8\u6001\u73af\u5883\u9002\u5e94&#xff1a; \u9488\u5bf9\u5728\u7ebf\u5b66\u4e60\u3001\u6301\u7eed\u5b66\u4e60\u573a\u666f&#xff0c;\u5f00\u53d1\u81ea\u9002\u5e94\u8c03\u6574\u5b66\u4e60\u7387\u4e0e\u52a8\u91cf\u53c2\u6570\u7684\u7b97\u6cd5&#xff08;\u5982Adan\u3001Lion&#xff09;\u3002<\/li>\n<li>\u4e8c\u9636\u4f18\u5316\u5668\u7684\u5b9e\u7528\u5316&#xff1a; \u4f20\u7edf\u4e8c\u9636\u65b9\u6cd5&#xff08;\u5982\u725b\u987f\u6cd5&#xff09;\u8ba1\u7b97\u6210\u672c\u9ad8&#xff0c;\u4f46\u8fd1\u4f3c\u4e8c\u9636\u4f18\u5316\u5668&#xff08;\u5982K-FAC&#xff09;\u5728\u5206\u5e03\u5f0f\u8bad\u7ec3\u4e2d\u5c55\u73b0\u51fa\u6f5c\u529b\u3002<\/li>\n<\/ul>\n<h6>2.3 \u9762\u5411\u65b0\u578b\u4efb\u52a1\u7684\u635f\u5931\u51fd\u6570\u8bbe\u8ba1<\/h6>\n<ul>\n<li>\u591a\u6a21\u6001\u4e0e\u8de8\u57df\u4efb\u52a1&#xff1a; \u8bbe\u8ba1\u7edf\u4e00\u635f\u5931\u51fd\u6570\u5904\u7406\u591a\u6a21\u6001\u6570\u636e&#xff08;\u5982\u56fe\u6587\u68c0\u7d22\u4e2d\u7684\u5bf9\u6bd4\u5b66\u4e60\u635f\u5931&#xff09;\u3002<\/li>\n<li>\u53ef\u89e3\u91ca\u6027\u4e0e\u9c81\u68d2\u6027&#xff1a; \u5f00\u53d1\u517c\u987e\u6a21\u578b\u53ef\u89e3\u91ca\u6027\u4e0e\u5bf9\u6297\u9c81\u68d2\u6027\u7684\u635f\u5931\u51fd\u6570&#xff08;\u5982\u5bf9\u6297\u8bad\u7ec3\u4e2d\u7684TRADES Loss&#xff09;\u3002<\/li>\n<\/ul>\n<h6>2.4 \u7eff\u8272AI\u4e0e\u80fd\u6548\u4f18\u5316<\/h6>\n<ul>\n<li>\u4f4e\u529f\u8017\u4f18\u5316\u7b56\u7565&#xff1a; \u7814\u7a76\u7a00\u758f\u68af\u5ea6\u66f4\u65b0&#xff08;\u5982AdaGrad with Momentum&#xff09;\u6216\u91cf\u5316\u8bad\u7ec3\u6280\u672f&#xff0c;\u964d\u4f4e\u8ba1\u7b97\u8d44\u6e90\u6d88\u8017\u3002<\/li>\n<\/ul>\n<hr \/>\n<h5>3. \u7ed3\u8bed<\/h5>\n<p>\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u7684\u7814\u7a76\u8d2f\u7a7f\u6df1\u5ea6\u5b66\u4e60\u53d1\u5c55\u53f2&#xff0c;\u4ece\u7ecf\u5178\u7684SGD\u5230\u5982\u4eca\u7684AutoML&#xff0c;\u6bcf\u4e00\u6b21\u7a81\u7834\u5747\u63a8\u52a8\u6a21\u578b\u6027\u80fd\u7684\u8dc3\u5347\u3002\u672a\u6765&#xff0c;\u968f\u7740\u8ba1\u7b97\u786c\u4ef6\u7684\u5347\u7ea7\u4e0e\u7406\u8bba\u5de5\u5177\u7684\u5b8c\u5584&#xff0c;\u4e8c\u8005\u7684\u534f\u540c\u521b\u65b0\u5c06\u7ee7\u7eed\u89e3\u51b3\u66f4\u590d\u6742\u7684\u73b0\u5b9e\u95ee\u9898&#xff08;\u5982\u81ea\u52a8\u9a7e\u9a76\u3001\u86cb\u767d\u8d28\u7ed3\u6784\u9884\u6d4b&#xff09;\u3002\u8bfb\u8005\u53ef\u901a\u8fc7\u5b9e\u8df5\u6587\u4e2d\u4ee3\u7801\u793a\u4f8b&#xff0c;\u7ed3\u5408\u524d\u6cbf\u8bba\u6587&#xff08;\u5982ICML\u3001NeurIPS\u6700\u65b0\u6210\u679c&#xff09;&#xff0c;\u6df1\u5165\u53c2\u4e0e\u8fd9\u4e00\u5145\u6ee1\u6d3b\u529b\u7684\u9886\u57df\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb647\u6b21\uff0c\u70b9\u8d5e62\u6b21\uff0c\u6536\u85cf45\u6b21\u3002\u672c\u6587\u5c06\u4ece\u7406\u8bba\u63a8\u5bfc\u3001\u4ee3\u7801\u5b9e\u73b0\u548c\u5b9e\u6218\u8c03\u53c2\u4e09\u4e2a\u7ef4\u5ea6\u5c55\u5f00\uff1a  &#8211; **\u7406\u8bba**\uff1a\u89e3\u6790\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u7684\u6570\u5b66\u539f\u7406\uff0c\u7406\u89e3\u5176\u9002\u7528\u573a\u666f\u4e0e\u5c40\u9650\u6027\u3002  &#8211; **\u5b9e\u8df5**\uff1a\u901a\u8fc7Python\u4ee3\u7801\uff08NumPy\/PyTorch\uff09\u624b\u5199\u6838\u5fc3\u7b97\u6cd5\uff0c\u5e76\u7ed3\u5408\u6846\u67b6API\u6f14\u793a\u5b9e\u9645\u5e94\u7528\u3002  &#8211; **\u8c03\u53c2**\uff1a\u603b\u7ed3\u5b66\u4e60\u7387\u8bbe\u7f6e\u3001\u6279\u91cf\u5927\u5c0f\u9009\u62e9\u7b49\u5173\u952e\u6280\u5de7\uff0c\u5e2e\u52a9\u8bfb\u8005\u907f\u5f00\u8bad\u7ec3\u4e2d\u7684\u5e38\u89c1\u201c\u5751\u201d\u3002<\/p>\n","protected":false},"author":2,"featured_media":38364,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[50,3358,86,427],"topic":[],"class_list":["post-38365","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-server","tag-50","tag-ai","tag-86","tag-427"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u3010\u6df1\u5ea6\u5b66\u4e60\u57fa\u7840\u3011\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u8be6\u89e3\uff1a\u4ece\u7406\u8bba\u5230\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/38365.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u3010\u6df1\u5ea6\u5b66\u4e60\u57fa\u7840\u3011\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u8be6\u89e3\uff1a\u4ece\u7406\u8bba\u5230\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb647\u6b21\uff0c\u70b9\u8d5e62\u6b21\uff0c\u6536\u85cf45\u6b21\u3002\u672c\u6587\u5c06\u4ece\u7406\u8bba\u63a8\u5bfc\u3001\u4ee3\u7801\u5b9e\u73b0\u548c\u5b9e\u6218\u8c03\u53c2\u4e09\u4e2a\u7ef4\u5ea6\u5c55\u5f00\uff1a - **\u7406\u8bba**\uff1a\u89e3\u6790\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u7684\u6570\u5b66\u539f\u7406\uff0c\u7406\u89e3\u5176\u9002\u7528\u573a\u666f\u4e0e\u5c40\u9650\u6027\u3002 - **\u5b9e\u8df5**\uff1a\u901a\u8fc7Python\u4ee3\u7801\uff08NumPy\/PyTorch\uff09\u624b\u5199\u6838\u5fc3\u7b97\u6cd5\uff0c\u5e76\u7ed3\u5408\u6846\u67b6API\u6f14\u793a\u5b9e\u9645\u5e94\u7528\u3002 - **\u8c03\u53c2**\uff1a\u603b\u7ed3\u5b66\u4e60\u7387\u8bbe\u7f6e\u3001\u6279\u91cf\u5927\u5c0f\u9009\u62e9\u7b49\u5173\u952e\u6280\u5de7\uff0c\u5e2e\u52a9\u8bfb\u8005\u907f\u5f00\u8bad\u7ec3\u4e2d\u7684\u5e38\u89c1\u201c\u5751\u201d\u3002\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/38365.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2025-05-20T01:18:24+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/05\/20250520011823-682bd85f31042.jpg\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"11 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/38365.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/38365.html\",\"name\":\"\u3010\u6df1\u5ea6\u5b66\u4e60\u57fa\u7840\u3011\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u8be6\u89e3\uff1a\u4ece\u7406\u8bba\u5230\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2025-05-20T01:18:24+00:00\",\"dateModified\":\"2025-05-20T01:18:24+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/38365.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/38365.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/38365.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u3010\u6df1\u5ea6\u5b66\u4e60\u57fa\u7840\u3011\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u8be6\u89e3\uff1a\u4ece\u7406\u8bba\u5230\u5b9e\u8df5\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"\u3010\u6df1\u5ea6\u5b66\u4e60\u57fa\u7840\u3011\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u8be6\u89e3\uff1a\u4ece\u7406\u8bba\u5230\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/38365.html","og_locale":"zh_CN","og_type":"article","og_title":"\u3010\u6df1\u5ea6\u5b66\u4e60\u57fa\u7840\u3011\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u8be6\u89e3\uff1a\u4ece\u7406\u8bba\u5230\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb647\u6b21\uff0c\u70b9\u8d5e62\u6b21\uff0c\u6536\u85cf45\u6b21\u3002\u672c\u6587\u5c06\u4ece\u7406\u8bba\u63a8\u5bfc\u3001\u4ee3\u7801\u5b9e\u73b0\u548c\u5b9e\u6218\u8c03\u53c2\u4e09\u4e2a\u7ef4\u5ea6\u5c55\u5f00\uff1a - **\u7406\u8bba**\uff1a\u89e3\u6790\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u7684\u6570\u5b66\u539f\u7406\uff0c\u7406\u89e3\u5176\u9002\u7528\u573a\u666f\u4e0e\u5c40\u9650\u6027\u3002 - **\u5b9e\u8df5**\uff1a\u901a\u8fc7Python\u4ee3\u7801\uff08NumPy\/PyTorch\uff09\u624b\u5199\u6838\u5fc3\u7b97\u6cd5\uff0c\u5e76\u7ed3\u5408\u6846\u67b6API\u6f14\u793a\u5b9e\u9645\u5e94\u7528\u3002 - **\u8c03\u53c2**\uff1a\u603b\u7ed3\u5b66\u4e60\u7387\u8bbe\u7f6e\u3001\u6279\u91cf\u5927\u5c0f\u9009\u62e9\u7b49\u5173\u952e\u6280\u5de7\uff0c\u5e2e\u52a9\u8bfb\u8005\u907f\u5f00\u8bad\u7ec3\u4e2d\u7684\u5e38\u89c1\u201c\u5751\u201d\u3002","og_url":"https:\/\/www.wsisp.com\/helps\/38365.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2025-05-20T01:18:24+00:00","og_image":[{"url":"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/05\/20250520011823-682bd85f31042.jpg"}],"author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"11 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/38365.html","url":"https:\/\/www.wsisp.com\/helps\/38365.html","name":"\u3010\u6df1\u5ea6\u5b66\u4e60\u57fa\u7840\u3011\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u8be6\u89e3\uff1a\u4ece\u7406\u8bba\u5230\u5b9e\u8df5 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2025-05-20T01:18:24+00:00","dateModified":"2025-05-20T01:18:24+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/38365.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/38365.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/38365.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"\u3010\u6df1\u5ea6\u5b66\u4e60\u57fa\u7840\u3011\u635f\u5931\u51fd\u6570\u4e0e\u4f18\u5316\u7b97\u6cd5\u8be6\u89e3\uff1a\u4ece\u7406\u8bba\u5230\u5b9e\u8df5"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/38365","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=38365"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/38365\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media\/38364"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=38365"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=38365"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=38365"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=38365"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}