{"id":44516,"date":"2025-06-19T09:27:57","date_gmt":"2025-06-19T01:27:57","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/44516.html"},"modified":"2025-06-19T09:27:57","modified_gmt":"2025-06-19T01:27:57","slug":"llms%ef%bc%9a%e3%80%8awebdancer-towards-autonomous-information-seeking-agency%e3%80%8b%e7%bf%bb%e8%af%91%e4%b8%8e%e8%a7%a3%e8%af%bb","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/44516.html","title":{"rendered":"LLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb"},"content":{"rendered":"<p>LLMs&#xff1a;\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u5bfc\u8bfb&#xff1a;\u8be5\u8bba\u6587\u63d0\u51fa\u4e86\u4e00\u4e2a<span style=\"color:#ff0000\">\u7cfb\u7edf<\/span>\u7684\u6846\u67b6&#xff0c;\u7528\u4e8e<span style=\"color:#ff0000\">\u4ece\u5934\u5f00\u59cb<\/span>\u6784\u5efa\u7aef\u5230\u7aef\u7684<span style=\"color:#ff0000\">\u591a\u6b65\u4fe1\u606f\u641c\u5bfb<\/span>Web Agent\u3002\u901a\u8fc7\u5f15\u5165\u53ef\u6269\u5c55\u7684QA\u6570\u636e\u5408\u6210\u65b9\u6cd5\u4ee5\u53ca<span style=\"color:#ff0000\">\u7ed3\u5408SFT<\/span>\u548c<span style=\"color:#ff0000\">On-Policy\u5f3a\u5316\u5b66\u4e60<\/span>\u7684\u4e24\u9636\u6bb5\u8bad\u7ec3pipeline&#xff0c;WebDancer Agent\u5728GAIA\u548cWebWalkerQA\u4e0a\u53d6\u5f97\u4e86\u5f3a\u5927\u7684\u6027\u80fd\u3002\u8fd9\u4e9b\u53d1\u73b0\u5f3a\u8c03\u4e86\u8be5\u8bba\u6587\u63d0\u51fa\u7684\u8bad\u7ec3\u7b56\u7565\u7684\u91cd\u8981\u6027&#xff0c;\u5e76\u4e3a\u793e\u533a\u63d0\u4f9b\u4e86\u53ef\u64cd\u4f5c\u548c\u7cfb\u7edf\u6027\u7684\u9014\u5f84&#xff0c;\u4ee5<span style=\"color:#ff0000\">\u63a8\u8fdb\u80fd\u591f\u5904\u7406\u590d\u6742\u73b0\u5b9e\u4e16\u754c\u4fe1\u606f<\/span>\u641c\u5bfb\u4efb\u52a1\u7684\u65e5\u76ca\u590d\u6742\u7684Agentic\u6a21\u578b\u7684\u5f00\u53d1\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">&gt;&gt; \u80cc\u666f\u75db\u70b9<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u590d\u6742\u73b0\u5b9e\u4e16\u754c\u95ee\u9898\u9700\u8981\u6df1\u5165\u7684\u4fe1\u606f\u641c\u5bfb\u548c\u591a\u6b65\u63a8\u7406\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u73b0\u6709Agentic\u7cfb\u7edf\u7684\u4fe1\u606f\u641c\u5bfb\u80fd\u529b\u4e0d\u8db3&#xff0c;\u8bad\u7ec3\u548c\u8bc4\u4f30\u6570\u636e\u96c6\u76f8\u5bf9\u7b80\u5355&#xff0c;\u65e0\u6cd5\u6355\u6349\u771f\u5b9e\u4e16\u754c\u7684\u6311\u6218\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u6784\u5efa\u81ea\u4e3b\u4fe1\u606f\u641c\u5bfb\u4ee3\u7406\u9762\u4e34\u6311\u6218&#xff1a;<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u83b7\u53d6\u9ad8\u8d28\u91cf\u3001\u7ec6\u7c92\u5ea6\u7684\u6d4f\u89c8\u6570\u636e&#xff0c;\u53cd\u6620\u591a\u6837\u5316\u7684\u7528\u6237\u610f\u56fe\u548c\u4e30\u5bcc\u7684\u4ea4\u4e92\u4e0a\u4e0b\u6587\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u6784\u5efa\u53ef\u9760\u7684\u8f68\u8ff9&#xff0c;\u652f\u6301\u957f\u7a0b\u63a8\u7406\u548c\u4efb\u52a1\u5206\u89e3\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u8bbe\u8ba1\u53ef\u6269\u5c55\u548c\u6cdb\u5316\u7684\u8bad\u7ec3\u7b56\u7565&#xff0c;\u4f7fWeb Agent\u5728\u5206\u5e03\u5916\u7684Web\u73af\u5883\u3001\u590d\u6742\u7684\u4ea4\u4e92\u6a21\u5f0f\u548c\u957f\u671f\u76ee\u6807\u4e2d\u5177\u5907\u9c81\u68d2\u7684\u884c\u4e3a\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">&gt;&gt; \u5177\u4f53\u7684\u89e3\u51b3\u65b9\u6848&#xff1a;\u63d0\u51fa\u4e86\u4e00\u4e2a\u6784\u5efa\u7aef\u5230\u7aefAgentic\u4fe1\u606f\u641c\u5bfb\u4ee3\u7406\u7684<span style=\"color:#ff0000\">\u7edf\u4e00\u8303\u5f0f<\/span>&#xff0c;\u4ece\u6570\u636e\u4e2d\u5fc3\u548c\u8bad\u7ec3\u9636\u6bb5\u7684\u89d2\u5ea6\u51fa\u53d1\u3002\u8be5\u65b9\u6cd5\u5305\u62ec\u56db\u4e2a\u5173\u952e\u9636\u6bb5&#xff1a;<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u6d4f\u89c8\u6570\u636e\u6784\u5efa&#xff08;Browsing data construction&#xff09;\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u8f68\u8ff9\u62bd\u6837&#xff08;Trajectories sampling&#xff09;\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u76d1\u7763\u5f0f\u5fae\u8c03&#xff08;Supervised fine-tuning&#xff09;&#xff0c;\u7528\u4e8e\u6709\u6548\u7684\u51b7\u542f\u52a8\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u5f3a\u5316\u5b66\u4e60&#xff08;Reinforcement learning&#xff09;&#xff0c;\u7528\u4e8e\u589e\u5f3a\u6cdb\u5316\u80fd\u529b\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u5728ReAct\u6846\u67b6\u7684\u57fa\u7840\u4e0a&#xff0c;\u5b9e\u4f8b\u5316\u4e86\u4e00\u4e2aWeb Agent&#xff0c;\u540d\u4e3aWebDancer\u3002\u63d0\u51fa\u4e86\u4e24\u79cd\u6570\u636e\u96c6\u5408\u6210\u65b9\u6cd5&#xff1a;<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf CRAWLQA&#xff1a;\u901a\u8fc7\u722c\u53d6\u7f51\u9875\u6765\u6784\u5efa\u6df1\u5ea6\u67e5\u8be2&#xff0c;\u4ece\u800c\u901a\u8fc7\u70b9\u51fb\u52a8\u4f5c\u83b7\u53d6Web\u4fe1\u606f\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf E2HQA&#xff1a;\u901a\u8fc7\u589e\u5f3a\u7531\u6613\u5230\u96be\u7684QA\u5bf9\u5408\u6210&#xff0c;\u5c06\u7b80\u5355\u95ee\u9898\u8f6c\u5316\u4e3a\u590d\u6742\u95ee\u9898&#xff0c;\u4ece\u800c\u6fc0\u52b1\u4ece\u5f31\u5230\u5f3a\u7684\u4ee3\u7406\u53d1\u5c55\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u91c7\u7528\u4e24\u9636\u6bb5\u65b9\u6cd5&#xff0c;\u7ed3\u5408\u62d2\u7edd\u62bd\u6837\u5fae\u8c03&#xff08;Rejection Sampling Fine-Tuning&#xff0c;RFT&#xff09;\u548c\u540e\u7eed\u7684On-Policy\u5f3a\u5316\u5b66\u4e60\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u5728\u5f3a\u5316\u5b66\u4e60\u9636\u6bb5&#xff0c;\u91c7\u7528\u89e3\u8026\u526a\u88c1\u548c\u52a8\u6001\u62bd\u6837\u7b56\u7565\u4f18\u5316&#xff08;Decoupled Clip and Dynamic Sampling Policy Optimization&#xff0c;DAPO&#xff09;\u7b97\u6cd5\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">&gt;&gt; \u6838\u5fc3\u601d\u8def\u6b65\u9aa4<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">* \u6570\u636e\u6784\u5efa (Step I)&#xff1a;<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u57fa\u4e8e\u771f\u5b9e\u4e16\u754c\u7684Web\u73af\u5883&#xff0c;\u6784\u5efa\u591a\u6837\u5316\u4e14\u5177\u6709\u6311\u6218\u6027\u7684\u6df1\u5ea6\u4fe1\u606f\u641c\u5bfbQA\u5bf9\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u4f7f\u7528CRAWLQA\u548cE2HQA\u4e24\u79cd\u65b9\u6cd5\u751f\u6210QA\u5bf9\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">* \u8f68\u8ff9\u62bd\u6837 (Step II)&#xff1a;<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u4f7f\u7528LLMs\u548cLRMs\u4eceQA\u5bf9\u4e2d\u62bd\u6837\u9ad8\u8d28\u91cf\u8f68\u8ff9&#xff0c;\u4ee5\u6307\u5bfc\u4ee3\u7406\u5b66\u4e60\u8fc7\u7a0b\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u4f7f\u7528ReAct\u6846\u67b6&#xff0c;\u7ed3\u5408Short-CoT\u548cLong-CoT\u4e24\u79cd\u65b9\u5f0f\u751f\u6210\u8f68\u8ff9\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u901a\u8fc7\u6709\u6548\u6027\u63a7\u5236\u3001\u6b63\u786e\u6027\u9a8c\u8bc1\u548c\u8d28\u91cf\u8bc4\u4f30\u4e09\u4e2a\u9636\u6bb5\u7684\u6f0f\u6597\u5f0f\u8f68\u8ff9\u8fc7\u6ee4\u6846\u67b6&#xff0c;\u7b5b\u9009\u9ad8\u8d28\u91cf\u8f68\u8ff9\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">* \u76d1\u7763\u5fae\u8c03 (Step III)&#xff1a;<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u6267\u884c\u5fae\u8c03&#xff0c;\u4ee5\u4f7f\u683c\u5f0f\u5316\u7684\u6307\u4ee4\u9075\u5faa\u9002\u5e94Agentic\u4efb\u52a1\u548c\u73af\u5883\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u4f7f\u7528\u76d1\u7763\u5f0f\u5fae\u8c03&#xff08;SFT&#xff09;\u6765\u8bad\u7ec3\u7b56\u7565\u6a21\u578b&#xff0c;\u4ee5\u5b9e\u73b0\u51b7\u542f\u52a8\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u63a9\u76d6\u6765\u81ea\u5916\u90e8\u53cd\u9988\u7684\u635f\u5931\u8d21\u732e&#xff0c;\u4ee5\u907f\u514d\u5728\u5b66\u4e60\u671f\u95f4\u53d7\u5230\u5e72\u6270\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">* \u5f3a\u5316\u5b66\u4e60 (Step IV)&#xff1a;<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u5e94\u7528\u5f3a\u5316\u5b66\u4e60&#xff0c;\u4ee5\u4f18\u5316Agent\u5728\u771f\u5b9e\u4e16\u754cWeb\u73af\u5883\u4e2d\u7684\u51b3\u7b56\u5236\u5b9a\u548c\u6cdb\u5316\u80fd\u529b\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u4f7f\u7528DAPO\u7b97\u6cd5\u6765\u4f18\u5316\u7b56\u7565\u6a21\u578b\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u8bbe\u8ba1\u5956\u52b1\u673a\u5236&#xff0c;\u5305\u62ec\u683c\u5f0f\u5f97\u5206\u548c\u7b54\u6848\u5f97\u5206\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">&gt;&gt; \u4f18\u52bf<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u63d0\u51fa\u4e86\u4e00\u4e2a\u7cfb\u7edf\u7684\u3001\u7aef\u5230\u7aef\u7684pipeline&#xff0c;\u7528\u4e8e\u6784\u5efa\u957f\u671f\u4fe1\u606f\u641c\u5bfbWeb Agent\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u63d0\u51fa\u4e86\u53ef\u6269\u5c55\u7684QA\u6570\u636e\u5408\u6210\u65b9\u6cd5\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u7ed3\u5408\u4e86SFT\u548cOn-Policy\u5f3a\u5316\u5b66\u4e60\u7684\u4e24\u9636\u6bb5\u8bad\u7ec3pipeline\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf WebDancer\u5728GAIA\u548cWebWalkerQA\u4e24\u4e2aWeb\u4fe1\u606f\u641c\u5bfb\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u8868\u73b0\u51fa\u8272\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u5bf9Agent\u8bad\u7ec3\u8fdb\u884c\u4e86\u6df1\u5165\u5206\u6790&#xff0c;\u63d0\u4f9b\u4e86\u6709\u4ef7\u503c\u7684\u89c1\u89e3\u548c\u53ef\u64cd\u4f5c\u7684\u3001\u7cfb\u7edf\u6027\u7684\u9014\u5f84&#xff0c;\u7528\u4e8e\u5f00\u53d1\u66f4\u5f3a\u5927\u7684Agentic\u6a21\u578b\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf DAPO\u7684\u52a8\u6001\u62bd\u6837\u673a\u5236\u53ef\u4ee5\u6709\u6548\u5730\u5229\u7528\u5728SFT\u9636\u6bb5\u672a\u5145\u5206\u5229\u7528\u7684QA\u5bf9&#xff0c;\u4ece\u800c\u63d0\u9ad8\u6570\u636e\u6548\u7387\u548c\u7b56\u7565\u9c81\u68d2\u6027\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">&gt;&gt; \u7ed3\u8bba\u548c\u89c2\u70b9<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u6ca1\u6709Agentic\u80fd\u529b\u7684\u6846\u67b6\u5728GAIA\u548cWebWalkerQA\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u8868\u73b0\u4e0d\u4f73&#xff0c;\u7a81\u51fa\u4e86\u4e3b\u52a8\u4fe1\u606f\u641c\u5bfb\u548cAgentic\u51b3\u7b56\u5236\u5b9a\u7684\u5fc5\u8981\u6027\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u57fa\u4e8eQwen-32B\u7b49\u539f\u751f\u5f3a\u5927\u63a8\u7406\u6a21\u578b\u6784\u5efa\u7684Agentic\u65b9\u6cd5\u59cb\u7ec8\u4f18\u4e8e\u5176\u975eAgentic\u5bf9\u5e94\u65b9\u6cd5&#xff0c;\u8bc1\u660e\u4e86\u5728Agent\u6784\u5efa\u4e2d\u5229\u7528\u63a8\u7406\u4e13\u7528\u6a21\u578b\u7684\u6709\u6548\u6027\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u5728\u9ad8\u5ea6\u53ef\u6269\u5c55\u7684ReAct\u6846\u67b6\u4e0b&#xff0c;WebDancer\u5728\u4e0d\u540c\u7684\u6a21\u578b\u89c4\u6a21\u4e0a\u90fd\u663e\u793a\u51fa\u4f18\u4e8eVanilla ReAct\u57fa\u7ebf\u7684\u663e\u8457\u4f18\u52bf\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf WebDancer\u5728BrowseComp\u548cBrowseComp-zh\u6570\u636e\u96c6\u4e0a\u8868\u73b0\u51fa\u6301\u7eed\u7684\u5f3a\u5927\u6027\u80fd&#xff0c;\u7a81\u51fa\u4e86\u5176\u5728\u5904\u7406\u56f0\u96be\u7684\u63a8\u7406\u548c\u4fe1\u606f\u641c\u5bfb\u4efb\u52a1\u65b9\u9762\u7684\u9c81\u68d2\u6027\u548c\u6709\u6548\u6027\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u9ad8\u8d28\u91cf\u7684\u8f68\u8ff9\u6570\u636e\u5bf9\u4e8eAgent\u7684\u6709\u6548SFT\u81f3\u5173\u91cd\u8981\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf SFT\u5bf9\u4e8e\u51b7\u542f\u52a8\u81f3\u5173\u91cd\u8981&#xff0c;\u56e0\u4e3aAgent\u4efb\u52a1\u9700\u8981\u5f3a\u5927\u7684\u591a\u6b65\u591a\u5de5\u5177\u6307\u4ee4\u9075\u5faa\u80fd\u529b\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u5f3a\u5316\u5b66\u4e60\u80fd\u591f\u5b9e\u73b0\u66f4\u957f\u7684\u63a8\u7406\u8fc7\u7a0b\u5e76\u652f\u6301\u66f4\u590d\u6742\u7684Agentic\u52a8\u4f5c\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u8c03\u6574\u89e3\u7801\u6e29\u5ea6\u5bf9\u6700\u7ec8\u6027\u80fd\u7684\u5f71\u54cd\u6700\u5c0f&#xff0c;\u8868\u660e\u89e3\u7801\u53ef\u53d8\u6027\u672c\u8eab\u5e76\u4e0d\u80fd\u89e3\u91caAgent\u7684\u4e0d\u7a33\u5b9a\u6027\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u5b9e\u9645\u73af\u5883\u968f\u65f6\u95f4\u6f14\u53d8&#xff0c;\u9700\u8981Agent\u5728\u4e0d\u65ad\u53d8\u5316\u7684\u73af\u5883\u548c\u90e8\u5206\u53ef\u89c2\u5bdf\u6027\u4e0b\u4fdd\u6301\u9c81\u68d2\u6027\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u25cf \u5f3a\u63a8\u7406\u5668\u6a21\u578b\u4f7f\u7528\u7684\u601d\u7ef4\u6a21\u5f0f\u77e5\u8bc6\u5f88\u96be\u8f6c\u79fb\u5230\u5c0f\u578b\u6307\u4ee4\u6a21\u578b\u4e2d\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<p id=\"main-toc\">\u76ee\u5f55<\/p>\n<p id=\"%E3%80%8AWebDancer%3A%20Towards%20Autonomous%20Information%20Seeking%20Agency%E3%80%8B%E7%BF%BB%E8%AF%91%E4%B8%8E%E8%A7%A3%E8%AF%BB-toc\" style=\"margin-left:0px\">\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb<\/p>\n<p id=\"Abstract-toc\" style=\"margin-left:0px\">Abstract<\/p>\n<p id=\"1%E3%80%81Introduction-toc\" style=\"margin-left:0px\">1\u3001Introduction<\/p>\n<p id=\"Conclusion-toc\" style=\"margin-left:0px\">Conclusion<\/p>\n<hr id=\"hr-toc\" \/>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<h2 id=\"%E3%80%8AWebDancer%3A%20Towards%20Autonomous%20Information%20Seeking%20Agency%E3%80%8B%E7%BF%BB%E8%AF%91%E4%B8%8E%E8%A7%A3%E8%AF%BB\" style=\"text-align:justify\">\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb<\/h2>\n<table border=\"1\" cellspacing=\"0\">\n<tbody>\n<tr>\n<td style=\"vertical-align:top;width:41.3000pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u5730\u5740<\/p>\n<\/td>\n<td style=\"vertical-align:top;width:384.8000pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u5730\u5740&#xff1a;[2505.22648] WebDancer: Towards Autonomous Information Seeking Agency<\/p>\n<\/td>\n<\/tr>\n<tr>\n<td style=\"vertical-align:top;width:41.3000pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u65f6\u95f4<\/p>\n<\/td>\n<td style=\"vertical-align:top;width:384.8000pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">2025\u5e745\u670828\u65e5<\/p>\n<\/td>\n<\/tr>\n<tr>\n<td style=\"vertical-align:top;width:41.3000pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u4f5c\u8005<\/p>\n<\/td>\n<td style=\"vertical-align:top;width:384.8000pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">Tongyi Lab , Alibaba Group<\/p>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<h2 id=\"Abstract\" style=\"text-align:justify\">Abstract<\/h2>\n<table border=\"1\" cellspacing=\"0\">\n<tbody>\n<tr>\n<td style=\"vertical-align:top;width:248.8500pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">Addressing intricate real-world problems necessitates in-depth information seeking and multi-step reasoning. Recent progress in agentic systems, exemplified by Deep Research, underscores the potential for autonomous multi-step research. In this work, we present a cohesive paradigm for building end-to-end agentic information seeking agents from a data-centric and training-stage perspective. Our approach consists of four key stages: (1) browsing data construction, (2) trajectories sampling, (3) supervised fine-tuning for effective cold start, and (4) reinforcement learning for enhanced generalisation. We instantiate this framework in a web agent based on the ReAct, WebDancer. Empirical evaluations on the challenging information seeking benchmarks, GAIA and WebWalkerQA, demonstrate the strong performance of WebDancer, achieving considerable results and highlighting the efficacy of our training paradigm. Further analysis of agent training provides valuable insights and actionable, systematic pathways for developing more capable agentic models. The codes and demo will be released in this https URL.<\/p>\n<\/td>\n<td style=\"vertical-align:top;width:177.2500pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u89e3\u51b3\u590d\u6742\u7684\u73b0\u5b9e\u4e16\u754c\u95ee\u9898\u9700\u8981\u6df1\u5165\u7684\u4fe1\u606f\u641c\u7d22\u548c\u591a\u6b65\u9aa4\u63a8\u7406\u3002\u4ee5 Deep Research \u4e3a\u4ee3\u8868\u7684\u4ee3\u7406\u7cfb\u7edf\u8fd1\u671f\u53d6\u5f97\u7684\u8fdb\u5c55\u8868\u660e\u4e86\u81ea\u4e3b\u591a\u6b65\u9aa4\u7814\u7a76\u7684\u6f5c\u529b\u3002\u5728\u672c\u7814\u7a76\u4e2d&#xff0c;\u6211\u4eec\u4ece\u6570\u636e\u4e3a\u4e2d\u5fc3\u548c\u8bad\u7ec3\u9636\u6bb5\u7684\u89d2\u5ea6\u51fa\u53d1&#xff0c;\u63d0\u51fa\u4e86\u4e00\u79cd\u6784\u5efa\u7aef\u5230\u7aef\u4ee3\u7406\u4fe1\u606f\u641c\u7d22\u4ee3\u7406\u7684\u8fde\u8d2f\u8303\u5f0f\u3002\u6211\u4eec\u7684\u65b9\u6cd5\u5305\u542b\u56db\u4e2a\u5173\u952e\u9636\u6bb5&#xff1a;&#xff08;1&#xff09;\u6d4f\u89c8\u6570\u636e\u6784\u5efa&#xff0c;&#xff08;2&#xff09;\u8f68\u8ff9\u91c7\u6837&#xff0c;&#xff08;3&#xff09;\u76d1\u7763\u5fae\u8c03\u4ee5\u5b9e\u73b0\u6709\u6548\u7684\u51b7\u542f\u52a8&#xff0c;\u4ee5\u53ca&#xff08;4&#xff09;\u5f3a\u5316\u5b66\u4e60\u4ee5\u589e\u5f3a\u6cdb\u5316\u80fd\u529b\u3002\u6211\u4eec\u57fa\u4e8e ReAct \u5b9e\u73b0\u4e86\u4e00\u4e2a\u7f51\u7edc\u4ee3\u7406 WebDancer \u6765\u5b9e\u4f8b\u5316\u6b64\u6846\u67b6\u3002\u5728\u5177\u6709\u6311\u6218\u6027\u7684\u4fe1\u606f\u641c\u7d22\u57fa\u51c6 GAIA \u548c WebWalkerQA \u4e0a\u8fdb\u884c\u7684\u5b9e\u8bc1\u8bc4\u4f30\u8868\u660e&#xff0c;WebDancer \u8868\u73b0\u5f3a\u52b2&#xff0c;\u53d6\u5f97\u4e86\u663e\u8457\u6210\u679c&#xff0c;\u5e76\u7a81\u663e\u4e86\u6211\u4eec\u8bad\u7ec3\u8303\u5f0f\u7684\u6709\u6548\u6027\u3002\u5bf9\u4ee3\u7406\u8bad\u7ec3\u7684\u8fdb\u4e00\u6b65\u5206\u6790\u63d0\u4f9b\u4e86\u6709\u4ef7\u503c\u7684\u89c1\u89e3\u548c\u53ef\u64cd\u4f5c\u7684\u3001\u7cfb\u7edf\u7684\u8def\u5f84&#xff0c;\u4ee5\u5f00\u53d1\u66f4\u5f3a\u5927\u7684\u4ee3\u7406\u6a21\u578b\u3002<\/p>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\"><img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"640\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/06\/20250619012755-6853679baa58b.png\" width=\"1252\" \/><\/p>\n<h4 style=\"margin-left:0.0001pt;margin-right:0px;text-align:justify\">Figure 1: Two web data generation pipelines. \u2776 For CRAWLQA, we first collect root url of knowlageable websites. Then we mimic human behavior by systematically clicking and collecting subpages accessible through sublinks on the root\/&#8230; page. Using predefined rules, we leverage GPT4o to generate synthetic QA pairs based on the gathered information. \u2777 For E2HQA, the initial question Q1 is iteratively evolved using the new information Ci retrieved from the entity Ei at iteration i, allowing the task to progressively scale in complexity, from simpler instances to more challenging ones. We use GPT-4o to rewrite the question until the iteration reaches n.\u56fe 1&#xff1a;\u4e24\u4e2a\u7f51\u7edc\u6570\u636e\u751f\u6210\u6d41\u7a0b\u3002\u2776 \u5bf9\u4e8e CRAWLQA&#xff0c;\u6211\u4eec\u9996\u5148\u6536\u96c6\u77e5\u8bc6\u578b\u7f51\u7ad9\u7684\u6839\u7f51\u5740\u3002\u7136\u540e\u901a\u8fc7\u7cfb\u7edf\u6027\u5730\u70b9\u51fb\u548c\u6536\u96c6\u6839\u9875\u9762\u53ca\u5176\u5b50\u94fe\u63a5\u53ef\u8bbf\u95ee\u7684\u5b50\u9875\u9762\u6765\u6a21\u62df\u4eba\u7c7b\u884c\u4e3a\u3002\u5229\u7528\u9884\u5b9a\u4e49\u89c4\u5219&#xff0c;\u6211\u4eec\u501f\u52a9 GPT4o \u6839\u636e\u6536\u96c6\u5230\u7684\u4fe1\u606f\u751f\u6210\u5408\u6210\u7684\u95ee\u7b54\u5bf9\u3002\u2777 \u5bf9\u4e8e E2HQA&#xff0c;\u521d\u59cb\u95ee\u9898 Q1 \u5728\u6bcf\u6b21\u8fed\u4ee3 i \u65f6\u4f7f\u7528\u4ece\u5b9e\u4f53 Ei \u68c0\u7d22\u5230\u7684\u65b0\u4fe1\u606f Ci \u8fdb\u884c\u8fed\u4ee3\u6f14\u5316&#xff0c;\u4f7f\u5f97\u4efb\u52a1\u7684\u590d\u6742\u5ea6\u9010\u6b65\u63d0\u5347&#xff0c;\u4ece\u7b80\u5355\u7684\u5b9e\u4f8b\u5230\u66f4\u5177\u6311\u6218\u6027\u7684\u5b9e\u4f8b\u3002\u6211\u4eec\u4f7f\u7528 GPT-4o \u91cd\u5199\u95ee\u9898&#xff0c;\u76f4\u5230\u8fed\u4ee3\u6b21\u6570\u8fbe\u5230 n\u3002<\/h4>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<h2 id=\"1%E3%80%81Introduction\" style=\"text-align:justify\">1\u3001Introduction<\/h2>\n<table border=\"1\" cellspacing=\"0\">\n<tbody>\n<tr>\n<td style=\"vertical-align:top;width:240.8500pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">Web agents are autonomous systems that perceive their real-world web environment, make decisions, and take actions to accomplish specific and human-like tasks. Recent systems, such as ChatGPT Deep Research [1] and Grok DeepSearch [2], have demonstrated strong deep information-seeking capabilities through end-to-end reinforcement learning (RL) training.<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">The community\u2019s previous approaches for information seeking by agentic systems can be categorized into three types: (i) Directly leveraging prompting engineering techniques to guide Large Language Models (LLMs) or Large Reasoning Models (LRMs) [3\u20135] to execute complex tasks. (ii) Incor-porating search or browser capabilities into the web agents through supervised fine-tuning (SFT) or RL [6, 5, 7\u201310]. The first training-free methods are unable to effectively leverage the reason-ing capabilities enabled by the reasoning model. Although the latter methods internalize certain information-seeking capabilities through SFT or RL training, both the training and evaluation datasets are relatively simple and do not capture the real-world challenges, for instance, performance on the 2Wiki dataset has already reached over 80%. Moreover, the current SFT or RL training paradigm does not fully and efficiently exploit the potential of information-seeking behavior. Building autonomous information seeking agency involves addressing a set of challenges that span web environment perception and decision-making: (1) acquiring high-quality, fine-grained browsing data that reflects diverse user intents and rich interaction contexts, (2) constructing reliable trajectories that support long-horizon reasoning and task decomposition, and (3) designing scalable and generalizable training strategies capable of endowing the web agent with robust behavior across out-of-distribution web environments, complex interaction patterns, and long-term objectives.<\/p>\n<\/td>\n<td style=\"vertical-align:top;width:185.2500pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u7f51\u7edc\u4ee3\u7406\u662f\u80fd\u591f\u611f\u77e5\u5176\u771f\u5b9e\u7f51\u7edc\u73af\u5883\u3001\u505a\u51fa\u51b3\u7b56\u5e76\u91c7\u53d6\u884c\u52a8\u4ee5\u5b8c\u6210\u7279\u5b9a\u4e14\u7c7b\u4f3c\u4eba\u7c7b\u4efb\u52a1\u7684\u81ea\u4e3b\u7cfb\u7edf\u3002\u8fd1\u671f\u7684\u7cfb\u7edf&#xff0c;\u5982 ChatGPT Deep Research [1] \u548c Grok DeepSearch [2]&#xff0c;\u901a\u8fc7\u7aef\u5230\u7aef\u5f3a\u5316\u5b66\u4e60&#xff08;RL&#xff09;\u8bad\u7ec3\u5c55\u793a\u4e86\u5f3a\u5927\u7684\u6df1\u5ea6\u4fe1\u606f\u641c\u7d22\u80fd\u529b\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u4ee3\u7406\u7cfb\u7edf\u8fdb\u884c\u4fe1\u606f\u641c\u7d22\u7684\u793e\u533a\u5148\u524d\u65b9\u6cd5\u53ef\u5206\u4e3a\u4e09\u7c7b&#xff1a;&#xff08;i&#xff09;\u76f4\u63a5\u5229\u7528\u63d0\u793a\u5de5\u7a0b\u6280\u672f\u5f15\u5bfc\u5927\u578b\u8bed\u8a00\u6a21\u578b&#xff08;LLMs&#xff09;\u6216\u5927\u578b\u63a8\u7406\u6a21\u578b&#xff08;LRMs&#xff09;[3-5]\u6267\u884c\u590d\u6742\u4efb\u52a1\u3002&#xff08;ii&#xff09;\u901a\u8fc7\u76d1\u7763\u5fae\u8c03&#xff08;SFT&#xff09;\u6216\u5f3a\u5316\u5b66\u4e60&#xff08;RL&#xff09;\u5c06\u641c\u7d22\u6216\u6d4f\u89c8\u5668\u529f\u80fd\u96c6\u6210\u5230\u7f51\u7edc\u4ee3\u7406\u4e2d[6&#xff0c; 5&#xff0c; 7-10]\u3002\u7b2c\u4e00\u79cd\u65e0\u9700\u8bad\u7ec3\u7684\u65b9\u6cd5\u65e0\u6cd5\u6709\u6548\u5229\u7528\u63a8\u7406\u6a21\u578b\u6240\u8d4b\u4e88\u7684\u63a8\u7406\u80fd\u529b\u3002\u5c3d\u7ba1\u540e\u4e00\u79cd\u65b9\u6cd5\u901a\u8fc7 SFT \u6216 RL \u8bad\u7ec3\u5185\u5316\u4e86\u67d0\u4e9b\u4fe1\u606f\u641c\u7d22\u80fd\u529b&#xff0c;\u4f46\u5176\u8bad\u7ec3\u548c\u8bc4\u4f30\u6570\u636e\u96c6\u76f8\u5bf9\u7b80\u5355&#xff0c;\u5e76\u672a\u6db5\u76d6\u73b0\u5b9e\u4e16\u754c\u7684\u6311\u6218&#xff0c;\u4f8b\u5982\u5728 2Wiki \u6570\u636e\u96c6\u4e0a\u7684\u6027\u80fd\u5df2\u8d85\u8fc7 80%\u3002\u6b64\u5916&#xff0c;\u5f53\u524d\u7684 SFT \u6216 RL \u8bad\u7ec3\u8303\u5f0f\u5e76\u672a\u5145\u5206\u4e14\u9ad8\u6548\u5730\u5229\u7528\u4fe1\u606f\u641c\u7d22\u884c\u4e3a\u7684\u6f5c\u529b\u3002\u6784\u5efa\u81ea\u4e3b\u7684\u4fe1\u606f\u641c\u7d22\u4ee3\u7406\u6d89\u53ca\u89e3\u51b3\u4e00\u7cfb\u5217\u8de8\u8d8a\u7f51\u7edc\u73af\u5883\u611f\u77e5\u548c\u51b3\u7b56\u7684\u6311\u6218&#xff1a;&#xff08;1&#xff09;\u83b7\u53d6\u9ad8\u8d28\u91cf\u3001\u7ec6\u7c92\u5ea6\u7684\u6d4f\u89c8\u6570\u636e&#xff0c;\u8fd9\u4e9b\u6570\u636e\u80fd\u53cd\u6620\u591a\u6837\u5316\u7684\u7528\u6237\u610f\u56fe\u548c\u4e30\u5bcc\u7684\u4ea4\u4e92\u60c5\u5883&#xff1b;&#xff08;2&#xff09;\u6784\u5efa\u53ef\u9760\u7684\u8f68\u8ff9&#xff0c;\u4ee5\u652f\u6301\u957f\u671f\u63a8\u7406\u548c\u4efb\u52a1\u5206\u89e3&#xff1b;&#xff08;3&#xff09;\u8bbe\u8ba1\u53ef\u6269\u5c55\u4e14\u901a\u7528\u7684\u8bad\u7ec3\u7b56\u7565&#xff0c;\u4f7f\u7f51\u7edc\u4ee3\u7406\u80fd\u591f\u5728\u5206\u5e03\u5916\u7684\u7f51\u7edc\u73af\u5883\u3001\u590d\u6742\u7684\u4ea4\u4e92\u6a21\u5f0f\u548c\u957f\u671f\u76ee\u6807\u4e2d\u5c55\u73b0\u51fa\u7a33\u5065\u7684\u884c\u4e3a\u3002<\/p>\n<\/td>\n<\/tr>\n<tr>\n<td style=\"vertical-align:top;width:240.8500pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">To address these challenges, our objective is to unlock the autonomous multi-turn information-seeking agency, exploring how to build a web agent like Deep Research from scratch. An agent model like Deep Research produces sequences of interleaved reasoning and action steps, where each action invokes a tool to interact with the external environment autonomously. Observations from these interactions guide subsequent reasoning and actions until the task is completed. This process is optimized through end-to-end tool-augmented training. The ReAct framework [11] is the most suitable paradigm, as it tightly couples reasoning with action to facilitate effective learning and generalization in interactive settings.<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">We aim to provide the research community with a systematic guideline for building such agents from a data-centric and training-stage perspective.<\/p>\n<\/td>\n<td style=\"vertical-align:top;width:185.2500pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u4e3a\u4e86\u89e3\u51b3\u8fd9\u4e9b\u6311\u6218&#xff0c;\u6211\u4eec\u7684\u76ee\u6807\u662f\u89e3\u9501\u81ea\u4e3b\u7684\u591a\u8f6e\u4fe1\u606f\u641c\u7d22\u4ee3\u7406\u80fd\u529b&#xff0c;\u63a2\u7d22\u5982\u4f55\u4ece\u96f6\u5f00\u59cb\u6784\u5efa\u4e00\u4e2a\u50cf Deep Research \u8fd9\u6837\u7684\u7f51\u7edc\u4ee3\u7406\u3002\u50cf Deep Research \u8fd9\u6837\u7684\u4ee3\u7406\u6a21\u578b\u4f1a\u751f\u6210\u4e00\u7cfb\u5217\u4ea4\u9519\u7684\u63a8\u7406\u548c\u884c\u52a8\u6b65\u9aa4\u5e8f\u5217&#xff0c;\u5176\u4e2d\u6bcf\u4e2a\u884c\u52a8\u90fd\u4f1a\u81ea\u4e3b\u8c03\u7528\u4e00\u4e2a\u5de5\u5177\u4e0e\u5916\u90e8\u73af\u5883\u8fdb\u884c\u4ea4\u4e92\u3002\u8fd9\u4e9b\u4ea4\u4e92\u7684\u89c2\u5bdf\u7ed3\u679c\u4f1a\u5f15\u5bfc\u540e\u7eed\u7684\u63a8\u7406\u548c\u884c\u52a8&#xff0c;\u76f4\u81f3\u4efb\u52a1\u5b8c\u6210\u3002\u8fd9\u4e00\u8fc7\u7a0b\u901a\u8fc7\u7aef\u5230\u7aef\u5de5\u5177\u589e\u5f3a\u8bad\u7ec3\u5f97\u4ee5\u4f18\u5316\u3002ReAct \u6846\u67b6[11]\u662f\u6700\u5408\u9002\u7684\u8303\u5f0f&#xff0c;\u56e0\u4e3a\u5b83\u5c06\u63a8\u7406\u4e0e\u884c\u52a8\u7d27\u5bc6\u7ed3\u5408&#xff0c;\u4ece\u800c\u5728\u4ea4\u4e92\u73af\u5883\u4e2d\u4fc3\u8fdb\u6709\u6548\u7684\u5b66\u4e60\u548c\u6cdb\u5316\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u6211\u4eec\u7684\u76ee\u6807\u662f\u4e3a\u7814\u7a76\u754c\u63d0\u4f9b\u4e00\u4e2a\u4ece\u6570\u636e\u4e3a\u4e2d\u5fc3\u548c\u8bad\u7ec3\u9636\u6bb5\u7684\u89d2\u5ea6\u6784\u5efa\u6b64\u7c7b\u4ee3\u7406\u7684\u7cfb\u7edf\u6027\u6307\u5357\u3002<\/p>\n<\/td>\n<\/tr>\n<tr>\n<td style=\"vertical-align:top;width:240.8500pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">From a data-centric perspective, constructing web QA data is crucial to building web agents, re-gardless of whether the training paradigm is SFT or RL. Widely used QA datasets are often shallow, typically consisting of problems that can be solved with a single or a few-turn search. Previous works often filter the difficult QA pairs from open-sourced human-labeled datasets using prompting techniques [7]. Additionally, challenging web-based QA datasets typically only have test or validation sets, and their data size is relatively small. For example, GAIA [12] only has 466, WebWalkerQA [3] contains 680 examples, and BrowseComp [13] has 1,266, making them insufficient for effective training. Therefore, the automatic synthesis of high-quality datasets becomes crucial. [14, 15]. We synthesise the datasets in two ways: 1). By crawling web pages to construct deep queries, referred to as CRAWLQA, enabling the acquisition of web information through click actions. 2). By en-hancing easy-to-hard QA pairs synthesis to incentivize the progression from weak-to-strong agency, transforming simple questions into complex ones, termed E2HQA.<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">From a training-stage perspective, prior work has explored SFT or off-policy RL, but these approaches often face generalization issues, particularly in complex, real-world search environments. Other methods adopt on-policy RL directly [6], but in multi-tool settings, early training steps tend to focus primarily on learning tool usage via instruction following. To enable more efficient and effective training, we adopt a two-stage approach combining rejection sampling fine-tuning (RFT) with subsequent on-policy RL. For the trajectory sampling, we restrict the action space to two commonly effective web information-seeking tools as action: search and click . Building on this setup, we employ rejection sampling to generate trajectories using two prompting strategies: one with a strong instruction LLMs for Short-CoT and another leveraging the LRMs for Long-CoT. These yield high-quality trajectories containing either short or long thought, respectively. In the RL stage, we adopt the Decoupled Clip and Dynamic Sampling Policy Optimization (DAPO) algorithm [16], whose dynamic sampling mechanism can effectively exploit QA pairs that remain underutilized during the SFT phase, thereby enhancing data efficiency and policy robustness.<\/p>\n<\/td>\n<td style=\"vertical-align:top;width:185.2500pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u4ece\u4ee5\u6570\u636e\u4e3a\u4e2d\u5fc3\u7684\u89d2\u5ea6\u6765\u770b&#xff0c;\u6784\u5efa\u7f51\u7edc\u95ee\u7b54\u6570\u636e\u5bf9\u4e8e\u6784\u5efa\u7f51\u7edc\u4ee3\u7406\u81f3\u5173\u91cd\u8981&#xff0c;\u65e0\u8bba\u8bad\u7ec3\u8303\u5f0f\u662f\u57fa\u4e8e\u76d1\u7763\u7684\u5fae\u8c03&#xff08;SFT&#xff09;\u8fd8\u662f\u57fa\u4e8e\u5f3a\u5316\u5b66\u4e60&#xff08;RL&#xff09;\u3002\u5e7f\u6cdb\u4f7f\u7528\u7684\u95ee\u7b54\u6570\u636e\u96c6\u901a\u5e38\u6bd4\u8f83\u6d45\u663e&#xff0c;\u901a\u5e38\u7531\u53ef\u4ee5\u901a\u8fc7\u5355\u6b21\u6216\u51e0\u6b21\u641c\u7d22\u89e3\u51b3\u7684\u95ee\u9898\u7ec4\u6210\u3002\u5148\u524d\u7684\u5de5\u4f5c\u5e38\u5e38\u4f7f\u7528\u63d0\u793a\u6280\u672f\u4ece\u5f00\u6e90\u7684\u4eba\u5de5\u6807\u6ce8\u6570\u636e\u96c6\u4e2d\u8fc7\u6ee4\u6389\u56f0\u96be\u7684\u95ee\u7b54\u5bf9[7]\u3002\u6b64\u5916&#xff0c;\u5177\u6709\u6311\u6218\u6027\u7684\u57fa\u4e8e\u7f51\u7edc\u7684\u95ee\u7b54\u6570\u636e\u96c6\u901a\u5e38\u53ea\u6709\u6d4b\u8bd5\u96c6\u6216\u9a8c\u8bc1\u96c6&#xff0c;\u800c\u4e14\u6570\u636e\u89c4\u6a21\u76f8\u5bf9\u8f83\u5c0f\u3002\u4f8b\u5982&#xff0c;GAIA [12] \u53ea\u6709 466 \u4e2a\u6837\u672c&#xff0c;WebWalkerQA [3] \u5305\u542b 680 \u4e2a\u793a\u4f8b&#xff0c;BrowseComp [13] \u6709 1266 \u4e2a&#xff0c;\u8fd9\u4f7f\u5f97\u5b83\u4eec\u4e0d\u8db3\u4ee5\u8fdb\u884c\u6709\u6548\u7684\u8bad\u7ec3\u3002\u56e0\u6b64&#xff0c;\u9ad8\u8d28\u91cf\u6570\u636e\u96c6\u7684\u81ea\u52a8\u5408\u6210\u53d8\u5f97\u81f3\u5173\u91cd\u8981[14&#xff0c; 15]\u3002\u6211\u4eec\u901a\u8fc7\u4e24\u79cd\u65b9\u5f0f\u5408\u6210\u6570\u636e\u96c6&#xff1a;1&#xff09;\u901a\u8fc7\u6293\u53d6\u7f51\u9875\u6765\u6784\u5efa\u6df1\u5ea6\u67e5\u8be2&#xff0c;\u79f0\u4e3a CRAWLQA&#xff0c;\u8fd9\u4f7f\u5f97\u80fd\u591f\u901a\u8fc7\u70b9\u51fb\u64cd\u4f5c\u83b7\u53d6\u7f51\u7edc\u4fe1\u606f\u30022&#xff09;\u901a\u8fc7\u589e\u5f3a\u4ece\u7b80\u5355\u5230\u56f0\u96be\u7684\u95ee\u7b54\u5bf9\u5408\u6210&#xff0c;\u4ee5\u6fc0\u52b1\u4ece\u5f31\u4ee3\u7406\u5230\u5f3a\u4ee3\u7406\u7684\u6f14\u8fdb&#xff0c;\u5c06\u7b80\u5355\u95ee\u9898\u8f6c\u5316\u4e3a\u590d\u6742\u95ee\u9898&#xff0c;\u79f0\u4e3a E2HQA\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u4ece\u8bad\u7ec3\u9636\u6bb5\u7684\u89d2\u5ea6\u6765\u770b&#xff0c;\u5148\u524d\u7684\u5de5\u4f5c\u63a2\u7d22\u4e86\u7b56\u7565\u5fae\u8c03&#xff08;SFT&#xff09;\u6216\u79bb\u7b56\u7565\u5f3a\u5316\u5b66\u4e60&#xff08;RL&#xff09;&#xff0c;\u4f46\u8fd9\u4e9b\u65b9\u6cd5\u5f80\u5f80\u9762\u4e34\u6cdb\u5316\u95ee\u9898&#xff0c;\u5c24\u5176\u662f\u5728\u590d\u6742\u7684\u73b0\u5b9e\u4e16\u754c\u641c\u7d22\u73af\u5883\u4e2d\u3002\u5176\u4ed6\u65b9\u6cd5\u76f4\u63a5\u91c7\u7528\u5728\u7b56\u7565 RL [6]&#xff0c;\u4f46\u5728\u591a\u5de5\u5177\u8bbe\u7f6e\u4e2d&#xff0c;\u65e9\u671f\u8bad\u7ec3\u6b65\u9aa4\u5f80\u5f80\u4e3b\u8981\u96c6\u4e2d\u5728\u901a\u8fc7\u9075\u5faa\u6307\u4ee4\u6765\u5b66\u4e60\u5de5\u5177\u4f7f\u7528\u3002\u4e3a\u4e86\u5b9e\u73b0\u66f4\u9ad8\u6548\u548c\u6709\u6548\u7684\u8bad\u7ec3&#xff0c;\u6211\u4eec\u91c7\u7528\u4e86\u4e00\u4e2a\u4e24\u9636\u6bb5\u7684\u65b9\u6cd5&#xff0c;\u5c06\u62d2\u7edd\u91c7\u6837\u5fae\u8c03&#xff08;RFT&#xff09;\u4e0e\u540e\u7eed\u7684\u5728\u7b56\u7565 RL \u76f8\u7ed3\u5408\u3002\u5bf9\u4e8e\u8f68\u8ff9\u91c7\u6837&#xff0c;\u6211\u4eec\u5c06\u52a8\u4f5c\u7a7a\u95f4\u9650\u5236\u4e3a\u4e24\u4e2a\u5e38\u7528\u7684\u6709\u6548\u7684\u7f51\u7edc\u4fe1\u606f\u641c\u7d22\u5de5\u5177\u4f5c\u4e3a\u52a8\u4f5c&#xff1a;\u641c\u7d22\u548c\u70b9\u51fb\u3002\u5728\u6b64\u57fa\u7840\u4e0a&#xff0c;\u6211\u4eec\u91c7\u7528\u62d2\u7edd\u91c7\u6837&#xff0c;\u4f7f\u7528\u4e24\u79cd\u63d0\u793a\u7b56\u7565\u751f\u6210\u8f68\u8ff9&#xff1a;\u4e00\u79cd\u662f\u4f7f\u7528\u5f3a\u5927\u7684\u6307\u4ee4 LLM \u8fdb\u884c\u77ed\u94fe\u5f0f\u601d\u8003&#xff08;Short-CoT&#xff09;&#xff0c;\u53e6\u4e00\u79cd\u662f\u5229\u7528 LRM \u8fdb\u884c\u957f\u94fe\u5f0f\u601d\u8003&#xff08;Long-CoT&#xff09;\u3002\u8fd9\u4e9b\u5206\u522b\u4ea7\u751f\u4e86\u5305\u542b\u77ed\u6216\u957f\u601d\u8003\u7684\u9ad8\u8d28\u91cf\u8f68\u8ff9\u3002\u5728\u5f3a\u5316\u5b66\u4e60\u9636\u6bb5&#xff0c;\u6211\u4eec\u91c7\u7528\u89e3\u8026\u526a\u8f91\u548c\u52a8\u6001\u91c7\u6837\u7b56\u7565\u4f18\u5316&#xff08;DAPO&#xff09;\u7b97\u6cd5[16]&#xff0c;\u5176\u52a8\u6001\u91c7\u6837\u673a\u5236\u80fd\u591f\u6709\u6548\u5229\u7528\u5728 SFT \u9636\u6bb5\u672a\u5145\u5206\u5229\u7528\u7684\u95ee\u7b54\u5bf9&#xff0c;\u4ece\u800c\u63d0\u9ad8\u6570\u636e\u6548\u7387\u548c\u7b56\u7565\u7684\u7a33\u5065\u6027\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<\/td>\n<\/tr>\n<tr>\n<td style=\"vertical-align:top;width:240.8500pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">Our key contributions can be summarized as follows: we abstract the end-to-end web agents building pipeline into four key stages: Step I: Construct diverse and challenging deep information seeking QA pairs based on the real-world web environment (\u00a72.1); Step II: Sample high-quality trajectories from QA pairs using both LLMs and LRMs to guide the agency learning process (\u00a72.2); Step III: Perform fine-tuning to adapt the format instruction following to agentic tasks and environments (\u00a73.1); Step IV: Apply RL to optimize the agent\u2019s decision-making and generalization capabilities in real-world web environments (\u00a73.2). We offer a systematic, end-to-end pipeline for building long-term information-seeking web agents.<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">Extensive experiments on two web information seeking benchmarks, GAIA and WebWalkerQA, show the effectiveness of our pipeline and WebDancer (\u00a74). We further present a comprehensive analysis covering data efficiency, agentic system evaluation, and agent learning (\u00a75).<\/p>\n<\/td>\n<td style=\"vertical-align:top;width:185.2500pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u6211\u4eec\u7684\u4e3b\u8981\u8d21\u732e\u53ef\u603b\u7ed3\u5982\u4e0b&#xff1a;\u6211\u4eec\u5c06\u7aef\u5230\u7aef\u7684\u7f51\u7edc\u4ee3\u7406\u6784\u5efa\u6d41\u7a0b\u62bd\u8c61\u4e3a\u56db\u4e2a\u5173\u952e\u9636\u6bb5&#xff1a;\u6b65\u9aa4\u4e00&#xff1a;\u57fa\u4e8e\u771f\u5b9e\u4e16\u754c\u7684\u7f51\u7edc\u73af\u5883\u6784\u5efa\u591a\u6837\u4e14\u5177\u6709\u6311\u6218\u6027\u7684\u6df1\u5ea6\u4fe1\u606f\u641c\u7d22\u95ee\u7b54\u5bf9&#xff08;\u00a72.1&#xff09;&#xff1b;\u6b65\u9aa4\u4e8c&#xff1a;\u5229\u7528\u5927\u8bed\u8a00\u6a21\u578b&#xff08;LLMs&#xff09;\u548c\u8bed\u8a00\u68c0\u7d22\u6a21\u578b&#xff08;LRMs&#xff09;\u4ece\u95ee\u7b54\u5bf9\u4e2d\u91c7\u6837\u9ad8\u8d28\u91cf\u8f68\u8ff9&#xff0c;\u4ee5\u5f15\u5bfc\u4ee3\u7406\u5b66\u4e60\u8fc7\u7a0b&#xff08;\u00a72.2&#xff09;&#xff1b;\u6b65\u9aa4\u4e09&#xff1a;\u8fdb\u884c\u5fae\u8c03\u4ee5\u9002\u5e94\u683c\u5f0f\u6307\u4ee4\u9075\u5faa\u5230\u4ee3\u7406\u4efb\u52a1\u548c\u73af\u5883&#xff08;\u00a73.1&#xff09;&#xff1b;\u6b65\u9aa4\u56db&#xff1a;\u5e94\u7528\u5f3a\u5316\u5b66\u4e60\u4f18\u5316\u4ee3\u7406\u5728\u771f\u5b9e\u7f51\u7edc\u73af\u5883\u4e2d\u7684\u51b3\u7b56\u548c\u6cdb\u5316\u80fd\u529b&#xff08;\u00a73.2&#xff09;\u3002\u6211\u4eec\u63d0\u4f9b\u4e86\u4e00\u4e2a\u7cfb\u7edf\u6027\u7684\u7aef\u5230\u7aef\u6d41\u7a0b&#xff0c;\u7528\u4e8e\u6784\u5efa\u957f\u671f\u7684\u4fe1\u606f\u641c\u7d22\u7f51\u7edc\u4ee3\u7406\u3002<\/p>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u5728\u4e24\u4e2a\u7f51\u7edc\u4fe1\u606f\u641c\u7d22\u57fa\u51c6\u6d4b\u8bd5 GAIA \u548c WebWalkerQA \u4e0a\u8fdb\u884c\u7684\u5927\u91cf\u5b9e\u9a8c\u8868\u660e\u4e86\u6211\u4eec\u7684\u6d41\u7a0b\u548c WebDancer \u7684\u6709\u6548\u6027&#xff08;\u00a74&#xff09;\u3002\u6211\u4eec\u8fdb\u4e00\u6b65\u63d0\u4f9b\u4e86\u6db5\u76d6\u6570\u636e\u6548\u7387\u3001\u4ee3\u7406\u7cfb\u7edf\u8bc4\u4f30\u548c\u4ee3\u7406\u5b66\u4e60\u7684\u5168\u9762\u5206\u6790&#xff08;\u7b2c 5 \u8282&#xff09;\u3002<\/p>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<h2 id=\"Conclusion\" style=\"text-align:justify\">Conclusion<\/h2>\n<table border=\"1\" cellspacing=\"0\">\n<tbody>\n<tr>\n<td style=\"vertical-align:top;width:237.3000pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">In this work, we propose a systematic framework for building end-to-end multi-step information-seeking web agents from scratch. By introducing scalable QA data synthesis methods and a two-stage training pipeline combining SFT and on-policy RL, our WebDancer agent achieves strong performance on GAIA and WebWalkerQA. These findings underscore the significance of our proposed training strategy and provide valuable insights into the critical aspects of agent training. Moving forward, this research offers actionable and systematic pathways for the community to advance the development of increasingly sophisticated agentic models capable of tackling complex real-world information-seeking tasks.<\/p>\n<\/td>\n<td style=\"vertical-align:top;width:188.8000pt\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\u5728\u672c\u7814\u7a76\u4e2d&#xff0c;\u6211\u4eec\u63d0\u51fa\u4e86\u4e00\u79cd\u4ece\u96f6\u5f00\u59cb\u6784\u5efa\u7aef\u5230\u7aef\u591a\u6b65\u4fe1\u606f\u68c0\u7d22\u7f51\u7edc\u4ee3\u7406\u7684\u7cfb\u7edf\u6846\u67b6\u3002\u901a\u8fc7\u5f15\u5165\u53ef\u6269\u5c55\u7684\u95ee\u7b54\u6570\u636e\u5408\u6210\u65b9\u6cd5\u4ee5\u53ca\u7ed3\u5408\u6709\u76d1\u7763\u5fae\u8c03&#xff08;SFT&#xff09;\u548c\u7b56\u7565\u5185\u5f3a\u5316\u5b66\u4e60&#xff08;RL&#xff09;\u7684\u4e24\u9636\u6bb5\u8bad\u7ec3\u6d41\u7a0b&#xff0c;\u6211\u4eec\u7684 WebDancer \u4ee3\u7406\u5728 GAIA \u548c WebWalkerQA \u4e0a\u53d6\u5f97\u4e86\u51fa\u8272\u7684\u8868\u73b0\u3002\u8fd9\u4e9b\u53d1\u73b0\u7a81\u663e\u4e86\u6211\u4eec\u6240\u63d0\u51fa\u7684\u8bad\u7ec3\u7b56\u7565\u7684\u91cd\u8981\u6027&#xff0c;\u5e76\u4e3a\u4ee3\u7406\u8bad\u7ec3\u7684\u5173\u952e\u65b9\u9762\u63d0\u4f9b\u4e86\u5b9d\u8d35\u7684\u89c1\u89e3\u3002\u5c55\u671b\u672a\u6765&#xff0c;\u8fd9\u9879\u7814\u7a76\u4e3a\u793e\u533a\u63d0\u4f9b\u4e86\u5207\u5b9e\u53ef\u884c\u4e14\u7cfb\u7edf\u7684\u9014\u5f84&#xff0c;\u4ee5\u63a8\u8fdb\u8d8a\u6765\u8d8a\u590d\u6742\u7684\u4ee3\u7406\u6a21\u578b\u7684\u53d1\u5c55&#xff0c;\u4f7f\u5176\u80fd\u591f\u5e94\u5bf9\u590d\u6742\u7684\u73b0\u5b9e\u4e16\u754c\u4fe1\u606f\u68c0\u7d22\u4efb\u52a1\u3002<\/p>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<p style=\"margin-left:.0001pt;margin-right:0;text-align:justify\">\n<\/p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb1k\u6b21\uff0c\u70b9\u8d5e10\u6b21\uff0c\u6536\u85cf11\u6b21\u3002\u200bLLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb\u76ee\u5f55\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfbAbstract1\u3001IntroductionConclusion\u300aWebDancer: Towards Auton<\/p>\n","protected":false},"author":2,"featured_media":44515,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[75,1706],"topic":[],"class_list":["post-44516","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-server","tag-llm","tag-nlp-llms"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>LLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/44516.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"LLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb1k\u6b21\uff0c\u70b9\u8d5e10\u6b21\uff0c\u6536\u85cf11\u6b21\u3002\u200bLLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb\u76ee\u5f55\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfbAbstract1\u3001IntroductionConclusion\u300aWebDancer: Towards Auton\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/44516.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2025-06-19T01:27:57+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/06\/20250619012755-6853679baa58b.png\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"9 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/44516.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/44516.html\",\"name\":\"LLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2025-06-19T01:27:57+00:00\",\"dateModified\":\"2025-06-19T01:27:57+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/44516.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/44516.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/44516.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"LLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"LLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/44516.html","og_locale":"zh_CN","og_type":"article","og_title":"LLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb1k\u6b21\uff0c\u70b9\u8d5e10\u6b21\uff0c\u6536\u85cf11\u6b21\u3002\u200bLLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb\u76ee\u5f55\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfbAbstract1\u3001IntroductionConclusion\u300aWebDancer: Towards Auton","og_url":"https:\/\/www.wsisp.com\/helps\/44516.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2025-06-19T01:27:57+00:00","og_image":[{"url":"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/06\/20250619012755-6853679baa58b.png"}],"author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"9 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/44516.html","url":"https:\/\/www.wsisp.com\/helps\/44516.html","name":"LLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2025-06-19T01:27:57+00:00","dateModified":"2025-06-19T01:27:57+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/44516.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/44516.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/44516.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"LLMs\uff1a\u300aWebDancer: Towards Autonomous Information Seeking Agency\u300b\u7ffb\u8bd1\u4e0e\u89e3\u8bfb"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/44516","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=44516"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/44516\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media\/44515"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=44516"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=44516"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=44516"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=44516"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}