{"id":53566,"date":"2025-08-12T13:29:57","date_gmt":"2025-08-12T05:29:57","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/53566.html"},"modified":"2025-08-12T13:29:57","modified_gmt":"2025-08-12T05:29:57","slug":"python%e5%b8%b8%e7%94%a8%e7%9a%845%e7%a7%8d%e4%b8%ad%e6%96%87%e5%88%86%e8%af%8d%e5%b7%a5%e5%85%b7","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/53566.html","title":{"rendered":"Python\u5e38\u7528\u76845\u79cd\u4e2d\u6587\u5206\u8bcd\u5de5\u5177"},"content":{"rendered":"<\/p>\n<h4>\u6587\u7ae0\u76ee\u5f55<\/h4>\n<ul>\n<li>\u4e00\u3001jieba<\/li>\n<li>\n<ul>\n<li>1. \u5b89\u88c5<\/li>\n<li>2. \u57fa\u672c\u529f\u80fd<\/li>\n<li>\n<ul>\n<li>\u4e09\u79cd\u5206\u8bcd\u6a21\u5f0f<\/li>\n<li>\u81ea\u5b9a\u4e49\u8bcd\u5178<\/li>\n<li>\u8bcd\u6027\u6807\u6ce8<\/li>\n<li>\u5173\u952e\u8bcd\u63d0\u53d6<\/li>\n<li>\u8fd4\u56de\u8bcd\u8bed\u4f4d\u7f6e<\/li>\n<\/ul>\n<\/li>\n<li>3. \u8bcd\u6027\u6807\u6ce8\u8bf4\u660e<\/li>\n<\/ul>\n<\/li>\n<li>\u4e8c\u3001jiagu<\/li>\n<li>\n<ul>\n<li>1. \u5b89\u88c5<\/li>\n<li>2. \u57fa\u672c\u529f\u80fd<\/li>\n<li>\n<ul>\n<li>\u5206\u8bcd<\/li>\n<li>\u81ea\u5b9a\u4e49\u8bcd\u5178<\/li>\n<li>\u8bcd\u6027\u6807\u6ce8<\/li>\n<li>\u547d\u540d\u5b9e\u4f53\u8bc6\u522b<\/li>\n<li>\u5173\u952e\u8bcd\u63d0\u53d6<\/li>\n<li>\u77e5\u8bc6\u56fe\u8c31\u5173\u7cfb\u62bd\u53d6<\/li>\n<li>\u6587\u672c\u6458\u8981<\/li>\n<li>\u65b0\u8bcd\u53d1\u73b0<\/li>\n<li>\u60c5\u611f\u5206\u6790<\/li>\n<li>\u6587\u672c\u805a\u7c7b<\/li>\n<\/ul>\n<\/li>\n<li>3. \u8bcd\u6027\u6807\u6ce8\u8bf4\u660e<\/li>\n<\/ul>\n<\/li>\n<li>\u4e09\u3001snownlp<\/li>\n<li>\n<ul>\n<li>1. \u5b89\u88c5<\/li>\n<li>2. \u57fa\u672c\u529f\u80fd<\/li>\n<li>\n<ul>\n<li>\u5206\u8bcd<\/li>\n<li>\u8bcd\u6027\u6807\u6ce8<\/li>\n<li>\u62fc\u97f3\u8f6c\u6362<\/li>\n<li>\u60c5\u611f\u5206\u6790<\/li>\n<li>\u7b80\u7e41\u8f6c\u6362<\/li>\n<li>\u5173\u952e\u5b57\u62bd\u53d6<\/li>\n<li>\u6458\u8981<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<\/li>\n<li>\u56db\u3001thulac<\/li>\n<li>\n<ul>\n<li>1. \u5b89\u88c5<\/li>\n<li>2. \u57fa\u672c\u529f\u80fd<\/li>\n<li>\n<ul>\n<li>\u5206\u8bcd<\/li>\n<li>\u8bcd\u6027\u6807\u6ce8<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<\/li>\n<li>\u4e94\u3001lac<\/li>\n<li>\n<ul>\n<li>1. \u5b89\u88c5<\/li>\n<li>2. \u57fa\u672c\u529f\u80fd<\/li>\n<li>\n<ul>\n<li>\u5206\u8bcd<\/li>\n<li>\u8bcd\u6027\u6807\u6ce8\u4e0e\u5b9e\u4f53\u8bc6\u522b<\/li>\n<li>\u8bcd\u8bed\u91cd\u8981\u6027<\/li>\n<li>\u81ea\u5b9a\u4e49\u8bcd\u5178<\/li>\n<\/ul>\n<\/li>\n<li>3. \u8bcd\u6027\u548c\u4e13\u540d\u7c7b\u522b\u6807\u7b7e<\/li>\n<\/ul>\n<\/li>\n<li>\u76f8\u5173\u94fe\u63a5<\/li>\n<\/ul>\n<hr \/>\n<p>\u672c\u6587\u4ecb\u7ecd\u4e86\u4e94\u79cd\u5e38\u7528\u7684\u4e2d\u6587\u81ea\u7136\u8bed\u8a00\u5904\u7406\u5de5\u5177&#xff1a;jieba\u3001jiagu\u3001snownlp\u3001thulac\u548cLAC&#xff0c;\u91cd\u70b9\u8bb2\u89e3\u4e86\u5b83\u4eec\u7684\u5206\u8bcd\u529f\u80fd\u53ca\u5176\u4ed6\u6838\u5fc3\u7279\u6027\u3002<\/p>\n<ul>\n<li>jieba&#xff1a;\u652f\u6301\u7cbe\u786e\u6a21\u5f0f\u3001\u5168\u6a21\u5f0f\u548c\u641c\u7d22\u5f15\u64ce\u6a21\u5f0f\u4e09\u79cd\u5206\u8bcd\u65b9\u5f0f&#xff0c;\u63d0\u4f9b\u81ea\u5b9a\u4e49\u8bcd\u5178\u3001\u8bcd\u6027\u6807\u6ce8\u3001\u5173\u952e\u8bcd\u63d0\u53d6\u3001\u8bcd\u8bed\u4f4d\u7f6e\u5b9a\u4f4d\u7b49\u529f\u80fd&#xff0c;\u9002\u7528\u4e8e\u591a\u79cd\u6587\u672c\u5904\u7406\u573a\u666f\u3002<\/li>\n<li>jiagu&#xff1a;\u96c6\u6210\u5206\u8bcd\u3001\u8bcd\u6027\u6807\u6ce8\u3001\u547d\u540d\u5b9e\u4f53\u8bc6\u522b\u3001\u60c5\u611f\u5206\u6790\u3001\u77e5\u8bc6\u56fe\u8c31\u5173\u7cfb\u62bd\u53d6\u7b49\u529f\u80fd&#xff0c;\u9002\u5408\u7efc\u5408\u6027NLP\u4efb\u52a1&#xff0c;\u5982\u65b0\u8bcd\u53d1\u73b0\u548c\u6587\u672c\u805a\u7c7b\u3002<\/li>\n<li>snownlp&#xff1a;\u63d0\u4f9b\u5206\u8bcd\u3001\u60c5\u611f\u5206\u6790\u3001\u7b80\u7e41\u8f6c\u6362\u3001\u62fc\u97f3\u8f6c\u6362\u3001\u6587\u672c\u6458\u8981\u7b49\u529f\u80fd&#xff0c;\u9002\u5408\u7b80\u5355\u7684\u60c5\u611f\u5206\u6790\u6216\u6587\u672c\u8f6c\u6362\u9700\u6c42\u3002<\/li>\n<li>thulac&#xff1a;\u7531\u6e05\u534e\u5927\u5b66\u5f00\u53d1&#xff0c;\u652f\u6301\u5206\u8bcd\u548c\u8bcd\u6027\u6807\u6ce8&#xff0c;\u8f93\u51fa\u683c\u5f0f\u7075\u6d3b&#xff0c;\u9002\u5408\u5b66\u672f\u548c\u7814\u7a76\u7528\u9014\u3002<\/li>\n<li>LAC&#xff1a;\u767e\u5ea6\u5f00\u53d1\u7684\u8bcd\u6cd5\u5206\u6790\u5de5\u5177&#xff0c;\u96c6\u6210\u5206\u8bcd\u3001\u8bcd\u6027\u6807\u6ce8\u3001\u5b9e\u4f53\u8bc6\u522b\u548c\u8bcd\u8bed\u91cd\u8981\u6027\u5206\u6790&#xff0c;\u652f\u6301\u81ea\u5b9a\u4e49\u8bcd\u5178&#xff0c;\u9002\u5408\u9700\u8981\u5b9e\u4f53\u8bc6\u522b\u548c\u5173\u952e\u8bcd\u8bed\u63d0\u53d6\u7684\u573a\u666f\u3002<\/li>\n<\/ul>\n<p>\u6bcf\u79cd\u5de5\u5177\u5404\u6709\u4fa7\u91cd&#xff0c;\u53ef\u6839\u636e\u5177\u4f53\u9700\u6c42\u9009\u62e9\u5408\u9002\u7684\u5de5\u5177\u3002<\/p>\n<h2>\u4e00\u3001jieba<\/h2>\n<p>jieba\u662f\u6700\u5e38\u7528\u7684\u4e2d\u6587\u5206\u8bcd\u5de5\u5177\u4e4b\u4e00\u3002<\/p>\n<h3>1. \u5b89\u88c5<\/h3>\n<p>pip <span class=\"token function\">install<\/span> jieba<\/p>\n<h3>2. \u57fa\u672c\u529f\u80fd<\/h3>\n<h4>\u4e09\u79cd\u5206\u8bcd\u6a21\u5f0f<\/h4>\n<ul>\n<li>\u7cbe\u786e\u6a21\u5f0f&#xff08;\u9ed8\u8ba4&#xff09;&#xff1a;\u6700\u5e38\u7528\u7684\u6a21\u5f0f&#xff0c;\u4f18\u5148\u8f93\u51fa\u6700\u51c6\u786e\u7684\u5207\u5206\u7ed3\u679c<\/li>\n<li>\u5168\u6a21\u5f0f&#xff1a;\u679a\u4e3e\u6240\u6709\u53ef\u80fd\u7684\u8bcd\u8bed\u7ec4\u5408&#xff0c;\u8f93\u51fa\u6240\u6709\u6210\u8bcd\u60c5\u51b5<\/li>\n<li>\u641c\u7d22\u5f15\u64ce\u6a21\u5f0f&#xff1a;\u5728\u7cbe\u786e\u6a21\u5f0f\u57fa\u7840\u4e0a&#xff0c;\u5bf9\u957f\u8bcd\u518d\u6b21\u5207\u5206<\/li>\n<\/ul>\n<p>\u793a\u4f8b<\/p>\n<p><span class=\"token keyword\">import<\/span> jieba<\/p>\n<p>text <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;\u6211\u7231\u81ea\u7136\u8bed\u8a00\u5904\u7406\u6280\u672f&#034;<\/span><\/p>\n<p><span class=\"token comment\"># \u7cbe\u786e\u6a21\u5f0f&#xff08;\u9ed8\u8ba4&#xff09;<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u7cbe\u786e\u6a21\u5f0f:&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;\/ &#034;<\/span><span class=\"token punctuation\">.<\/span>join<span class=\"token punctuation\">(<\/span>jieba<span class=\"token punctuation\">.<\/span>cut<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># \u7cbe\u786e\u6a21\u5f0f: \u6211\/ \u7231\/ \u81ea\u7136\u8bed\u8a00\/ \u5904\u7406\/ \u6280\u672f<\/span><\/p>\n<p><span class=\"token comment\"># \u5168\u6a21\u5f0f<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u5168\u6a21\u5f0f:&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;\/ &#034;<\/span><span class=\"token punctuation\">.<\/span>join<span class=\"token punctuation\">(<\/span>jieba<span class=\"token punctuation\">.<\/span>cut<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">,<\/span> cut_all<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># \u5168\u6a21\u5f0f: \u6211\/ \u7231\/ \u81ea\u7136\/ \u81ea\u7136\u8bed\u8a00\/ \u8bed\u8a00\/ \u5904\u7406\/ \u6280\u672f<\/span><\/p>\n<p><span class=\"token comment\"># \u641c\u7d22\u5f15\u64ce\u6a21\u5f0f<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u641c\u7d22\u5f15\u64ce\u6a21\u5f0f:&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;\/ &#034;<\/span><span class=\"token punctuation\">.<\/span>join<span class=\"token punctuation\">(<\/span>jieba<span class=\"token punctuation\">.<\/span>cut_for_search<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># \u641c\u7d22\u5f15\u64ce\u6a21\u5f0f: \u6211\/ \u7231\/ \u81ea\u7136\/ \u8bed\u8a00\/ \u81ea\u7136\u8bed\u8a00\/ \u5904\u7406\/ \u6280\u672f<\/span><\/p>\n<p>\u6a21\u5f0f\u5bf9\u6bd4<\/p>\n<table>\n<tr>\u6a21\u5f0f\u5207\u5206\u7c92\u5ea6\u901f\u5ea6\u5197\u4f59\u5ea6\u5178\u578b\u5e94\u7528\u573a\u666f<\/tr>\n<tbody>\n<tr>\n<td>\u7cbe\u786e\u6a21\u5f0f<\/td>\n<td>\u6700\u51c6\u786e<\/td>\n<td>\u4e2d<\/td>\n<td>\u5feb<\/td>\n<td>\u6587\u672c\u5206\u6790\u3001\u5e38\u89c4 NLP<\/td>\n<\/tr>\n<tr>\n<td>\u5168\u6a21\u5f0f<\/td>\n<td>\u6240\u6709\u53ef\u80fd\u7ec4\u5408<\/td>\n<td>\u6700\u5feb<\/td>\n<td>\u9ad8<\/td>\n<td>\u65b0\u8bcd\u53d1\u73b0\u3001\u7c97\u7c92\u5ea6\u7edf\u8ba1<\/td>\n<\/tr>\n<tr>\n<td>\u641c\u7d22\u5f15\u64ce\u6a21\u5f0f<\/td>\n<td>\u9002\u4e2d&#xff08;\u957f\u8bcd\u5207\u5206&#xff09;<\/td>\n<td>\u8f83\u6162<\/td>\n<td>\u4e2d<\/td>\n<td>\u641c\u7d22\u7d22\u5f15\u3001\u77ed\u6587\u672c\u53ec\u56de<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h4>\u81ea\u5b9a\u4e49\u8bcd\u5178<\/h4>\n<p>\u8bcd\u5178\u6587\u4ef6\u683c\u5f0f<\/p>\n<p>\u8bcd\u8bed [\u8bcd\u9891] [\u8bcd\u6027]<\/p>\n<p>\u793a\u4f8b<\/p>\n<p>\u51a0\u72b6\u52a8\u8109 2000 n<br \/>\n\u8840\u5c0f\u677f\u51cf\u5c11\u75c7 1800 n<br \/>\nMRI\u68c0\u67e5 1500 n<\/p>\n<p>\u8bf4\u660e \u8bcd\u9891&#xff1a;\u53ef\u7701\u7565&#xff0c;\u9ed8\u8ba4\u503c\u4f1a\u4f7f\u8be5\u8bcd\u88ab\u5206\u51fa&#xff08;\u6570\u503c\u8d8a\u9ad8&#xff0c;\u8bcd\u8bed\u88ab\u5206\u51fa\u7684\u6982\u7387\u8d8a\u5927&#xff09;\u3002 \u8bcd\u6027&#xff1a;\u53ef\u7701\u7565&#xff0c;\u7528\u4e8e\u8bcd\u6027\u6807\u6ce8&#xff08;\u7528\u4e8e\u540e\u7eed\u7684\u8bcd\u6027\u6807\u6ce8\u4efb\u52a1&#xff0c;\u4e0d\u5f71\u54cd\u5206\u8bcd\u7ed3\u679c&#xff09;<\/p>\n<p>\u793a\u4f8b<\/p>\n<p><span class=\"token comment\"># \u6dfb\u52a0\u81ea\u5b9a\u4e49\u8bcd\u5178<\/span><br \/>\n<span class=\"token comment\"># \u6587\u4ef6\u683c\u5f0f&#xff1a;\u8bcd\u8bed \u8bcd\u9891(\u53ef\u9009) \u8bcd\u6027(\u53ef\u9009)<\/span><br \/>\njieba<span class=\"token punctuation\">.<\/span>load_userdict<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;userdict.txt&#034;<\/span><span class=\"token punctuation\">)<\/span>  <\/p>\n<p><span class=\"token comment\"># \u52a8\u6001\u8c03\u6574\u8bcd\u5178<\/span><br \/>\njieba<span class=\"token punctuation\">.<\/span>add_word<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u81ea\u7136\u8bed\u8a00\u5904\u7406&#034;<\/span><span class=\"token punctuation\">,<\/span> freq<span class=\"token operator\">&#061;<\/span><span class=\"token number\">20000<\/span><span class=\"token punctuation\">,<\/span> tag<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#039;n&#039;<\/span><span class=\"token punctuation\">)<\/span><br \/>\njieba<span class=\"token punctuation\">.<\/span>del_word<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u67d0\u4e2a\u8bcd&#034;<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token comment\"># \u8c03\u6574\u8bcd\u9891<\/span><br \/>\njieba<span class=\"token punctuation\">.<\/span>suggest_freq<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u81ea\u7136\u8bed\u8a00&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;\u5904\u7406&#034;<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> tune<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u8bcd\u6027\u6807\u6ce8<\/h4>\n<p><span class=\"token keyword\">import<\/span> jieba<span class=\"token punctuation\">.<\/span>posseg <span class=\"token keyword\">as<\/span> pseg<\/p>\n<p>words <span class=\"token operator\">&#061;<\/span> pseg<span class=\"token punctuation\">.<\/span>cut<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">for<\/span> word<span class=\"token punctuation\">,<\/span> flag <span class=\"token keyword\">in<\/span> words<span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;<\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>word<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">(<\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>flag<span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">)&#034;<\/span><\/span><span class=\"token punctuation\">,<\/span> end<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#034; &#034;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># \u6211(r) \u7231(v) \u81ea\u7136\u8bed\u8a00(l) \u5904\u7406(v) \u6280\u672f(n)<\/span><\/p>\n<h4>\u5173\u952e\u8bcd\u63d0\u53d6<\/h4>\n<p>\u539f\u7406 TF-IDF&#xff1a;\u8bc4\u4f30\u8bcd\u8bed\u7684\u91cd\u8981\u6027&#xff0c;\u4e0e\u8bcd\u9891\u6210\u6b63\u6bd4&#xff0c;\u4e0e\u6587\u6863\u9891\u7387\u6210\u53cd\u6bd4 TextRank&#xff1a;\u57fa\u4e8ePageRank\u7684\u56fe\u6392\u5e8f\u7b97\u6cd5&#xff0c;\u5c06\u6587\u672c\u6784\u5efa\u4e3a\u8bcd\u8bed\u56fe<\/p>\n<p>\u53c2\u6570\u8c03\u6574 topK&#xff1a;\u8fd4\u56de\u5173\u952e\u8bcd\u6570\u91cf withWeight&#xff1a;\u662f\u5426\u8fd4\u56de\u6743\u91cd\u503c allowPOS&#xff1a;\u5141\u8bb8\u7684\u8bcd\u6027\u5217\u8868<\/p>\n<p>\u9009\u62e9 \u77ed\u6587\u672c\u4f18\u5148\u4f7f\u7528TextRank \u957f\u6587\u6863\u4f18\u5148\u4f7f\u7528TF-IDF<\/p>\n<p>\u793a\u4f8b<\/p>\n<p><span class=\"token keyword\">from<\/span> jieba <span class=\"token keyword\">import<\/span> analyse<\/p>\n<p><span class=\"token comment\"># TF-IDF\u5173\u952e\u8bcd\u63d0\u53d6<\/span><br \/>\n<span class=\"token comment\"># \u53ef\u81ea\u5b9a\u4e49IDF\u8bed\u6599\u5e93<\/span><br \/>\n<span class=\"token comment\"># analyse.set_idf_path(&#034;idf.txt&#034;)  <\/span><br \/>\ntfidf_result <span class=\"token operator\">&#061;<\/span> analyse<span class=\"token punctuation\">.<\/span>extract_tags<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">,<\/span> topK<span class=\"token operator\">&#061;<\/span><span class=\"token number\">5<\/span><span class=\"token punctuation\">,<\/span> withWeight<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">,<\/span> allowPOS<span class=\"token operator\">&#061;<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#039;n&#039;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#039;nr&#039;<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#039;tfidf_result&#039;<\/span><span class=\"token punctuation\">,<\/span> tfidf_result<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># tfidf_result [(&#039;\u6280\u672f&#039;, 4.71945717857)]<\/span><\/p>\n<p><span class=\"token comment\"># TextRank\u5173\u952e\u8bcd\u63d0\u53d6<\/span><br \/>\ntextrank_result <span class=\"token operator\">&#061;<\/span> analyse<span class=\"token punctuation\">.<\/span>textrank<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">,<\/span> topK<span class=\"token operator\">&#061;<\/span><span class=\"token number\">5<\/span><span class=\"token punctuation\">,<\/span> withWeight<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#039;textrank_result&#039;<\/span><span class=\"token punctuation\">,<\/span> textrank_result<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># textrank_result [(&#039;\u6280\u672f&#039;, 1.0), (&#039;\u5904\u7406&#039;, 0.9961264494011037)]<\/span><\/p>\n<h4>\u8fd4\u56de\u8bcd\u8bed\u4f4d\u7f6e<\/h4>\n<p>tokenize\u65b9\u6cd5\u8fd4\u56de\u8bcd\u8bed\u53ca\u5176\u5728\u6587\u672c\u4e2d\u7684\u8d77\u6b62\u4f4d\u7f6e&#xff0c;\u5bf9\u4e8e\u9700\u8981\u5b9a\u4f4d\u8bcd\u8bed\u5728\u6587\u672c\u4e2d\u4f4d\u7f6e\u7684\u5e94\u7528\u573a\u666f\u5f88\u6709\u7528&#xff0c;\u5982\u9ad8\u4eae\u663e\u793a\u3001\u6587\u672c\u6807\u6ce8\u7b49\u3002<\/p>\n<p>\u8f93\u51fa\u683c\u5f0f word&#xff1a;\u8bcd\u8bed\u5185\u5bb9 start&#xff1a;\u8d77\u59cb\u4f4d\u7f6e&#xff08;\u4ece0\u5f00\u59cb&#xff09; end&#xff1a;\u7ed3\u675f\u4f4d\u7f6e&#xff08;Python\u98ce\u683c\u7684\u534a\u5f00\u533a\u95f4&#xff09;<\/p>\n<p>\u793a\u4f8b<\/p>\n<p>result <span class=\"token operator\">&#061;<\/span> jieba<span class=\"token punctuation\">.<\/span>tokenize<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">for<\/span> tk <span class=\"token keyword\">in<\/span> result<span class=\"token punctuation\">:<\/span><br \/>\n    <span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&#034;word:<\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>tk<span class=\"token punctuation\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"> start:<\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>tk<span class=\"token punctuation\">[<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\"> end:<\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>tk<span class=\"token punctuation\">[<\/span><span class=\"token number\">2<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&#034;<\/span><\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p>\u8f93\u51fa<\/p>\n<p>word:\u6211 start:0 end:1<br \/>\nword:\u7231 start:1 end:2<br \/>\nword:\u81ea\u7136\u8bed\u8a00 start:2 end:6<br \/>\nword:\u5904\u7406 start:6 end:8<br \/>\nword:\u6280\u672f start:8 end:10<\/p>\n<h3>3. \u8bcd\u6027\u6807\u6ce8\u8bf4\u660e<\/h3>\n<p>jieba\u7684\u8bcd\u6027\u6807\u6ce8\u96c6\u57fa\u4e8eICTCLAS\/NLPIR\u7684\u6807\u6ce8\u4f53\u7cfb&#xff0c;\u8bed\u7d20\u6807\u7b7e&#xff08;\u5982Ag\u3001Bg\u7b49&#xff09;\u7528\u4e8e\u8868\u793a\u6784\u8bcd\u8bed\u7d20&#xff0c;\u5e38\u89c1\u8bcd\u6027\u5982\u540d\u8bcd(n)\u3001\u52a8\u8bcd(v)\u3001\u5f62\u5bb9\u8bcd(a)\u7b49\u6709\u7ec6\u5206\u7c7b\u578b&#xff0c;\u7279\u6b8a\u7b26\u53f7\u548c\u672a\u77e5\u8bcd\u4e5f\u6709\u76f8\u5e94\u6807\u6ce8&#xff0c;\u80fd\u591f\u8f83\u597d\u5730\u53cd\u6620\u6c49\u8bed\u8bcd\u6c47\u7684\u8bed\u6cd5\u7279\u5f81\u548c\u6784\u8bcd\u89c4\u5f8b\u3002<\/p>\n<table>\n<tr>\u6807\u7b7e\u542b\u4e49\u6807\u7b7e\u542b\u4e49\u6807\u7b7e\u542b\u4e49\u6807\u7b7e\u542b\u4e49<\/tr>\n<tbody>\n<tr>\n<td>Ag<\/td>\n<td>\u5f62\u8bed\u7d20<\/td>\n<td>g<\/td>\n<td>\u8bed\u7d20<\/td>\n<td>ns<\/td>\n<td>\u5730\u540d<\/td>\n<td>u<\/td>\n<td>\u52a9\u8bcd<\/td>\n<\/tr>\n<tr>\n<td>a<\/td>\n<td>\u5f62\u5bb9\u8bcd<\/td>\n<td>h<\/td>\n<td>\u524d\u63a5\u6210\u5206<\/td>\n<td>nt<\/td>\n<td>\u673a\u6784\u56e2\u4f53<\/td>\n<td>Vg<\/td>\n<td>\u52a8\u8bed\u7d20<\/td>\n<\/tr>\n<tr>\n<td>ad<\/td>\n<td>\u526f\u5f62\u8bcd<\/td>\n<td>i<\/td>\n<td>\u6210\u8bed<\/td>\n<td>nz<\/td>\n<td>\u5176\u5b83\u4e13\u540d<\/td>\n<td>v<\/td>\n<td>\u52a8\u8bcd<\/td>\n<\/tr>\n<tr>\n<td>an<\/td>\n<td>\u540d\u5f62\u8bcd<\/td>\n<td>j<\/td>\n<td>\u7b80\u7565\u8bed<\/td>\n<td>o<\/td>\n<td>\u62df\u58f0\u8bcd<\/td>\n<td>vd<\/td>\n<td>\u526f\u52a8\u8bcd<\/td>\n<\/tr>\n<tr>\n<td>Bg<\/td>\n<td>\u533a\u522b\u8bed\u7d20<\/td>\n<td>k<\/td>\n<td>\u540e\u63a5\u6210\u5206<\/td>\n<td>p<\/td>\n<td>\u4ecb\u8bcd<\/td>\n<td>vn<\/td>\n<td>\u540d\u52a8\u8bcd<\/td>\n<\/tr>\n<tr>\n<td>b<\/td>\n<td>\u533a\u522b\u8bcd<\/td>\n<td>l<\/td>\n<td>\u4e60\u7528\u8bed<\/td>\n<td>q<\/td>\n<td>\u91cf\u8bcd<\/td>\n<td>w<\/td>\n<td>\u6807\u70b9\u7b26\u53f7<\/td>\n<\/tr>\n<tr>\n<td>c<\/td>\n<td>\u8fde\u8bcd<\/td>\n<td>Mg<\/td>\n<td>\u6570\u8bed\u7d20<\/td>\n<td>r<\/td>\n<td>\u4ee3\u8bcd<\/td>\n<td>x<\/td>\n<td>\u975e\u8bed\u7d20\u5b57<\/td>\n<\/tr>\n<tr>\n<td>Dg<\/td>\n<td>\u526f\u8bed\u7d20<\/td>\n<td>m<\/td>\n<td>\u6570\u8bcd<\/td>\n<td>s<\/td>\n<td>\u5904\u6240\u8bcd<\/td>\n<td>Yg<\/td>\n<td>\u8bed\u6c14\u8bed\u7d20<\/td>\n<\/tr>\n<tr>\n<td>d<\/td>\n<td>\u526f\u8bcd<\/td>\n<td>Ng<\/td>\n<td>\u540d\u8bed\u7d20<\/td>\n<td>Tg<\/td>\n<td>\u65f6\u95f4\u8bed\u7d20<\/td>\n<td>y<\/td>\n<td>\u8bed\u6c14\u8bcd<\/td>\n<\/tr>\n<tr>\n<td>e<\/td>\n<td>\u53f9\u8bcd<\/td>\n<td>n<\/td>\n<td>\u540d\u8bcd<\/td>\n<td>t<\/td>\n<td>\u65f6\u95f4\u8bcd<\/td>\n<td>z<\/td>\n<td>\u72b6\u6001\u8bcd<\/td>\n<\/tr>\n<tr>\n<td>f<\/td>\n<td>\u65b9\u4f4d\u8bcd<\/td>\n<td>nr<\/td>\n<td>\u4eba\u540d<\/td>\n<td>Ug<\/td>\n<td>\u52a9\u8bed\u7d20<\/td>\n<td>un<\/td>\n<td>\u672a\u77e5\u8bcd<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h2>\u4e8c\u3001jiagu<\/h2>\n<p>jiagu\u662f\u4e00\u4e2a\u7b80\u5355\u9ad8\u6548\u7684\u4e2d\u6587\u81ea\u7136\u8bed\u8a00\u5904\u7406\u5de5\u5177&#xff0c;\u63d0\u4f9b\u4e2d\u6587\u5206\u8bcd\u3001\u8bcd\u6027\u6807\u6ce8\u3001\u547d\u540d\u5b9e\u4f53\u8bc6\u522b\u3001\u60c5\u611f\u5206\u6790\u3001\u77e5\u8bc6\u56fe\u8c31\u5173\u7cfb\u62bd\u53d6\u3001\u5173\u952e\u8bcd\u62bd\u53d6\u3001\u6587\u672c\u6458\u8981\u3001\u65b0\u8bcd\u53d1\u73b0\u3001\u60c5\u611f\u5206\u6790\u3001\u6587\u672c\u805a\u7c7b\u7b49\u529f\u80fd\u3002<\/p>\n<h3>1. \u5b89\u88c5<\/h3>\n<p>pip <span class=\"token function\">install<\/span> <span class=\"token parameter variable\">-U<\/span> jiagu<\/p>\n<h3>2. \u57fa\u672c\u529f\u80fd<\/h3>\n<h4>\u5206\u8bcd<\/h4>\n<p><span class=\"token keyword\">import<\/span> jiagu<\/p>\n<p>text <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;\u6df1\u5ea6\u5b66\u4e60\u662f\u4eba\u5de5\u667a\u80fd\u7684\u6838\u5fc3\u6280\u672f&#034;<\/span><br \/>\nwords <span class=\"token operator\">&#061;<\/span> jiagu<span class=\"token punctuation\">.<\/span>seg<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>words<span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u81ea\u5b9a\u4e49\u8bcd\u5178<\/h4>\n<p>Jiagu\u652f\u6301\u901a\u8fc7\u6587\u4ef6\u548c\u4ee3\u7801\u4e24\u79cd\u65b9\u5f0f\u6dfb\u52a0\u81ea\u5b9a\u4e49\u8bcd\u5178&#xff0c;\u63d0\u5347\u7279\u5b9a\u9886\u57df\u7684\u5206\u8bcd\u51c6\u786e\u7387\u3002<\/p>\n<p><span class=\"token comment\"># \u52a0\u8f7d\u81ea\u5b9a\u4e49\u8bcd\u5178<\/span><br \/>\njiagu<span class=\"token punctuation\">.<\/span>load_userdict<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#039;userdict.dict&#039;<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># jiagu.load_userdict([&#039;\u6c49\u670d\u548c\u670d\u88c5&#039;])<\/span><\/p>\n<h4>\u8bcd\u6027\u6807\u6ce8<\/h4>\n<p>Jiagu\u7684\u8bcd\u6027\u6807\u6ce8\u91c7\u7528\u901a\u7528\u7684\u8bcd\u6027\u6807\u8bb0\u96c6\u3002<\/p>\n<p>pos <span class=\"token operator\">&#061;<\/span> jiagu<span class=\"token punctuation\">.<\/span>pos<span class=\"token punctuation\">(<\/span>words<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>pos<span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u547d\u540d\u5b9e\u4f53\u8bc6\u522b<\/h4>\n<p>Jiagu\u5185\u7f6e\u547d\u540d\u5b9e\u4f53\u8bc6\u522b\u529f\u80fd&#xff0c;\u53ef\u8bc6\u522b\u4eba\u540d (PER)\u3001\u5730\u540d (LOC)\u3001\u673a\u6784\u540d (ORG)\u3001\u5176\u4ed6\u4e13\u6709\u540d\u8bcd (MISC)\u3002<\/p>\n<p>ner <span class=\"token operator\">&#061;<\/span> jiagu<span class=\"token punctuation\">.<\/span>ner<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>ner<span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u5173\u952e\u8bcd\u63d0\u53d6<\/h4>\n<p><span class=\"token comment\"># \u63d0\u53d63\u4e2a\u5173\u952e\u8bcd<\/span><br \/>\nkeywords <span class=\"token operator\">&#061;<\/span> jiagu<span class=\"token punctuation\">.<\/span>keywords<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">,<\/span> <span class=\"token number\">3<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>keywords<span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u77e5\u8bc6\u56fe\u8c31\u5173\u7cfb\u62bd\u53d6<\/h4>\n<p><span class=\"token keyword\">import<\/span> jiagu<\/p>\n<p>text <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#039;\u59da\u660e1980\u5e749\u670812\u65e5\u51fa\u751f\u4e8e\u4e0a\u6d77\u5e02\u5f90\u6c47\u533a&#xff0c;\u7956\u7c4d\u6c5f\u82cf\u7701\u82cf\u5dde\u5e02\u5434\u6c5f\u533a\u9707\u6cfd\u9547&#xff0c;\u524d\u4e2d\u56fd\u804c\u4e1a\u7bee\u7403\u8fd0\u52a8\u5458&#xff0c;\u53f8\u804c\u4e2d\u950b&#xff0c;\u73b0\u4efb\u4e2d\u804c\u8054\u516c\u53f8\u8463\u4e8b\u957f\u517c\u603b\u7ecf\u7406\u3002&#039;<\/span><br \/>\nknowledge <span class=\"token operator\">&#061;<\/span> jiagu<span class=\"token punctuation\">.<\/span>knowledge<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>knowledge<span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u6587\u672c\u6458\u8981<\/h4>\n<p><span class=\"token keyword\">import<\/span> jiagu<\/p>\n<p><span class=\"token keyword\">with<\/span> <span class=\"token builtin\">open<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#039;input.txt&#039;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#039;r&#039;<\/span><span class=\"token punctuation\">,<\/span> encoding<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#039;utf-8&#039;<\/span><span class=\"token punctuation\">)<\/span> <span class=\"token keyword\">as<\/span> f<span class=\"token punctuation\">:<\/span><br \/>\n    text <span class=\"token operator\">&#061;<\/span> f<span class=\"token punctuation\">.<\/span>read<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># \u6458\u8981<\/span><br \/>\nsummarize <span class=\"token operator\">&#061;<\/span> jiagu<span class=\"token punctuation\">.<\/span>summarize<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">,<\/span> <span class=\"token number\">3<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>summarize<span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u65b0\u8bcd\u53d1\u73b0<\/h4>\n<p><span class=\"token keyword\">import<\/span> jiagu<\/p>\n<p><span class=\"token comment\"># \u6839\u636e\u6587\u672c&#xff0c;\u5229\u7528\u4fe1\u606f\u71b5\u505a\u65b0\u8bcd\u53d1\u73b0\u3002<\/span><br \/>\njiagu<span class=\"token punctuation\">.<\/span>findword<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#039;input.txt&#039;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#039;output.txt&#039;<\/span><span class=\"token punctuation\">)<\/span> <\/p>\n<h4>\u60c5\u611f\u5206\u6790<\/h4>\n<p>import jiagu<\/p>\n<p>text &#061; &#039;\u6211\u4e0d\u559c\u6b22\u5403\u82f9\u679c\u3002&#039;<br \/>\nsentiment &#061; jiagu.sentiment(text)<br \/>\nprint(sentiment)<\/p>\n<h4>\u6587\u672c\u805a\u7c7b<\/h4>\n<p>import jiagu<\/p>\n<p>docs &#061; [<br \/>\n    &#034;\u82f9\u679c\u662f\u4e00\u79cd\u5e38\u89c1\u7684\u6c34\u679c&#xff0c;\u5bcc\u542b\u7ef4\u751f\u7d20\u548c\u81b3\u98df\u7ea4\u7ef4&#034;,<br \/>\n    &#034;\u9999\u8549\u662f\u70ed\u5e26\u6c34\u679c&#xff0c;\u542b\u6709\u4e30\u5bcc\u7684\u94be\u5143\u7d20&#xff0c;\u6709\u52a9\u4e8e\u7f13\u89e3\u75b2\u52b3&#034;,<br \/>\n    &#034;\u6c34\u679c\u5206\u7c7b\u7814\u7a76&#xff1a;\u6d46\u679c\u3001\u6838\u679c\u548c\u67d1\u6a58\u7c7b\u6c34\u679c\u7684\u8425\u517b\u4ef7\u503c\u6bd4\u8f83&#034;,<br \/>\n    &#034;\u5982\u4f55\u6311\u9009\u65b0\u9c9c\u7684\u6c34\u679c&#xff1f;\u4ece\u989c\u8272\u3001\u6c14\u5473\u548c\u786c\u5ea6\u4e09\u4e2a\u65b9\u9762\u6559\u4f60\u9009\u8d2d\u6280\u5de7&#034;,<br \/>\n    &#034;\u8292\u679c\u7684\u683d\u57f9\u6280\u672f\u53ca\u75c5\u866b\u5bb3\u9632\u6cbb\u65b9\u6cd5\u5206\u4eab&#034;,<br \/>\n    &#034;\u4e0d\u540c\u6210\u719f\u5ea6\u7684\u6c34\u679c\u5728\u50a8\u5b58\u8fc7\u7a0b\u4e2d\u7cd6\u5206\u548c\u9178\u5ea6\u7684\u53d8\u5316\u7814\u7a76&#034;,<br \/>\n    &#034;\u6c34\u679c\u69a8\u6c41\u4e0e\u76f4\u63a5\u98df\u7528\u7684\u8425\u517b\u5dee\u5f02\u5206\u6790&#034;,<br \/>\n    &#034;\u8fdb\u53e3\u6c34\u679c\u4e0e\u672c\u5730\u6c34\u679c\u5728\u4ef7\u683c\u548c\u53e3\u611f\u4e0a\u7684\u5bf9\u6bd4\u8bc4\u6d4b&#034;<br \/>\n]<br \/>\ncluster &#061; jiagu.text_cluster(docs)<br \/>\nfor group_id, values in cluster.items():<br \/>\n    print(group_id, values)<\/p>\n<h3>3. \u8bcd\u6027\u6807\u6ce8\u8bf4\u660e<\/h3>\n<p>jiagu\u7684\u8bcd\u6027\u6807\u6ce8\u96c6\u76f8\u5bf9\u7b80\u6d01\u5b9e\u7528&#xff0c;\u540d\u8bcd\u6709\u7ec6\u81f4\u5206\u7c7b&#xff0c;\u52a8\u8bcd\u533a\u5206\u666e\u901a\u52a8\u8bcd\u548c\u7279\u6b8a\u52a8\u8bcd\u7c7b\u578b&#xff0c;\u5305\u542b\u524d\u540e\u63a5\u6210\u5206\u7b49\u6c49\u8bed\u7279\u6709\u8bcd\u7c7b&#xff0c;\u5bf9\u975e\u6c49\u5b57\u5185\u5bb9\u4e5f\u6709\u4e13\u95e8\u6807\u6ce8\u3002<\/p>\n<table>\n<tr>\u6807\u7b7e\u542b\u4e49\u6807\u7b7e\u542b\u4e49\u6807\u7b7e\u542b\u4e49\u6807\u7b7e\u542b\u4e49<\/tr>\n<tbody>\n<tr>\n<td>n<\/td>\n<td>\u666e\u901a\u540d\u8bcd<\/td>\n<td>ni<\/td>\n<td>\u673a\u6784\u540d<\/td>\n<td>q<\/td>\n<td>\u91cf\u8bcd<\/td>\n<td>j<\/td>\n<td>\u7f29\u7565\u8bed<\/td>\n<\/tr>\n<tr>\n<td>nt<\/td>\n<td>\u65f6\u95f4\u540d\u8bcd<\/td>\n<td>nz<\/td>\n<td>\u5176\u4ed6\u4e13\u540d<\/td>\n<td>d<\/td>\n<td>\u526f\u8bcd<\/td>\n<td>h<\/td>\n<td>\u524d\u63a5\u6210\u5206<\/td>\n<\/tr>\n<tr>\n<td>nd<\/td>\n<td>\u65b9\u4f4d\u540d\u8bcd<\/td>\n<td>v<\/td>\n<td>\u52a8\u8bcd<\/td>\n<td>r<\/td>\n<td>\u4ee3\u8bcd<\/td>\n<td>k<\/td>\n<td>\u540e\u63a5\u6210\u5206<\/td>\n<\/tr>\n<tr>\n<td>nl<\/td>\n<td>\u5904\u6240\u540d\u8bcd<\/td>\n<td>vd<\/td>\n<td>\u8d8b\u5411\u52a8\u8bcd<\/td>\n<td>p<\/td>\n<td>\u4ecb\u8bcd<\/td>\n<td>g<\/td>\n<td>\u8bed\u7d20\u5b57<\/td>\n<\/tr>\n<tr>\n<td>nh<\/td>\n<td>\u4eba\u540d<\/td>\n<td>vl<\/td>\n<td>\u8054\u7cfb\u52a8\u8bcd<\/td>\n<td>c<\/td>\n<td>\u8fde\u8bcd<\/td>\n<td>x<\/td>\n<td>\u975e\u8bed\u7d20\u5b57<\/td>\n<\/tr>\n<tr>\n<td>nhf<\/td>\n<td>\u59d3<\/td>\n<td>vu<\/td>\n<td>\u80fd\u613f\u52a8\u8bcd<\/td>\n<td>u<\/td>\n<td>\u52a9\u8bcd<\/td>\n<td>w<\/td>\n<td>\u6807\u70b9\u7b26\u53f7<\/td>\n<\/tr>\n<tr>\n<td>nhs<\/td>\n<td>\u540d<\/td>\n<td>a<\/td>\n<td>\u5f62\u5bb9\u8bcd<\/td>\n<td>e<\/td>\n<td>\u53f9\u8bcd<\/td>\n<td>ws<\/td>\n<td>\u975e\u6c49\u5b57\u5b57\u7b26\u4e32<\/td>\n<\/tr>\n<tr>\n<td>ns<\/td>\n<td>\u5730\u540d<\/td>\n<td>f<\/td>\n<td>\u533a\u522b\u8bcd<\/td>\n<td>o<\/td>\n<td>\u62df\u58f0\u8bcd<\/td>\n<td>wu<\/td>\n<td>\u5176\u4ed6\u672a\u77e5\u7684\u7b26\u53f7<\/td>\n<\/tr>\n<tr>\n<td>nn<\/td>\n<td>\u65cf\u540d<\/td>\n<td>m<\/td>\n<td>\u6570\u8bcd<\/td>\n<td>i<\/td>\n<td>\u4e60\u7528\u8bed<\/td>\n<td><\/td>\n<td><\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h2>\u4e09\u3001snownlp<\/h2>\n<p>SnowNLP\u662f\u4e00\u4e2a\u7528\u4e8e\u5904\u7406\u4e2d\u6587\u6587\u672c\u7684 Python \u5e93&#xff0c;\u529f\u80fd\u5305\u62ec\u4e2d\u6587\u5206\u8bcd\u3001\u60c5\u611f\u5206\u6790\u3001\u7b80\u7e41\u8f6c\u6362\u3001\u6587\u672c\u6458\u8981\u3001\u62fc\u97f3\u8f6c\u6362\u7b49\u3002<\/p>\n<h3>1. \u5b89\u88c5<\/h3>\n<p>pip <span class=\"token function\">install<\/span> snownlp<\/p>\n<h3>2. \u57fa\u672c\u529f\u80fd<\/h3>\n<h4>\u5206\u8bcd<\/h4>\n<p><span class=\"token keyword\">from<\/span> snownlp <span class=\"token keyword\">import<\/span> SnowNLP<\/p>\n<p>text <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;\u8fd9\u4e2a\u7535\u5f71\u771f\u7684\u5f88\u68d2&#xff01;\u5267\u60c5\u8dcc\u5b95\u8d77\u4f0f&#xff0c;\u5f15\u4eba\u5165\u80dc&#xff0c;\u6211\u5f88\u559c\u6b22\u3002&#034;<\/span><br \/>\ns <span class=\"token operator\">&#061;<\/span> SnowNLP<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u5206\u8bcd:&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token builtin\">list<\/span><span class=\"token punctuation\">(<\/span>s<span class=\"token punctuation\">.<\/span>words<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u8bcd\u6027\u6807\u6ce8<\/h4>\n<p>snownlp\u7684\u8bcd\u6027\u6807\u6ce8\u529f\u80fd\u57fa\u4e8e\u5176\u5206\u8bcd\u7ed3\u679c&#xff0c;\u6807\u6ce8\u96c6\u76f8\u5bf9\u7b80\u5355&#xff0c;\u9002\u5408\u4e0d\u9700\u8981\u590d\u6742\u8bed\u6cd5\u5206\u6790\u7684\u5e94\u7528\u573a\u666f\u3002<\/p>\n<p><span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u8bcd\u6027\u6807\u6ce8:&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token builtin\">list<\/span><span class=\"token punctuation\">(<\/span>s<span class=\"token punctuation\">.<\/span>tags<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u62fc\u97f3\u8f6c\u6362<\/h4>\n<p><span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u62fc\u97f3:&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token builtin\">list<\/span><span class=\"token punctuation\">(<\/span>s<span class=\"token punctuation\">.<\/span>pinyin<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u60c5\u611f\u5206\u6790<\/h4>\n<p><span class=\"token comment\"># 0-1\u4e4b\u95f4&#xff0c;\u8d8a\u63a5\u8fd11\u8868\u793a\u8d8a\u79ef\u6781<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u60c5\u611f\u5206\u6570:&#034;<\/span><span class=\"token punctuation\">,<\/span> s<span class=\"token punctuation\">.<\/span>sentiments<span class=\"token punctuation\">)<\/span>  <\/p>\n<h4>\u7b80\u7e41\u8f6c\u6362<\/h4>\n<p>text<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#039;\u5b78\u7fd2\u7a0b\u5f0f\u8a2d\u8a08\u8b93\u6211\u7684\u751f\u6d3b\u66f4\u8c50\u5bcc\u591a\u5f69&#039;<\/span><br \/>\ns <span class=\"token operator\">&#061;<\/span> SnowNLP<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\ntraditional <span class=\"token operator\">&#061;<\/span> s<span class=\"token punctuation\">.<\/span>han<br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u7b80\u4f53\u5b57:&#034;<\/span><span class=\"token punctuation\">,<\/span> traditional<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># \u7b80\u4f53\u5b57: \u5b66\u4e60\u7a0b\u5f0f\u8bbe\u8ba1\u8ba9\u6211\u7684\u751f\u6d3b\u66f4\u4e30\u5bcc\u591a\u5f69<\/span><\/p>\n<h4>\u5173\u952e\u5b57\u62bd\u53d6<\/h4>\n<p>\u5173\u952e\u8bcd\u62bd\u53d6\u57fa\u4e8eTF-IDF\u7b97\u6cd5&#xff0c;\u8fd4\u56de\u6743\u91cd\u6700\u9ad8\u7684\u82e5\u5e72\u4e2a\u8bcd\u8bed&#xff0c;\u9002\u7528\u4e8e\u5feb\u901f\u83b7\u53d6\u6587\u672c\u4e3b\u9898\u3002<\/p>\n<p><span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u5173\u952e\u8bcd:&#034;<\/span><span class=\"token punctuation\">,<\/span> s<span class=\"token punctuation\">.<\/span>keywords<span class=\"token punctuation\">(<\/span><span class=\"token number\">3<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u6458\u8981<\/h4>\n<p><span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string\">&#034;\u6458\u8981:&#034;<\/span><span class=\"token punctuation\">,<\/span> s<span class=\"token punctuation\">.<\/span>summary<span class=\"token punctuation\">(<\/span><span class=\"token number\">3<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<h2>\u56db\u3001thulac<\/h2>\n<p>THULAC\u7531\u6e05\u534e\u5927\u5b66\u81ea\u7136\u8bed\u8a00\u5904\u7406\u4e0e\u793e\u4f1a\u4eba\u6587\u8ba1\u7b97\u5b9e\u9a8c\u5ba4\u7814\u5236\u3002<\/p>\n<h3>1. \u5b89\u88c5<\/h3>\n<p>pip <span class=\"token function\">install<\/span> thulac<\/p>\n<h3>2. \u57fa\u672c\u529f\u80fd<\/h3>\n<h4>\u5206\u8bcd<\/h4>\n<p><span class=\"token keyword\">import<\/span> thulac<br \/>\n<span class=\"token comment\"># \u9ed8\u8ba4\u6a21\u5f0f<\/span><br \/>\nthu <span class=\"token operator\">&#061;<\/span> thulac<span class=\"token punctuation\">.<\/span>thulac<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\ntext <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;\u6211\u7231\u5317\u4eac\u5929\u5b89\u95e8&#034;<\/span><\/p>\n<p><span class=\"token comment\"># \u5206\u8bcd<\/span><br \/>\nresult <span class=\"token operator\">&#061;<\/span> thu<span class=\"token punctuation\">.<\/span>cut<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>result<span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token comment\"># \u4ec5\u5206\u8bcd<\/span><br \/>\nthu <span class=\"token operator\">&#061;<\/span> thulac<span class=\"token punctuation\">.<\/span>thulac<span class=\"token punctuation\">(<\/span>seg_only<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>thu<span class=\"token punctuation\">.<\/span>cut<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u8bcd\u6027\u6807\u6ce8<\/h4>\n<p>thulac\u7684\u8bcd\u6027\u6807\u6ce8\u529f\u80fd\u652f\u6301\u8fd4\u56de\u5143\u7ec4\u6216\u62fc\u63a5\u5b57\u7b26\u4e32\u4e24\u79cd\u8f93\u51fa\u683c\u5f0f&#xff0c;\u65b9\u4fbf\u4e0d\u540c\u573a\u666f\u4e0b\u7684\u4f7f\u7528\u3002<\/p>\n<p><span class=\"token keyword\">import<\/span> thulac<br \/>\nthu <span class=\"token operator\">&#061;<\/span> thulac<span class=\"token punctuation\">.<\/span>thulac<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\ntext <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;\u6e05\u534e\u5927\u5b66\u81ea\u7136\u8bed\u8a00\u5904\u7406\u5b9e\u9a8c\u5ba4&#034;<\/span><br \/>\nseg_result <span class=\"token operator\">&#061;<\/span> thu<span class=\"token punctuation\">.<\/span>cut<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>seg_result<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># [[&#039;\u6e05\u534e\u5927\u5b66&#039;, &#039;ni&#039;], [&#039;\u81ea\u7136&#039;, &#039;n&#039;], [&#039;\u8bed\u8a00&#039;, &#039;n&#039;], [&#039;\u5904\u7406&#039;, &#039;v&#039;], [&#039;\u5b9e\u9a8c\u5ba4&#039;, &#039;n&#039;]]<\/span><br \/>\nseg_result <span class=\"token operator\">&#061;<\/span> thu<span class=\"token punctuation\">.<\/span>cut<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">,<\/span> text<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">.<\/span>split<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>seg_result<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># [&#039;\u6e05\u534e\u5927\u5b66_ni&#039;, &#039;\u81ea\u7136_n&#039;, &#039;\u8bed\u8a00_n&#039;, &#039;\u5904\u7406_v&#039;, &#039;\u5b9e\u9a8c\u5ba4_n&#039;]<\/span><\/p>\n<h2>\u4e94\u3001lac<\/h2>\n<p>\u767e\u5ea6\u81ea\u7136\u8bed\u8a00\u5904\u7406\u90e8\u7684\u8bcd\u6cd5\u5206\u6790\u5de5\u5177\u3002<\/p>\n<h3>1. \u5b89\u88c5<\/h3>\n<p>pip <span class=\"token function\">install<\/span> LAC<\/p>\n<h3>2. \u57fa\u672c\u529f\u80fd<\/h3>\n<h4>\u5206\u8bcd<\/h4>\n<p><span class=\"token keyword\">from<\/span> LAC <span class=\"token keyword\">import<\/span> LAC<\/p>\n<p><span class=\"token comment\"># \u521d\u59cb\u5316\u6a21\u578b&#xff08;\u9ed8\u8ba4\u4f1a\u4e0b\u8f7d\u6a21\u578b\u6587\u4ef6&#xff09;<\/span><br \/>\nlac <span class=\"token operator\">&#061;<\/span> LAC<span class=\"token punctuation\">(<\/span>mode<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#039;seg&#039;<\/span><span class=\"token punctuation\">)<\/span> <\/p>\n<p><span class=\"token comment\"># \u5206\u8bcd<\/span><br \/>\ntext <span class=\"token operator\">&#061;<\/span> <span class=\"token string\">&#034;\u767e\u5ea6\u662f\u4e00\u5bb6\u9ad8\u79d1\u6280\u516c\u53f8&#034;<\/span><br \/>\nresult <span class=\"token operator\">&#061;<\/span> lac<span class=\"token punctuation\">.<\/span>run<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>result<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># [&#039;\u767e\u5ea6&#039;, &#039;\u662f&#039;, &#039;\u4e00\u5bb6&#039;, &#039;\u9ad8\u79d1\u6280&#039;, &#039;\u516c\u53f8&#039;]<\/span><\/p>\n<p><span class=\"token comment\"># \u6279\u91cf\u6837\u672c\u8f93\u5165<\/span><br \/>\ntexts <span class=\"token operator\">&#061;<\/span> <span class=\"token punctuation\">[<\/span><span class=\"token string\">&#034;\u767e\u5ea6\u662f\u4e00\u5bb6\u9ad8\u79d1\u6280\u516c\u53f8&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;\u963f\u91cc\u5df4\u5df4\u662f\u4e00\u5bb6\u9ad8\u79d1\u6280\u516c\u53f8&#034;<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token string\">&#034;\u817e\u8baf\u662f\u4e00\u5bb6\u9ad8\u79d1\u6280\u516c\u53f8&#034;<\/span><span class=\"token punctuation\">]<\/span><br \/>\nresults <span class=\"token operator\">&#061;<\/span> lac<span class=\"token punctuation\">.<\/span>run<span class=\"token punctuation\">(<\/span>texts<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>results<span class=\"token punctuation\">)<\/span><\/p>\n<h4>\u8bcd\u6027\u6807\u6ce8\u4e0e\u5b9e\u4f53\u8bc6\u522b<\/h4>\n<p>\u767e\u5ea6LAC\u5c06\u8bcd\u6027\u6807\u6ce8\u548c\u5b9e\u4f53\u8bc6\u522b\u529f\u80fd\u96c6\u6210\u5728\u4e00\u8d77&#xff0c;\u80fd\u540c\u65f6\u8f93\u51fa\u8bcd\u8bed\u7684\u8bcd\u6027\u6807\u7b7e\u548c\u5b9e\u4f53\u7c7b\u578b\u6807\u7b7e&#xff08;\u5982\u4eba\u540d\u3001\u5730\u540d\u7b49&#xff09;\u3002<\/p>\n<p><span class=\"token comment\"># \u88c5\u8f7dLAC\u6a21\u578b<\/span><br \/>\nlac <span class=\"token operator\">&#061;<\/span> LAC<span class=\"token punctuation\">(<\/span>mode<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#039;lac&#039;<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token comment\"># \u5355\u4e2a\u6837\u672c\u8f93\u5165<\/span><br \/>\nresult <span class=\"token operator\">&#061;<\/span> lac<span class=\"token punctuation\">.<\/span>run<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>result<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># [[&#039;\u767e\u5ea6&#039;, &#039;\u662f&#039;, &#039;\u4e00\u5bb6&#039;, &#039;\u9ad8\u79d1\u6280&#039;, &#039;\u516c\u53f8&#039;], [&#039;ORG&#039;, &#039;v&#039;, &#039;m&#039;, &#039;n&#039;, &#039;n&#039;]]<\/span><\/p>\n<h4>\u8bcd\u8bed\u91cd\u8981\u6027<\/h4>\n<p>\u8bcd\u8bed\u91cd\u8981\u6027\u5206\u6790\u529f\u80fd\u53ef\u4ee5\u8bc6\u522b\u6587\u672c\u4e2d\u7684\u5173\u952e\u8bcd\u8bed&#xff0c;\u6570\u503c\u8d8a\u9ad8\u8868\u793a\u8bcd\u8bed\u5728\u6587\u672c\u4e2d\u8d8a\u91cd\u8981\u3002<\/p>\n<p><span class=\"token comment\"># \u8bcd\u8bed\u91cd\u8981\u6027<\/span><br \/>\nlac <span class=\"token operator\">&#061;<\/span> LAC<span class=\"token punctuation\">(<\/span>mode<span class=\"token operator\">&#061;<\/span><span class=\"token string\">&#039;rank&#039;<\/span><span class=\"token punctuation\">)<\/span><br \/>\nresult <span class=\"token operator\">&#061;<\/span> lac<span class=\"token punctuation\">.<\/span>run<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token keyword\">print<\/span><span class=\"token punctuation\">(<\/span>result<span class=\"token punctuation\">)<\/span><br \/>\n<span class=\"token comment\"># [[&#039;\u767e\u5ea6&#039;, &#039;\u662f&#039;, &#039;\u4e00\u5bb6&#039;, &#039;\u9ad8\u79d1\u6280&#039;, &#039;\u516c\u53f8&#039;], [&#039;ORG&#039;, &#039;v&#039;, &#039;m&#039;, &#039;n&#039;, &#039;n&#039;], [3, 0, 2, 3, 1]]<\/span><\/p>\n<h4>\u81ea\u5b9a\u4e49\u8bcd\u5178<\/h4>\n<p><span class=\"token comment\"># \u88c5\u8f7d\u81ea\u5b9a\u4e49\u8bcd\u5178&#xff0c;sep\u53c2\u6570\u8868\u793a\u8bcd\u5178\u6587\u4ef6\u91c7\u7528\u7684\u5206\u9694\u7b26&#xff0c;\u4e3aNone\u65f6\u9ed8\u8ba4\u4f7f\u7528\u7a7a\u683c\u6216\u5236\u8868\u7b26&#039;\\\\t&#039;<\/span><br \/>\nlac<span class=\"token punctuation\">.<\/span>load_customization<span class=\"token punctuation\">(<\/span><span class=\"token string\">&#039;custom.txt&#039;<\/span><span class=\"token punctuation\">,<\/span> sep<span class=\"token operator\">&#061;<\/span><span class=\"token boolean\">None<\/span><span class=\"token punctuation\">)<\/span><\/p>\n<p><span class=\"token comment\"># \u4f7f\u7528\u81ea\u5b9a\u4e49\u8bcd\u5178\u8fdb\u884c\u5206\u8bcd<\/span><br \/>\nresult <span class=\"token operator\">&#061;<\/span> lac<span class=\"token punctuation\">.<\/span>run<span class=\"token punctuation\">(<\/span>text<span class=\"token punctuation\">)<\/span><\/p>\n<h3>3. \u8bcd\u6027\u548c\u4e13\u540d\u7c7b\u522b\u6807\u7b7e<\/h3>\n<p>LAC\u7684\u6807\u6ce8\u4f53\u7cfb\u5305\u542b\u4e30\u5bcc\u7684\u4e13\u6709\u540d\u8bcd\u7c7b\u522b&#xff0c;\u52a8\u8bcd\u548c\u5f62\u5bb9\u8bcd\u6709\u7ec6\u5206\u7c7b\u578b&#xff0c;\u5b9e\u4f53\u8bc6\u522b\u6807\u7b7e\u4e0e\u8bcd\u6027\u6807\u7b7e\u7edf\u4e00\u8f93\u51fa&#xff0c;\u5bf9\u6807\u70b9\u7b26\u53f7\u548c\u975e\u6c49\u5b57\u5185\u5bb9\u4e5f\u6709\u4e13\u95e8\u5904\u7406\u3002<\/p>\n<table>\n<tr>\u6807\u7b7e\u542b\u4e49\u6807\u7b7e\u542b\u4e49\u6807\u7b7e\u542b\u4e49\u6807\u7b7e\u542b\u4e49<\/tr>\n<tbody>\n<tr>\n<td>n<\/td>\n<td>\u666e\u901a\u540d\u8bcd<\/td>\n<td>f<\/td>\n<td>\u65b9\u4f4d\u540d\u8bcd<\/td>\n<td>s<\/td>\n<td>\u5904\u6240\u540d\u8bcd<\/td>\n<td>nw<\/td>\n<td>\u4f5c\u54c1\u540d<\/td>\n<\/tr>\n<tr>\n<td>nz<\/td>\n<td>\u5176\u4ed6\u4e13\u540d<\/td>\n<td>v<\/td>\n<td>\u666e\u901a\u52a8\u8bcd<\/td>\n<td>vd<\/td>\n<td>\u52a8\u526f\u8bcd<\/td>\n<td>vn<\/td>\n<td>\u540d\u52a8\u8bcd<\/td>\n<\/tr>\n<tr>\n<td>a<\/td>\n<td>\u5f62\u5bb9\u8bcd<\/td>\n<td>ad<\/td>\n<td>\u526f\u5f62\u8bcd<\/td>\n<td>an<\/td>\n<td>\u540d\u5f62\u8bcd<\/td>\n<td>d<\/td>\n<td>\u526f\u8bcd<\/td>\n<\/tr>\n<tr>\n<td>m<\/td>\n<td>\u6570\u91cf\u8bcd<\/td>\n<td>q<\/td>\n<td>\u91cf\u8bcd<\/td>\n<td>r<\/td>\n<td>\u4ee3\u8bcd<\/td>\n<td>p<\/td>\n<td>\u4ecb\u8bcd<\/td>\n<\/tr>\n<tr>\n<td>c<\/td>\n<td>\u8fde\u8bcd<\/td>\n<td>u<\/td>\n<td>\u52a9\u8bcd<\/td>\n<td>xc<\/td>\n<td>\u5176\u4ed6\u865a\u8bcd<\/td>\n<td>w<\/td>\n<td>\u6807\u70b9\u7b26\u53f7<\/td>\n<\/tr>\n<tr>\n<td>PER<\/td>\n<td>\u4eba\u540d<\/td>\n<td>LOC<\/td>\n<td>\u5730\u540d<\/td>\n<td>ORG<\/td>\n<td>\u673a\u6784\u540d<\/td>\n<td>TIME<\/td>\n<td>\u65f6\u95f4<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h2>\u76f8\u5173\u94fe\u63a5<\/h2>\n<p>jieba-Python \u4e2d\u6587\u5206\u8bcd\u7ec4\u4ef6 NLPIR\u5927\u6570\u636e\u8bed\u4e49\u589e\u5f3a\u5206\u6790\u5e73\u53f0\u7684\u76f8\u5173\u7684\u6587\u4ef6 Jiagu\u81ea\u7136\u8bed\u8a00\u5904\u7406\u5de5\u5177 LAC\u767e\u5ea6\u81ea\u7136\u8bed\u8a00\u5904\u7406\u8bcd\u6cd5\u5206\u6790\u5de5\u5177 \u81ea\u7136\u8bed\u8a00\u5904\u7406-\u6c49\u8bed\u8bcd\u6027\u5bf9\u7167\u8868-\u8bcd\u6027\u7f16\u7801\u4e0e\u540d\u79f0\u5bf9\u5e94\u5173\u7cfb<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb358\u6b21\uff0c\u70b9\u8d5e6\u6b21\uff0c\u6536\u85cf5\u6b21\u3002\u672c\u6587\u4ecb\u7ecd\u4e86\u4e94\u79cd\u5e38\u7528\u7684\u4e2d\u6587\u81ea\u7136\u8bed\u8a00\u5904\u7406\u5de5\u5177\uff1ajieba\u3001jiagu\u3001snownlp\u3001thulac\u548cLAC\uff0c\u91cd\u70b9\u8bb2\u89e3\u4e86\u5b83\u4eec\u7684\u5206\u8bcd\u529f\u80fd\u53ca\u5176\u4ed6\u6838\u5fc3\u7279\u6027\uff0c\u5e76\u63d0\u4f9b\u4e86\u76f8\u5173\u793a\u4f8b\u4ee3\u7801\u3002<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[5441,5440,5444,5442,5443,5439,207,224],"topic":[],"class_list":["post-53566","post","type-post","status-publish","format-standard","hentry","category-server","tag-jiagu","tag-jieba","tag-lac","tag-snownlp","tag-thulac","tag-5439","tag-207","tag-224"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>Python\u5e38\u7528\u76845\u79cd\u4e2d\u6587\u5206\u8bcd\u5de5\u5177 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/53566.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Python\u5e38\u7528\u76845\u79cd\u4e2d\u6587\u5206\u8bcd\u5de5\u5177 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb358\u6b21\uff0c\u70b9\u8d5e6\u6b21\uff0c\u6536\u85cf5\u6b21\u3002\u672c\u6587\u4ecb\u7ecd\u4e86\u4e94\u79cd\u5e38\u7528\u7684\u4e2d\u6587\u81ea\u7136\u8bed\u8a00\u5904\u7406\u5de5\u5177\uff1ajieba\u3001jiagu\u3001snownlp\u3001thulac\u548cLAC\uff0c\u91cd\u70b9\u8bb2\u89e3\u4e86\u5b83\u4eec\u7684\u5206\u8bcd\u529f\u80fd\u53ca\u5176\u4ed6\u6838\u5fc3\u7279\u6027\uff0c\u5e76\u63d0\u4f9b\u4e86\u76f8\u5173\u793a\u4f8b\u4ee3\u7801\u3002\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/53566.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2025-08-12T05:29:57+00:00\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"4 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/53566.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/53566.html\",\"name\":\"Python\u5e38\u7528\u76845\u79cd\u4e2d\u6587\u5206\u8bcd\u5de5\u5177 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2025-08-12T05:29:57+00:00\",\"dateModified\":\"2025-08-12T05:29:57+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/53566.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/53566.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/53566.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Python\u5e38\u7528\u76845\u79cd\u4e2d\u6587\u5206\u8bcd\u5de5\u5177\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Python\u5e38\u7528\u76845\u79cd\u4e2d\u6587\u5206\u8bcd\u5de5\u5177 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/53566.html","og_locale":"zh_CN","og_type":"article","og_title":"Python\u5e38\u7528\u76845\u79cd\u4e2d\u6587\u5206\u8bcd\u5de5\u5177 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb358\u6b21\uff0c\u70b9\u8d5e6\u6b21\uff0c\u6536\u85cf5\u6b21\u3002\u672c\u6587\u4ecb\u7ecd\u4e86\u4e94\u79cd\u5e38\u7528\u7684\u4e2d\u6587\u81ea\u7136\u8bed\u8a00\u5904\u7406\u5de5\u5177\uff1ajieba\u3001jiagu\u3001snownlp\u3001thulac\u548cLAC\uff0c\u91cd\u70b9\u8bb2\u89e3\u4e86\u5b83\u4eec\u7684\u5206\u8bcd\u529f\u80fd\u53ca\u5176\u4ed6\u6838\u5fc3\u7279\u6027\uff0c\u5e76\u63d0\u4f9b\u4e86\u76f8\u5173\u793a\u4f8b\u4ee3\u7801\u3002","og_url":"https:\/\/www.wsisp.com\/helps\/53566.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2025-08-12T05:29:57+00:00","author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"4 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/53566.html","url":"https:\/\/www.wsisp.com\/helps\/53566.html","name":"Python\u5e38\u7528\u76845\u79cd\u4e2d\u6587\u5206\u8bcd\u5de5\u5177 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2025-08-12T05:29:57+00:00","dateModified":"2025-08-12T05:29:57+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/53566.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/53566.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/53566.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"Python\u5e38\u7528\u76845\u79cd\u4e2d\u6587\u5206\u8bcd\u5de5\u5177"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/53566","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=53566"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/53566\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=53566"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=53566"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=53566"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=53566"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}