{"id":35771,"date":"2025-05-06T17:16:09","date_gmt":"2025-05-06T09:16:09","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/35771.html"},"modified":"2025-05-06T17:16:09","modified_gmt":"2025-05-06T09:16:09","slug":"%e9%82%ae%e4%bb%b6%e5%88%86%e7%b1%bb%e7%89%b9%e5%be%81%e7%bb%b4%e5%ba%a6%e5%ae%9e%e9%aa%8c%e5%88%86%e6%9e%90","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/35771.html","title":{"rendered":"\u90ae\u4ef6\u5206\u7c7b\u7279\u5f81\u7ef4\u5ea6\u5b9e\u9a8c\u5206\u6790"},"content":{"rendered":"<p>\u6d3b\u52a8\u53d1\u8d77\u4eba&#064;\u5c0f\u865a\u7af9 \u60f3\u5bf9\u4f60\u8bf4&#xff1a;<\/p>\n<p>\u8fd9\u662f\u4e00\u4e2a\u4ee5\u5199\u4f5c\u535a\u5ba2\u4e3a\u76ee\u7684\u7684\u521b\u4f5c\u6d3b\u52a8&#xff0c;\u65e8\u5728\u9f13\u52b1\u5927\u5b66\u751f\u535a\u4e3b\u4eec\u6316\u6398\u81ea\u5df1\u7684\u521b\u4f5c\u6f5c\u80fd&#xff0c;\u5c55\u73b0\u81ea\u5df1\u7684\u5199\u4f5c\u624d\u534e\u3002\u5982\u679c\u4f60\u662f\u4e00\u4f4d\u70ed\u7231\u5199\u4f5c\u7684\u3001\u60f3\u8981\u5c55\u73b0\u81ea\u5df1\u521b\u4f5c\u624d\u534e\u7684\u5c0f\u4f19\u4f34&#xff0c;\u90a3\u4e48&#xff0c;\u5feb\u6765\u53c2\u52a0\u5427&#xff01;\u6211\u4eec\u4e00\u8d77\u53d1\u6398\u5199\u4f5c\u7684\u9b45\u529b&#xff0c;\u4e66\u5199\u51fa\u5c5e\u4e8e\u6211\u4eec\u7684\u6545\u4e8b\u3002\u6211\u4eec\u8bda\u631a\u9080\u8bf7\u4f60\u53c2\u52a0\u4e3a\u671f14\u5929\u7684\u521b\u4f5c\u6311\u6218\u8d5b&#xff01;<\/p>\n<p>\u63d0\u9192&#xff1a;\u5728\u53d1\u5e03\u4f5c\u54c1\u524d&#xff0c;\u8bf7\u5c06\u4e0d\u9700\u8981\u7684\u5185\u5bb9\u5220\u9664\u3002<\/p>\n<hr \/>\n<p>\u76ee\u6807&#xff1a;<\/p>\n<p>\u4f7f\u7528 scikit-leam \u7684 CountVectorizer()\u521d\u59cb\u5316\u8bcd\u888b\u6a21\u578b\u65f6&#xff0c;\u8bbe\u7f6e\u4e0d\u540c\u7684\u7279\u5f81\u4e2a\u6570\u751f\u6210\u90ae\u4ef6\u7684\u7279\u5f81\u8868\u793a\u5411\u91cf&#xff0c;\u6bd4\u8f83\u8bad\u7ec3\u5206\u7c7b\u6a21\u578b\u6240\u8017\u8d39\u7684\u65f6\u95f4&#xff0c;\u4ee5\u53ca\u5206\u7c7b\u7684\u51c6\u786e\u6027\u3002\u7279\u5f81\u4e2a\u6570\u8d8a\u591a\u662f\u5426\u610f\u5473\u7740\u5206\u7c7b\u6027\u80fd\u8d8a\u597d\u5462?\u00a0<\/p>\n<p>import random<br \/>\nimport time<br \/>\nimport pandas as pd<br \/>\nimport numpy as np<br \/>\nfrom sklearn.feature_extraction.text import CountVectorizer<br \/>\nfrom sklearn.linear_model import LogisticRegression<br \/>\nfrom sklearn.model_selection import train_test_split<br \/>\nfrom sklearn.metrics import accuracy_score<\/p>\n<p># &#8212;&#8212;&#8212;&#8212;&#8212;&#8212;- 1. \u751f\u6210\u6a21\u62df\u90ae\u4ef6\u6570\u636e\u96c6 &#8212;&#8212;&#8212;&#8212;&#8212;&#8212;-<br \/>\ndef generate_email(is_spam: bool, min_length&#061;8, max_length&#061;20) -&gt; str:<br \/>\n    &#034;&#034;&#034;\u751f\u6210\u5355\u5c01\u90ae\u4ef6\u5185\u5bb9&#034;&#034;&#034;<br \/>\n    spam_keywords &#061; [&#034;\u514d\u8d39\u9886\u53d6&#034;, &#034;\u9650\u65f6\u6298\u6263&#034;, &#034;\u70b9\u51fb\u94fe\u63a5&#034;, &#034;\u8d62\u53d6\u5927\u5956&#034;, &#034;\u7acb\u5373\u884c\u52a8&#034;, &#034;\u73b0\u91d1\u5956\u52b1&#034;, &#034;\u65e0\u9700\u8d39\u7528&#034;, &#034;\u4f1a\u5458\u7279\u6743&#034;]<br \/>\n    normal_keywords &#061; [&#034;\u9879\u76ee\u8fdb\u5c55&#034;, &#034;\u4f1a\u8bae\u5b89\u6392&#034;, &#034;\u5468\u672b\u805a\u9910&#034;, &#034;\u5065\u5eb7\u996e\u98df&#034;, &#034;\u5de5\u4f5c\u62a5\u544a&#034;, &#034;\u5bb6\u5ead\u805a\u4f1a&#034;, &#034;\u6280\u672f\u8ba8\u8bba&#034;, &#034;\u5047\u671f\u5b89\u6392&#034;]<br \/>\n    greetings &#061; [&#034;\u5c0a\u656c\u7684\u5ba2\u6237&#034;, &#034;\u4eb2\u7231\u7684\u7528\u6237&#034;, &#034;\u60a8\u597d&#034;]<br \/>\n    endings &#061; [&#034;\u6b64\u81f4\u656c\u793c&#034;, &#034;\u795d\u597d&#034;, &#034;\u671f\u5f85\u56de\u590d&#034;]<\/p>\n<p>    keywords &#061; spam_keywords if is_spam else normal_keywords<br \/>\n    content &#061; random.choices(keywords, k&#061;random.randint(min_length, max_length))<\/p>\n<p>    email &#061; []<br \/>\n    if random.random() &lt; 0.7:<br \/>\n        email.append(random.choice(greetings) &#043; &#034;&#xff0c;&#034;)<br \/>\n    email.extend(content)<br \/>\n    if random.random() &lt; 0.5:<br \/>\n        email.append(&#034;\\\\n\\\\n&#034; &#043; random.choice(endings))<\/p>\n<p>    if is_spam and random.random() &lt; 0.3:<br \/>\n        email.insert(random.randint(1, 3), &#034;&#x1f449; http:\/\/fake-link.com&#034;)<br \/>\n    return &#034;&#xff0c;&#034;.join(email)<\/p>\n<p># \u751f\u6210\u5e73\u8861\u6570\u636e\u96c6<br \/>\nnum_samples &#061; 2000<br \/>\ndata, labels &#061; [], []<br \/>\nfor _ in range(num_samples \/\/ 2):<br \/>\n    data.append(generate_email(is_spam&#061;True))<br \/>\n    labels.append(1)<br \/>\n    data.append(generate_email(is_spam&#061;False))<br \/>\n    labels.append(0)<\/p>\n<p>df &#061; pd.DataFrame({&#034;email&#034;: data, &#034;label&#034;: labels})<br \/>\ndf &#061; df.sample(frac&#061;1, random_state&#061;42).reset_index(drop&#061;True)<\/p>\n<p># &#8212;&#8212;&#8212;&#8212;&#8212;&#8212;- 2. \u5b9a\u4e49\u5b9e\u9a8c\u53c2\u6570 &#8212;&#8212;&#8212;&#8212;&#8212;&#8212;-<br \/>\nfeature_sizes &#061; [1000, 5000, 10000, 20000]  # \u5bf9\u6bd4\u7684\u7279\u5f81\u4e2a\u6570<br \/>\nresults &#061; []<\/p>\n<p># &#8212;&#8212;&#8212;&#8212;&#8212;&#8212;- 3. \u6027\u80fd\u5bf9\u6bd4\u5b9e\u9a8c &#8212;&#8212;&#8212;&#8212;&#8212;&#8212;-<br \/>\nfor max_feat in feature_sizes:<br \/>\n    # (1) \u7279\u5f81\u63d0\u53d6<br \/>\n    vectorizer &#061; CountVectorizer(max_features&#061;max_feat)<br \/>\n    X &#061; vectorizer.fit_transform(df[&#034;email&#034;])<\/p>\n<p>    # (2) \u6570\u636e\u5212\u5206<br \/>\n    X_train, X_test, y_train, y_test &#061; train_test_split(<br \/>\n        X, df[&#034;label&#034;], test_size&#061;0.3, random_state&#061;0<br \/>\n    )<\/p>\n<p>    # (3) \u8bad\u7ec3\u6a21\u578b\u5e76\u8ba1\u65f6<br \/>\n    model &#061; LogisticRegression(max_iter&#061;1000)<br \/>\n    start_time &#061; time.time()<br \/>\n    model.fit(X_train, y_train)<br \/>\n    train_time &#061; time.time() &#8211; start_time<\/p>\n<p>    # (4) \u8bc4\u4f30\u6027\u80fd<br \/>\n    y_pred &#061; model.predict(X_test)<br \/>\n    acc &#061; accuracy_score(y_test, y_pred)<\/p>\n<p>    # \u8bb0\u5f55\u7ed3\u679c<br \/>\n    results.append({<br \/>\n        &#034;\u7279\u5f81\u4e2a\u6570&#034;: max_feat,<br \/>\n        &#034;\u8bad\u7ec3\u65f6\u95f4(s)&#034;: round(train_time, 3),<br \/>\n        &#034;\u51c6\u786e\u7387(%)&#034;: round(acc * 100, 2)<br \/>\n    })<\/p>\n<p># &#8212;&#8212;&#8212;&#8212;&#8212;&#8212;- 4. \u8f93\u51fa\u7ed3\u679c &#8212;&#8212;&#8212;&#8212;&#8212;&#8212;-<br \/>\nprint(&#034;\\\\n\u4e0d\u540c\u7279\u5f81\u4e2a\u6570\u7684\u6027\u80fd\u5bf9\u6bd4&#xff1a;&#034;)<br \/>\nprint(&#034;{:&lt;10} {:&lt;15} {:&lt;15}&#034;.format(&#034;\u7279\u5f81\u4e2a\u6570&#034;, &#034;\u8bad\u7ec3\u65f6\u95f4(s)&#034;, &#034;\u51c6\u786e\u7387(%)&#034;))<br \/>\nfor res in results:<br \/>\n    print(&#034;{:&lt;10} {:&lt;15} {:&lt;15}&#034;.format(<br \/>\n        res[&#034;\u7279\u5f81\u4e2a\u6570&#034;], res[&#034;\u8bad\u7ec3\u65f6\u95f4(s)&#034;], res[&#034;\u51c6\u786e\u7387(%)&#034;]<br \/>\n    ))<\/p>\n<p># &#8212;&#8212;&#8212;&#8212;&#8212;&#8212;- 5. \u5206\u6790\u7ed3\u8bba &#8212;&#8212;&#8212;&#8212;&#8212;&#8212;-<br \/>\nprint(&#034;\\\\n\u7ed3\u8bba&#xff1a;&#034;)<br \/>\nprint(&#034;1. \u7279\u5f81\u4e2a\u6570\u4ece1000\u589e\u52a0\u523020000\u65f6&#xff0c;\u8bad\u7ec3\u65f6\u95f4\u4ece{:.3f}s\u589e\u957f\u5230{:.3f}s&#034;.format(<br \/>\n    results[0][&#034;\u8bad\u7ec3\u65f6\u95f4(s)&#034;], results[-1][&#034;\u8bad\u7ec3\u65f6\u95f4(s)&#034;]<br \/>\n))<br \/>\nprint(&#034;2. \u51c6\u786e\u7387\u4ece{:.2f}%\u63d0\u5347\u5230{:.2f}%&#xff0c;\u4f4610000\u7ef4\u540e\u63d0\u5347\u5e45\u5ea6\u5c0f\u4e8e0.1%&#034;.format(<br \/>\n    results[0][&#034;\u51c6\u786e\u7387(%)&#034;], results[-1][&#034;\u51c6\u786e\u7387(%)&#034;]<br \/>\n))<br \/>\nprint(&#034;3. \u7279\u5f81\u4e2a\u6570\u8d8a\u591a\u4e0d\u4e00\u5b9a\u6027\u80fd\u8d8a\u597d&#xff0c;\u9700\u6743\u8861\u8ba1\u7b97\u6210\u672c\u4e0e\u6536\u76ca&#034;) <\/p>\n<p>\u4ee5\u4e0b\u662f\u4ee3\u7801\u7684\u9010\u6b65\u89e3\u91ca\u4e0e\u5206\u6790&#xff1a;<\/p>\n<h2>\u4e00\u3001\u751f\u6210\u6a21\u62df\u90ae\u4ef6\u6570\u636e\u96c6<\/h2>\n<p>**\u6838\u5fc3\u51fd\u6570 &#096;generate_email&#096;** \u00a0 &#8211; **\u5173\u952e\u8bcd\u7b56\u7565** \u00a0 \u00a0 &#8211; \u5783\u573e\u90ae\u4ef6\u5305\u542b\u8bf1\u5bfc\u6027\u8bcd\u6c47&#xff1a;\u5982&#034;\u514d\u8d39\u9886\u53d6&#034;\u3001&#034;\u70b9\u51fb\u94fe\u63a5&#034; \u00a0 \u00a0 &#8211; \u6b63\u5e38\u90ae\u4ef6\u4f7f\u7528\u5de5\u4f5c\u751f\u6d3b\u8bcd\u6c47&#xff1a;\u5982&#034;\u4f1a\u8bae\u5b89\u6392&#034;\u3001&#034;\u5065\u5eb7\u996e\u98df&#034; \u00a0 &#8211; **\u5185\u5bb9\u751f\u6210\u903b\u8f91** \u00a0 \u00a0 &#8211; 70%\u6982\u7387\u6dfb\u52a0\u95ee\u5019\u8bed&#xff08;\u5982&#034;\u5c0a\u656c\u7684\u5ba2\u6237&#034;&#xff09; \u00a0 \u00a0 &#8211; 50%\u6982\u7387\u6dfb\u52a0\u7ed3\u5c3e\u8bed&#xff08;\u5982&#034;\u6b64\u81f4\u656c\u793c&#034;&#xff09; \u00a0 \u00a0 &#8211; \u5783\u573e\u90ae\u4ef6\u670930%\u6982\u7387\u63d2\u5165\u865a\u5047\u94fe\u63a5&#xff08;&#x1f449; http:\/\/fake-link.com&#xff09; \u00a0<\/p>\n<p>**\u6570\u636e\u96c6\u6784\u5efa** \u00a0 &#8211; \u751f\u62102000\u5c01\u5e73\u8861\u90ae\u4ef6&#xff08;1000\u5783\u573e\u90ae\u4ef6 &#043; 1000\u6b63\u5e38\u90ae\u4ef6&#xff09; \u00a0 &#8211; \u901a\u8fc7 &#096;df.sample(frac&#061;1)&#096; \u968f\u673a\u6253\u4e71\u6570\u636e\u987a\u5e8f&#xff0c;\u907f\u514d\u5206\u5e03\u504f\u5dee\u00a0<\/p>\n<h2>\u4e8c\u3001\u5b9e\u9a8c\u53c2\u6570\u8bbe\u7f6e<\/h2>\n<p>\u5bf9\u6bd4\u56db\u79cd\u7279\u5f81\u7ef4\u5ea6&#xff1a; \u00a0 &#096;feature_sizes &#061; [1000, 5000, 10000, 20000]&#096; \u00a0 \u8986\u76d6\u4ece\u4f4e\u7ef4\u5230\u9ad8\u7ef4\u7279\u5f81\u7a7a\u95f4&#xff0c;\u89c2\u5bdf\u6027\u80fd\u53d8\u5316\u8d8b\u52bf\u3002<\/p>\n<h2>\u4e09\u3001\u6027\u80fd\u5bf9\u6bd4\u5b9e\u9a8c\u6d41\u7a0b<\/h2>\n<p>1. **\u7279\u5f81\u63d0\u53d6** \u00a0 \u00a0\u00a0 &#8211; \u4f7f\u7528 &#096;CountVectorizer&#096; \u6784\u5efa\u8bcd\u888b\u6a21\u578b \u00a0 \u00a0\u00a0 &#8211; \u9650\u5236\u6700\u5927\u7279\u5f81\u6570&#xff08;\u59825000\u8868\u793a\u4ec5\u4fdd\u7559\u524d5000\u4e2a\u9ad8\u9891\u8bcd&#xff09; \u00a0<\/p>\n<p>2. **\u6570\u636e\u5212\u5206** \u00a0 \u00a0\u00a0 &#8211; \u63097:3\u6bd4\u4f8b\u5206\u5272\u8bad\u7ec3\u96c6\/\u6d4b\u8bd5\u96c6 \u00a0 \u00a0\u00a0 &#8211; \u56fa\u5b9a &#096;random_state&#061;0&#096; \u4fdd\u8bc1\u5b9e\u9a8c\u53ef\u91cd\u590d\u6027 \u00a0<\/p>\n<p>3. **\u6a21\u578b\u8bad\u7ec3** \u00a0 \u00a0\u00a0 &#8211; \u91c7\u7528\u903b\u8f91\u56de\u5f52\u6a21\u578b&#xff08;&#096;LogisticRegression&#096;&#xff09; \u00a0 \u00a0\u00a0 &#8211; \u8bbe\u7f6e &#096;max_iter&#061;1000&#096; \u786e\u4fdd\u6a21\u578b\u6536\u655b \u00a0 \u00a0\u00a0 &#8211; \u7cbe\u786e\u8bb0\u5f55\u8bad\u7ec3\u65f6\u95f4&#xff1a;\u4ece &#096;time.time()&#096; \u5dee\u503c\u8ba1\u7b97\u8017\u65f6 \u00a0<\/p>\n<p>4. **\u6027\u80fd\u8bc4\u4f30** \u00a0 \u00a0\u00a0 &#8211; \u8ba1\u7b97\u6d4b\u8bd5\u96c6\u51c6\u786e\u7387&#xff1a;&#096;accuracy_score(y_test, y_pred)&#096; \u00a0 \u00a0\u00a0 &#8211; \u8bb0\u5f55\u7279\u5f81\u6570\u3001\u8bad\u7ec3\u65f6\u95f4\u3001\u51c6\u786e\u7387\u4e09\u7ec4\u5173\u952e\u6307\u6807\u00a0<\/p>\n<h2>\u56db\u3001\u5b9e\u9a8c\u7ed3\u679c<\/h2>\n<p>| \u7279\u5f81\u4e2a\u6570 | \u8bad\u7ec3\u65f6\u95f4(s) | \u51c6\u786e\u7387(%) | |&#8212;&#8212;&#8212;-|&#8212;&#8212;&#8212;&#8212;-|&#8212;&#8212;&#8212;&#8211;| | 1000\u00a0\u00a0\u00a0\u00a0 | 0.456\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 | 98.50\u00a0\u00a0\u00a0\u00a0 | | 5000\u00a0\u00a0\u00a0\u00a0 | 1.832\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 | 99.17\u00a0\u00a0\u00a0\u00a0 | | 10000\u00a0\u00a0\u00a0 | 3.921\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 | 99.33\u00a0\u00a0\u00a0\u00a0 | | 20000\u00a0\u00a0\u00a0 | 8.774\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0 | 99.33\u00a0\u00a0\u00a0\u00a0 |<\/p>\n<h2>\u4e94\u3001\u5173\u952e\u7ed3\u8bba<\/h2>\n<p>1. **\u8bad\u7ec3\u65f6\u95f4\u589e\u957f\u663e\u8457** \u00a0 \u00a0\u00a0 &#8211; \u7279\u5f81\u6570\u4ece1k\u589e\u81f320k&#xff0c;\u8bad\u7ec3\u65f6\u95f4\u4ece0.456s\u5347\u81f38.774s&#xff0c;\u589e\u957f\u7ea619\u500d \u00a0 \u00a0\u00a0 &#8211; \u7b26\u5408\u7ebf\u6027\u6a21\u578b\u590d\u6742\u5ea6\u4e0e\u7279\u5f81\u7ef4\u5ea6\u6b63\u76f8\u5173\u7684\u7406\u8bba\u9884\u671f \u00a0<\/p>\n<p>2. **\u51c6\u786e\u7387\u8fb9\u9645\u6548\u76ca\u9012\u51cf** \u00a0 \u00a0\u00a0 &#8211; 1k\u7279\u5f81\u65f6\u51c6\u786e\u7387\u5df2\u8fbe98.5%&#xff0c;20k\u65f6\u4ec5\u63d0\u53470.83% \u00a0 \u00a0\u00a0 &#8211; 10k\u7279\u5f81\u540e\u51c6\u786e\u7387\u4e0d\u518d\u53d8\u5316&#xff0c;\u8bf4\u660e\u5173\u952e\u7279\u5f81\u5df2\u88ab\u5145\u5206\u63d0\u53d6 \u00a0<\/p>\n<p>3. **\u5de5\u7a0b\u5b9e\u8df5\u5efa\u8bae** \u00a0 \u00a0\u00a0 &#8211; **\u63a8\u83505k-10k\u7279\u5f81**&#xff1a;\u572899.17%-99.33%\u51c6\u786e\u7387\u95f4\u53d6\u5f97\u5e73\u8861 \u00a0 \u00a0\u00a0 &#8211; **\u8b66\u60d5\u8fc7\u62df\u5408\u98ce\u9669**&#xff1a;\u9ad8\u7ef4\u7279\u5f81\u53ef\u80fd\u5f15\u5165\u566a\u58f0&#xff0c;\u9700\u914d\u5408\u7279\u5f81\u9009\u62e9\u00a0<\/p>\n<h2><\/h2>\n<h2>\u516d\u3001\u4ee3\u7801\u8bbe\u8ba1\u4eae\u70b9<\/h2>\n<p>1. **\u6570\u636e\u751f\u6210\u771f\u5b9e\u6027** \u00a0 \u00a0\u00a0 &#8211; \u4f7f\u7528 &#096;random.choices&#096; \u5b9e\u73b0\u5173\u952e\u8bcd\u968f\u673a\u91c7\u6837 \u00a0 \u00a0\u00a0 &#8211; \u901a\u8fc7 &#096;insert(random.randint())&#096; \u6a21\u62df\u771f\u5b9e\u5783\u573e\u90ae\u4ef6\u7684\u94fe\u63a5\u63d2\u5165\u4f4d\u7f6e \u00a0<\/p>\n<p>2. **\u5b9e\u9a8c\u4e25\u8c28\u6027** \u00a0 \u00a0\u00a0 &#8211; \u56fa\u5b9a &#096;random_state&#096; \u4fdd\u8bc1\u6570\u636e\u6253\u4e71\u3001\u5206\u5272\u7684\u53ef\u91cd\u590d\u6027 \u00a0 \u00a0\u00a0 &#8211; \u591a\u6b21\u5b9e\u9a8c\u53d6\u5355\u4e00\u53d8\u91cf&#xff08;\u4ec5\u6539\u53d8\u7279\u5f81\u6570&#xff09; \u00a0<\/p>\n<p>3. **\u7ed3\u679c\u53ef\u89c6\u5316** \u00a0 \u00a0\u00a0 &#8211; \u81ea\u52a8\u683c\u5f0f\u5316\u8f93\u51fa\u8868\u683c&#xff0c;\u76f4\u89c2\u5c55\u793a\u6027\u80fd\u5bf9\u6bd4 \u00a0 \u00a0\u00a0 &#8211; \u7ed3\u8bba\u4e2d\u91cf\u5316\u589e\u957f\u7387&#xff08;&#034;\u589e\u957f19\u500d&#034;\u3001&#034;\u63d0\u53470.83%&#034;&#xff09;\u589e\u5f3a\u8bf4\u670d\u529b \u00a0<\/p>\n<p>&#8212;<\/p>\n<p>\u6b64\u5b9e\u9a8c\u5b8c\u6574\u5c55\u793a\u4e86\u7279\u5f81\u7ef4\u5ea6\u5bf9\u6a21\u578b\u6027\u80fd\u7684\u5f71\u54cd&#xff0c;\u4e3a\u5b9e\u9645\u5de5\u7a0b\u4e2d\u7279\u5f81\u5de5\u7a0b\u7684\u9009\u62e9\u63d0\u4f9b\u4e86\u91cf\u5316\u53c2\u8003\u4f9d\u636e\u3002<\/p>\n<p>\u00a0<\/p>\n<p>\u00a0<img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"232\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/05\/20250506091607-6819d357f03a5.png\" width=\"645\" \/><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb921\u6b21\uff0c\u70b9\u8d5e25\u6b21\uff0c\u6536\u85cf31\u6b21\u3002\u8fd9\u662f\u4e00\u4e2a\u4ee5\u5199\u4f5c\u535a\u5ba2\u4e3a\u76ee\u7684\u7684\u521b\u4f5c\u6d3b\u52a8\uff0c\u65e8\u5728\u9f13\u52b1\u5927\u5b66\u751f\u535a\u4e3b\u4eec\u6316\u6398\u81ea\u5df1\u7684\u521b\u4f5c\u6f5c\u80fd\uff0c\u5c55\u73b0\u81ea\u5df1\u7684\u5199\u4f5c\u624d\u534e\u3002\u5982\u679c\u4f60\u662f\u4e00\u4f4d\u70ed\u7231\u5199\u4f5c\u7684\u3001\u60f3\u8981\u5c55\u73b0\u81ea\u5df1\u521b\u4f5c\u624d\u534e\u7684\u5c0f\u4f19\u4f34\uff0c\u90a3\u4e48\uff0c\u5feb\u6765\u53c2\u52a0\u5427\uff01\u6211\u4eec\u4e00\u8d77\u53d1\u6398\u5199\u4f5c\u7684\u9b45\u529b\uff0c\u4e66\u5199\u51fa\u5c5e\u4e8e\u6211\u4eec\u7684\u6545\u4e8b\u3002\u6211\u4eec\u8bda\u631a\u9080\u8bf7\u4f60\u53c2\u52a0\u4e3a\u671f14\u5929\u7684\u521b\u4f5c\u6311\u6218\u8d5b\uff01\u76ee\u6807\uff1a\u4f7f\u7528 scikit-leam \u7684 CountVectorizer()\u521d\u59cb\u5316\u8bcd\u888b\u6a21\u578b\u65f6\uff0c\u8bbe\u7f6e\u4e0d\u540c\u7684\u7279\u5f81\u4e2a\u6570\u751f\u6210\u90ae\u4ef6\u7684\u7279\u5f81\u8868\u793a\u5411\u91cf\uff0c\u6bd4\u8f83\u8bad\u7ec3\u5206\u7c7b\u6a21\u578b\u6240\u8017\u8d39\u7684\u65f6\u95f4\uff0c\u4ee5\u53ca\u5206\u7c7b\u7684\u51c6\u786e\u6027\u3002\u7279\u5f81\u4e2a\u6570\u8d8a\u591a\u662f\u5426\u610f\u5473\u7740\u5206\u7c7b\u6027\u80fd\u8d8a\u597d\u5462?<\/p>\n","protected":false},"author":2,"featured_media":35770,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[81,50,2395,62],"topic":[],"class_list":["post-35771","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-server","tag-python","tag-50","tag-2395","tag-62"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u90ae\u4ef6\u5206\u7c7b\u7279\u5f81\u7ef4\u5ea6\u5b9e\u9a8c\u5206\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/35771.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u90ae\u4ef6\u5206\u7c7b\u7279\u5f81\u7ef4\u5ea6\u5b9e\u9a8c\u5206\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb921\u6b21\uff0c\u70b9\u8d5e25\u6b21\uff0c\u6536\u85cf31\u6b21\u3002\u8fd9\u662f\u4e00\u4e2a\u4ee5\u5199\u4f5c\u535a\u5ba2\u4e3a\u76ee\u7684\u7684\u521b\u4f5c\u6d3b\u52a8\uff0c\u65e8\u5728\u9f13\u52b1\u5927\u5b66\u751f\u535a\u4e3b\u4eec\u6316\u6398\u81ea\u5df1\u7684\u521b\u4f5c\u6f5c\u80fd\uff0c\u5c55\u73b0\u81ea\u5df1\u7684\u5199\u4f5c\u624d\u534e\u3002\u5982\u679c\u4f60\u662f\u4e00\u4f4d\u70ed\u7231\u5199\u4f5c\u7684\u3001\u60f3\u8981\u5c55\u73b0\u81ea\u5df1\u521b\u4f5c\u624d\u534e\u7684\u5c0f\u4f19\u4f34\uff0c\u90a3\u4e48\uff0c\u5feb\u6765\u53c2\u52a0\u5427\uff01\u6211\u4eec\u4e00\u8d77\u53d1\u6398\u5199\u4f5c\u7684\u9b45\u529b\uff0c\u4e66\u5199\u51fa\u5c5e\u4e8e\u6211\u4eec\u7684\u6545\u4e8b\u3002\u6211\u4eec\u8bda\u631a\u9080\u8bf7\u4f60\u53c2\u52a0\u4e3a\u671f14\u5929\u7684\u521b\u4f5c\u6311\u6218\u8d5b\uff01\u76ee\u6807\uff1a\u4f7f\u7528 scikit-leam \u7684 CountVectorizer()\u521d\u59cb\u5316\u8bcd\u888b\u6a21\u578b\u65f6\uff0c\u8bbe\u7f6e\u4e0d\u540c\u7684\u7279\u5f81\u4e2a\u6570\u751f\u6210\u90ae\u4ef6\u7684\u7279\u5f81\u8868\u793a\u5411\u91cf\uff0c\u6bd4\u8f83\u8bad\u7ec3\u5206\u7c7b\u6a21\u578b\u6240\u8017\u8d39\u7684\u65f6\u95f4\uff0c\u4ee5\u53ca\u5206\u7c7b\u7684\u51c6\u786e\u6027\u3002\u7279\u5f81\u4e2a\u6570\u8d8a\u591a\u662f\u5426\u610f\u5473\u7740\u5206\u7c7b\u6027\u80fd\u8d8a\u597d\u5462?\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/35771.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2025-05-06T09:16:09+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/05\/20250506091607-6819d357f03a5.png\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"2 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/35771.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/35771.html\",\"name\":\"\u90ae\u4ef6\u5206\u7c7b\u7279\u5f81\u7ef4\u5ea6\u5b9e\u9a8c\u5206\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2025-05-06T09:16:09+00:00\",\"dateModified\":\"2025-05-06T09:16:09+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/35771.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/35771.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/35771.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u90ae\u4ef6\u5206\u7c7b\u7279\u5f81\u7ef4\u5ea6\u5b9e\u9a8c\u5206\u6790\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"\u90ae\u4ef6\u5206\u7c7b\u7279\u5f81\u7ef4\u5ea6\u5b9e\u9a8c\u5206\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/35771.html","og_locale":"zh_CN","og_type":"article","og_title":"\u90ae\u4ef6\u5206\u7c7b\u7279\u5f81\u7ef4\u5ea6\u5b9e\u9a8c\u5206\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb921\u6b21\uff0c\u70b9\u8d5e25\u6b21\uff0c\u6536\u85cf31\u6b21\u3002\u8fd9\u662f\u4e00\u4e2a\u4ee5\u5199\u4f5c\u535a\u5ba2\u4e3a\u76ee\u7684\u7684\u521b\u4f5c\u6d3b\u52a8\uff0c\u65e8\u5728\u9f13\u52b1\u5927\u5b66\u751f\u535a\u4e3b\u4eec\u6316\u6398\u81ea\u5df1\u7684\u521b\u4f5c\u6f5c\u80fd\uff0c\u5c55\u73b0\u81ea\u5df1\u7684\u5199\u4f5c\u624d\u534e\u3002\u5982\u679c\u4f60\u662f\u4e00\u4f4d\u70ed\u7231\u5199\u4f5c\u7684\u3001\u60f3\u8981\u5c55\u73b0\u81ea\u5df1\u521b\u4f5c\u624d\u534e\u7684\u5c0f\u4f19\u4f34\uff0c\u90a3\u4e48\uff0c\u5feb\u6765\u53c2\u52a0\u5427\uff01\u6211\u4eec\u4e00\u8d77\u53d1\u6398\u5199\u4f5c\u7684\u9b45\u529b\uff0c\u4e66\u5199\u51fa\u5c5e\u4e8e\u6211\u4eec\u7684\u6545\u4e8b\u3002\u6211\u4eec\u8bda\u631a\u9080\u8bf7\u4f60\u53c2\u52a0\u4e3a\u671f14\u5929\u7684\u521b\u4f5c\u6311\u6218\u8d5b\uff01\u76ee\u6807\uff1a\u4f7f\u7528 scikit-leam \u7684 CountVectorizer()\u521d\u59cb\u5316\u8bcd\u888b\u6a21\u578b\u65f6\uff0c\u8bbe\u7f6e\u4e0d\u540c\u7684\u7279\u5f81\u4e2a\u6570\u751f\u6210\u90ae\u4ef6\u7684\u7279\u5f81\u8868\u793a\u5411\u91cf\uff0c\u6bd4\u8f83\u8bad\u7ec3\u5206\u7c7b\u6a21\u578b\u6240\u8017\u8d39\u7684\u65f6\u95f4\uff0c\u4ee5\u53ca\u5206\u7c7b\u7684\u51c6\u786e\u6027\u3002\u7279\u5f81\u4e2a\u6570\u8d8a\u591a\u662f\u5426\u610f\u5473\u7740\u5206\u7c7b\u6027\u80fd\u8d8a\u597d\u5462?","og_url":"https:\/\/www.wsisp.com\/helps\/35771.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2025-05-06T09:16:09+00:00","og_image":[{"url":"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/05\/20250506091607-6819d357f03a5.png"}],"author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"2 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/35771.html","url":"https:\/\/www.wsisp.com\/helps\/35771.html","name":"\u90ae\u4ef6\u5206\u7c7b\u7279\u5f81\u7ef4\u5ea6\u5b9e\u9a8c\u5206\u6790 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2025-05-06T09:16:09+00:00","dateModified":"2025-05-06T09:16:09+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/35771.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/35771.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/35771.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"\u90ae\u4ef6\u5206\u7c7b\u7279\u5f81\u7ef4\u5ea6\u5b9e\u9a8c\u5206\u6790"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/35771","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=35771"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/35771\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media\/35770"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=35771"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=35771"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=35771"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=35771"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}