{"id":1442,"date":"2018-01-12T15:54:35","date_gmt":"2018-01-12T07:54:35","guid":{"rendered":"https:\/\/yanjingang.com\/blog\/?p=1442"},"modified":"2018-01-12T16:31:17","modified_gmt":"2018-01-12T08:31:17","slug":"python%e4%bd%bf%e7%94%a8esmre%e4%bb%a3%e6%9b%bfahocorasick%e5%ae%9e%e7%8e%b0ac%e8%87%aa%e5%8a%a8%e6%9c%ba%e5%a4%9a%e6%a8%a1%e5%8c%b9%e9%85%8d","status":"publish","type":"post","link":"https:\/\/yanjingang.com\/blog\/?p=1442","title":{"rendered":"python\u4f7f\u7528esmre\u4ee3\u66ffahocorasick\u5b9e\u73b0ac\u81ea\u52a8\u673a[\u591a\u6a21\u5339\u914d]"},"content":{"rendered":"<p>\u4e3a\u4ec0\u4e48\u4f1a\u7528AC\u81ea\u52a8\u673a\uff1f \u5982\u679c\u4f60\u60f3\u77e5\u9053\u4e00\u7bc7\u6587\u7ae0\u6709\u6ca1\u6709\u4f60\u8981\u8fc7\u6ee4\u7684\u654f\u611f\u8bcd\uff0c\u600e\u4e48\u529e\uff1f \u4e0d\u53ef\u80fd\u7528\u6b63\u5219\u4e00\u4e2a\u4e2a\u7684\u5339\u914d\u5427\uff1f \u00a0\u654f\u611f\u8bcd\u8d85\u8fc7300\u4e2a\u4e4b\u540e\uff0c\u7528Trie\u6765\u6784\u5efa\u6a21\u5f0f\u6811 (\u5b57\u5178\u6811)\u7684\u901f\u5ea6\u4f18\u52bf\u76f8\u5f53\u7684\u660e\u663e\u2026 \u2026<\/p>\n<p>\u7279\u522b\u8bf4\u4e0b\uff0ctrie\u56fe\u4e5f\u662f\u4e00\u79cdDFA\uff0c\u53ef\u4ee5\u7531trie\u6811\u4e3a\u57fa\u7840\u6784\u9020\u51fa\u6765\uff0c\u5bf9\u4e8e\u63d2\u5165\u7684\u6bcf\u4e2a\u6a21\u5f0f\u4e32\uff0c\u5176\u63d2\u5165\u8fc7\u7a0b\u4e2d\u4f7f\u7528\u7684\u6700\u540e\u4e00\u4e2a\u8282\u70b9\u90fd\u4f5c\u4e3aDFA\u7684\u4e00\u4e2a\u7ec8\u6b62\u8282\u70b9\u3002<\/p>\n<p>\u5982\u679c\u8981\u6c42\u4e00\u4e2a\u6bcd\u4e32\u5305\u542b\u54ea\u4e9b\u6a21\u5f0f\u4e32\uff0c\u4ee5\u7528\u6bcd\u4e32\u4f5c\u4e3aDFA\u7684\u8f93\u5165\uff0c\u5728DFA \u4e0a\u884c\u8d70\uff0c\u8d70\u5230\u7ec8\u6b62\u8282\u70b9\uff0c\u5c31\u610f\u5473\u7740\u5339\u914d\u4e86\u76f8\u5e94\u7684\u6a21\u5f0f\u4e32\u3002<\/p>\n<p>ps: AC\u81ea\u52a8\u673a\u662fTrie\u7684\u4e00\u79cd\u5b9e\u73b0\uff0c\u4e5f\u5c31\u662f\u8bf4AC\u81ea\u52a8\u673a\u662f\u6784\u9020Trie\u56fe\u7684DFA\u7684\u4e00\u79cd\u65b9\u6cd5\u3002\u8fd8\u6709\u522b\u7684\u6784\u9020DFA\u7684\u65b9\u6cd5\u2026<\/p>\n<p>\u4e0d\u626f\u6de1\u4e86\uff0c\u6211\u4eec\u540e\u7aef\u90fd\u662fpython\u5199\u7684\uff0cpython\u7684ahocorasick\u6a21\u5757\u8ddf\u6211\u4eec\u7684\u4e1a\u52a1\u4e0d\u592a\u5339\u914d\uff0c\u95ee\u9898\u662f\u8fd9\u6837\u7684 \uff01 \u00a0 \u00a0\u5982\u679c\u4f60\u7684\u670d\u52a1\u662f\u7528\u6765\u505a\u654f\u611f\u8bcd\u5339\u914d\uff0c\u4e5f\u5c31\u662f\u8bf4\u6240\u6709\u6587\u7ae0\u91cc\u9762\u53ea\u8981\u542b\u6709\u4e00\u4e2a\u5173\u952e\u8bcd\uff0c\u90a3\u5c31\u8bf4\u660e\u5339\u914d\u4e86\u3002 \u00a0\u4f46\u662f\u6211\u4eec\u7684\u4e1a\u52a1\u662f\u6587\u7ae0\u4e2d\u7684\u6240\u6709\u80fd\u5339\u914d\u5230\u7684\u5173\u952e\u8bcd\u90fd\u4e00\u4e00\u7684\u62bd\u53d6\u51fa\u6765\u3002 \u00a0 \u6211\u60f3\u6709\u4e9b\u670b\u53cb\u53ef\u80fd\u8fd8\u4e0d\u592a\u660e\u767d\uff0c\u90a3\u4e48\u6211\u4e3e\u4e2a\u4f8b\u5b50\uff0c \u5982\u679c\u6211\u7684\u5173\u952e\u8bcd\u91cc\u9762\u6709\u5b9d\u9a6c\u548c\u9a6c\uff0c\u90a3\u4e48\u7528python\u7684ahocorasick\u5e93\u53ea\u4f1a\u5f97\u5230\u5b9d\u9a6c\uff0c\u800c\u4e0d\u4f1a\u5f97\u5230\u9a6c\u3002 \u00a0\u95ee\u9898\u662f\u5904\u5728\u9a6c\u8fd9\u4e2a\u5b57\u8282\u662f\u5728\u5b9d\u9a6c\u7684\u94fe\u6761\u91cc\u9762\u7684\u3002 \u00a0\u5982\u4f55\u907f\u5f00\u8fd9\u4e2a\u95ee\u9898\uff0c\u6211\u4eec\u4e5f\u61d2\u5f97\u81ea\u5df1\u91cd\u5199\u4e86\uff0c\u5df2\u7ecf\u6709\u4eba\u7ed9\u51fa\u4e86\u89e3\u51b3\u7684\u6a21\u5757\u3002 \u00a0\u5df2\u7ecf\u6d4b\u8bd5\u5b8c\u6210\uff0c\u5e76\u4e0a\u7ebf\u4f7f\u7528\u4e86\u3002<\/p>\n<p>\u5b89\u88c5\u7b80\u5355\uff0c\u76f4\u63a5pip install esmre<\/p>\n<div id=\"crayon-5a55bc199497c248461653\" class=\"crayon-syntax crayon-theme-solarized-light crayon-font-monaco crayon-os-pc print-yes notranslate\" data-settings=\" minimize scroll-always expand\">\n<div class=\"crayon-plain-wrap\">\n<pre class=\"pure-highlightjs\"><code class=\"\">import esm\r\nindex = esm.Index()\r\nindex.enter(\"\u5b9d\u9a6c\")\r\nindex.enter(\"\u9a6c\")\r\nindex.enter(\"\u5954\u9a70\")\r\nindex.enter(\"\u4fdd\u65f6\u6377\")\r\nindex.fix()\r\nindex.query(\"\u54ce\u5440\uff0c\u4eca\u5929\u5728\u697c\u4e0b\u770b\u5230\u4e86\u5b9d\u9a6c\uff0c\u6211\u8001\u5bb6\u5012\u662f\u6709\u517b\u9a6c\u7684\uff0c\u4ee5\u524d\u7684\u90bb\u5c45\u6709\u4e2a\u5954\u9a70\uff0c\u4e0d\u5bf9\u662f\u4fdd\u65f6\u6377\uff0c\u5927\u7237\u7684\uff0c\u90fd\u662f\u9a6c\")<\/code><\/pre>\n<\/div>\n<\/div>\n<p>\u518d\u6765\u4e00\u4e2a\u5b8c\u6574\u7684\u4f8b\u5b50. \u00a0\u540e\u7eed\u6709\u65f6\u95f4\u6211\u4f1a\u628aac\u81ea\u52a8\u673a\u7684\u670d\u52a1\u96c6\u6210\u5230rpc\u670d\u52a1\u91cc\u9762,\u7136\u540e\u7528docker\u6253\u5305\u3002<\/p>\n<div id=\"crayon-5a55bc199498a355613849\" class=\"crayon-syntax crayon-theme-solarized-light crayon-font-monaco crayon-os-pc print-yes notranslate\" data-settings=\" minimize scroll-always expand\">\n<div class=\"crayon-plain-wrap\">\n<pre class=\"pure-highlightjs\"><code class=\"\">#coding:utf-8\r\nimport esm\r\nindex = esm.Index()\r\nwith open('keyword.config','r') as f:\r\n\u00a0\u00a0\u00a0\u00a0for i in f.readlines():\r\n\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0\u00a0index.enter(i.strip())\r\n\u00a0\u00a0\u00a0\u00a0index.fix()\r\n\u00a0\r\nstr = \"\"\"\r\nhead&amp;shoulders\u6d77\u98de\u4e1dHershey\u2018s\r\nLoreal\u6b27\u83b1\u96c5LUX2\r\n\u529b\u58eb\r\nL\u2019OREALMagic2\r\n\u7f8e\u5373\r\nMysteryCity\r\n\u8c1c\u57ce\r\nNO.1BABY2\r\nRESIN\r\nSANXIAO\r\nSHISEIDO FINE TOILETRY2\r\nSKII\u7f8e\u4e4b\u5319\r\nTao yu tang2\r\n\u6dd8\u96e8\u5802\r\nwhoo\u500d\u52a0\u6d01\r\n\u5361\u5c14\u6797\u5948\r\n\u5a07\u97fb\u8bd7\r\n\u5fb7\u56fd\u5e2e\u5b9d\u9002\r\nPerfect puff2\r\nSee Young2\r\n\u843d\u5065\r\n\u9ad8\u592b\r\n\"\"\"\r\ndata = index.query(str)\r\nprint data<\/code><\/pre>\n<p><a href=\"https:\/\/yanjingang.com\/blog\/?attachment_id=1446\" rel=\"attachment wp-att-1446\"><img loading=\"lazy\" class=\"alignnone size-full wp-image-1446\" src=\"https:\/\/yanjingang.com\/blog\/wp-content\/uploads\/2018\/01\/1.jpg\" alt=\"\" width=\"861\" height=\"397\" srcset=\"https:\/\/yanjingang.com\/blog\/wp-content\/uploads\/2018\/01\/1.jpg 861w, https:\/\/yanjingang.com\/blog\/wp-content\/uploads\/2018\/01\/1-300x138.jpg 300w, https:\/\/yanjingang.com\/blog\/wp-content\/uploads\/2018\/01\/1-768x354.jpg 768w\" sizes=\"(max-width: 861px) 100vw, 861px\" \/><\/a><\/p>\n<\/div>\n<\/div>\n<p>\u6d4b\u4e86\u51e0\u5929\uff0c\u6027\u80fd\u8fd8\u662f\u53ef\u4ee5\u7684\uff0c500KB\u7684\u6587\u7ae0\uff0c6000\u591a\u4e2a\u5173\u952e\u8bcd\uff0c\u6d88\u8017\u7684\u65f6\u95f4\u57280.002\u5de6\u53f3\uff0c\u76f8\u6bd4ahocorasick\u4e00\u70b9\u90fd\u4e0d\u5dee\u7684\u3002 \u00a0\u89c2\u5bdf\u4e86\u4e0b\uff0cesmre\u662f\u6ca1\u6709\u53d1\u73b0\u5185\u5b58\u5f02\u5e38\u6cc4\u9732\u7b49\u95ee\u9898\u3002<\/p>\n<p><a href=\"https:\/\/yanjingang.com\/blog\/?attachment_id=1447\" rel=\"attachment wp-att-1447\"><img loading=\"lazy\" class=\"alignnone size-large wp-image-1447\" src=\"https:\/\/yanjingang.com\/blog\/wp-content\/uploads\/2018\/01\/2-1024x430.jpg\" alt=\"\" width=\"660\" height=\"277\" srcset=\"https:\/\/yanjingang.com\/blog\/wp-content\/uploads\/2018\/01\/2-1024x430.jpg 1024w, https:\/\/yanjingang.com\/blog\/wp-content\/uploads\/2018\/01\/2-300x126.jpg 300w, https:\/\/yanjingang.com\/blog\/wp-content\/uploads\/2018\/01\/2-768x323.jpg 768w, https:\/\/yanjingang.com\/blog\/wp-content\/uploads\/2018\/01\/2.jpg 1600w\" sizes=\"(max-width: 660px) 100vw, 660px\" \/><\/a><\/p>\n<div id=\"crayon-5a55bc1994990271287594\" class=\"crayon-syntax crayon-theme-solarized-light crayon-font-monaco crayon-os-pc print-yes notranslate\" data-settings=\" minimize scroll-always expand\">\n<div class=\"crayon-plain-wrap\"><\/div>\n<div class=\"crayon-main\">\n<pre class=\"pure-highlightjs\"><code class=\"\">[2015-06-12 23:34:01,043] INFO extractor \"Get keywords takes 0.0003 seconds\"\r\n[2015-06-12 23:34:01,069] INFO extractor \"Get keywords takes 0.0002 seconds\"\r\n[2015-06-12 23:34:01,178] INFO extractor \"Get keywords takes 0.0002 seconds\"\r\n[2015-06-12 23:34:02,372] INFO extractor \"Get keywords takes 0.0002 seconds\"\r\n[2015-06-12 23:34:02,386] INFO extractor \"Get keywords takes 0.0012 seconds\"\r\n[2015-06-12 23:34:02,631] INFO extractor \"Get keywords takes 0.0002 seconds\"\r\n[2015-06-12 23:34:03,656] INFO extractor \"Get keywords takes 0.0021 seconds\"\r\n[2015-06-12 23:34:03,744] INFO extractor \"Get keywords takes 0.0001 seconds\"\r\n[2015-06-12 23:34:03,785] INFO extractor \"Get keywords takes 0.0001 seconds\"\r\n[2015-06-12 23:34:03,910] INFO extractor \"Get keywords takes 0.0002 seconds\"\r\n[2015-06-12 23:34:04,031] INFO extractor \"Get keywords takes 0.0002 seconds\"\r\n[2015-06-12 23:34:05,004] INFO extractor \"Get keywords takes 0.0035 seconds\"\r\n[2015-06-12 23:34:05,579] INFO extractor \"Get keywords takes 0.0055 seconds\"\r\n[2015-06-12 23:34:05,602] INFO extractor \"Get keywords takes 0.0005 seconds\"\r\n[2015-06-12 23:34:05,662] INFO extractor \"Get keywords takes 0.0010 seconds\"\r\n[2015-06-12 23:34:06,125] INFO extractor \"Get keywords takes 0.0002 seconds\"\r\n[2015-06-12 23:34:06,299] INFO extractor \"Get keywords takes 0.0002 seconds\"\r\n[2015-06-12 23:34:06,404] INFO extractor \"Get keywords takes 0.0003 seconds\"\r\n[2015-06-12 23:34:07,396] INFO extractor \"Get keywords takes 0.0002 seconds\"\r\n[2015-06-12 23:34:07,595] INFO extractor \"Get keywords takes 0.0004 seconds\"\r\n[2015-06-12 23:34:08,725] INFO extractor \"Get keywords takes 0.0015 seconds\"\r\n[2015-06-12 23:34:09,504] INFO extractor \"Get keywords takes 0.0004 seconds\"\r\n[2015-06-12 23:34:09,515] INFO extractor \"Get keywords takes 0.0005 seconds\"\r\n[2015-06-12 23:34:10,650] INFO extractor \"Get keywords takes 0.0002 seconds\"\r\n[2015-06-12 23:34:11,206] INFO extractor \"Get keywords takes 0.0003 seconds\"\r\n[2015-06-12 23:34:12,298] INFO extractor \"Get keywords takes 0.0002 seconds\"\r\n[2015-06-12 23:34:12,319] INFO extractor \"Get keywords takes 0.0001 seconds\"\r\n[2015-06-12 23:34:13,547] INFO extractor \"Get keywords takes 0.0006 seconds\"\r\n[2015-06-12 23:34:13,853] INFO extractor \"Get keywords takes 0.0005 seconds\"<\/code><\/pre>\n<p>&nbsp;<\/p>\n<\/div>\n<\/div>\n<p>\u6458\u81ea\uff1a\u00a0<a href=\"http:\/\/xiaorui.cc\/\" target=\"_blank\" rel=\"noopener noreferrer\">http:\/\/xiaorui.cc<\/a>\u00a0 \u00a0<a href=\"http:\/\/xiaorui.cc\/?p=1649\" target=\"_blank\" rel=\"noopener noreferrer\">http:\/\/xiaorui.cc\/?p=1649<\/a><\/p>\n<div><\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u4e3a\u4ec0\u4e48\u4f1a\u7528AC\u81ea\u52a8\u673a\uff1f \u5982\u679c\u4f60\u60f3\u77e5\u9053\u4e00\u7bc7\u6587\u7ae0\u6709\u6ca1\u6709\u4f60\u8981\u8fc7\u6ee4\u7684\u654f\u611f\u8bcd\uff0c\u600e\u4e48\u529e\uff1f \u4e0d\u53ef\u80fd\u7528\u6b63\u5219\u4e00\u4e2a\u4e2a\u7684\u5339\u914d\u5427\uff1f \u00a0 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":[],"categories":[255],"tags":[523,526,522,434,525,524],"_links":{"self":[{"href":"https:\/\/yanjingang.com\/blog\/index.php?rest_route=\/wp\/v2\/posts\/1442"}],"collection":[{"href":"https:\/\/yanjingang.com\/blog\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/yanjingang.com\/blog\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/yanjingang.com\/blog\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/yanjingang.com\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1442"}],"version-history":[{"count":0,"href":"https:\/\/yanjingang.com\/blog\/index.php?rest_route=\/wp\/v2\/posts\/1442\/revisions"}],"wp:attachment":[{"href":"https:\/\/yanjingang.com\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1442"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/yanjingang.com\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1442"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/yanjingang.com\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1442"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}