{"id":27022,"date":"2025-06-21T16:05:16","date_gmt":"2025-06-21T08:05:16","guid":{"rendered":"http:\/\/139.9.1.231\/?p=27022"},"modified":"2025-06-23T13:51:43","modified_gmt":"2025-06-23T05:51:43","slug":"kaldihot-word-recognition-function","status":"publish","type":"post","link":"http:\/\/139.9.1.231\/index.php\/2025\/06\/21\/kaldihot-word-recognition-function\/","title":{"rendered":"\u65b0\u4e00\u4ee3 Kaldi \u70ed\u8bcd\u8bc6\u522b\u529f\u80fd"},"content":{"rendered":"\n<p><strong><em>\u8f6c\u81ea\uff1a<a href=\"https:\/\/mp.weixin.qq.com\/s\/d7Ab9u1_OAGLF76V1ymHmg\">https:\/\/mp.weixin.qq.com\/s\/d7Ab9u1_OAGLF76V1ymHmg<\/a><\/em><\/strong><\/p>\n\n\n\n<h2>\u4ec0\u4e48\u662f\u70ed\u8bcd<\/h2>\n\n\n\n<p><strong>\u70ed\u8bcd<\/strong>&nbsp;\u5176\u5b9e\u662f\u4e00\u4e2a\u7279\u522b\u5bb9\u6613\u5f15\u8d77\u6b67\u4e49\u7684\u8bf4\u6cd5\uff0c\u5c24\u5176\u662f\u5728\u8bed\u97f3\u9886\u57df\uff0c\u6bd4\u5982<code>\u5524\u9192\u6b21\/\u547d\u4ee4\u8bcd\/\u65b0\u8bcd<\/code>\u90fd\u6709\u4eba\u79f0\u4e4b\u4e3a<strong>\u70ed\u8bcd<\/strong>\uff0c\u672c\u6587\u4e2d\u8981\u8ba8\u8bba\u7684\u70ed\u8bcd\u8bc6\u522b\u662f\u5728\u8bed\u97f3\u8bc6\u522b\u8bed\u5883\u4e0b\u7684<code>\u201c\u4e0a\u4e0b\u6587\u8bcd\u8bed\u504f\u7f6e\u201d<\/code>\u5bf9\u5e94\u7684\u82f1\u6587\u4e3a&nbsp;<code>contextual biasing<\/code>\u3002\u70ed\u8bcd\u8bc6\u522b\u5230\u5e95\u662f\u505a\u4ec0\u4e48\u7684\u5462\uff1f\u4e3e\u4e00\u4e2a\u4f8b\u5b50\u5c31\u975e\u5e38\u6e05\u695a\u4e86\uff0c\u6bd4\u5982\uff1a<strong>\u201c\u4eca\u5929\u6cb3\u5357\u7701\u6559\u80b2\u5385\u6709\u5173\u9886\u5bfc\u53c2\u89c2\u4e86\u5357\u9633\u7406\u5de5\u5927\u5b66\u201d<\/strong>&nbsp;\u8fd9\u6837\u4e00\u53e5\u8bdd\uff0c\u5f88\u591a\u7684\u8bed\u97f3\u8bc6\u522b\u7cfb\u7edf\u5e94\u8be5\u4f1a\u8bc6\u522b\u6210&nbsp;<strong>\u201c\u4eca\u5929\u6cb3\u5357\u7701\u6559\u80b2\u5385\u6709\u5173\u9886\u5bfc\u53c2\u89c2\u4e86\u5357\u6d0b\u7406\u5de5\u5927\u5b66\u201d<\/strong>\uff0c<code>\u201c\u5357\u9633\u7406\u5de5\u5927\u5b66\u201d<\/code>\u548c<code>\u201c\u5357\u6d0b\u7406\u5de5\u5927\u5b66\u201d<\/code>\u97f3\u540c\u5b57\u4e0d\u540c\uff0c\u8bad\u7ec3\u8bed\u6599\u4e2d<code>\u201c\u5357\u6d0b\u7406\u5de5\u5927\u5b66\u201d<\/code>\u53c8\u5927\u6982\u7387\u591a\u4e8e<code>\u201c\u5357\u9633\u7406\u5de5\u5927\u5b66\u201d<\/code>\uff0c\u6240\u4ee5\u6a21\u578b\u975e\u5e38\u503e\u5411\u4e8e\u8f93\u51fa<code>\u201c\u5357\u6d0b\u7406\u5de5\u5927\u5b66\u201d<\/code>\u3002\u70ed\u8bcd\u8bc6\u522b\u8981\u5b9e\u73b0\u7684\u5c31\u662f\uff0c\u7ed9\u5b9a\u4e00\u4e9b\u5916\u90e8\u6761\u4ef6\uff0c\u8ba9\u7cfb\u7edf\u4e86\u89e3\u6211\u4eec\u5f53\u524d\u60f3\u8981\u8bf4\u7684\u662f<code>\u201c\u5357\u9633\u7406\u5de5\u5927\u5b66\u201d<\/code>\u800c\u4e0d\u662f<code>\u201c\u5357\u6d0b\u7406\u5de5\u5927\u5b66\u201d<\/code>\u3002<\/p>\n\n\n\n<h2>\u70ed\u8bcd\u7684\u5b9e\u73b0\u65b9\u6cd5<\/h2>\n\n\n\n<p>\u70ed\u8bcd\u7684\u5b9e\u73b0\u65b9\u6cd5\u5927\u81f4\u53ef\u4ee5\u5206\u4e3a\u4e24\u5927\u7c7b\uff0c\u4e00\u7c7b\u662f<strong>\u7eaf\u5b57\u7b26\u4e32\u5339\u914d\u65b9\u6cd5<\/strong>\uff0c\u4e00\u7c7b\u662f<strong>NN <strong>\u795e\u7ecf\u7f51\u7edc<\/strong>  \u65b9\u6cd5<\/strong>\u3002\u987e\u540d\u601d\u4e49\uff0c<code>\u7eaf\u5b57\u7b26\u4e32\u5339\u914d<\/code>\u7684\u65b9\u6cd5\u5c31\u662f\u5c06\u89e3\u7801\u8fc7\u7a0b\u4e2d\u7684\u6240\u6709\u53ef\u80fd\u8def\u5f84\u90fd\u4e00\u4e00\u53bb\u5339\u914d\u70ed\u8bcd\u5217\u8868\uff0c\u5982\u679c<strong>\u5339\u914d\u4e0a\u70ed\u8bcd\u5c31\u7ed9\u5bf9\u5e94\u7684\u8def\u5f84\u52a0\u4e0a\u5206\u6570\u5956\u52b1<\/strong>\uff0c\u8fd9\u6837\u8be5\u8def\u5f84\u5c31\u66f4\u6709\u53ef\u80fd\u5728 beam \u526a\u679d\u4e2d\u80dc\u51fa\uff0c\u4ece\u800c\u5b9e\u73b0\u8bc6\u522b\u70ed\u8bcd\u7684\u529f\u80fd\u3002\u8fd9\u79cd\u65b9\u6cd5\u4e00\u822c\u662f\u5728\u89e3\u7801\u9636\u6bb5\u5b9e\u73b0\uff0c\u5bf9\u58f0\u5b66\u90e8\u5206\u662f\u900f\u660e\u7684\uff0c\u800c\u4e14\u53ef\u4ee5\u968f\u610f\u8c03\u6574\u5956\u52b1\u7684\u5206\u6570\uff0c\u6bd4\u8f83\u7075\u6d3b\u3002\u9700\u8981\u89e3\u51b3\u7684\u6838\u5fc3\u95ee\u9898\u662f\u9ad8\u6548\u7684\u67e5\u627e\uff0c\u4e00\u822c\u90fd\u662f\u57fa\u4e8e\u81ea\u52a8\u673a\u6765\u5b9e\u73b0\uff0c\u5728\u89e3\u7801\u5668\u4e2d\u9644\u5e26\u4e00\u4e2a\u7c7b\u4f3c\u4e8e\u4e0b\u56fe\u7684\u70ed\u8bcd\u56fe\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-49.png\" alt=\"\" class=\"wp-image-27029\" width=\"463\" height=\"165\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-49.png 802w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-49-300x107.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-49-768x274.png 768w\" sizes=\"(max-width: 463px) 100vw, 463px\" \/><\/figure>\n\n\n\n<p><code><strong>NN \u65b9\u6cd5<\/strong><\/code>\u5176\u5b9e\u975e\u5e38\u591a\uff0c\u8fd1\u5e74\u4e5f\u662f\u5927\u5bb6\u53d1\u8bba\u6587\u7684\u70ed\u70b9\uff08\u8d34\u4e00\u4e2a awesome&nbsp;https:\/\/github.com\/stevenhillis\/awesome-asr-contextualization\uff0c\u6709\u5174\u8da3\u7684\u540c\u5b66\u53ef\u4ee5\u53bb\u770b\u8bba\u6587\uff09\uff0c\u4f46\u603b\u7684\u6765\u8bf4\u5c31\u662f\u5c06\u70ed\u8bcd\u5217\u8868\u4f5c\u4e3a\u795e\u7ecf\u7f51\u7edc\u7684\u5176\u4e2d\u4e00\u4e2a\u8f93\u5165\uff0c\u4ee5\u6b64<strong>\u6539\u53d8\u795e\u7ecf\u7f51\u7edc\u8f93\u51fa\u7684\u5206\u5e03<\/strong>\uff0c\u8fd9\u6837\u795e\u7ecf\u7f51\u7edc\u5c31\u80fd\u66f4\u5927\u6982\u7387\u8bc6\u522b\u51fa\u70ed\u8bcd\u3002\u6b64\u79cd\u65b9\u6cd5\u7684\u4f7f\u7528\u9700\u8981\u5728\u8bad\u7ec3\u6a21\u578b\u65f6\u8fdb\u884c\u5e72\u9884\uff0c\u4e5f\u5c31\u662f\u8bf4\u5982\u679c\u4f60\u9700\u8981\u4e00\u4e2a\u5e26\u70ed\u8bcd\u8bc6\u522b\u529f\u80fd\u7684\u6a21\u578b\uff0c\u4f60\u5c31\u5f97\u91cd\u65b0\u8bad\u4e00\u4e2a\u6a21\u578b\uff0c\u6700\u8d77\u7801\u5f97\u5728\u4e0d\u5e26\u70ed\u8bcd\u8bc6\u522b\u529f\u80fd\u6a21\u578b\u7684\u57fa\u7840\u4e0a\u505a finetune\u3002\u4e0b\u56fe\u662f\u4e00\u79cd\u53ef\u80fd\u7684\u5b9e\u73b0\u65b9\u5f0f\uff0c\u901a\u8fc7\u70ed\u8bcd\u5217\u8868\u6765\u5bf9 <strong>transducer \u7684 predictor <\/strong>\u7f51\u7edc\u8fdb\u884c\u504f\u7f6e\u3002<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-full is-resized\"><img loading=\"lazy\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-50.png\" alt=\"\" class=\"wp-image-27032\" width=\"384\" height=\"284\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-50.png 714w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-50-300x222.png 300w\" sizes=\"(max-width: 384px) 100vw, 384px\" \/><\/figure><\/div>\n\n\n\n<p>\u5b9e\u9645\u7684\u4f7f\u7528\u4e2d\u4e5f\u5e38\u5e38\u5c06\u4e24\u8005\u4e00\u8d77\u914d\u5408\u4f7f\u7528\uff0c\u672c\u6587\u8ba8\u8bba\u7684\u662f\u7b2c\u4e00\u79cd\u7eaf\u5b57\u7b26\u4e32\u5339\u914d\u7684\u70ed\u8bcd\u5b9e\u73b0\u65b9\u6cd5\u3002<\/p>\n\n\n\n<h2>\u57fa\u4e8e Aho-corasick \u7684\u70ed\u8bcd\u5b9e\u73b0<\/h2>\n\n\n\n<p>\u4e0a\u6587\u63d0\u5230\u57fa\u4e8e\u5339\u914d\u7684\u70ed\u8bcd\u8bc6\u522b\u4e3b\u8981\u89e3\u51b3\u7684\u662f\u5339\u914d\u6548\u7387\u95ee\u9898\uff0c\u6240\u4ee5\u57fa\u672c\u90fd\u4f7f\u7528\u81ea\u52a8\u673a\u6765\u5b9e\u73b0\uff0copenfst \u4f5c\u4e3a\u4e00\u4e2a\u9ad8\u6548\u7684\u81ea\u52a8\u673a\u5b9e\u73b0\u53d7\u5230\u7edd\u5927\u90e8\u5206\u4eba\u7684\u9752\u7750\uff0c\u4f46\u5bf9\u4e8e\u70ed\u8bcd\u8bc6\u522b\uff0c\u8fd8\u662f\u6709\u4e00\u4e9b\u6b20\u7f3a\u3002\u6bd4\u5982\uff0c<strong>\u5982\u679c\u4e0d\u8fdb\u884c\u8f83\u590d\u6742\u7684\u72b6\u6001\u7ba1\u7406\uff0c\u5219\u4e00\u6b21\u53ea\u80fd\u8fdb\u884c\u4e00\u4e2a\u70ed\u8bcd\u7684\u5339<\/strong>\u914d\uff0c\u8fd9\u4e2a\u95ee\u9898 wenet \u5728\u5176\u5b9e\u73b0\u4e2d\u6709\u4e3e\u4f8b\u8bf4\u660e\u3002\u5982\u4e0b\u6240\u793a\uff0c\u201c\u552f\u54c1\u4f1a\u201d\u548c\u201c\u6b27\u9633\u552f\u4e00\u201d\u90fd\u662f\u70ed\u8bcd\uff0c\u4f46\u201c\u6b27\u9633\u552f\u54c1\u4f1a\u201d\u8fd9\u6761\u8def\u5f84\u5374\u65e0\u6cd5\u5339\u914d\u5230\u201c\u552f\u54c1\u4f1a\u201d\u3002\uff08openfst \u5f53\u7136\u53ef\u4ee5\u5b9e\u73b0\u8fd9\u4e9b\u529f\u80fd\uff0c\u4f46\u4f1a\u589e\u52a0\u590d\u6742\u5ea6\u4ee5\u53ca\u5f71\u54cd\u6548\u7387\u3002\uff09<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"681\" height=\"178\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-51.png\" alt=\"\" class=\"wp-image-27038\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-51.png 681w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-51-300x78.png 300w\" sizes=\"(max-width: 681px) 100vw, 681px\" \/><\/figure>\n\n\n\n<p>\u70ed\u8bcd\u7684\u5b9e\u73b0\u672c\u8d28\u662f\u4e00\u4e2a<strong>\u591a\u6a21\u5339\u914d\u95ee\u9898<\/strong>\uff0c\u5b83\u9700\u8981\u5728 hypothesis \u4e2d\u641c\u7d22\u662f\u5426\u5305\u542b\u7ed9\u5b9a\u7684\u70ed\u8bcd\u5217\u8868\uff0c\u800c\u591a\u6a21\u5339\u914d\u7684\u6700\u4f73\u6570\u636e\u7ed3\u6784\u5c31\u662f Aho-corasick \u81ea\u52a8\u673a\u3002\u5173\u4e8e Aho-corasick \u7684\u6784\u5efa\u7ec6\u8282\u672c\u6587\u4e0d\u505a\u8fc7\u591a\u53d9\u8ff0\uff0c\u611f\u5174\u8da3\u7684\u540c\u5b66\u53ef\u4ee5\u9605\u8bfb \uff08https:\/\/en.wikipedia.org\/wiki\/Aho%E2%80%93Corasick_algorithm\uff09\u3002<\/p>\n\n\n\n<p>\u4e0b\u9762\u5c06\u4e00\u6b65\u4e00\u6b65\u53d9\u8ff0\u5176\u600e\u6837\u7528\u4e8e\u70ed\u8bcd\u8bc6\u522b\uff0c\u4e0b\u56fe\u662f\u4e00\u4e2a\u5305\u542b\u4e86\u70ed\u8bcd&nbsp;<code>{ \"S\", \"HE\", \"SHE\", \"SHELL\", \"HIS\", \"HERS\", \"HELLO\", \"THIS\", \"THEM\"}<\/code>&nbsp;\u7684\u72b6\u6001\u56fe\uff08<em>\u56fe\u8981\u6709\u4e00\u5b9a\u590d\u6742\u5ea6\u624d\u80fd\u591f\u8bf4\u660e\u95ee\u9898\uff0c\u7231\u5b66\u4e60\u7684\u4f60\u4e00\u5b9a\u4f1a\u8ba4\u771f\u770b\u7684<\/em>\uff09\uff0cAho-corasick \u56fe\u4e2d\u4e3b\u8981\u6709\u4e09\u79cd\u7c7b\u578b\u7684\u8fb9\uff0c<code>goto<\/code>&nbsp;\u8fb9\uff08\u9ed1\u7bad\u5934\uff09\uff0c<code>failure<\/code>&nbsp;\u8fb9\uff08\u7ea2\u7bad\u5934\uff09 \u548c&nbsp;<code>output<\/code>&nbsp;\u8fb9\uff08\u7eff\u7bad\u5934\uff09\uff0c\u7b80\u5355\u5730\u8bf4\uff0c\u5339\u914d\u8d70&nbsp;<code>goto<\/code>&nbsp;\u8fb9\uff0c\u5339\u914d\u5931\u8d25\u5219\u8d70&nbsp;<code>failure<\/code>&nbsp;\u8fb9<strong>\u76f4\u5230<\/strong>\u5339\u914d\u4e3a\u6b62\u6216\u8005\u56de\u5230 ROOT \u8282\u70b9\uff0c\u800c\u53ea\u8981&nbsp;<code>output<\/code>&nbsp;\u8fb9\u5b58\u5728\u5373\u8868\u793a\u547d\u4e2d\u70ed\u8bcd (\u7406\u8bba\u4e0a\u6bcf\u4e00\u4e2a\u7ec8\u6b62\u8282\u70b9\u90fd\u6709\u4e00\u4e2a\u6307\u5411\u81ea\u5df1\u7684 output \u8fb9\uff0c\u4e0b\u56fe\u4e2d\u672a\u4f53\u73b0\uff09\u800c\u4e14\u5f97\u6cbf\u7740 output \u8fb9\u7684\u8def\u5f84<strong>\u4e00\u76f4\u56de\u6eaf<\/strong>\u5230\u6ca1\u6709 output \u8fb9\u4e3a\u6b62\u3002<\/p>\n\n\n\n<p>\u56fe\u4e2d\u6bcf\u6761\u00a0<code>goto<\/code>\u00a0\u8fb9\u90fd\u6709\u4e00\u4e2a\u5206\u6570\uff0c\u6bcf\u4e2a\u8282\u70b9\u542b\u6709\u4e24\u4e2a\u5206\u6570\uff08<code>node_score<\/code>,\u00a0<code>local_node_score<\/code>)\uff0c<code>node_score<\/code>\u00a0\u4e3a\u5168\u5c40\u8282\u70b9\u7684\u5206\u6570\u5373\u4ece ROOT \u8282\u70b9\u5230\u76ee\u524d\u7684\u8def\u5f84\u5206\u6570\u548c\uff0c<code>local_node_score<\/code>\u4e3a\u5c40\u90e8\u8282\u70b9\u5206\u6570\u5373\u4ece\u4e0a\u4e00\u4e2a<strong>\u4e2d\u6b62\u8282\u70b9<\/strong>[\u5339\u914d\u5230\u70ed\u8bcd\u7684\u8282\u70b9]\u5230\u76ee\u524d\u7684\u8def\u5f84\u5206\u6570\u548c\uff0c<strong>\u5339\u914d\u00a0<code>failure<\/code>\u00a0\u7684\u5206\u6570\u4e3a\u00a0<code>dest.node_score - src.local_node_score<\/code><\/strong>\uff08\u56fe\u4e2d\u672a\u753b\u51fa\uff0c\u56e0\u4e3a dest \u53ef\u80fd\u9700\u8981\u56de\u6eaf\u51e0\u6761\u00a0<code>failure<\/code>\u00a0\u8fb9\u624d\u80fd\u5230\u8fbe\uff09\u3002\u6211\u4eec<strong>\u60f3\u5728\u70ed\u8bcd\u5c40\u90e8\u547d\u4e2d\u65f6\u5c31\u7ed9\u4e88\u4e00\u5b9a\u5206\u6570\u5956\u52b1\uff0c\u9632\u6b62 beam search \u8fc7\u7a0b\u5c06\u53ef\u80fd\u7684\u70ed\u8bcd\u8def\u5f84\u526a\u6389<\/strong>\uff0c\u6240\u4ee5\u4f1a\u6709\u5982\u6b64\u590d\u6742\u7684\u5206\u6570\u8bbe\u8ba1\uff0c<strong>\u90e8\u5206\u547d\u4e2d\u7ed9\u4e88\u5956\u52b1\u9700\u8981\u5728\u5339\u914d\u5931\u8d25\u65f6\u5bf9\u5df2\u65bd\u52a0\u7684\u5206\u6570\u8fdb\u884c<code>\u8865\u507f<\/code>\u6216<code>\u6d88\u9664<\/code><\/strong>\u3002<strong>\u5956\u52b1\u5206\u6570\u7a76\u7adf\u5e94\u8be5\u5728<code>\u5b8c\u5168\u547d\u4e2d<\/code>\u540e\u624d\u65bd\u52a0\u8fd8\u662f<code>\u5c40\u90e8\u547d\u4e2d<\/code>\u5c31\u9884\u5148\u7ed9\u4e88\uff0c\u6bcf\u4e2a\u4eba\u6709\u4e0d\u540c\u7684\u770b\u6cd5<\/strong>\uff0c\u7b14\u8005\u672a\u8fdb\u884c\u8fc7\u4e25\u683c\u7684\u6027\u80fd\u5bf9\u6bd4\uff0ck2 \u4e2d\u76ee\u524d\u7684\u5b9e\u73b0\u53c2\u7167\u00a0<code>Deep context<\/code>\u00a0(https:\/\/arxiv.org\/pdf\/1808.02480.pdf) \u4e2d on the fly rescoring \u4e00\u8282\u6240\u8ff0\uff0c\u6bcf\u5339\u914d\u4e00\u4e2a token \u90fd\u4f1a\u65bd\u52a0\u5206\u6570\u5956\u52b1\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"811\" height=\"673\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-52.png\" alt=\"\" class=\"wp-image-27042\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-52.png 811w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-52-300x249.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-52-768x637.png 768w\" sizes=\"(max-width: 811px) 100vw, 811px\" \/><\/figure>\n\n\n\n<p>\u6211\u4eec\u4ee5&nbsp;<code>\u201cDID_HE_WANT_HERS_SHELF\u201d<\/code>&nbsp;(\u6ce8\u610f\u7a7a\u683c&nbsp;<code>_<\/code>&nbsp;\u4e5f\u662f\u5b57\u7b26\uff09\uff0c\u6765\u8bf4\u660e\u6574\u4e2a\u8fc7\u7a0b\u662f\u5982\u4f55\u5339\u914d\u7684\uff0c\u4ee5\u53ca\u5956\u52b1\u5206\u6570\u5982\u4f55\u4f5c\u7528\u5230\u8def\u5f84\u3002<code>\u201cDID_\u201d<\/code>&nbsp;\u51e0\u4e2a\u5b57\u7b26\u672a\u5339\u914d\u4efb\u4f55\u70ed\u8bcd\u7684\u524d\u7f00\uff0c\u72b6\u6001\u4e00\u76f4\u505c\u7559\u5728 ROOT \uff08ROOT \u7684 failure \u662f\u5b83\u81ea\u5df1\uff09\u3002<code>\u201cH\u201d<\/code>&nbsp;\u5339\u914d state 0 \u5230 state 2 \u7684\u8fb9\u83b7\u5f97\u5956\u52b1 1\uff0c<code>\u201cE\u201d<\/code>&nbsp;\u5339\u914d state 2 \u5230 state 3 \u7684\u8fb9\u83b7\u5f97\u5956\u52b1 1\uff08total \u4e3a2\uff09\uff0c\u6b64\u65f6\u547d\u4e2d&nbsp;<code>\u201cHE\u201d<\/code>&nbsp;\u83b7\u83b7\u5f97\u5956\u52b1 2 \uff08total \u4e3a 4\uff09\uff0c<code>\u201c_\u201d<\/code>&nbsp;\u672a\u5339\u914d\u4e0a\u6cbf\u7740 state 3 \u7684 failure \u8fb9\u56de\u5230 ROOT \u51cf\u53bb\u5956\u52b1 2 \uff08total \u4e3a2\uff09\uff0c<code>\u201cWAN\u201d<\/code>&nbsp;\u672a\u5339\u914d\u4efb\u4f55\u524d\u7f00\u72b6\u6001\u4e00\u76f4\u505c\u7559\u5728 ROOT\uff0c<code>\u201cT\u201d<\/code>&nbsp;\u5339\u914d state 0 \u5230 state 15 \u7684\u8fb9\u83b7\u5f97\u5956\u52b1 1 \uff08total \u4e3a 3\uff09\uff0c<code>\u201c_\u201d<\/code>&nbsp;\u672a\u5339\u914d\u4e0a\u6cbf\u7740 state 15 \u7684 failure \u8fb9\u56de\u5230 ROOT \u51cf\u53bb\u5956\u52b1 1 \uff08total \u4e3a2\uff09\uff0c<code>\u201cH\u201d<\/code>&nbsp;\u5339\u914d state 0 \u5230 state 2 \u7684\u8fb9\u83b7\u5f97\u5956\u52b1 1 \uff08total \u4e3a3\uff09\uff0c<code>\u201cE\u201d<\/code>&nbsp;\u5339\u914d state 2 \u5230 state 3 \u7684\u8fb9\u83b7\u5f97\u5956\u52b1 1\uff08total \u4e3a4\uff09\uff0c\u6b64\u65f6\u547d\u4e2d&nbsp;<code>\u201cHE\u201d<\/code>&nbsp;\u83b7\u5f97\u5956\u52b1 2 \uff08total \u4e3a 6\uff09\uff0c<code>\u201cR\u201d<\/code>&nbsp;\u5339\u914d state 3 \u5230 state 10 \u7684\u8fb9\u83b7\u5f97\u5956\u52b1 1\uff08total \u4e3a7\uff09\uff0c<code>\u201cS\u201d<\/code>&nbsp;\u5339\u914d state 10 \u5230 state 11 \u7684\u8fb9\u83b7\u5f97\u5956\u52b1 1\uff08total \u4e3a8\uff09\uff0c\u6b64\u65f6\u547d\u4e2d&nbsp;<code>\u201cHERS\u201d<\/code>&nbsp;\u83b7\u5f97\u5956\u52b1 4 \uff08total \u4e3a 12\uff09\uff0cstate 11 \u5305\u542b output \u8fb9\u6307\u5411 state 1 \u5373\u547d\u4e2d&nbsp;<code>\u201cS\u201d<\/code>\u83b7\u5f97\u5956\u52b1 1 \uff08total \u4e3a13\uff09\uff0c&nbsp;<code>\u201c_\u201d<\/code>&nbsp;\u672a\u5339\u914d\u4e0a\u6cbf\u7740 state 11 \u7684 failure \u8fb9\u56de\u5230 ROOT \u51cf\u53bb\u5956\u52b1 4 \uff08total \u4e3a9\uff09\uff0c<code>\u201cS\u201d<\/code>&nbsp;\u5339\u914d state 0 \u5230 state 1 \u7684\u8fb9\u83b7\u5f97\u5956\u52b1 1 \uff08total \u4e3a 10\uff09\uff0c\u6b64\u65f6\u547d\u4e2d&nbsp;<code>\u201cS\u201d<\/code>&nbsp;\u83b7\u5f97\u5956\u52b1 1 \uff08total \u4e3a11\uff09\uff0c<code>\u201cH\u201d<\/code>&nbsp;\u5339\u914d state 1 \u5230 state 4 \u7684\u8fb9\u83b7\u5f97\u5956\u52b1 1 \uff08total \u4e3a 12\uff09\uff0c<code>\u201cE\u201d<\/code>&nbsp;\u5339\u914d state 4 \u5230 state 5 \u7684\u8fb9\u83b7\u5f97\u5956\u52b1 1 \uff08total \u4e3a 13\uff09\uff0c\u6b64\u65f6\u547d\u4e2d&nbsp;<code>\u201cSHE\u201d<\/code>&nbsp;\u83b7\u5f97\u5956\u52b1 3 \uff08total \u4e3a 16\uff09\uff0cstate 5 \u8fd8\u6709 output \u8fb9\u6307\u5411 state 3 \u5373\u547d\u4e2d&nbsp;<code>\u201cHE\u201d<\/code>&nbsp;\u83b7\u5f97\u5956\u52b1 2 \uff08total \u4e3a 18\uff09\uff0c<code>\u201cL\u201d<\/code>&nbsp;\u5339\u914d state 5 \u5230 state 6 \u7684\u8fb9\u83b7\u5f97\u5956\u52b11 \uff08total \u4e3a 19\uff09\uff0c<code>\u201cF\u201d<\/code>&nbsp;\u4e3a\u5339\u914d\u4e0a\u6cbf\u7740 state 6 \u7684 failure \u8fb9\u5230\u8fbe state 12\uff0c state 12 \u4f9d\u7136\u6ca1\u80fd\u5339\u914d&nbsp;<code>\u201cF\u201d<\/code>&nbsp;\u6cbf\u7740 state 12 \u7684 failure \u8fb9\u56de\u5230 ROOT \u51cf\u53bb\u5956\u52b1 4 \uff08state 6 \u7684 node_score\uff09\uff08total 15\uff09\uff0c\u5339\u914d\u7ed3\u675f\u3002<code>\u201cDID_HE_WANT_HERS_SHELF\u201d<\/code>&nbsp;\u547d\u4e2d&nbsp;<code>\u201cHE\u201d\uff0c\u201cHE\u201d\uff0c\u201cHERS\u201d \uff0c\u201cS\u201d\uff0c\u201cS\u201d\uff0c\u201cSHE\u201d\uff0c \u201cHE\u201d<\/code>&nbsp;\u83b7\u5f97 15 \u7684\u5206\u6570\u5956\u52b1\u3002\u4e0b\u9762\u8fd8\u6709\u4e00\u4e9b\u6d4b\u8bd5\u6837\u4f8b\uff0c\u53ef\u4ee5\u5e2e\u52a9\u7406\u89e3\u6574\u4e2a\u5339\u914d\u8fc7\u7a0b\uff0c\u5b9e\u9645\u7684\u70ed\u8bcd\u8bc6\u522b\u5339\u914d\u4e0d\u4f1a\u8fd9\u4e48\u590d\u6742\uff0c\u80fd\u547d\u4e2d\u4e00\u4e24\u4e2a\u70ed\u8bcd\u5c31\u5df2\u7ecf\u8db3\u591f\u5728 beam search \u80dc\u51fa\u4e86\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>queries = {\n        \"HEHERSHE\": 14,  # \"HE\", \"HE\", \"HERS\", \"S\", \"SHE\", \"HE\"\n        \"HERSHE\": 12,  # \"HE\", \"HERS\", \"S\", \"SHE\", \"HE\"\n        \"HISHE\": 9,  # \"HIS\", \"S\", \"SHE\", \"HE\"\n        \"SHED\": 6,  # \"S\", \"SHE\", \"HE\"\n        \"HELL\": 2,  # \"HE\"\n        \"HELLO\": 7,  # \"HE\", \"HELLO\"\n        \"DHRHISQ\": 4,  # \"HIS\", \"S\"\n        \"THEN\": 2,  # \"HE\"\n    }\n    for query, expected_score in queries.items():\n        total_scores = 0\n        state = context_graph.root\n        for q in query:\n            score, state = context_graph.forward_one_step(state, ord(q))\n            total_scores += score\n        score, state = context_graph.finalize(state)\n        assert state.token == -1, state.token\n        total_scores += score\n        assert total_scores == expected_score, (\n            total_scores,\n            expected_score,\n            query,\n        )<\/code><\/pre>\n\n\n\n<blockquote class=\"wp-block-quote\"><p>Wenet \u4e2d\u4e5f\u6709\u57fa\u4e8e Aho-corasick \u5b9e\u73b0\u7684\u70ed\u8bcd\uff0c\u4f46\u6682\u65f6\u8fd8\u6ca1\u6709\u5408\u5e76\uff0c\u53ef\u4ee5\u5728 wenet \u4ed3\u5e93\u7684 pull requests \u91cc\u67e5\u627e\u3002<\/p><\/blockquote>\n\n\n\n<h2>\u4e00\u4e9b\u5b9e\u9a8c\u7ed3\u679c<\/h2>\n\n\n\n<p>\u70ed\u8bcd\u7684\u5b9e\u9a8c\u7ed3\u679c\u8ddf\u6d4b\u8bd5\u96c6\u5173\u7cfb\u5f88\u5927\uff0c\u4e0b\u9762\u653e\u7684\u662f\u65e9\u671f\u7684\u4e00\u4e9b\u6d4b\u8bd5\u7ed3\u679c\uff0c\u5177\u4f53\u6548\u679c\u600e\u6837\uff0c\u8bf7\u5728\u81ea\u5df1\u7684\u6d4b\u8bd5\u96c6\u4e0a\u5b9e\u9a8c\u3002\u4e0b\u9762\u6d4b\u8bd5\u4e2d\u7684\u70ed\u8bcd\u5747\u4e3a\u6d4b\u8bd5\u96c6\u5bf9\u5e94 transcript \u6587\u672c\u4e0a\u7528&nbsp;<code>NER<\/code>&nbsp;\u5de5\u5177\u63d0\u53d6\u7684\u77ed\u8bed\uff0c\u5e76\u505a\u4e86\u9002\u5f53\u7b5b\u9009\u53bb\u9664\u7279\u522b\u5bb9\u6613\u8bc6\u522b\u7684\u77ed\u8bed\u3002<\/p>\n\n\n\n<p>Aishell \u6d4b\u8bd5\u96c6\uff08\u5305\u542b 1073 \u6761\u70ed\u8bcd\uff09\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"828\" height=\"264\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-53.png\" alt=\"\" class=\"wp-image-27044\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-53.png 828w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-53-300x96.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-53-768x245.png 768w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-53-825x264.png 825w\" sizes=\"(max-width: 828px) 100vw, 828px\" \/><\/figure>\n\n\n\n<p>Librispeech \u6d4b\u8bd5\u96c6 (\u5305\u542b 487 \u6761\u70ed\u8bcd\uff09\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"819\" height=\"250\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-54.png\" alt=\"\" class=\"wp-image-27045\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-54.png 819w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-54-300x92.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-54-768x234.png 768w\" sizes=\"(max-width: 819px) 100vw, 819px\" \/><\/figure>\n\n\n\n<p>\u53ef\u4ee5\u770b\u51fa\uff0c\u8be5\u5b9e\u73b0\u5bf9 contexts \u5b50\u96c6\u6709\u8f83\u660e\u663e\u7684\u63d0\u5347\uff0c\u800c\u5bf9\u5176\u4ed6\u6d4b\u8bd5\u96c6\u57fa\u672c\u6ca1\u6709\u5f71\u54cd\u3002<\/p>\n\n\n\n<h2>k2 \u70ed\u8bcd\u529f\u80fd\u73b0\u72b6<\/h2>\n\n\n\n<p>k2 \u7684\u70ed\u8bcd\u529f\u80fd\u5b9e\u73b0\u5df2\u7ecf\u6709\u4e00\u6bb5\u65f6\u95f4\u4e86\uff0c\u7531\u4e8e\u4f5c\u8005\u6bd4\u8f83<s>\u61d2<\/s>\u5fd9\u4e00\u76f4\u6ca1\u6709\u5168\u9762\u652f\u6301\uff0c\u76ee\u524d icefall \u4e2d\u7684&nbsp;<code>librispeech pruned_transducer_stateless4 recipe<\/code>&nbsp;\u548c&nbsp;<code>wenetspeech pruned_transducer_stateless5 recipe<\/code>&nbsp;\u5df2\u7ecf\u652f\u6301\uff0c<code>zipformer<\/code>&nbsp;\u6a21\u578b\u6b63\u5728 PR \u4e2d\uff08\u5f88\u5feb\u5408\u5e76\uff09\u3002<code>sherpa<\/code>&nbsp;\u548c&nbsp;<code>sherpa-onnx<\/code>&nbsp;\u4e2d\u5df2\u7ecf\u5b9e\u73b0\u4e86\u6838\u5fc3\u529f\u80fd\uff0c\u5e76\u4e14\u5c01\u88c5\u4e86&nbsp;<code>python<\/code>&nbsp;\u7684 API\uff0c\u56e0\u4e3a\u5df2\u7ecf\u6709\u5f88\u597d\u7684\u6837\u4f8b\uff0c\u6240\u4ee5\u6211\u4eec\u5f53\u7136\u975e\u5e38\u5e0c\u671b\u793e\u533a\u7684\u5c0f\u4f19\u4f34\u80fd\u4e00\u8d77\u5e2e\u5fd9\u5b8c\u5584\uff0c\u4f46\u5982\u679c\u4f60\u4eec\u4e5f\u5f88<s>\u61d2<\/s>\u5fd9\uff0c\u4e5f\u53ef\u4ee5\u5728<code>\u5fae\u4fe1\u7fa4<\/code>\u544a\u8bc9\u6211\u4eec\u6216\u8005\u5728&nbsp;<code>github<\/code>&nbsp;\u4ed3\u5e93\u63d0&nbsp;<code>issue<\/code>\uff0c\u6211\u4eec\u4f1a\u6839\u636e\u9700\u8981\u6765\u5b89\u6392\u4f18\u5148\u7ea7\uff0c\u76ee\u524d\u6211\u4eec\u6536\u5230\u7684\u4e24\u4e2a\u63d0\u8bae\u662f\u652f\u6301 sherpa-onnx android \u5e73\u53f0 \u548c sherpa-ncnn\u3002<\/p>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u8f6c\u81ea\uff1ahttps:\/\/mp.weixin.qq.com\/s\/d7Ab9u1_OAGLF76V1ymHmg \u4ec0\u4e48 &hellip; <a href=\"http:\/\/139.9.1.231\/index.php\/2025\/06\/21\/kaldihot-word-recognition-function\/\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">\u65b0\u4e00\u4ee3 Kaldi \u70ed\u8bcd\u8bc6\u522b\u529f\u80fd<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[34],"tags":[],"_links":{"self":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/27022"}],"collection":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/comments?post=27022"}],"version-history":[{"count":25,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/27022\/revisions"}],"predecessor-version":[{"id":27151,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/27022\/revisions\/27151"}],"wp:attachment":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/media?parent=27022"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/categories?post=27022"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/tags?post=27022"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}