{"id":19744,"date":"2024-09-27T18:23:38","date_gmt":"2024-09-27T10:23:38","guid":{"rendered":"http:\/\/139.9.1.231\/?p=19744"},"modified":"2025-08-18T16:49:07","modified_gmt":"2025-08-18T08:49:07","slug":"token-coende","status":"publish","type":"post","link":"http:\/\/139.9.1.231\/index.php\/2024\/09\/27\/token-coende\/","title":{"rendered":"\u5e38\u89c1\u7684\u7f16\u89e3\u7801\u5668\u6a21\u578b\u4ee5\u53ca\u538b\u7f29token\u6570"},"content":{"rendered":"\n<p>\u76ee\u524d\u4e3b\u6d41\u7684\u97f3\u9891\u7f16\u89e3\u7801\u5668\u7684\u4e00\u4e9b\u6307\u6807<\/p>\n\n\n\n<p><strong>\u6570\u636e\u6765\u6e90\uff1a<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2408.16532\" target=\"_blank\">wavtokenizer: an efficient acoustic discrete codec tokenizer for audio language modeling<\/a><\/strong><\/p>\n\n\n\n<p>\u8ba1\u7b97\u516c\u5f0f\uff1a<\/p>\n\n\n\n<p>\u5047\u8bbe\u7801\u672c\u5927\u5c0f2^n\uff0c\u6bcf\u79d2\u8bed\u97f3\u7684hz=BW\/\uff08Nq*n\uff09\uff0ctoken=Nq* \u6bcf\u79d2\u8bed\u97f3\u7684hz \uff0c \u6bcf\u79d2\u8bed\u97f3\u7684hz =token\/Nq<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table><tbody><tr><td><strong>Model<\/strong><\/td><td><strong>Bandwidth\u00a0<\/strong><\/td><td><strong>Nq\u00a0<\/strong>\u2193<strong>\u91cf\u5316\u5668\u6570\u91cf<\/strong><br>(<strong>number of quantizers.<\/strong>)<\/td><td><strong>token\/s\u00a0\u2193<\/strong><\/td><td> <strong>\u7801\u672c\u5927\u5c0f <\/strong><\/td><\/tr><tr><td>GT<\/td><td>&#8211;<\/td><td>&#8211;<\/td><td>&#8211;<\/td><td><\/td><\/tr><tr><td>DAC <\/td><td>9.0kpbs <\/td><td>9<\/td><td>900<\/td><td>1024<\/td><\/tr><tr><td>Encodec<\/td><td>6.0kbps<\/td><td>8<\/td><td>600<\/td><td>1024<\/td><\/tr><tr><td>Vocos<\/td><td>6.0kbps<\/td><td>8<\/td><td>600<\/td><td>1024<\/td><\/tr><tr><td>SpeechTokenizer<\/td><td>6.0kpbs<\/td><td>8<\/td><td>600<\/td><td>1024<\/td><\/tr><tr><td>DAC<\/td><td>4.0kbps<\/td><td>4<\/td><td>400<\/td><td>1024<\/td><\/tr><tr><td>HiFi-Codec<\/td><td>3.0kbps<\/td><td>4<\/td><td>400<\/td><td>2^7.5<\/td><\/tr><tr><td>HiFi-Codec<\/td><td>4.0kbps<\/td><td>4<\/td><td>300<\/td><td>2^13<\/td><\/tr><tr><td>Encodec<\/td><td>3.0kbps<\/td><td>4<\/td><td>300<\/td><td>1024 <\/td><\/tr><tr><td>Vocos<\/td><td>3.0kbps <\/td><td>4<\/td><td>300<\/td><td>1024<\/td><\/tr><tr><td>SpeechTokenizer<\/td><td>3.0kbps  <\/td><td>4<\/td><td>300<\/td><td>1024<\/td><\/tr><tr><td>WavTokenizer-small<\/td><td>0.5kbps<\/td><td>1<\/td><td>40<\/td><td> 4096 <\/td><\/tr><tr><td>WavTokenizer-small<\/td><td>0.9kbps<\/td><td>1<\/td><td>75<\/td><td> 4096 <\/td><\/tr><tr><td> Mini <\/td><td>1.1kbps<\/td><td>8<\/td><td>100<\/td><td>2048<\/td><\/tr><\/tbody><\/table><\/figure>\n","protected":false},"excerpt":{"rendered":"<p>\u76ee\u524d\u4e3b\u6d41\u7684\u97f3\u9891\u7f16\u89e3\u7801\u5668\u7684\u4e00\u4e9b\u6307\u6807 \u6570\u636e\u6765\u6e90\uff1awavtokenizer: an efficient acoust &hellip; <a href=\"http:\/\/139.9.1.231\/index.php\/2024\/09\/27\/token-coende\/\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">\u5e38\u89c1\u7684\u7f16\u89e3\u7801\u5668\u6a21\u578b\u4ee5\u53ca\u538b\u7f29token\u6570<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[4,38,43,34],"tags":[],"_links":{"self":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/19744"}],"collection":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/comments?post=19744"}],"version-history":[{"count":17,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/19744\/revisions"}],"predecessor-version":[{"id":19776,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/19744\/revisions\/19776"}],"wp:attachment":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/media?parent=19744"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/categories?post=19744"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/tags?post=19744"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}