{"id":23534,"date":"2025-01-04T19:53:13","date_gmt":"2025-01-04T11:53:13","guid":{"rendered":"http:\/\/139.9.1.231\/?p=23534"},"modified":"2025-02-06T10:18:54","modified_gmt":"2025-02-06T02:18:54","slug":"deepseek-v3","status":"publish","type":"post","link":"http:\/\/139.9.1.231\/index.php\/2025\/01\/04\/deepseek-v3\/","title":{"rendered":"DeepSeek-V3 \u6280\u672f\u62a5\u544a"},"content":{"rendered":"\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"680\" height=\"146\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2024\/12\/image-116.png\" alt=\"\" class=\"wp-image-23538\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2024\/12\/image-116.png 680w, http:\/\/139.9.1.231\/wp-content\/uploads\/2024\/12\/image-116-300x64.png 300w\" sizes=\"(max-width: 680px) 100vw, 680px\" \/><\/figure>\n\n\n\n<ul class=\"has-light-pink-background-color has-background\"><li><em><a href=\"https:\/\/github.com\/deepseek-ai\/DeepSeek-V3\/blob\/main\/DeepSeek_V3.pdf\">https:\/\/github.com\/deepseek-ai\/DeepSeek-V3\/blob\/main\/DeepSeek_V3.pdf<\/a><\/em><\/li><li><em><a href=\"https:\/\/arxiv.org\/abs\/2412.19437\">https:\/\/arxiv.org\/abs\/2412.19437<\/a><\/em><\/li><li><em><a href=\"https:\/\/github.com\/deepseek-ai\/DeepSeek-V3\">https:\/\/github.com\/deepseek-ai\/DeepSeek-V3<\/a><\/em><\/li><li><em><a href=\"https:\/\/huggingface.co\/collections\/deepseek-ai\/deepseek-v3-676bc4546fb4876383c4208b\">https:\/\/huggingface.co\/collections\/deepseek-ai\/deepseek-v3-676bc4546fb4876383c4208b<\/a><\/em><\/li><\/ul>\n\n\n\n<p><em>\u539f\u6587\uff1ahttps:\/\/zhuanlan.zhihu.com\/p\/14890557782<\/em><\/p>\n\n\n\n\n\n<p>DeepSeek-V3 \u662f\u4e00\u6b3e\u6027\u80fd\u5353\u8d8a\u7684<strong>\u6df7\u5408\u4e13\u5bb6\uff08MoE\uff09<\/strong>&nbsp;\u8bed\u8a00\u6a21\u578b\uff0c\u6574\u4f53\u53c2\u6570\u89c4\u6a21\u8fbe\u5230 671B\uff0c\u5176\u4e2d\u6bcf\u4e2a token \u6fc0\u6d3b\u7684\u53c2\u6570\u91cf\u4e3a&nbsp;<strong>37B<\/strong>\u3002\u8bc4\u4f30\u7ed3\u679c\u8868\u660e\uff0cDeepSeek-V3&nbsp;<strong>\u5728\u6027\u80fd\u4e0a\u8d85\u8d8a\u4e86\u5176\u4ed6\u5f00\u6e90\u6a21\u578b<\/strong>\uff0c\u5e76\u80fd\u591f\u4e0e\u4e3b\u6d41\u95ed\u6e90\u6a21\u578b\u76f8\u5ab2\u7f8e\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"985\" height=\"628\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-1.png\" alt=\"\" class=\"wp-image-23639\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-1.png 985w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-1-300x191.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-1-768x490.png 768w\" sizes=\"(max-width: 985px) 100vw, 985px\" \/><\/figure>\n\n\n\n<h2>\u5f15\u8a00<\/h2>\n\n\n\n<p>\u8fd1\u5e74\u6765\uff0cLLM \u7ecf\u5386\u4e86\u5feb\u901f\u8fed\u4ee3\u548c\u6f14\u8fdb\uff0c\u9010\u6b65\u7f29\u5c0f\u4e86\u4e0e<strong>\u901a\u7528\u4eba\u5de5\u667a\u80fd\uff08AGI\uff09<\/strong>&nbsp;\u7684\u5dee\u8ddd\u3002\u9664\u4e86\u95ed\u6e90\u6a21\u578b\u5916,\u5f00\u6e90\u6a21\u578b\u9635\u8425\u4e5f\u5728\u53d6\u5f97\u91cd\u5927\u8fdb\u5c55,\u5305\u62ec DeepSeek \u7cfb\u5217\u3001LLaMA \u7cfb\u5217\u3001Qwen \u7cfb\u5217\u548c Mistral \u7cfb\u5217\uff0c\u8fd9\u4e9b\u6a21\u578b\u6b63\u5728\u52aa\u529b\u7f29\u5c0f\u4e0e\u95ed\u6e90\u6a21\u578b\u7684\u6027\u80fd\u5dee\u8ddd\u3002<\/p>\n\n\n\n<p>\u4e3a\u4e86\u8fdb\u4e00\u6b65\u7a81\u7834\u5f00\u6e90\u6a21\u578b\u7684\u80fd\u529b\u8fb9\u754c,\u7814\u7a76\u56e2\u961f\u5f00\u53d1\u4e86 DeepSeek-V3\uff0c\u8fd9\u662f\u4e00\u4e2a\u57fa\u4e8e MoE \u67b6\u6784\u7684\u5927\u6a21\u578b\uff0c\u603b\u53c2\u6570\u91cf\u8fbe\u5230 671B\uff0c\u5176\u4e2d\u6bcf\u4e2a token \u4f1a\u6fc0\u6d3b 37B \u4e2a\u53c2\u6570\u3002<\/p>\n\n\n\n<p>\u57fa\u4e8e\u63d0\u5347\u6027\u80fd\u548c\u964d\u4f4e\u6210\u672c\u7684\u53cc\u91cd\u76ee\u6807\uff0c\u5728\u67b6\u6784\u8bbe\u8ba1\u65b9\u9762\uff0cDeepSeek-V3 \u91c7\u7528\u4e86MLA\u6765\u786e\u4fdd\u63a8\u7406\u6548\u7387\uff0c\u5e76\u4f7f\u7528 DeepSeekMoE\u6765\u5b9e\u73b0\u7ecf\u6d4e\u9ad8\u6548\u7684\u8bad\u7ec3\u3002\u8fd9\u4e24\u79cd\u67b6\u6784\u5728 DeepSeek-V2 \u4e2d\u5df2\u7ecf\u5f97\u5230\u9a8c\u8bc1\uff0c\u8bc1\u5b9e\u4e86\u5b83\u4eec\u80fd\u591f\u5728\u4fdd\u6301\u6a21\u578b\u6027\u80fd\u7684\u540c\u65f6\u5b9e\u73b0\u9ad8\u6548\u7684\u8bad\u7ec3\u548c\u63a8\u7406\u3002<\/p>\n\n\n\n<p>\u9664\u4e86\u5ef6\u7eed\u8fd9\u4e9b\u57fa\u7840\u67b6\u6784\u5916\uff0c\u7814\u7a76\u56e2\u961f\u8fd8\u5f15\u5165\u4e86\u4e24\u9879\u521b\u65b0\u7b56\u7565\u6765\u8fdb\u4e00\u6b65\u63d0\u5347\u6a21\u578b\u6027\u80fd\u3002<\/p>\n\n\n\n<p>\u9996\u5148\uff0cDeepSeek-V3 \u9996\u521b\u4e86<strong>\u65e0\u8f85\u52a9\u635f\u5931\u7684\u8d1f\u8f7d\u5747\u8861<\/strong>\u7b56\u7565\uff0c\u6709\u6548\u964d\u4f4e\u4e86\u8d1f\u8f7d\u5747\u8861\u5bf9\u6a21\u578b\u6027\u80fd\u7684\u8d1f\u9762\u5f71\u54cd\u3002\u53e6\u5916\uff0cDeepSeek-V3 \u91c7\u7528\u4e86<strong>\u591a token \u9884\u6d4b\u8bad\u7ec3\u76ee\u6807<\/strong>\uff0c\u8fd9\u79cd\u65b9\u6cd5\u5728\u8bc4\u4f30\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u5c55\u73b0\u51fa\u4e86\u663e\u8457\u7684\u6027\u80fd\u63d0\u5347\u3002<\/p>\n\n\n\n<p>\u4e3a\u4e86\u63d0\u9ad8\u8bad\u7ec3\u6548\u7387\uff0c\u8be5\u7814\u7a76\u91c7\u7528\u4e86<strong>FP8 \u6df7\u5408\u7cbe\u5ea6\u8bad\u7ec3\u6280\u672f<\/strong>\u5e76\u5bf9\u8bad\u7ec3\u6846\u67b6\u8fdb\u884c\u4e86\u5168\u9762\u4f18\u5316\u3002\u4f4e\u7cbe\u5ea6\u8bad\u7ec3\u4f5c\u4e3a\u4e00\u79cd\u9ad8\u6548\u7684\u8bad\u7ec3\u65b9\u6848\uff0c\u5176\u53d1\u5c55\u4e0e\u786c\u4ef6\u6027\u80fd\u7684\u63d0\u5347\u5bc6\u5207\u76f8\u5173\u3002\u672c\u7814\u7a76\u9996\u6b21\u5728\u8d85\u5927\u89c4\u6a21\u6a21\u578b\u4e0a\u6210\u529f\u9a8c\u8bc1\u4e86 FP8 \u6df7\u5408\u7cbe\u5ea6\u8bad\u7ec3\u6846\u67b6\u7684\u6709\u6548\u6027\u3002\u901a\u8fc7\u91c7\u7528 FP8 \u8ba1\u7b97\u548c\u5b58\u50a8\u6280\u672f\uff0c\u663e\u8457\u63d0\u5347\u4e86\u8bad\u7ec3\u901f\u5ea6\u5e76\u964d\u4f4e\u4e86 GPU \u5185\u5b58\u5360\u7528\u3002<\/p>\n\n\n\n<p>\u5728\u8bad\u7ec3\u6846\u67b6\u65b9\u9762\uff0c\u7814\u7a76\u56e2\u961f\u5f00\u53d1\u7684 DualPipe \u7b97\u6cd5\u5b9e\u73b0\u4e86\u9ad8\u6548\u7684\u6d41\u6c34\u7ebf\u5e76\u884c\u5904\u7406\uff0c\u51cf\u5c11\u4e86\u6d41\u6c34\u7ebf\u505c\u6ede\uff0c\u5e76\u901a\u8fc7\u8ba1\u7b97\u548c\u901a\u4fe1\u5e76\u884c\u5904\u7406\u7684\u65b9\u5f0f\u964d\u4f4e\u4e86\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u7684\u901a\u4fe1\u5f00\u9500\u3002\u8fd9\u79cd\u4f18\u5316\u786e\u4fdd\u4e86\u5373\u4f7f\u5728\u6a21\u578b\u89c4\u6a21\u8fdb\u4e00\u6b65\u6269\u5927\u7684\u60c5\u51b5\u4e0b\uff0c\u53ea\u8981\u7ef4\u6301\u9002\u5f53\u7684\u8ba1\u7b97\u901a\u4fe1\u6bd4\u4f8b\uff0c\u5c31\u80fd\u5728\u4e0d\u540c\u8282\u70b9\u95f4\u5b9e\u73b0\u7ec6\u7c92\u5ea6\u4e13\u5bb6\u5206\u914d\uff0c\u540c\u65f6\u5c06\u5168\u8282\u70b9\u95f4\u7684\u901a\u4fe1\u5f00\u9500\u964d\u81f3\u63a5\u8fd1\u4e8e\u96f6\u3002<\/p>\n\n\n\n<p>\u6b64\u5916,\u7814\u7a76\u56e2\u961f<strong>\u4f18\u5316\u4e86\u8de8\u8282\u70b9\u7684\u5168\u8282\u70b9\u901a\u4fe1\u5185\u6838<\/strong>\uff0c\u5145\u5206\u5229\u7528\u4e86 InfiniBand(IB) \u548c NVLink \u7684\u5e26\u5bbd\u6027\u80fd\u3002\u901a\u8fc7\u7cbe\u7ec6\u7684\u5185\u5b58\u4f18\u5316\uff0c\u4f7f\u5f97 DeepSeek-V3 \u7684\u8bad\u7ec3<strong>\u65e0\u9700\u4f9d\u8d56\u6210\u672c\u9ad8\u6602\u7684\u5f20\u91cf\u5e76\u884c\u6280\u672f<\/strong>\u3002<\/p>\n\n\n\n<p>\u8fd9\u4e9b\u6280\u672f\u6539\u8fdb\u7684\u7efc\u5408\u8fd0\u7528\u5b9e\u73b0\u4e86\u6781\u9ad8\u7684\u8bad\u7ec3\u6548\u7387\u3002<\/p>\n\n\n\n<p>\u5728<strong>\u9884\u8bad\u7ec3\u9636\u6bb5<\/strong>\uff0cDeepSeek-V3 \u4f7f\u7528\u4e86 14.8T \u9ad8\u8d28\u91cf\u4e14\u591a\u6837\u5316\u7684 token \u8fdb\u884c\u8bad\u7ec3\u3002\u6574\u4e2a\u9884\u8bad\u7ec3\u8fc7\u7a0b\u8868\u73b0\u51fa\u4e86\u51fa\u4eba\u610f\u6599\u7684\u7a33\u5b9a\u6027\uff0c\u4e0d\u4ec5\u6ca1\u6709\u51fa\u73b0\u4e0d\u53ef\u6062\u590d\u7684\u635f\u5931\u7a81\u589e\uff0c\u4e5f\u672a\u53d1\u751f\u9700\u8981\u56de\u6eda\u7684\u60c5\u51b5\u3002<\/p>\n\n\n\n<p>\u968f\u540e\uff0c\u6a21\u578b\u8fdb\u884c\u4e86<strong>\u4e24\u4e2a\u9636\u6bb5\u7684\u4e0a\u4e0b\u6587\u957f\u5ea6\u6269\u5c55<\/strong>\uff1a\u7b2c\u4e00\u9636\u6bb5\u5c06\u6700\u5927\u4e0a\u4e0b\u6587\u957f\u5ea6\u63d0\u5347\u81f3 32K\uff0c\u7b2c\u4e8c\u9636\u6bb5\u8fdb\u4e00\u6b65\u6269\u5c55\u81f3 128K\u3002<\/p>\n\n\n\n<p>\u63a5\u7740\uff0c\u7814\u7a76\u56e2\u961f\u5bf9 DeepSeek-V3 \u57fa\u7840\u6a21\u578b\u8fdb\u884c\u4e86<strong>\u540e\u8bad\u7ec3<\/strong>\uff0c\u5305\u62ec SFT \u548c RL\uff0c\u4ee5\u589e\u5f3a\u6a21\u578b\u5bf9\u4eba\u7c7b\u504f\u597d\u7684\u7406\u89e3\u5e76\u8fdb\u4e00\u6b65\u63d0\u5347\u5176\u6027\u80fd\u3002\u5728\u540e\u8bad\u7ec3\u9636\u6bb5\uff0c\u901a\u8fc7\u4ece DeepSeek R1 \u7cfb\u5217\u6a21\u578b\u4e2d\u63d0\u53d6\u63a8\u7406\u80fd\u529b\uff0c\u540c\u65f6\u7cbe\u786e\u63a7\u5236\u6a21\u578b\u7684\u8f93\u51fa\u8d28\u91cf\u548c\u957f\u5ea6\u6bd4\u4f8b\u3002<\/p>\n\n\n\n<p>DeepSeek-V3 \u5728\u5168\u9762\u7684\u57fa\u51c6\u6d4b\u8bd5\u8bc4\u4f30\u4e2d\u8868\u73b0\u7a81\u51fa\u3002\u5c3d\u7ba1\u5176\u8bad\u7ec3\u6210\u672c\u8f83\u4f4e\uff0c\u4f46\u7efc\u5408\u8bc4\u4f30\u7ed3\u679c\u663e\u793a\uff0c<strong>DeepSeek-V3-Base \u5df2\u6210\u4e3a\u5f53\u524d\u6027\u80fd\u6700\u5f3a\u7684\u5f00\u6e90\u57fa\u7840\u6a21\u578b<\/strong>\uff0c\u5c24\u5176\u5728<strong>\u4ee3\u7801<\/strong>\u548c<strong>\u6570\u5b66<\/strong>\u9886\u57df\u8868\u73b0\u5353\u8d8a\u3002\u5176\u5bf9\u8bdd\u7248\u672c\u4e0d\u4ec5\u8d85\u8d8a\u4e86\u5176\u4ed6\u5f00\u6e90\u6a21\u578b\uff0c\u8fd8\u5728\u591a\u4e2a\u6807\u51c6\u548c\u5f00\u653e\u5f0f\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u5c55\u73b0\u51fa\u4e0e\u9886\u5148\u95ed\u6e90\u6a21\u578b\uff08\u5982 GPT-4o \u548c Claude-3.5-Sonnet\uff09\u76f8\u5339\u654c\u7684\u6027\u80fd\u3002<\/p>\n\n\n\n<p>\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0cDeepSeek-V3 \u5b9e\u73b0\u4e86\u6781\u5177\u7ade\u4e89\u529b\u7684\u8bad\u7ec3\u6210\u672c\uff08\u8be6\u89c1\u88681\uff09\uff0c\u8fd9\u5f97\u76ca\u4e8e\u5728\u7b97\u6cd5\u3001\u6846\u67b6\u548c\u786c\u4ef6\u5c42\u9762\u7684\u6574\u4f53\u4f18\u5316\u8bbe\u8ba1\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"1023\" height=\"192\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-2.png\" alt=\"\" class=\"wp-image-23643\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-2.png 1023w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-2-300x56.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-2-768x144.png 768w\" sizes=\"(max-width: 1023px) 100vw, 1023px\" \/><figcaption>\u8868 1\uff1aDeepSeek-V3 \u7684\u8bad\u7ec3\u6210\u672c\uff0c\u5047\u8bbe H800 \u7684\u79df\u8d41\u4ef7\u683c\u4e3a$2\/GPU\u5c0f\u65f6<\/figcaption><\/figure>\n\n\n\n<p>\u5728\u9884\u8bad\u7ec3\u9636\u6bb5\uff0c\u6bcf\u5904\u74061T token \u4ec5\u9700 180K H800 GPU \u5c0f\u65f6\uff0c\u5373\u5728\u914d\u5907 2048 \u4e2a H800 GPU \u7684\u96c6\u7fa4\u4e0a\u4ec5\u9700 3.7 \u5929\u3002\u56e0\u6b64\uff0c\u6574\u4e2a\u9884\u8bad\u7ec3\u9636\u6bb5\u5728<strong>\u4e0d\u5230\u4e24\u4e2a\u6708\u5185<\/strong>\u5b8c\u6210\uff0c\u603b\u8ba1\u4f7f\u7528\u4e86 2664K GPU \u5c0f\u65f6\u3002<\/p>\n\n\n\n<p>\u52a0\u4e0a\u4e0a\u4e0b\u6587\u957f\u5ea6\u6269\u5c55\u6240\u9700\u7684 119K GPU \u5c0f\u65f6\u548c\u540e\u8bad\u7ec3\u9636\u6bb5\u7684 5K GPU \u5c0f\u65f6\uff0cDeepSeek-V3 \u7684\u5b8c\u6574\u8bad\u7ec3\u603b\u5171\u6d88\u8017\u4e86 2.788M GPU \u5c0f\u65f6\u3002\u6309\u7167\u6bcf GPU \u5c0f\u65f6 2 \u7f8e\u5143\u7684 H800 GPU \u79df\u7528\u4ef7\u683c\u8ba1\u7b97\uff0c\u603b\u8bad\u7ec3\u6210\u672c\u4ec5\u4e3a&nbsp;<strong>557.6 \u4e07\u7f8e\u5143<\/strong>\u3002\u9700\u8981\u8bf4\u660e\u7684\u662f\uff0c\u8fd9\u4e9b\u6210\u672c\u4ec5\u5305\u542b DeepSeek-V3 \u7684\u6b63\u5f0f\u8bad\u7ec3\u73af\u8282\uff0c\u4e0d\u5305\u62ec\u524d\u671f\u67b6\u6784\u7814\u7a76\u3001\u7b97\u6cd5\u9a8c\u8bc1\u548c\u6570\u636e\u5b9e\u9a8c\u7b49\u76f8\u5173\u652f\u51fa\u3002<\/p>\n\n\n\n<p>\u672c\u7814\u7a76\u7684\u4e3b\u8981\u521b\u65b0\u70b9\u5305\u62ec\uff1a<\/p>\n\n\n\n<p><strong>\u67b6\u6784\u521b\u65b0<\/strong><\/p>\n\n\n\n<p>\u5728 DeepSeek-V2 \u9ad8\u6548\u67b6\u6784\u7684\u57fa\u7840\u4e0a\uff0c\u521b\u65b0\u6027\u5730\u63d0\u51fa\u4e86<strong>\u65e0\u8f85\u52a9\u635f\u5931\u7684\u8d1f\u8f7d\u5747\u8861\u7b56\u7565<\/strong>\uff0c\u6709\u6548\u964d\u4f4e\u4e86\u8d1f\u8f7d\u5747\u8861\u8fc7\u7a0b\u5bf9\u6a21\u578b\u6027\u80fd\u7684\u5f71\u54cd\u3002<\/p>\n\n\n\n<p>\u5f00\u53d1\u5e76\u9a8c\u8bc1\u4e86\u591a token \u9884\u6d4b(MTP)\u8bad\u7ec3\u76ee\u6807\uff0c\u8bc1\u5b9e\u4e86\u5176\u5bf9\u6a21\u578b\u6027\u80fd\u7684\u63d0\u5347\u4f5c\u7528\uff0c\u8be5\u6280\u672f\u8fd8\u53ef\u7528\u4e8e\u63a8\u6d4b\u89e3\u7801\u6765\u52a0\u901f\u63a8\u7406\u8fc7\u7a0b\u3002<\/p>\n\n\n\n<p><strong>\u9ad8\u6548\u9884\u8bad\u7ec3<\/strong><\/p>\n\n\n\n<p>\u5f00\u53d1\u4e86&nbsp;<strong>FP8 \u6df7\u5408\u7cbe\u5ea6\u8bad\u7ec3\u6846\u67b6<\/strong>\uff0c\u9996\u6b21\u5728\u8d85\u5927\u89c4\u6a21\u6a21\u578b\u4e0a\u9a8c\u8bc1\u4e86 FP8 \u8bad\u7ec3\u7684\u53ef\u884c\u6027\u548c\u6548\u679c\u3002<\/p>\n\n\n\n<p>\u901a\u8fc7\u7b97\u6cd5\u3001\u6846\u67b6\u548c\u786c\u4ef6\u7684\u7efc\u5408\u4f18\u5316\uff0c\u7a81\u7834\u4e86<strong>\u8de8\u8282\u70b9 MoE \u8bad\u7ec3\u4e2d\u7684\u901a\u4fe1\u74f6\u9888<\/strong>\uff0c\u5b9e\u73b0\u4e86\u8ba1\u7b97\u4e0e\u901a\u4fe1\u7684\u9ad8\u5ea6\u91cd\u53e0\u3002\u8fd9\u79cd\u4f18\u5316\u5927\u5e45\u63d0\u5347\u4e86\u8bad\u7ec3\u6548\u7387\uff0c\u964d\u4f4e\u4e86\u8bad\u7ec3\u6210\u672c\uff0c\u540c\u65f6\u652f\u6301\u4e86\u66f4\u5927\u89c4\u6a21\u6a21\u578b\u7684\u8bad\u7ec3\u800c\u65e0\u9700\u989d\u5916\u5f00\u9500\u3002<\/p>\n\n\n\n<p>\u4ec5\u7528 2.664M H800 GPU \u5c0f\u65f6\u5c31\u5b8c\u6210\u4e86 DeepSeek-V3 \u5728 14.8T token \u4e0a\u7684\u9884\u8bad\u7ec3\uff0c\u6253\u9020\u51fa\u5f53\u524d\u6700\u5f3a\u5927\u7684\u5f00\u6e90\u57fa\u7840\u6a21\u578b\u3002\u9884\u8bad\u7ec3\u540e\u7684\u5176\u4ed6\u8bad\u7ec3\u9636\u6bb5\u4ec5\u9700 0.1M GPU \u5c0f\u65f6\u3002<\/p>\n\n\n\n<p><strong>\u540e\u8bad\u7ec3\u2014\u2014DeepSeek-R1 \u77e5\u8bc6\u84b8\u998f<\/strong><\/p>\n\n\n\n<p>\u8be5\u7814\u7a76\u63d0\u51fa\u4e86\u4e00\u79cd\u521b\u65b0\u7684\u77e5\u8bc6\u84b8\u998f\u65b9\u6cd5\uff0c<strong>\u5c06\u601d\u7ef4\u94fe (CoT) \u6a21\u578b\uff08\u7279\u522b\u662f DeepSeek R1 \u7cfb\u5217\uff09\u7684\u63a8\u7406\u80fd\u529b\u8f6c\u79fb\u5230\u6807\u51c6 LLM \u4e2d<\/strong>\uff0c\u5c24\u5176\u662f DeepSeek-V3\u3002\u8fd9\u4e00\u65b9\u6cd5\u6210\u529f\u5730\u5c06 R1 \u7684\u9a8c\u8bc1\u548c\u53cd\u601d\u673a\u5236\u6574\u5408\u5230 DeepSeek-V3 \u4e2d\uff0c\u663e\u8457\u63d0\u5347\u4e86\u5176\u63a8\u7406\u80fd\u529b\uff0c\u540c\u65f6\u6709\u6548\u63a7\u5236\u4e86\u8f93\u51fa\u7684\u98ce\u683c\u548c\u957f\u5ea6\u3002<\/p>\n\n\n\n<p><strong>\u6838\u5fc3\u8bc4\u4f30\u6210\u679c<\/strong><\/p>\n\n\n\n<p>\u77e5\u8bc6\u9886\u57df\u8bc4\u4f30\uff1a<\/p>\n\n\n\n<ul><li>\u2022&nbsp;<strong>\u5728\u6559\u80b2\u7c7b\u57fa\u51c6\u6d4b\u8bd5\u4e2d<\/strong>\uff0cDeepSeek-V3 \u7684\u8868\u73b0<strong>\u8d85\u8d8a\u4e86\u6240\u6709\u5f00\u6e90\u6a21\u578b<\/strong>\uff0c\u5728 MMLU\u3001MMLU-Pro \u548c GPQA \u6d4b\u8bd5\u4e2d\u5206\u522b\u83b7\u5f97\u4e86 88.5\u300175.9 \u548c 59.1 \u7684\u4f18\u5f02\u6210\u7ee9\u3002\u8fd9\u4e00\u6027\u80fd\u6c34\u5e73\u5df2\u4e0e\u9886\u5148\u95ed\u6e90\u6a21\u578b GPT-4o \u548c Claude-Sonnet-3.5 \u76f8\u5f53\uff0c\u663e\u8457\u7f29\u5c0f\u4e86\u5f00\u6e90\u4e0e\u95ed\u6e90\u6a21\u578b\u7684\u6027\u80fd\u5dee\u8ddd\u3002<\/li><li>\u2022 \u5728<strong>\u4e8b\u5b9e\u6027\u77e5\u8bc6\u8bc4\u6d4b<\/strong>\u4e2d\uff0cDeepSeek-V3 \u5728 SimpleQA \u548c\u4e2d\u6587 SimpleQA \u6d4b\u8bd5\u4e2d\u90fd\u5c55\u73b0\u51fa\u9886\u5148\u4e8e\u5176\u4ed6\u5f00\u6e90\u6a21\u578b\u7684\u4f18\u52bf\u3002\u7279\u522b\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0c\u867d\u7136\u5176\u82f1\u8bed\u4e8b\u5b9e\u77e5\u8bc6\uff08SimpleQA\uff09\u7565\u900a\u4e8e GPT-4o \u548c Claude-Sonnet-3.5\uff0c\u4f46\u5728\u4e2d\u6587\u4e8b\u5b9e\u77e5\u8bc6\uff08\u4e2d\u6587 SimpleQA\uff09\u65b9\u9762\u5374\u8d85\u8d8a\u4e86\u8fd9\u4e9b\u6a21\u578b\uff0c\u51f8\u663e\u4e86\u5176<strong>\u5728\u4e2d\u6587\u77e5\u8bc6\u9886\u57df\u7684\u7279\u6b8a\u4f18\u52bf<\/strong>\u3002<\/li><\/ul>\n\n\n\n<p>\u6280\u672f\u80fd\u529b\u8bc4\u4f30\uff1a<\/p>\n\n\n\n<ul><li>\u2022 \u5728<strong>\u6570\u5b66<\/strong>\u9886\u57df\uff0cDeepSeek-V3 \u5728\u6240\u6709<strong>\u975e CoT \u6a21\u578b\uff08\u5305\u62ec\u5f00\u6e90\u548c\u95ed\u6e90\uff09\u4e2d\u53d6\u5f97\u4e86\u6700\u4f18\u6027\u80fd<\/strong>\u3002\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0c\u5728 MATH-500 \u7b49\u7279\u5b9a\u6d4b\u8bd5\u4e2d\uff0c\u5176\u8868\u73b0\u751a\u81f3\u8d85\u8d8a\u4e86 GPT-4o\uff0c\u5145\u5206\u5c55\u793a\u4e86\u5176\u51fa\u8272\u7684\u6570\u5b66\u63a8\u7406\u80fd\u529b\u3002<\/li><li>\u2022 \u5728<strong>\u7f16\u7a0b<\/strong>\u9886\u57df\uff0cDeepSeek-V3 \u5728&nbsp;<strong>LiveCodeBench \u7b49\u7f16\u7a0b\u7ade\u8d5b\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u8868\u73b0\u6700\u4e3a\u7a81\u51fa<\/strong>\uff0c\u786e\u7acb\u4e86\u5176\u5728\u8be5\u9886\u57df\u7684\u9886\u5148\u5730\u4f4d\u3002\u5728\u8f6f\u4ef6\u5de5\u7a0b\u76f8\u5173\u4efb\u52a1\u4e2d\uff0c\u5c3d\u7ba1\u7565\u4f4e\u4e8e Claude-Sonnet-3.5\uff0c\u4f46\u4ecd\u5927\u5e45\u9886\u5148\u4e8e\u5176\u4ed6\u6a21\u578b\uff0c\u5c55\u793a\u4e86\u5176\u5728\u5404\u7c7b\u6280\u672f\u8bc4\u6d4b\u4e2d\u7684\u7efc\u5408\u5b9e\u529b\u3002<\/li><\/ul>\n\n\n\n<h2>\u67b6\u6784<\/h2>\n\n\n\n<p>DeepSeek-V3 \u7684\u57fa\u672c\u67b6\u6784\u5177\u6709\u4e24\u4e2a\u6838\u5fc3\u7279\u5f81\uff1a<\/p>\n\n\n\n<p>1.\u91c7\u7528&nbsp;<strong>MLA<\/strong>&nbsp;\u5b9e\u73b0\u9ad8\u6548\u63a8\u7406<\/p>\n\n\n\n<p>2.\u4f7f\u7528&nbsp;<strong>DeepSeekMoE<\/strong>&nbsp;\u5b9e\u73b0\u7ecf\u6d4e\u9ad8\u6548\u7684\u8bad\u7ec3\u3002<\/p>\n\n\n\n<p>\u6b64\u5916\uff0c\u8be5\u7814\u7a76\u8fd8\u5f00\u53d1\u4e86MTP\u8bad\u7ec3\u76ee\u6807\uff0c\u8fd9\u4e00\u521b\u65b0\u5728\u8bc4\u4f30\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u5c55\u73b0\u51fa\u663e\u8457\u7684\u6027\u80fd\u63d0\u5347\u3002<\/p>\n\n\n\n<p>\u5728\u5176\u4ed6\u672a\u7279\u522b\u8bf4\u660e\u7684\u67b6\u6784\u7ec6\u8282\u65b9\u9762\uff0cDeepSeek-V3 \u5ef6\u7eed\u4e86 DeepSeek-V2 \u7684\u8bbe\u8ba1\u65b9\u6848\u3002<\/p>\n\n\n\n<h3>\u57fa\u672c\u67b6\u6784<\/h3>\n\n\n\n<p>DeepSeek-V3 \u7684\u57fa\u7840\u67b6\u6784\u5efa\u7acb\u5728 Transformer \u6846\u67b6\u4e4b\u4e0a\u3002\u4e3a\u5b9e\u73b0\u9ad8\u6548\u63a8\u7406\u548c\u964d\u4f4e\u8bad\u7ec3\u6210\u672c\uff0c\u8be5\u6a21\u578b\u91c7\u7528\u4e86\u7ecf DeepSeek-V2 \u9a8c\u8bc1\u7684 MLA \u548c DeepSeekMoE \u6280\u672f\u3002\u76f8\u6bd4 DeepSeek-V2\uff0c\u672c\u7814\u7a76\u5728 DeepSeekMoE \u4e2d\u521b\u65b0\u6027\u5730\u5f15\u5165\u4e86\u65e0\u8f85\u52a9\u635f\u5931\u8d1f\u8f7d\u5747\u8861\u7b56\u7565\uff0c\u6709\u6548\u964d\u4f4e\u4e86\u8d1f\u8f7d\u5747\u8861\u8fc7\u7a0b\u5bf9\u6a21\u578b\u6027\u80fd\u7684\u5f71\u54cd\u3002<\/p>\n\n\n\n<p>\u56fe2\u5c55\u793a\u4e86 DeepSeek-V3 \u7684\u57fa\u672c\u67b6\u6784\uff0c\u672c\u8282\u5c06\u7b80\u8981\u4ecb\u7ecd MLA \u548c DeepSeekMoE \u7684\u6280\u672f\u7ec6\u8282\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" width=\"1024\" height=\"769\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-3-1024x769.png\" alt=\"\" class=\"wp-image-23650\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-3-1024x769.png 1024w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-3-300x225.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-3-768x577.png 768w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-3.png 1053w\" sizes=\"(max-width: 1024px) 100vw, 1024px\" \/><figcaption>\u56fe2\uff1aDeepSeek-V3 \u57fa\u672c\u67b6\u6784\u793a\u610f\u56fe\u3002\u57fa\u4e8e DeepSeek-V2\uff0c\u56e2\u961f\u91c7\u7528\u4e86\u591a\u5934\u6f5c\u5728\u6ce8\u610f\u529b\uff08MLA\uff09\u548c DeepSeekMoE \u67b6\u6784\uff0c\u4ee5\u5b9e\u73b0\u9ad8\u6548\u63a8\u7406\u548c\u7ecf\u6d4e\u7684\u8bad\u7ec3\u3002<\/figcaption><\/figure>\n\n\n\n<h3>\u591a\u5934\u6f5c\u5728\u6ce8\u610f\u529b\u673a\u5236<\/h3>\n\n\n\n<p>DeepSeek-V3 \u5728\u6ce8\u610f\u529b\u673a\u5236\u65b9\u9762\u91c7\u7528\u4e86 MLA \u67b6\u6784\u3002\u8bbe\u5411\u91cf\u7ef4\u5ea6\u4e3a<em>d<\/em>\uff0c\u6ce8\u610f\u529b\u5934\u6570\u4e3a\ud835\udc5b<sub>\u210e<\/sub>\uff0c\u6bcf\u4e2a\u5934\u7684\u7ef4\u5ea6\u4e3a&nbsp;\ud835\udc51<sub>\u210e<\/sub>\uff0c\u5728\u7279\u5b9a\u6ce8\u610f\u529b\u5c42\u4e2d\u7b2c<em>t<\/em>\u4e2a token \u7684\u6ce8\u610f\u529b\u8f93\u5165\u8868\u793a\u4e3a&nbsp;<em>h<\/em>\ud835\udc61 \u2208<em> R<\/em><sup>d<\/sup>&nbsp;\u3002MLA \u7684\u6838\u5fc3\u521b\u65b0\u5728\u4e8e\u5bf9\u6ce8\u610f\u529b\u952e\u548c\u503c\u8fdb\u884c\u4f4e\u79e9\u8054\u5408\u538b\u7f29\uff0c\u4ee5\u964d\u4f4e\u63a8\u7406\u8fc7\u7a0b\u4e2d\u7684\u952e\u503c(KV)\u7f13\u5b58\u5f00\u9500\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"767\" height=\"247\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-4.png\" alt=\"\" class=\"wp-image-23658\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-4.png 767w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-4-300x97.png 300w\" sizes=\"(max-width: 767px) 100vw, 767px\" \/><\/figure>\n\n\n\n<p>\u5176\u4e2d\uff1a<\/p>\n\n\n\n<ul><li><em>c<sub>t<\/sub><sup>KV<\/sup>\u2208R<sub>dc<\/sub><\/em>&nbsp;\u8868\u793a\u952e\u548c\u503c\u7684\u538b\u7f29\u6f5c\u5728\u5411\u91cf<\/li><li><em>dc(\u226ad<sub>h<\/sub>n<sub>h<\/sub>)<\/em>&nbsp;\u8868\u793a KV \u538b\u7f29\u7ef4\u5ea6<\/li><li><em>W<sup>DKV<\/sup>\u2208R<sup>dc\u00d7d<\/sup><\/em>&nbsp;\u4e3a\u7ef4\u5ea6\u4e0b\u6295\u5f71\u53d8\u6362\u77e9\u9635<\/li><li><em>W<sup>UK<\/sup>,W<sup>UV<\/sup>\u2208R<sup>d<sub>h<\/sub>n<sub>h<\/sub>\u00d7dc<\/sup><\/em>&nbsp;\u5206\u522b\u4e3a\u952e\u548c\u503c\u7684\u7ef4\u5ea6\u4e0a\u6295\u5f71\u53d8\u6362\u77e9\u9635<\/li><li><em>W<sup>KR<\/sup>\u2208R<sup>d<\/sup><sub><sup>h<\/sup><\/sub><sup>R\u00d7d<\/sup><\/em><sup>&nbsp;<\/sup>\u7528\u4e8e\u751f\u6210\u643a\u5e26<strong>\u65cb\u8f6c\u4f4d\u7f6e\u7f16\u7801(Rotary Positional Embedding, RoPE)<\/strong>\u7684\u89e3\u8026\u952e<\/li><li><strong>RoPE(\u00b7)<\/strong>\u8868\u793a\u65cb\u8f6c\u4f4d\u7f6e\u7f16\u7801\u77e9\u9635\u7684\u5e94\u7528\u64cd\u4f5c<\/li><li><strong>[\u00b7;\u00b7]<\/strong>\u8868\u793a\u5411\u91cf\u8fde\u63a5\u64cd\u4f5c<\/li><\/ul>\n\n\n\n<p>\u5728 MLA \u4e2d\uff0c\u751f\u6210\u8fc7\u7a0b\u4ec5\u9700\u7f13\u5b58\u9ad8\u4eae\u6807\u8bb0\u7684\u5411\u91cf<em>c<sub>t<\/sub><sup>KV<\/sup><\/em> \u548c <em>k<sub>t<\/sub><sup>R<\/sup><\/em>\uff0c\u8fd9\u79cd\u8bbe\u8ba1\u663e\u8457\u964d\u4f4e\u4e86 KV \u7f13\u5b58\u7a7a\u95f4\uff0c\u540c\u65f6\u4fdd\u6301\u4e86\u4e0e\u6807\u51c6MHA\u76f8\u5f53\u7684\u6027\u80fd\u6c34\u5e73\u3002<\/p>\n\n\n\n<p>\u5bf9\u4e8e\u6ce8\u610f\u529b\u67e5\u8be2(Query)\u90e8\u5206\uff0c\u6a21\u578b\u540c\u6837\u91c7\u7528\u4f4e\u79e9\u538b\u7f29\u6280\u672f\uff0c\u8fd9\u79cd\u8bbe\u8ba1\u6709\u6548\u964d\u4f4e\u4e86\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u7684\u6fc0\u6d3b\u503c\u5185\u5b58\u5360\u7528\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"757\" height=\"172\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-6.png\" alt=\"\" class=\"wp-image-23666\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-6.png 757w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-6-300x68.png 300w\" sizes=\"(max-width: 757px) 100vw, 757px\" \/><\/figure>\n\n\n\n<p>\u5176\u4e2d\uff1a<\/p>\n\n\n\n<ul><li>c<sub>t<\/sub><sup>Q<\/sup>\u2208R<sup>d\u2032c<\/sup>&nbsp;\u8868\u793a\u67e5\u8be2\u7684\u538b\u7f29\u6f5c\u5728\u5411\u91cf<\/li><li>d\u2032c(\u226a<em>d<sub>h<\/sub>n<sub>h<\/sub><\/em>)&nbsp;\u8868\u793a\u67e5\u8be2\u538b\u7f29\u7ef4\u5ea6<\/li><li>W<sup><em>DQ<\/em><\/sup>\u2208R<sup><em>d\u2032c\u00d7d&nbsp;<\/em><\/sup>\u548c&nbsp;W<sup><em>UQ<\/em><\/sup>\u2208R<em><sup>d<sub>h<\/sub>n<sub>h<\/sub>\u00d7d\u2032c<\/sup>&nbsp;<\/em>\u5206\u522b\u4e3a\u67e5\u8be2\u7684\u7ef4\u5ea6\u964d\u7ef4\u548c\u5347\u7ef4\u53d8\u6362\u77e9\u9635<\/li><li>W<sup><em>QR<\/em><\/sup>\u2208R<sup>dR<sub>h<\/sub>n<sub>h<\/sub>\u00d7d\u2032c<\/sup>&nbsp;\u7528\u4e8e\u751f\u6210\u643a\u5e26\u65cb\u8f6c\u4f4d\u7f6e\u7f16\u7801\u7684\u89e3\u8026\u67e5\u8be2\u77e9\u9635<\/li><\/ul>\n\n\n\n<p>\u6700\u7ec8\uff0c\u901a\u8fc7\u7ec4\u5408\u6ce8\u610f\u529b\u67e5\u8be2(<em>q<sub>t,i<\/sub>&nbsp;<\/em>)\u3001\u952e&nbsp;(<em>k<sub>j,i<\/sub><\/em>)\u548c\u503c( v<sup>C<\/sup><em><sub>j,i<\/sub><\/em> )\uff0c\u5f97\u5230\u6ce8\u610f\u529b\u673a\u5236\u7684\u6700\u7ec8\u8f93\u51fa&nbsp;<em>U<sub>t<\/sub><\/em>\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"692\" height=\"148\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-9.png\" alt=\"\" class=\"wp-image-23679\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-9.png 692w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-9-300x64.png 300w\" sizes=\"(max-width: 692px) 100vw, 692px\" \/><\/figure>\n\n\n\n<p>\u5176\u4e2d\ud835\udc4a<sup>\ud835\udc42<\/sup> \u2208 R<sup>\ud835\udc51\u00d7\ud835\udc51<sub>\u210e<\/sub>\ud835\udc5b<sub>\u210e<\/sub><\/sup>\u4e3a\u8f93\u51fa\u7ef4\u5ea6\u53d8\u6362\u77e9\u9635\u3002<\/p>\n\n\n\n<h3>DeepSeekMoE \u53ca\u5176\u65e0\u8f85\u52a9\u635f\u5931\u8d1f\u8f7d\u5747\u8861\u673a\u5236<\/h3>\n\n\n\n<p><strong>DeepSeekMoE\u7684\u57fa\u7840\u67b6\u6784\uff1a\u5728\u524d\u9988\u7f51\u7edc(Feed-Forward Networks, FFN)<\/strong>&nbsp;\u90e8\u5206\uff0cDeepSeek-V3 \u91c7\u7528\u4e86 DeepSeekMoE \u67b6\u6784\u3002\u76f8\u6bd4\u4f20\u7edf\u7684 MoE \u67b6\u6784\uff08\u5982 GShard\uff09\uff0cDeepSeekMoE \u91c7\u7528\u4e86\u66f4\u7ec6\u7c92\u5ea6\u7684\u4e13\u5bb6\u5206\u914d\u673a\u5236\uff0c\u5e76\u521b\u65b0\u6027\u5730\u5c06\u90e8\u5206\u4e13\u5bb6\u8bbe\u7f6e\u4e3a\u5171\u4eab\u4e13\u5bb6\u3002\u5047\u8bbe\u7b2c&nbsp;t\u4e2a token \u7684 FFN \u8f93\u5165\u4e3a<em>u<sub>t<\/sub><\/em>\uff0c\u5176\u8f93\u51fa<em>h\u2032<sub>t<\/sub><\/em>\u7684\u8ba1\u7b97\u8fc7\u7a0b\u5982\u4e0b\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-10.png\" alt=\"\" class=\"wp-image-23683\" width=\"607\" height=\"228\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-10.png 805w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-10-300x113.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-10-768x289.png 768w\" sizes=\"(max-width: 607px) 100vw, 607px\" \/><\/figure>\n\n\n\n<p>\u5176\u4e2d\uff1a<\/p>\n\n\n\n<ul><li>&nbsp;Ns\u548c&nbsp;Nr&nbsp;\u5206\u522b\u8868\u793a\u5171\u4eab\u4e13\u5bb6\u548c\u8def\u7531\u4e13\u5bb6\u7684\u6570\u91cf<\/li><li>FFNi<sup>(s)<\/sup>&nbsp;\u548c&nbsp;FFNi<sup>(r)<\/sup>(\u00b7)&nbsp;\u5206\u522b\u4ee3\u8868\u7b2c<em>&nbsp;i&nbsp;<\/em>\u4e2a\u5171\u4eab\u4e13\u5bb6\u548c\u8def\u7531\u4e13\u5bb6\u7684\u5904\u7406\u51fd\u6570<\/li><li>K<em>r<\/em>&nbsp;\u8868\u793a\u88ab\u6fc0\u6d3b\u7684\u8def\u7531\u4e13\u5bb6\u6570\u91cf<\/li><li>,&nbsp;\u4ee3\u8868\u7b2c<em>&nbsp;i&nbsp;<\/em>\u4e2a\u4e13\u5bb6\u7684\u6743\u91cd\u7cfb\u6570<\/li><li>s<em>i,t&nbsp;<\/em>\u8868\u793a token \u4e0e\u4e13\u5bb6\u95f4\u7684\u76f8\u5173\u5ea6<\/li><li>ei&nbsp;\u4ee3\u8868\u7b2c&nbsp;i&nbsp;\u4e2a\u8def\u7531\u4e13\u5bb6\u7684\u7279\u5f81\u5411\u91cf<\/li><li>Topk(\u00b7,K)&nbsp;\u51fd\u6570\u8fd4\u56de\u7b2c&nbsp;t&nbsp;\u4e2a token \u4e0e\u6240\u6709\u8def\u7531\u4e13\u5bb6\u8ba1\u7b97\u5f97\u5230\u7684\u76f8\u5173\u5ea6\u5206\u6570\u4e2d\u6700\u9ad8\u7684&nbsp;K&nbsp;\u4e2a\u503c\u3002<\/li><\/ul>\n\n\n\n<p><strong>\u65e0\u8f85\u52a9\u635f\u5931\u8d1f\u8f7d\u5747\u8861<\/strong>\uff1a\u5bf9\u4e8e MoE \u6a21\u578b\uff0c\u4e0d\u5e73\u8861\u7684\u4e13\u5bb6\u8d1f\u8f7d\u5c06\u5bfc\u81f4\u8def\u7531\u5d29\u6e83\uff0c\u5e76\u5728\u4e13\u5bb6\u5e76\u884c\u573a\u666f\u4e2d\u964d\u4f4e\u8ba1\u7b97\u6548\u7387\u3002\u4f20\u7edf\u89e3\u51b3\u65b9\u6848\u901a\u5e38\u4f9d\u8d56\u8f85\u52a9\u635f\u5931\u6765\u907f\u514d\u4e0d\u5e73\u8861\u8d1f\u8f7d\u3002\u7136\u800c\uff0c\u8fc7\u5927\u7684\u8f85\u52a9\u635f\u5931\u4f1a\u635f\u5bb3\u6a21\u578b\u6027\u80fd\u3002\u4e3a\u4e86\u5728\u8d1f\u8f7d\u5e73\u8861\u548c\u6a21\u578b\u6027\u80fd\u4e4b\u95f4\u5b9e\u73b0\u66f4\u597d\u7684\u6743\u8861\uff0c\u7814\u7a76\u56e2\u961f\u5f00\u521b\u4e86\u4e00\u79cd\u65e0\u8f85\u52a9\u635f\u5931\u8d1f\u8f7d\u5747\u8861\u7b56\u7565\u6765\u786e\u4fdd\u8d1f\u8f7d\u5e73\u8861\u3002<\/p>\n\n\n\n<p>\u5177\u4f53\u800c\u8a00\uff0c\u7814\u7a76\u56e2\u961f\u4e3a\u6bcf\u4e2a\u4e13\u5bb6\u5f15\u5165\u4e86\u4e00\u4e2a<strong>\u504f\u7f6e\u9879<em>bi<\/em><\/strong>\uff0c\u5e76\u5c06\u5176\u6dfb\u52a0\u5230\u76f8\u5e94\u7684<strong>\u4eb2\u548c\u5ea6\u5206\u6570<\/strong>s<em><sub>i,t<\/sub><\/em>\u4e2d\u4ee5\u786e\u5b9a top-K \u8def\u7531\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"800\" height=\"86\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-12.png\" alt=\"\" class=\"wp-image-23687\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-12.png 800w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-12-300x32.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-12-768x83.png 768w\" sizes=\"(max-width: 800px) 100vw, 800px\" \/><\/figure>\n\n\n\n<p>\u5728\u8fd9\u79cd\u8bbe\u8ba1\u4e2d\uff0c\u504f\u7f6e\u9879\u4ec5\u7528\u4e8e\u8def\u7531\u9009\u62e9\uff0c\u800c\u95e8\u63a7\u503c\uff08\u7528\u4e8e\u4e0e FFN \u8f93\u51fa\u76f8\u4e58\uff09\u4ecd\u57fa\u4e8e\u539f\u59cb\u76f8\u5173\u5ea6\u5206\u6570&nbsp;s<em><sub>i,t<\/sub><\/em>&nbsp;\u8ba1\u7b97\u3002\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\uff0c\u7cfb\u7edf\u4f1a\u5b9e\u65f6\u76d1\u63a7\u6bcf\u4e2a\u8bad\u7ec3\u6b65\u9aa4\u4e2d\u6240\u6709\u6279\u6b21\u7684\u4e13\u5bb6\u8d1f\u8f7d\u5206\u5e03\u3002\u5728\u6bcf\u4e2a\u6b65\u9aa4\u7ed3\u675f\u65f6\uff0c\u5bf9\u4e8e\u8d1f\u8f7d\u8fc7\u9ad8\u7684\u4e13\u5bb6\uff0c\u5176\u504f\u7f6e\u9879\u4f1a\u51cf\u5c11&nbsp;\u03b3&nbsp;\uff1b\u5bf9\u4e8e\u8d1f\u8f7d\u4e0d\u8db3\u7684\u4e13\u5bb6\uff0c\u5176\u504f\u7f6e\u9879\u4f1a\u589e\u52a0&nbsp;\u03b3&nbsp;\uff0c\u5176\u4e2d&nbsp;\u03b3&nbsp;\u662f\u63a7\u5236\u504f\u7f6e\u66f4\u65b0\u901f\u7387\u7684\u8d85\u53c2\u6570\u3002<\/p>\n\n\n\n<p>\u901a\u8fc7\u8fd9\u79cd\u52a8\u6001\u8c03\u6574\u673a\u5236\uff0cDeepSeek-V3 \u5728\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u5b9e\u73b0\u4e86\u4e13\u5bb6\u8d1f\u8f7d\u7684\u5747\u8861\u5206\u5e03\uff0c\u5176\u6027\u80fd\u4f18\u4e8e\u4f20\u7edf\u4ec5\u4f9d\u9760\u8f85\u52a9\u635f\u5931\u6765\u5b9e\u73b0\u8d1f\u8f7d\u5747\u8861\u7684\u6a21\u578b\u3002<\/p>\n\n\n\n<p><strong>\u5e8f\u5217\u7ea7\u8f85\u52a9\u635f\u5931\u8865\u5145\u673a\u5236\uff1a<\/strong>&nbsp;\u867d\u7136 DeepSeek-V3 \u4e3b\u8981\u91c7\u7528\u65e0\u8f85\u52a9\u635f\u5931\u7b56\u7565\u6765\u5b9e\u73b0\u8d1f\u8f7d\u5747\u8861\uff0c\u4f46\u4e3a\u4e86\u9632\u6b62\u5355\u4e2a\u5e8f\u5217\u4e2d\u51fa\u73b0\u663e\u8457\u7684\u8d1f\u8f7d\u4e0d\u5747\u8861\u73b0\u8c61\uff0c\u6a21\u578b\u8fd8\u5f15\u5165\u4e86\u8865\u5145\u6027\u7684\u5e8f\u5217\u7ea7\u5e73\u8861\u635f\u5931\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"830\" height=\"313\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-13.png\" alt=\"\" class=\"wp-image-23697\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-13.png 830w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-13-300x113.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-13-768x290.png 768w\" sizes=\"(max-width: 830px) 100vw, 830px\" \/><\/figure>\n\n\n\n<p>\u5176\u4e2d\u5e73\u8861\u56e0\u5b50&nbsp;\u03b1&nbsp;\u662f\u4e00\u4e2a\u8d85\u53c2\u6570\uff0c\u5728 DeepSeek-V3 \u4e2d\u88ab\u8bbe\u7f6e\u4e3a\u6781\u5c0f\u503c\uff1b&nbsp;1(\u00b7)&nbsp;\u8868\u793a\u6307\u793a\u51fd\u6570\uff1b&nbsp;T&nbsp;\u4ee3\u8868\u5e8f\u5217\u4e2d\u7684 token \u603b\u6570\u3002\u8fd9\u79cd\u5e8f\u5217\u7ea7\u5e73\u8861\u635f\u5931\u673a\u5236\u6709\u52a9\u4e8e\u4fdd\u6301\u5355\u4e2a\u5e8f\u5217\u5185\u4e13\u5bb6\u8d1f\u8f7d\u7684\u5747\u8861\u6027\u3002<\/p>\n\n\n\n<p><strong>\u8282\u70b9\u7ea6\u675f\u8def\u7531\u673a\u5236\uff1a<\/strong>&nbsp;\u7c7b\u4f3c\u4e8e DeepSeek-V2 \u7684\u8bbe\u5907\u9650\u5236\u8def\u7531\u7b56\u7565\uff0cDeepSeek-V3 \u91c7\u7528\u4e86\u53d7\u63a7\u8def\u7531\u673a\u5236\u6765\u4f18\u5316\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u7684\u901a\u4fe1\u5f00\u9500\u3002\u5177\u4f53\u800c\u8a00\uff0c\u7cfb\u7edf\u9650\u5236\u6bcf\u4e2a token \u6700\u591a\u53ea\u80fd\u5206\u914d\u7ed9&nbsp;M&nbsp;\u4e2a\u8ba1\u7b97\u8282\u70b9\uff0c\u8fd9\u4e9b\u8282\u70b9\u7684\u9009\u62e9\u57fa\u4e8e\u6bcf\u4e2a\u8282\u70b9\u4e0a\u4e13\u5bb6\u7684\u6700\u9ad8&nbsp;Kr\/M&nbsp;\u76f8\u5173\u5ea6\u5206\u6570\u603b\u548c\u3002<\/p>\n\n\n\n<p>\u5728\u8fd9\u79cd\u7ea6\u675f\u4e0b\uff0c<strong>MoE \u8bad\u7ec3\u6846\u67b6\u80fd\u591f\u5b9e\u73b0\u8ba1\u7b97\u4e0e\u901a\u4fe1\u7684\u8fd1\u4e4e\u5b8c\u5168\u5e76\u884c\u5904\u7406<\/strong>\u3002<\/p>\n\n\n\n<p><strong>\u5b8c\u6574\u7684 Token \u4fdd\u7559\u673a\u5236\uff1a<\/strong>&nbsp;\u5f97\u76ca\u4e8e\u9ad8\u6548\u7684\u8d1f\u8f7d\u5747\u8861\u7b56\u7565\uff0cDeepSeek-V3 \u5728\u6574\u4e2a\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u90fd\u4fdd\u6301\u7740\u826f\u597d\u7684\u8d1f\u8f7d\u5e73\u8861\u72b6\u6001\u3002\u56e0\u6b64\uff0c\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u4e0d\u5b58\u5728 token \u4e22\u5f03\u73b0\u8c61\u3002\u540c\u65f6\uff0c\u901a\u8fc7\u7279\u5b9a\u7684\u63a8\u7406\u90e8\u7f72\u7b56\u7565\uff0cDeepSeek-V3 \u5728\u63a8\u7406\u9636\u6bb5\u540c\u6837\u5b9e\u73b0\u4e86\u5b8c\u6574\u7684 token \u4fdd\u7559\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_3\">\u591a token \u9884\u6d4b\u673a\u5236 (Multi-Token Prediction, MTP)<\/h3>\n\n\n\n<p>DeepSeek-V3 \u521b\u65b0\u6027\u5730\u91c7\u7528\u4e86 MTP \u76ee\u6807\uff0c\u5c06\u9884\u6d4b\u8303\u56f4<strong>\u6269\u5c55\u5230\u6bcf\u4e2a\u4f4d\u7f6e\u7684\u591a\u4e2a\u540e\u7eed token<\/strong>\u3002<\/p>\n\n\n\n<p>\u8fd9\u79cd\u8bbe\u8ba1\u5177\u6709\u53cc\u91cd\u4f18\u52bf\uff1a<\/p>\n\n\n\n<p>\u9996\u5148\uff0cMTP \u76ee\u6807\u901a\u8fc7\u589e\u52a0\u8bad\u7ec3\u4fe1\u53f7\u7684\u5bc6\u5ea6\u53ef\u80fd\u63d0\u9ad8\u6570\u636e\u5229\u7528\u6548\u7387\uff1b\u5176\u6b21\uff0c\u5b83\u4f7f\u6a21\u578b\u80fd\u591f\u63d0\u524d\u89c4\u5212\u8868\u5f81\uff0c\u4ece\u800c\u66f4\u51c6\u786e\u5730\u9884\u6d4b\u540e\u7eed token\u3002<\/p>\n\n\n\n<p>\u5982\u56fe3\u6240\u793a\uff0c\u8be5\u5b9e\u73b0\u65b9\u6848\u4e0e\u5148\u524d\u7814\u7a76\u7684\u65b9\u6cd5\u6709\u6240\u4e0d\u540c\uff1a\u524d\u8005\u4f7f\u7528\u72ec\u7acb\u8f93\u51fa\u5934\u5e76\u884c\u9884\u6d4b&nbsp;D&nbsp;\u4e2a\u989d\u5916 token\uff0c\u800c DeepSeek-V3 \u91c7\u7528\u987a\u5e8f\u9884\u6d4b\u65b9\u5f0f\uff0c\u5e76\u5728\u6bcf\u4e2a\u9884\u6d4b\u5c42\u7ea7\u4fdd\u6301\u5b8c\u6574\u7684\u56e0\u679c\u5173\u7cfb\u94fe\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"997\" height=\"447\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-14.png\" alt=\"\" class=\"wp-image-23702\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-14.png 997w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-14-300x135.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-14-768x344.png 768w\" sizes=\"(max-width: 997px) 100vw, 997px\" \/><figcaption>\u56fe3\uff1aMTP\u5b9e\u73b0\u793a\u610f\u56fe\u3002V3\u5728\u6bcf\u4e2a\u6df1\u5ea6\u4e0a\u4fdd\u6301\u6bcf\u4e2a token \u9884\u6d4b\u8fc7\u7a0b\u4e2d\u7684\u5b8c\u6574\u56e0\u679c\u4f9d\u8d56\u94fe\u3002<\/figcaption><\/figure>\n\n\n\n<p><strong>MTP \u6a21\u5757\u67b6\u6784\uff1a<\/strong>&nbsp;\u5177\u4f53\u5b9e\u73b0\u4e2d\uff0c\u6a21\u578b\u91c7\u7528&nbsp;D&nbsp;\u4e2a\u4e32\u8054\u6a21\u5757\u6765\u9884\u6d4b&nbsp;D&nbsp;\u4e2a\u989d\u5916\u7684 token\u3002\u6bcf\u4e2a MTP \u6a21\u5757\uff08\u7b2ck\u4e2a\uff09\u5305\u542b\u4ee5\u4e0b\u7ec4\u4ef6\uff1a<\/p>\n\n\n\n<ul><li>\u5171\u4eab\u5411\u91cf\u5c42&nbsp;Emb(\u00b7)<\/li><li>\u5171\u4eab\u8f93\u51fa\u5934&nbsp;OutHead(\u00b7)<\/li><li>Transformer \u5904\u7406\u5355\u5143&nbsp;TRM(\u00b7)<\/li><li>\u7ef4\u5ea6\u6620\u5c04\u77e9\u9635&nbsp;M<sub>k<\/sub>\u2208R<em><sup>d\u00d72d<\/sup><\/em><\/li><\/ul>\n\n\n\n<p>\u5bf9\u4e8e\u8f93\u5165\u5e8f\u5217\u4e2d\u7684\u7b2c&nbsp;<em>i<\/em>&nbsp;\u4e2a token<em>&nbsp;t<sub>i<\/sub>&nbsp;<\/em>\uff0c\u5728\u7b2c&nbsp;k&nbsp;\u5c42\u9884\u6d4b\u65f6\uff0c\u6a21\u578b\u9996\u5148\u5c06\u4e24\u4e2a\u5411\u91cf\u8fdb\u884c\u7ec4\u5408\uff1a\u8be5 token \u5728\u7b2c&nbsp;(k\u22121)&nbsp;\u5c42\u7684\u7279\u5f81\u8868\u793a&nbsp;<em>h<sub>i<\/sub><sup>k\u22121<\/sup><\/em>\u2208R<sup>d<\/sup>&nbsp;\u548c\u7b2c&nbsp;(i+k)&nbsp;\u4e2a token \u7684\u5411\u91cf&nbsp;<em>Emb(t<sub>i+k<\/sub>)\u2208R<sup>d<\/sup>&nbsp;<\/em>\uff0c\u901a\u8fc7\u7ebf\u6027\u53d8\u6362\u8fdb\u884c\u878d\u5408\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"786\" height=\"60\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-15.png\" alt=\"\" class=\"wp-image-23708\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-15.png 786w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-15-300x23.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-15-768x59.png 768w\" sizes=\"(max-width: 786px) 100vw, 786px\" \/><\/figure>\n\n\n\n<p>\u5176\u4e2d<strong>[\u00b7;\u00b7]<\/strong>\u8868\u793a\u5411\u91cf\u62fc\u63a5\u64cd\u4f5c\u3002\u9700\u8981\u7279\u522b\u8bf4\u660e\u7684\u662f\uff0c\u5728&nbsp;k=1&nbsp;\u65f6\uff0ch<em><sub>ik\u22121<\/sub><\/em>&nbsp;\u4ee3\u8868\u4e3b\u6a21\u578b\u8f93\u51fa\u7684\u7279\u5f81\u8868\u793a\u3002\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0c\u6bcf\u4e2a MTP \u6a21\u5757\u90fd\u4e0e\u4e3b\u6a21\u578b\u5171\u4eab\u540c\u4e00\u4e2a\u5411\u91cf\u5c42\u3002\u7ecf\u8fc7\u7ec4\u5408\u7684\u7279\u5f81\u5411\u91cf<em>&nbsp;h<sub>i<\/sub><sup>\u2032k<\/sup><\/em>&nbsp;\u968f\u540e\u8f93\u5165\u5230\u7b2c&nbsp;k&nbsp;\u5c42\u7684 Transformer \u5904\u7406\u5355\u5143\uff0c\u751f\u6210\u8be5\u5c42\u7684\u8f93\u51fa\u7279\u5f81\u8868\u793a&nbsp;<em>h<sub>i<\/sub><sup>k<\/sup><\/em>&nbsp;\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-16.png\" alt=\"\" class=\"wp-image-23712\" width=\"488\" height=\"49\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-16.png 650w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-16-300x30.png 300w\" sizes=\"(max-width: 488px) 100vw, 488px\" \/><\/figure>\n\n\n\n<p>\u5176\u4e2d&nbsp;T&nbsp;\u4ee3\u8868\u8f93\u5165\u5e8f\u5217\u7684\u957f\u5ea6\uff0c&nbsp;i:j&nbsp;\u8868\u793a\u5305\u542b\u4e24\u7aef\u7684\u5207\u7247\u64cd\u4f5c\u3002\u63a5\u7740\uff0c\u7cfb\u7edf\u5c06h<em><sub>i<\/sub><\/em><sup><em>k<\/em><\/sup>\u8f93\u5165\u5230\u5171\u4eab\u8f93\u51fa\u5c42\uff0c\u8ba1\u7b97\u7b2c&nbsp;k&nbsp;\u4e2a\u9884\u6d4b token \u7684\u6982\u7387\u5206\u5e03&nbsp;P<sub><em>i+1+k<\/em><\/sub><em><sup>k<\/sup><\/em>\u2208R<sup>V<\/sup>&nbsp;\uff08V \u4e3a\u8bcd\u8868\u5927\u5c0f\uff09\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-17.png\" alt=\"\" class=\"wp-image-23715\" width=\"448\" height=\"38\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-17.png 620w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-17-300x25.png 300w\" sizes=\"(max-width: 448px) 100vw, 448px\" \/><\/figure>\n\n\n\n<p>\u8f93\u51fa\u5c42&nbsp;<strong>OutHead(\u00b7)&nbsp;<\/strong>\u9996\u5148\u901a\u8fc7\u7ebf\u6027\u53d8\u6362\u5c06\u7279\u5f81\u8868\u793a\u8f6c\u6362\u4e3a logits\uff0c\u7136\u540e\u4f7f\u7528&nbsp;<strong>Softmax(\u00b7)<\/strong>&nbsp;\u51fd\u6570\u8ba1\u7b97\u7b2c&nbsp;k&nbsp;\u4e2a\u9884\u6d4b token \u7684\u6982\u7387\u5206\u5e03\u3002\u4e0e\u5411\u91cf\u5c42\u7c7b\u4f3c\uff0c\u6bcf\u4e2a MTP \u6a21\u5757\u7684\u8f93\u51fa\u5c42\u4e5f\u4e0e\u4e3b\u6a21\u578b\u5171\u4eab\u3002\u8fd9\u79cd\u4fdd\u6301\u9884\u6d4b\u56e0\u679c\u94fe\u7684\u8bbe\u8ba1\u601d\u8def\u4e0e&nbsp;<strong>EAGLE<\/strong>&nbsp;\u76f8\u8fd1\uff0c\u4f46\u4e24\u8005\u76ee\u6807\u4e0d\u540c\uff1aEAGLE \u4e3b\u8981\u7528\u4e8e\u63a8\u6d4b\u89e3\u7801\uff0c\u800c\u672c\u7814\u7a76\u4e2d\u7684 MTP \u4e3b\u8981\u7528\u4e8e\u4f18\u5316\u8bad\u7ec3\u6548\u679c\u3002<\/p>\n\n\n\n<p><strong>MTP \u8bad\u7ec3\u76ee\u6807\u4f18\u5316\uff1a<\/strong>&nbsp;\u7cfb\u7edf\u4e3a\u6bcf\u4e2a\u9884\u6d4b\u5c42\u7ea7\u8ba1\u7b97\u4ea4\u53c9\u71b5\u635f\u5931&nbsp;L<em><sub>MTP<\/sub><\/em><sup><em>k<\/em><\/sup>&nbsp;\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"831\" height=\"70\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-18.png\" alt=\"\" class=\"wp-image-23717\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-18.png 831w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-18-300x25.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-18-768x65.png 768w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-18-825x70.png 825w\" sizes=\"(max-width: 831px) 100vw, 831px\" \/><\/figure>\n\n\n\n<p>\u5176\u4e2d\uff0cT&nbsp;\u8868\u793a\u8f93\u5165\u5e8f\u5217\u957f\u5ea6\uff0cti&nbsp;\u4ee3\u8868\u7b2c&nbsp;i&nbsp;\u4e2a\u4f4d\u7f6e\u7684\u76ee\u6807 token\uff0c<em><sub>i<\/sub><sup>k<\/sup>[ti]<\/em>&nbsp;\u8868\u793a\u7b2c&nbsp;k&nbsp;\u4e2a MTP \u6a21\u5757\u5bf9<em>&nbsp;ti&nbsp;<\/em>\u7684\u9884\u6d4b\u6982\u7387\u3002\u6700\u7ec8\uff0c\u901a\u8fc7\u8ba1\u7b97\u6240\u6709\u5c42\u7ea7 MTP \u635f\u5931\u7684\u5e73\u5747\u503c\u5e76\u4e58\u4ee5\u6743\u91cd\u7cfb\u6570&nbsp;\u03bb&nbsp;\uff0c\u5f97\u5230\u603b\u4f53 MTP \u635f\u5931&nbsp;LMTP&nbsp;\uff0c\u4f5c\u4e3a DeepSeek-V3 \u7684\u8865\u5145\u8bad\u7ec3\u76ee\u6807\uff1a<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-full is-resized\"><img loading=\"lazy\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-19.png\" alt=\"\" class=\"wp-image-23718\" width=\"166\" height=\"57\"\/><\/figure><\/div>\n\n\n\n<p><strong>\u63a8\u7406\u9636\u6bb5\u7684MTP\uff1a<\/strong>\u5e94\u7528 MTP \u673a\u5236\u7684\u4e3b\u8981\u76ee\u7684\u662f\u63d0\u5347\u57fa\u7840\u6a21\u578b\u7684\u6027\u80fd\uff0c\u56e0\u6b64\u5728\u5b9e\u9645\u63a8\u7406\u9636\u6bb5\u53ef\u4ee5\u4e0d\u4f7f\u7528 MTP \u6a21\u5757\uff0c\u57fa\u7840\u6a21\u578b\u80fd\u591f\u72ec\u7acb\u5b8c\u6210\u6b63\u5e38\u63a8\u7406\u3002\u6b64\u5916\uff0c\u8fd9\u4e9b MTP \u6a21\u5757\u4e5f\u53ef\u4ee5\u88ab\u91cd\u65b0\u914d\u7f6e\u7528\u4e8e\u63a8\u6d4b\u89e3\u7801\uff0c\u4ece\u800c\u964d\u4f4e\u6a21\u578b\u751f\u6210\u7684\u65f6\u95f4\u5ef6\u8fdf\u3002<\/p>\n\n\n\n<h2 id=\"h_14890557782_4\">\u57fa\u7840\u8bbe\u65bd<\/h2>\n\n\n\n<h3 id=\"h_14890557782_5\">\u8ba1\u7b97\u96c6\u7fa4\u67b6\u6784<\/h3>\n\n\n\n<p>DeepSeek-V3 \u7684\u8bad\u7ec3\u73af\u5883\u662f\u4e00\u4e2a\u914d\u5907&nbsp;<strong>2048 \u4e2a&nbsp;NVIDIA H800 GPU<\/strong>&nbsp;\u7684\u5927\u89c4\u6a21\u8ba1\u7b97\u96c6\u7fa4\u3002<\/p>\n\n\n\n<p>\u8be5\u96c6\u7fa4\u4e2d\u7684\u6bcf\u4e2a\u8ba1\u7b97\u8282\u70b9\u5305\u542b&nbsp;<strong>8 \u4e2a GPU<\/strong>\uff0c\u8fd9\u4e9b GPU \u901a\u8fc7\u8282\u70b9\u5185\u7684&nbsp;<strong>NVLink&nbsp;<\/strong>\u548c&nbsp;<strong>NVSwitch&nbsp;<\/strong>\u5b9e\u73b0\u9ad8\u901f\u4e92\u8fde\u3002\u8282\u70b9\u4e4b\u95f4\u5219\u91c7\u7528&nbsp;<strong>InfiniBand (IB)<\/strong>&nbsp;\u6280\u672f\u8fdb\u884c\u9ad8\u6548\u901a\u4fe1\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_6\">\u8bad\u7ec3\u6846\u67b6\u8bbe\u8ba1<\/h3>\n\n\n\n<p>\u6a21\u578b\u8bad\u7ec3\u57fa\u4e8e\u81ea\u4e3b\u7814\u53d1\u7684&nbsp;<strong>HAI-LLM&nbsp;<\/strong>\u6846\u67b6\uff0c\u8fd9\u662f\u4e00\u4e2a\u7ecf\u8fc7\u4f18\u5316\u7684\u9ad8\u6548\u8f7b\u91cf\u7ea7\u8bad\u7ec3\u7cfb\u7edf\u3002DeepSeek-V3 \u7684\u5e76\u884c\u7b56\u7565\u5305\u542b\u4e09\u4e2a\u5c42\u9762\uff1a16 \u8def<strong>\u6d41\u6c34\u7ebf\u5e76\u884c(Pipeline Parallelism, PP)<\/strong>\u3001\u8de8 8 \u4e2a\u8282\u70b9\u7684 64 \u8def<strong>\u4e13\u5bb6\u5e76\u884c(Expert Parallelism, EP)<\/strong>\uff0c\u4ee5\u53ca ZeRO-1&nbsp;<strong>\u6570\u636e\u5e76\u884c(Data Parallelism, DP)<\/strong>\u3002<\/p>\n\n\n\n<p>\u4e3a\u5b9e\u73b0\u9ad8\u6548\u8bad\u7ec3\uff0c\u8be5\u6846\u67b6\u8fdb\u884c\u4e86\u591a\u65b9\u9762\u7684\u5de5\u7a0b\u4f18\u5316\uff1a<\/p>\n\n\n\n<ol><li>\u5f00\u53d1\u4e86&nbsp;<strong>DualPipe&nbsp;<\/strong>\u6d41\u6c34\u7ebf\u5e76\u884c\u7b97\u6cd5\uff0c\u76f8\u6bd4\u73b0\u6709 PP \u65b9\u6cd5\uff0c\u8be5\u7b97\u6cd5<strong>\u663e\u8457\u51cf\u5c11\u4e86\u6d41\u6c34\u7ebf\u505c\u6ede<\/strong>\u73b0\u8c61\u3002\u66f4\u91cd\u8981\u7684\u662f\uff0c\u5b83\u5b9e\u73b0\u4e86<strong>\u524d\u5411\u548c\u540e\u5411\u8fc7\u7a0b\u4e2d\u8ba1\u7b97\u4e0e\u901a\u4fe1\u9636\u6bb5\u7684\u91cd\u53e0<\/strong>\uff0c\u6709\u6548\u89e3\u51b3\u4e86\u8de8\u8282\u70b9\u4e13\u5bb6\u5e76\u884c\u5e26\u6765\u7684\u901a\u4fe1\u8d1f\u8f7d\u95ee\u9898\u3002<\/li><li>\u4f18\u5316\u4e86<strong>\u8de8\u8282\u70b9\u5168\u5bf9\u5168\u901a\u4fe1\u5185\u6838<\/strong>\uff0c\u5145\u5206\u5229\u7528 IB \u548c NVLink \u5e26\u5bbd\uff0c\u540c\u65f6\u51cf\u5c11\u4e86\u901a\u4fe1\u6240\u9700\u7684<strong>\u6d41\u5f0f\u591a\u5904\u7406\u5668(SMs)<\/strong>\u8d44\u6e90\u5360\u7528\u3002<\/li><li>\u901a\u8fc7\u7cbe\u7ec6\u7684\u5185\u5b58\u7ba1\u7406\u4f18\u5316\uff0c\u4f7f\u5f97\u6a21\u578b\u8bad\u7ec3\u65e0\u9700\u4f9d\u8d56\u5f00\u9500\u8f83\u5927\u7684<strong>\u5f20\u91cf\u5e76\u884c(Tensor Parallelism, TP)<\/strong>\u6280\u672f\u3002<\/li><\/ol>\n\n\n\n<p><strong>DualPipe \u6280\u672f\u4e0e\u8ba1\u7b97\u901a\u4fe1\u534f\u540c\u4f18\u5316<\/strong><\/p>\n\n\n\n<p>\u5728 DeepSeek-V3 \u4e2d\uff0c\u8de8\u8282\u70b9\u4e13\u5bb6\u5e76\u884c\u673a\u5236\u5f15\u5165\u4e86\u663e\u8457\u7684\u901a\u4fe1\u5f00\u9500\uff0c\u5bfc\u81f4\u8ba1\u7b97\u4e0e\u901a\u4fe1\u6bd4\u4f8b\u63a5\u8fd1<strong>1:1<\/strong>\uff0c\u5f71\u54cd\u4e86\u8bad\u7ec3\u6548\u7387\u3002<\/p>\n\n\n\n<p>\u4e3a\u89e3\u51b3\u8fd9\u4e00\u95ee\u9898\uff0c\u6a21\u578b\u91c7\u7528\u4e86\u521b\u65b0\u6027\u7684 DualPipe \u6d41\u6c34\u7ebf\u5e76\u884c\u7b97\u6cd5\u3002\u8be5\u7b97\u6cd5\u901a\u8fc7\u4e24\u4e2a\u5173\u952e\u4f18\u5316\u5b9e\u73b0\u6027\u80fd\u63d0\u5347\uff1a\u6709\u6548\u878d\u5408\u524d\u5411\u548c\u540e\u5411\u8ba1\u7b97\u7684\u901a\u4fe1\u9636\u6bb5\uff0c\u540c\u65f6\u51cf\u5c11\u6d41\u6c34\u7ebf\u963b\u585e\u3002<\/p>\n\n\n\n<p>DualPipe \u7684\u6838\u5fc3\u521b\u65b0\u5728\u4e8e\u5b9e\u73b0\u4e86\u5355\u4e2a\u524d\u5411\u548c\u540e\u5411\u8ba1\u7b97\u5757\u5185\u7684\u8ba1\u7b97\u901a\u4fe1\u91cd\u53e0\u3002\u5177\u4f53\u6765\u8bf4\uff0c\u6bcf\u4e2a\u8ba1\u7b97\u5757\u88ab\u5212\u5206\u4e3a\u56db\u4e2a\u529f\u80fd\u6a21\u5757\uff1a<\/p>\n\n\n\n<ul><li><strong>\u6ce8\u610f\u529b\u673a\u5236<\/strong><\/li><li><strong>\u5168\u8282\u70b9\u6570\u636e\u5206\u53d1<\/strong><\/li><li><strong>MLP \u5904\u7406<\/strong><\/li><li><strong>\u5168\u8282\u70b9\u6570\u636e\u6574\u5408<\/strong><\/li><\/ul>\n\n\n\n<p>\u7279\u522b\u5730\uff0c\u5728\u540e\u5411\u8ba1\u7b97\u5757\u4e2d\uff0c\u6ce8\u610f\u529b\u548c MLP \u6a21\u5757\u90fd\u88ab\u8fdb\u4e00\u6b65\u7ec6\u5206\u4e3a<strong>\u8f93\u5165\u68af\u5ea6\u8ba1\u7b97<\/strong>\u548c<strong>\u6743\u91cd\u68af\u5ea6\u8ba1\u7b97<\/strong>\u4e24\u4e2a\u90e8\u5206\uff0c\u8fd9\u4e00\u8bbe\u8ba1\u501f\u9274\u4e86&nbsp;<strong>ZeroBubble<\/strong>&nbsp;\u7684\u601d\u8def\u3002\u6b64\u5916\uff0c\u8fd8\u5305\u542b\u4e13\u95e8\u7684 PP \u901a\u4fe1\u6a21\u5757\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"996\" height=\"112\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-20.png\" alt=\"\" class=\"wp-image-23722\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-20.png 996w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-20-300x34.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-20-768x86.png 768w\" sizes=\"(max-width: 996px) 100vw, 996px\" \/><figcaption>\u56fe4\uff1a\u4e2a\u4f53\u524d\u5411\u548c\u540e\u5411\u5757\u7684\u91cd\u53e0\u7b56\u7565\uff08Transformer \u5757\u7684\u8fb9\u754c\u672a\u5bf9\u9f50\uff09\u3002\u6a59\u8272\u8868\u793a\u524d\u5411\u8ba1\u7b97\uff0c\u7eff\u8272\u8868\u793a\u201c\u8f93\u5165\u7684\u540e\u5411\u8ba1\u7b97\u201d\uff0c\u84dd\u8272\u8868\u793a\u201c\u6743\u91cd\u7684\u540e\u5411\u8ba1\u7b97\u201d\uff0c\u7d2b\u8272\u8868\u793a PP \u901a\u4fe1\uff0c\u7ea2\u8272\u8868\u793a\u5c4f\u969c\u3002\u5168\u5bf9\u5168\uff08all-to-all\uff09\u901a\u4fe1\u548c PP \u901a\u4fe1\u53ef\u4ee5\u5b8c\u5168\u9690\u85cf\u3002<\/figcaption><\/figure>\n\n\n\n<p>\u5982\u56fe4\u6240\u793a\uff0c\u901a\u8fc7\u4f18\u5316\u6392\u5217\u8fd9\u4e9b\u529f\u80fd\u6a21\u5757\uff0c\u5e76\u7cbe\u786e\u8c03\u63a7\u7528\u4e8e\u901a\u4fe1\u548c\u8ba1\u7b97\u7684 GPU \u6d41\u5904\u7406\u5668\u8d44\u6e90\u5206\u914d\u6bd4\u4f8b\uff0c\u7cfb\u7edf\u80fd\u591f\u5728\u8fd0\u884c\u8fc7\u7a0b\u4e2d\u6709\u6548\u9690\u85cf\u5168\u8282\u70b9\u901a\u4fe1\u548c PP \u901a\u4fe1\u5f00\u9500\u3002<\/p>\n\n\n\n<p>\u5b8c\u6574\u7684 DualPipe \u8c03\u5ea6\u673a\u5236\u5982\u56fe5\u6240\u793a\u3002\u5b83\u91c7\u7528\u521b\u65b0\u7684<strong>\u53cc\u5411\u6d41\u6c34\u7ebf\u8c03\u5ea6\u7b56\u7565<\/strong>\uff0c\u5b9e\u73b0\u4e86\u4ece\u6d41\u6c34\u7ebf\u4e24\u7aef\u540c\u65f6\u8f93\u5165\u5fae\u6279\u6b21\u6570\u636e\uff0c\u4f7f\u5f97\u5927\u90e8\u5206\u901a\u4fe1\u8fc7\u7a0b\u80fd\u591f\u4e0e\u8ba1\u7b97\u8fc7\u7a0b\u5b8c\u5168\u91cd\u53e0\u3002\u8fd9\u79cd\u8bbe\u8ba1\u786e\u4fdd\u4e86\u5373\u4f7f\u5728\u6a21\u578b\u89c4\u6a21\u8fdb\u4e00\u6b65\u6269\u5927\u7684\u60c5\u51b5\u4e0b\uff0c\u53ea\u8981\u7ef4\u6301\u9002\u5f53\u7684\u8ba1\u7b97\u901a\u4fe1\u6bd4\u4f8b\uff0c\u5c31\u80fd\u5728\u8282\u70b9\u95f4\u5b9e\u73b0\u7ec6\u7c92\u5ea6\u7684\u4e13\u5bb6\u5206\u914d\uff0c\u540c\u65f6\u5c06\u5168\u8282\u70b9\u901a\u4fe1\u5f00\u9500\u964d\u81f3\u63a5\u8fd1\u4e8e\u96f6\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"1018\" height=\"172\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-21.png\" alt=\"\" class=\"wp-image-23724\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-21.png 1018w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-21-300x51.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-21-768x130.png 768w\" sizes=\"(max-width: 1018px) 100vw, 1018px\" \/><figcaption>\u56fe5\uff1a8 \u4e2a PP \u6392\u4f4d\u548c 20 \u4e2a\u5fae\u6279\u6b21\u5728\u4e24\u4e2a\u65b9\u5411\u4e0a\u7684\u53cc\u7ba1\u9053\u8c03\u5ea6\u793a\u4f8b\u3002\u53cd\u5411\u65b9\u5411\u7684\u5fae\u6279\u6b21\u4e0e\u524d\u5411\u65b9\u5411\u7684\u5fae\u6279\u6b21\u5bf9\u79f0\uff0c\u56e0\u6b64\u4e3a\u7b80\u5316\u8bf4\u660e\uff0c\u7701\u7565\u4e86\u53cd\u5411\u5fae\u6279\u6b21\u7684\u6279\u6b21 ID\u3002\u4e24\u4e2a\u7531\u5171\u4eab\u9ed1\u8272\u8fb9\u6846\u56f4\u4f4f\u7684\u5355\u5143\u5b58\u5728\u76f8\u4e92\u91cd\u53e0\u7684\u8ba1\u7b97\u548c\u901a\u4fe1\u3002<\/figcaption><\/figure>\n\n\n\n<p>\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0c\u5373\u4f7f\u5728\u901a\u4fe1\u8d1f\u8f7d\u76f8\u5bf9\u8f83\u8f7b\u7684\u5e38\u89c4\u5e94\u7528\u573a\u666f\u4e2d\uff0cDualPipe \u4ecd\u7136\u5c55\u73b0\u51fa\u663e\u8457\u7684\u6548\u7387\u4f18\u52bf\u3002\u88682\u5bf9\u6bd4\u4e86\u4e0d\u540c PP \u65b9\u6cd5\u5728\u6d41\u6c34\u7ebf\u963b\u585e\u548c\u5185\u5b58\u4f7f\u7528\u65b9\u9762\u7684\u8868\u73b0\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"945\" height=\"170\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-22.png\" alt=\"\" class=\"wp-image-23725\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-22.png 945w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-22-300x54.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-22-768x138.png 768w\" sizes=\"(max-width: 945px) 100vw, 945px\" \/><\/figure>\n\n\n\n<p>\u6570\u636e\u663e\u793a\uff0c\u76f8\u6bd4&nbsp;<strong>ZB1P&nbsp;<\/strong>\u548c&nbsp;<strong>1F1B<\/strong>\uff0cDualPipe \u5927\u5e45\u51cf\u5c11\u4e86\u6d41\u6c34\u7ebf\u963b\u585e\uff0c\u800c\u5cf0\u503c\u6d3b\u6027\u5185\u5b58\u4ec5\u589e\u52a0\u4e86&nbsp;1\/PP&nbsp;\u3002\u867d\u7136 DualPipe \u9700\u8981\u7ef4\u62a4\u4e24\u4efd\u6a21\u578b\u53c2\u6570\u526f\u672c\uff0c\u4f46\u7531\u4e8e\u8bad\u7ec3\u8fc7\u7a0b\u91c7\u7528\u4e86\u5927\u89c4\u6a21 EP\uff0c\u8fd9\u4e00\u5197\u4f59\u5e76\u672a\u5bfc\u81f4\u663e\u8457\u7684\u5185\u5b58\u5f00\u9500\u589e\u52a0\u3002<\/p>\n\n\n\n<p>\u4e0e&nbsp;<strong>Chimera<\/strong>&nbsp;\u76f8\u6bd4\uff0cDualPipe \u7684\u5b9e\u73b0\u8981\u6c42\u66f4\u4e3a\u7075\u6d3b\uff0c\u4ec5\u9700\u8981\u6d41\u6c34\u7ebf\u9636\u6bb5\u6570\u548c\u5fae\u6279\u6b21\u6570\u80fd\u88ab 2 \u6574\u9664\uff0c\u800c\u4e0d\u8981\u6c42\u5fae\u6279\u6b21\u6570\u5fc5\u987b\u80fd\u88ab\u6d41\u6c34\u7ebf\u9636\u6bb5\u6570\u6574\u9664\u3002<\/p>\n\n\n\n<p>\u6b64\u5916\uff0cDualPipe \u7684\u4e00\u4e2a\u91cd\u8981\u7279\u6027\u662f\uff0c<strong>\u968f\u7740\u5fae\u6279\u6b21\u6570\u91cf\u7684\u589e\u52a0\uff0c\u5176\u6d41\u6c34\u7ebf\u963b\u585e\u548c\u6fc0\u6d3b\u5185\u5b58\u5360\u7528\u90fd\u4e0d\u4f1a\u76f8\u5e94\u589e\u52a0<\/strong>\u3002<\/p>\n\n\n\n<p><strong>\u8de8\u8282\u70b9all-to-all\u901a\u4fe1\u7684\u9ad8\u6548\u5b9e\u73b0<\/strong><\/p>\n\n\n\n<p>\u4e3a\u4e86\u786e\u4fdd DualPipe \u5177\u6709\u5145\u8db3\u7684\u8ba1\u7b97\u6027\u80fd\uff0c\u7cfb\u7edf\u91c7\u7528\u4e86\u5b9a\u5236\u5316\u7684\u9ad8\u6548\u8de8\u8282\u70b9\u5168\u5bf9\u5168\u901a\u4fe1\u5185\u6838\uff08\u5305\u62ec\u5206\u53d1\u548c\u7ec4\u5408\u529f\u80fd\uff09\uff0c\u4ee5\u8282\u7701\u7528\u4e8e\u901a\u4fe1\u7684 SMs \u6570\u91cf\u3002<\/p>\n\n\n\n<p>\u8fd9\u4e9b\u5185\u6838\u7684\u5b9e\u73b0\u4e0e MoE \u95e8\u63a7\u7b97\u6cd5\u548c\u96c6\u7fa4\u7684\u7f51\u7edc\u62d3\u6251\u7ed3\u6784\u8fdb\u884c\u4e86\u534f\u540c\u8bbe\u8ba1\u3002\u5177\u4f53\u800c\u8a00\uff0c\u5728\u8be5\u96c6\u7fa4\u4e2d\uff0c\u8de8\u8282\u70b9 GPU \u901a\u8fc7 IB \u5b9e\u73b0\u5168\u8fde\u63a5\uff0c\u8282\u70b9\u5185\u901a\u4fe1\u5219\u901a\u8fc7 NVLink \u5904\u7406\u3002NVLink \u63d0\u4f9b&nbsp;<strong>160GB\/s&nbsp;<\/strong>\u7684\u5e26\u5bbd\uff0c\u7ea6\u4e3a IB\uff0850GB\/s\uff09\u7684 3.2 \u500d\u3002\u4e3a\u4e86\u6709\u6548\u5229\u7528 IB \u548c NVLink \u7684\u4e0d\u540c\u5e26\u5bbd\u7279\u6027\uff0c\u7cfb\u7edf\u9650\u5236\u6bcf\u4e2a token \u6700\u591a\u5206\u53d1\u5230 4 \u4e2a\u8282\u70b9\uff0c\u4ece\u800c\u51cf\u5c11 IB \u6d41\u91cf\u3002\u5bf9\u4e8e\u6bcf\u4e2a token\uff0c\u5f53\u786e\u5b9a\u5176\u8def\u7531\u51b3\u7b56\u540e\uff0c\u9996\u5148\u901a\u8fc7 IB \u4f20\u8f93\u5230\u76ee\u6807\u8282\u70b9\u4e0a\u5177\u6709\u76f8\u540c\u8282\u70b9\u5185\u7d22\u5f15\u7684 GPU\u3002\u4e00\u65e6\u5230\u8fbe\u76ee\u6807\u8282\u70b9\uff0c\u7cfb\u7edf\u786e\u4fdd\u5176\u901a\u8fc7 NVLink \u5373\u65f6\u8f6c\u53d1\u5230\u627f\u8f7d\u76ee\u6807\u4e13\u5bb6\u7684\u7279\u5b9a GPU\uff0c\u907f\u514d\u88ab\u540e\u7eed\u5230\u8fbe\u7684 token \u963b\u585e\u3002<\/p>\n\n\n\n<p>\u901a\u8fc7\u8fd9\u79cd\u65b9\u5f0f\uff0cIB \u548c NVLink \u7684\u901a\u4fe1\u5b9e\u73b0\u5b8c\u5168\u91cd\u53e0\uff0c\u6bcf\u4e2a token \u80fd\u591f\u5728\u4e0d\u4ea7\u751f NVLink \u989d\u5916\u5f00\u9500\u7684\u60c5\u51b5\u4e0b\uff0c\u5728\u6bcf\u4e2a\u8282\u70b9\u4e0a\u5e73\u5747\u9ad8\u6548\u9009\u62e9 3.2 \u4e2a\u4e13\u5bb6\u3002\u8fd9\u610f\u5473\u7740\uff0c<strong>\u867d\u7136 DeepSeek-V3 \u5b9e\u9645\u53ea\u9009\u62e9 8 \u4e2a\u8def\u7531\u4e13\u5bb6\uff0c\u4f46\u5b83\u53ef\u4ee5\u5c06\u8fd9\u4e2a\u6570\u5b57\u6269\u5c55\u5230\u6700\u591a 13 \u4e2a\u4e13\u5bb6\uff084 \u4e2a\u8282\u70b9 \u00d7 3.2 \u4e2a\u4e13\u5bb6\/\u8282\u70b9\uff09\uff0c\u540c\u65f6\u4fdd\u6301\u76f8\u540c\u7684\u901a\u4fe1\u6210\u672c<\/strong>\u3002<\/p>\n\n\n\n<p>\u603b\u4f53\u800c\u8a00\uff0c\u5728\u8fd9\u79cd\u901a\u4fe1\u7b56\u7565\u4e0b\uff0c\u4ec5\u9700 20 \u4e2a SMs \u5c31\u8db3\u4ee5\u5145\u5206\u5229\u7528 IB \u548c NVLink \u7684\u5e26\u5bbd\u3002\u5177\u4f53\u800c\u8a00\uff0c\u7cfb\u7edf\u91c7\u7528\u4e86\u7ebf\u7a0b\u4e13\u95e8\u5316\u6280\u672f\uff0c\u5c06 20 \u4e2a SMs \u5212\u5206\u4e3a 10 \u4e2a\u901a\u4fe1\u4fe1\u9053\u3002<\/p>\n\n\n\n<p>\u5728\u5206\u53d1\u8fc7\u7a0b\u4e2d\uff0c(1)IB \u53d1\u9001\u3001(2)&nbsp;IB \u5230 NVLink \u8f6c\u53d1\u548c(3)&nbsp;NVLink \u63a5\u6536\u7531\u5404\u81ea\u7684\u7ebf\u7a0b\u7ec4\u5904\u7406\u3002\u5206\u914d\u7ed9\u6bcf\u4e2a\u901a\u4fe1\u4efb\u52a1\u7684\u7ebf\u7a0b\u7ec4\u6570\u91cf\u6839\u636e\u6240\u6709 SMs \u7684\u5b9e\u9645\u5de5\u4f5c\u8d1f\u8f7d\u52a8\u6001\u8c03\u6574\u3002<\/p>\n\n\n\n<p>\u7c7b\u4f3c\u5730\uff0c\u5728\u7ec4\u5408\u8fc7\u7a0b\u4e2d\uff0c(1)&nbsp;NVLink \u53d1\u9001\u3001(2)&nbsp;NVLink \u5230 IB \u8f6c\u53d1\u548c\u7d2f\u79ef\uff0c\u4ee5\u53ca(3)&nbsp;IB \u63a5\u6536\u548c\u7d2f\u79ef\u4e5f\u7531\u52a8\u6001\u8c03\u6574\u7684\u7ebf\u7a0b\u7ec4\u5904\u7406\u3002\u6b64\u5916\uff0c\u5206\u53d1\u548c\u7ec4\u5408\u5185\u6838\u4e0e\u8ba1\u7b97\u6d41\u91cd\u53e0\uff0c\u56e0\u6b64\u8fd8\u8003\u8651\u4e86\u5b83\u4eec\u5bf9\u5176\u4ed6 SM \u8ba1\u7b97\u5185\u6838\u7684\u5f71\u54cd\u3002\u5177\u4f53\u800c\u8a00\uff0c\u7cfb\u7edf\u91c7\u7528\u4e86<strong>\u5b9a\u5236\u7684 PTX \u6307\u4ee4<\/strong>\u5e76<strong>\u81ea\u52a8\u8c03\u6574\u901a\u4fe1\u5757\u5927\u5c0f<\/strong>\uff0c\u8fd9\u663e\u8457\u964d\u4f4e\u4e86 L2 \u7f13\u5b58\u7684\u4f7f\u7528\u548c\u5bf9\u5176\u4ed6 SMs \u7684\u5e72\u6270\u3002<\/p>\n\n\n\n<p><strong>\u6781\u81f4\u7684\u5185\u5b58\u8282\u7701\u4e0e\u6700\u5c0f\u5f00\u9500<\/strong><\/p>\n\n\n\n<p>\u4e3a\u4e86\u51cf\u5c11\u8bad\u7ec3\u671f\u95f4\u7684\u5185\u5b58\u5360\u7528\uff0c\u7cfb\u7edf\u91c7\u7528\u4e86\u4ee5\u4e0b\u6280\u672f\uff1a<\/p>\n\n\n\n<p><strong>RMSNorm&nbsp;\u548c MLA \u4e0a\u6295\u5f71\u7684\u91cd\u8ba1\u7b97<\/strong>\u3002\u5728\u53cd\u5411\u4f20\u64ad\u671f\u95f4\u91cd\u65b0\u8ba1\u7b97\u6240\u6709 RMSNorm \u64cd\u4f5c\u548c MLA \u4e0a\u6295\u5f71\uff0c\u4ece\u800c\u907f\u514d\u4e86\u6301\u4e45\u5b58\u50a8\u5176\u8f93\u51fa\u6fc0\u6d3b\u7684\u9700\u6c42\u3002\u8fd9\u79cd\u7b56\u7565\u867d\u5e26\u6765\u5c11\u91cf\u5f00\u9500\uff0c\u4f46\u663e\u8457\u51cf\u5c11\u4e86\u5b58\u50a8\u6fc0\u6d3b\u6240\u9700\u7684\u5185\u5b58\u3002<\/p>\n\n\n\n<p><strong>CPU \u4e2d\u7684\u6307\u6570\u79fb\u52a8\u5e73\u5747(EMA)<\/strong>\u3002\u5728\u8bad\u7ec3\u671f\u95f4\uff0c\u7cfb\u7edf\u5728 CPU \u5185\u5b58\u4e2d\u4fdd\u7559\u6a21\u578b\u53c2\u6570\u7684EMA\uff0c\u7528\u4e8e\u5b66\u4e60\u7387\u8870\u51cf\u540e\u5bf9\u6a21\u578b\u6027\u80fd\u7684\u65e9\u671f\u4f30\u8ba1\u3002EMA \u53c2\u6570\u5b58\u50a8\u5728 CPU \u5185\u5b58\u4e2d\uff0c\u5e76\u5728\u6bcf\u4e2a\u8bad\u7ec3\u6b65\u9aa4\u540e\u5f02\u6b65\u66f4\u65b0\u3002\u8fd9\u79cd\u65b9\u6cd5\u4f7f\u7ef4\u62a4 EMA \u53c2\u6570\u4e0d\u4f1a\u4ea7\u751f\u989d\u5916\u7684\u5185\u5b58\u6216\u65f6\u95f4\u5f00\u9500\u3002<\/p>\n\n\n\n<p><strong>MTP\u7684\u5171\u4eab\u5411\u91cf\u548c\u8f93\u51fa\u5934<\/strong>\u3002\u91c7\u7528 DualPipe \u7b56\u7565\uff0c\u5c06\u6a21\u578b\u7684\u6700\u6d45\u5c42\uff08\u5305\u62ec\u5411\u91cf\u5c42\uff09\u548c\u6700\u6df1\u5c42\uff08\u5305\u62ec\u8f93\u51fa\u5934\uff09\u90e8\u7f72\u5728\u76f8\u540c\u7684PP\u7b49\u7ea7\u4e0a\u3002\u8fd9\u79cd\u5b89\u6392\u4f7f MTP \u6a21\u5757\u548c\u4e3b\u6a21\u578b\u4e4b\u95f4\u80fd\u591f\u7269\u7406\u5171\u4eab\u53c2\u6570\u548c\u68af\u5ea6\uff0c\u5b9e\u73b0\u5171\u4eab\u5411\u91cf\u548c\u8f93\u51fa\u5934\u3002\u8fd9\u79cd\u7269\u7406\u5171\u4eab\u673a\u5236\u8fdb\u4e00\u6b65\u63d0\u9ad8\u4e86\u5185\u5b58\u4f7f\u7528\u6548\u7387\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_7\">FP8 \u8bad\u7ec3<\/h3>\n\n\n\n<p>\u57fa\u4e8e\u4f4e\u7cbe\u5ea6\u8bad\u7ec3\u9886\u57df\u7684\u6700\u65b0\u8fdb\u5c55\uff0c\u672c\u7814\u7a76\u5f00\u53d1\u4e86\u4e00\u79cd\u7ec6\u7c92\u5ea6\u6df7\u5408\u7cbe\u5ea6\u6846\u67b6\uff0c\u91c7\u7528 FP8 \u6570\u636e\u683c\u5f0f\u8bad\u7ec3 DeepSeek-V3\u3002<\/p>\n\n\n\n<p>\u5c3d\u7ba1\u4f4e\u7cbe\u5ea6\u8bad\u7ec3\u6280\u672f\u5c55\u73b0\u51fa\u5de8\u5927\u6f5c\u529b\uff0c\u4f46\u5176\u5b9e\u9645\u5e94\u7528\u5e38\u53d7\u5230\u6fc0\u6d3b\u503c\u3001\u6743\u91cd\u548c\u68af\u5ea6\u4e2d\u5f02\u5e38\u503c\u7684\u5236\u7ea6\u3002\u867d\u7136\u63a8\u7406\u91cf\u5316\u6280\u672f\u53d6\u5f97\u91cd\u8981\u7a81\u7834\uff0c\u4f46\u5728\u5927\u89c4\u6a21\u8bed\u8a00\u6a21\u578b\u9884\u8bad\u7ec3\u4e2d\u6210\u529f\u5e94\u7528\u4f4e\u7cbe\u5ea6\u6280\u672f\u7684\u6848\u4f8b\u4ecd\u7136\u6709\u9650\u3002<\/p>\n\n\n\n<p>\u4e3a\u4e86\u5e94\u5bf9\u8fd9\u4e00\u6311\u6218\u5e76\u6709\u6548\u6269\u5c55 FP8 \u683c\u5f0f\u7684\u52a8\u6001\u8303\u56f4\uff0c\u672c\u7814\u7a76\u91c7\u7528\u4e86\u7ec6\u7c92\u5ea6\u91cf\u5316\u7b56\u7565\uff1a<\/p>\n\n\n\n<p>\u91c7\u7528&nbsp;1\u00d7Nc&nbsp;\u5143\u7d20\u7684\u6761\u72b6\u5206\u7ec4\u6216Nc\u00d7Nc\u5143\u7d20\u7684\u5757\u72b6\u5206\u7ec4\u3002<\/p>\n\n\n\n<p>\u901a\u8fc7\u63d0\u9ad8\u7cbe\u5ea6\u7d2f\u79ef\u8fc7\u7a0b\uff0c\u5927\u5e45\u964d\u4f4e\u4e86\u53cd\u91cf\u5316\u5e26\u6765\u7684\u8ba1\u7b97\u5f00\u9500\uff0c\u8fd9\u5bf9\u5b9e\u73b0\u9ad8\u7cbe\u5ea6 FP8&nbsp;<strong>\u901a\u7528\u77e9\u9635\u4e58\u6cd5(GEMM)<\/strong>\u81f3\u5173\u91cd\u8981\u3002\u6b64\u5916\uff0c\u4e3a\u964d\u4f4e MoE \u8bad\u7ec3\u4e2d\u7684\u5185\u5b58\u548c\u901a\u4fe1\u5f00\u9500\uff0c\u7cfb\u7edf\u91c7\u7528 FP8 \u683c\u5f0f\u8fdb\u884c\u6fc0\u6d3b\u503c\u7684\u7f13\u5b58\u548c\u5206\u53d1\uff0c\u540c\u65f6\u4f7f\u7528&nbsp;BF16&nbsp;\u683c\u5f0f\u5b58\u50a8\u4f4e\u7cbe\u5ea6\u4f18\u5316\u5668\u72b6\u6001\u3002<\/p>\n\n\n\n<p>\u8be5\u6846\u67b6\u5728\u4e0e DeepSeek-V2-Lite \u548c DeepSeek-V2 \u89c4\u6a21\u76f8\u8fd1\u7684\u4e24\u4e2a\u6a21\u578b\u4e0a\u8fdb\u884c\u4e86\u9a8c\u8bc1\uff0c\u8bad\u7ec3\u6570\u636e\u91cf\u7ea6\u4e3a 1T token\uff08\u8be6\u89c1\u539f\u6587\u9644\u5f55B.1\uff09\u3002\u7ed3\u679c\u8868\u660e\uff0c\u4e0e BF16 \u57fa\u51c6\u76f8\u6bd4\uff0c<strong>FP8 \u8bad\u7ec3\u6a21\u578b\u7684\u76f8\u5bf9\u635f\u5931\u8bef\u5dee\u59cb\u7ec8\u4fdd\u6301\u5728 0.25% \u4ee5\u4e0b<\/strong>\uff0c\u8fd9\u5b8c\u5168\u5728\u8bad\u7ec3\u968f\u673a\u6027\u7684\u53ef\u63a5\u53d7\u8303\u56f4\u5185\u3002<\/p>\n\n\n\n<p><strong>\u6df7\u5408\u7cbe\u5ea6\u6846\u67b6<\/strong><\/p>\n\n\n\n<p>\u672c\u7814\u7a76\u5728\u5df2\u6709\u4f4e\u7cbe\u5ea6\u8bad\u7ec3\u6280\u672f\u7684\u57fa\u7840\u4e0a\uff0c\u8bbe\u8ba1\u4e86\u4e13\u95e8\u7684 FP8 \u8bad\u7ec3\u6df7\u5408\u7cbe\u5ea6\u6846\u67b6\u3002\u5728\u8fd9\u4e00\u6846\u67b6\u4e2d\uff0c\u5927\u90e8\u5206\u8ba1\u7b97\u5bc6\u96c6\u578b\u64cd\u4f5c\u91c7\u7528 FP8 \u6267\u884c\uff0c\u800c\u5173\u952e\u64cd\u4f5c\u5219\u4fdd\u6301\u539f\u6709\u6570\u636e\u683c\u5f0f\uff0c\u4ee5\u5b9e\u73b0\u8bad\u7ec3\u6548\u7387\u548c\u6570\u503c\u7a33\u5b9a\u6027\u7684\u6700\u4f18\u5e73\u8861\u3002<\/p>\n\n\n\n<p>\u6574\u4f53\u6846\u67b6\u7ed3\u6784\u5982\u56fe6\u6240\u793a\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"1012\" height=\"377\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-23.png\" alt=\"\" class=\"wp-image-23728\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-23.png 1012w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-23-300x112.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-23-768x286.png 768w\" sizes=\"(max-width: 1012px) 100vw, 1012px\" \/><figcaption>\u56fe6\uff1a\u5e26\u6709 FP8 \u6570\u636e\u683c\u5f0f\u7684\u6574\u4f53\u6df7\u5408\u7cbe\u5ea6\u6846\u67b6\u3002\u4e3a\u6e05\u6670\u8d77\u89c1\uff0c\u4ec5\u5c55\u793a\u4e86\u7ebf\u6027\u7b97\u5b50\u3002<\/figcaption><\/figure>\n\n\n\n<p>\u9996\u5148\uff0c\u4e3a\u63d0\u9ad8\u6a21\u578b\u8bad\u7ec3\u901f\u5ea6\uff0c\u5927\u90e8\u5206\u6838\u5fc3\u8ba1\u7b97\u64cd\u4f5c\uff08\u5c24\u5176\u662f GEMM \u8fd0\u7b97\uff09\uff0c\u5747\u91c7\u7528 FP8 \u7cbe\u5ea6\u5b9e\u73b0\u3002\u8fd9\u4e9b GEMM \u8fd0\u7b97\u63a5\u6536 FP8 \u683c\u5f0f\u7684\u5f20\u91cf\u8f93\u5165\uff0c\u8f93\u51fa BF16 \u6216 FP32 \u683c\u5f0f\u7684\u7ed3\u679c\u3002\u5982\u56fe6\u6240\u793a\uff0c\u7ebf\u6027\u8fd0\u7b97\u76f8\u5173\u7684\u4e09\u4e2a GEMM \u64cd\u4f5c\uff0c\u5305\u62ec<strong>&nbsp;Fprop\uff08\u524d\u5411\u4f20\u64ad\uff09<\/strong>\u3001<strong>Dgrad\uff08\u6fc0\u6d3b\u503c\u53cd\u5411\u4f20\u64ad\uff09<\/strong>\u548c&nbsp;<strong>Wgrad\uff08\u6743\u91cd\u53cd\u5411\u4f20\u64ad\uff09<\/strong>\uff0c\u5747\u91c7\u7528 FP8 \u6267\u884c\u3002\u8fd9\u79cd\u8bbe\u8ba1\u7b56\u7565<strong>\u7406\u8bba\u4e0a\u5c06\u8ba1\u7b97\u901f\u5ea6\u63d0\u5347\u81f3\u539f\u6709 BF16 \u65b9\u6cd5\u7684\u4e24\u500d<\/strong>\u3002\u540c\u65f6\uff0cFP8 \u683c\u5f0f\u7684 Wgrad GEMM \u4f7f\u5f97\u6fc0\u6d3b\u503c\u80fd\u591f\u4ee5 FP8 \u683c\u5f0f\u5b58\u50a8\u7528\u4e8e\u53cd\u5411\u4f20\u64ad\uff0c<strong>\u663e\u8457\u964d\u4f4e\u4e86\u5185\u5b58\u4f7f\u7528\u91cf<\/strong>\u3002<\/p>\n\n\n\n<p>\u867d\u7136 FP8 \u683c\u5f0f\u5728\u6548\u7387\u65b9\u9762\u5177\u6709\u4f18\u52bf\uff0c\u4f46\u67d0\u4e9b\u8fd0\u7b97\u7531\u4e8e\u5bf9\u8ba1\u7b97\u7cbe\u5ea6\u8f83\u4e3a\u654f\u611f\uff0c\u4ecd\u9700\u8981\u66f4\u9ad8\u7cbe\u5ea6\u7684\u652f\u6301\u3002\u53e6\u5916\uff0c\u90e8\u5206\u8ba1\u7b97\u5f00\u9500\u8f83\u5c0f\u7684\u8fd0\u7b97\u53ef\u4ee5\u91c7\u7528\u66f4\u9ad8\u7cbe\u5ea6\u800c\u4e0d\u4f1a\u663e\u8457\u5f71\u54cd\u6574\u4f53\u8bad\u7ec3\u6548\u7387\u3002<\/p>\n\n\n\n<p>\u56e0\u6b64\uff0c\u7ecf\u8fc7\u8be6\u7ec6\u8bc4\u4f30\uff0c\u7cfb\u7edf\u5bf9\u4ee5\u4e0b\u6a21\u5757\u4fdd\u6301\u539f\u6709\u7cbe\u5ea6\uff08BF16 \u6216 FP32\uff09\uff1a<strong>\u5411\u91cf\u5c42<\/strong>\u3001<strong>\u8f93\u51fa\u5c42<\/strong>\u3001<strong>MoE \u95e8\u63a7\u6a21\u5757<\/strong>\u3001<strong>\u6807\u51c6\u5316\u8fd0\u7b97<\/strong>\u548c<strong>\u6ce8\u610f\u529b\u8fd0\u7b97\u6a21\u5757<\/strong>\u3002\u8fd9\u79cd\u9488\u5bf9\u6027\u7684\u9ad8\u7cbe\u5ea6\u4fdd\u7559\u7b56\u7565\u786e\u4fdd\u4e86 DeepSeek-V3 \u8bad\u7ec3\u8fc7\u7a0b\u7684\u52a8\u6001\u7a33\u5b9a\u6027\u3002\u4e3a\u8fdb\u4e00\u6b65\u4fdd\u969c\u6570\u503c\u8ba1\u7b97\u7684\u7a33\u5b9a\u6027\uff0c\u4e3b\u8981\u6743\u91cd\u53c2\u6570\u3001\u6743\u91cd\u68af\u5ea6\u548c\u4f18\u5316\u5668\u72b6\u6001\u5747\u91c7\u7528\u66f4\u9ad8\u7cbe\u5ea6\u5b58\u50a8\u3002\u867d\u7136\u8fd9\u4e9b\u9ad8\u7cbe\u5ea6\u7ec4\u4ef6\u4f1a\u5e26\u6765\u4e00\u5b9a\u7684\u5185\u5b58\u5f00\u9500\uff0c\u4f46\u901a\u8fc7\u5728\u5206\u5e03\u5f0f\u8bad\u7ec3\u7cfb\u7edf\u4e2d<strong>\u8de8\u591a\u4e2a DP \u5c42\u7ea7\u8fdb\u884c\u9ad8\u6548\u6570\u636e\u5206\u5272<\/strong>\uff0c\u8fd9\u4e9b\u989d\u5916\u5f00\u9500\u5f97\u5230\u4e86\u6709\u6548\u63a7\u5236\u3002<\/p>\n\n\n\n<p><strong>\u91cf\u5316\u548c\u4e58\u6cd5\u7cbe\u5ea6\u4f18\u5316<\/strong><\/p>\n\n\n\n<p>\u57fa\u4e8e\u6df7\u5408\u7cbe\u5ea6 FP8 \u6846\u67b6\uff0c\u7814\u7a76\u56e2\u961f\u5f00\u53d1\u4e86\u591a\u79cd\u7b56\u7565\u6765\u63d0\u5347\u4f4e\u7cbe\u5ea6\u8bad\u7ec3\u7684\u51c6\u786e\u6027\uff0c\u4e3b\u8981\u4ece\u91cf\u5316\u65b9\u6cd5\u548c\u4e58\u6cd5\u8ba1\u7b97\u4e24\u4e2a\u65b9\u9762\u8fdb\u884c\u4f18\u5316\u3002<\/p>\n\n\n\n<p><strong>\u7ec6\u7c92\u5ea6\u91cf\u5316\u6280\u672f\uff1a<\/strong>&nbsp;\u5728\u4f4e\u7cbe\u5ea6\u8bad\u7ec3\u6846\u67b6\u4e2d\uff0c\u7531\u4e8e FP8 \u683c\u5f0f\u7684\u6307\u6570\u4f4d\u8f83\u5c11\u5bfc\u81f4\u5176\u52a8\u6001\u8303\u56f4\u53d7\u9650\uff0c\u7ecf\u5e38\u51fa\u73b0\u6570\u503c\u6ea2\u51fa\u548c\u4e0b\u6ea2\u7684\u95ee\u9898\u3002\u4f20\u7edf\u65b9\u6cd5\u662f\u5c06\u8f93\u5165\u5f20\u91cf\u7684\u6700\u5927\u7edd\u5bf9\u503c\u6620\u5c04\u5230 FP8 \u683c\u5f0f\u7684\u6700\u5927\u53ef\u8868\u793a\u503c\uff0c\u5c06\u8f93\u5165\u5206\u5e03\u5bf9\u9f50\u5230\u53ef\u8868\u793a\u8303\u56f4\u5185\u3002\u7136\u800c\uff0c\u8fd9\u79cd\u65b9\u6cd5\u4f7f\u5f97\u4f4e\u7cbe\u5ea6\u8bad\u7ec3\u5bf9\u6fc0\u6d3b\u503c\u4e2d\u7684\u6781\u7aef\u503c\u7279\u522b\u654f\u611f\uff0c\u53ef\u80fd\u5bfc\u81f4\u91cf\u5316\u7cbe\u5ea6\u663e\u8457\u4e0b\u964d\u3002<\/p>\n\n\n\n<p>\u4e3a\u89e3\u51b3\u8fd9\u4e00\u95ee\u9898\uff0c\u8be5\u7814\u7a76\u63d0\u51fa\u4e86\u4e00\u79cd\u66f4\u7ec6\u7c92\u5ea6\u7684\u91cf\u5316\u65b9\u6cd5\u3002\u5982\u56fe7(a)\u6240\u793a\uff0c\u8be5\u65b9\u6cd5\u91c7\u7528\u4e24\u79cd\u4e0d\u540c\u7684\u5206\u7ec4\u7b56\u7565\uff1a<\/p>\n\n\n\n<ol><li>\u6fc0\u6d3b\u503c\u91c7\u7528 1&#215;128 \u6761\u72b6\u5206\u7ec4\u548c\u7f29\u653e\uff08\u6bcf\u4e2a token \u7684\u6bcf 128 \u4e2a\u901a\u9053\uff09<\/li><li>\u6743\u91cd\u91c7\u7528 128&#215;128 \u5757\u72b6\u5206\u7ec4\u548c\u7f29\u653e\uff08\u6bcf 128 \u4e2a\u8f93\u5165\u901a\u9053\u5bf9\u5e94 128 \u4e2a\u8f93\u51fa\u901a\u9053\uff09<\/li><\/ol>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"957\" height=\"480\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-24.png\" alt=\"\" class=\"wp-image-23729\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-24.png 957w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-24-300x150.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-24-768x385.png 768w\" sizes=\"(max-width: 957px) 100vw, 957px\" \/><figcaption>\u56fe7(a)\uff1a \u7814\u7a76\u63d0\u51fa\u4e86\u4e00\u79cd\u7ec6\u7c92\u5ea6\u91cf\u5316\u65b9\u6cd5\uff0c\u7528\u4e8e\u51cf\u8f7b\u7531\u7279\u5f81\u5f02\u5e38\u503c\u5f15\u8d77\u7684\u91cf\u5316\u8bef\u5dee\uff1b\u4e3a\u7b80\u5316\u8bf4\u660e\uff0c\u4ec5\u5c55\u793a\u4e86\u524d\u5411\u4f20\u64ad\uff08Fprop\uff09\u3002(b)\uff1a \u914d\u5408\u91cf\u5316\u7b56\u7565\uff0c\u56e2\u961f\u901a\u8fc7\u4ee5\u95f4\u9694 NC = 128 \u5143\u7d20\u7684 MMA \u63d0\u5347\u5230 CUDA \u6838\u5fc3\uff0c\u4ece\u800c\u63d0\u9ad8 FP8 GEMM \u7684\u7cbe\u5ea6\uff0c\u4ee5\u8fdb\u884c\u9ad8\u7cbe\u5ea6\u7d2f\u52a0\u3002<\/figcaption><\/figure>\n\n\n\n<p>\u8fd9\u79cd\u65b9\u6cd5\u901a\u8fc7\u5728\u66f4\u5c0f\u8303\u56f4\u5185\u8c03\u6574<a href=\"https:\/\/zhida.zhihu.com\/search?content_id=252001026&amp;content_type=Article&amp;match_order=1&amp;q=%E7%BC%A9%E6%94%BE%E5%9B%A0%E5%AD%90&amp;zhida_source=entity\" target=\"_blank\" rel=\"noreferrer noopener\">\u7f29\u653e\u56e0\u5b50<\/a>\uff0c\u663e\u8457\u63d0\u9ad8\u4e86\u91cf\u5316\u8fc7\u7a0b\u5bf9\u6781\u7aef\u503c\u7684\u9002\u5e94\u80fd\u529b\u3002\u539f\u6587\u9644\u5f55B.2&nbsp;\u4e2d\u8be6\u7ec6\u5206\u6790\u4e86\u5728\u5757\u72b6\u57fa\u7840\u4e0a\u5bf9\u6fc0\u6d3b\u503c\u8fdb\u884c\u5206\u7ec4\u548c\u7f29\u653e\u65f6\u53ef\u80fd\u51fa\u73b0\u7684\u8bad\u7ec3\u4e0d\u7a33\u5b9a\u73b0\u8c61\u3002<\/p>\n\n\n\n<p>\u8be5\u65b9\u6cd5\u7684\u4e00\u9879\u91cd\u8981\u521b\u65b0\u662f<strong>\u5728 GEMM \u64cd\u4f5c\u7684\u5185\u90e8\u7ef4\u5ea6\u5f15\u5165\u7ec4\u7ea7\u7f29\u653e\u56e0\u5b50<\/strong>\u3002\u867d\u7136\u6807\u51c6 FP8 GEMM \u4e0d\u76f4\u63a5\u652f\u6301\u8fd9\u4e00\u529f\u80fd\uff0c\u4f46\u901a\u8fc7\u4e0e\u7cbe\u786e FP32 \u7d2f\u79ef\u7b56\u7565\u7684\u7ed3\u5408\uff0c\u5b9e\u73b0\u4e86\u9ad8\u6548\u7684\u6267\u884c\u3002\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0c\u8fd9\u79cd\u7ec6\u7c92\u5ea6\u91cf\u5316\u7b56\u7565\u4e0e\u5fae\u7f29\u653e\u683c\u5f0f\u7684\u7406\u5ff5\u9ad8\u5ea6\u5951\u5408\uff0c\u800c NVIDIA \u65b0\u4e00\u4ee3 GPU\uff08Blackwell \u7cfb\u5217\uff09\u7684 Tensor Cores \u5df2\u5ba3\u5e03\u5c06\u652f\u6301\u66f4\u7ec6\u7c92\u5ea6\u7684\u5fae\u7f29\u653e\u683c\u5f0f\u3002\u8fd9\u4e00\u8bbe\u8ba1\u4e3a\u9002\u914d\u6700\u65b0 GPU \u67b6\u6784\u7684\u672a\u6765\u7814\u7a76\u63d0\u4f9b\u4e86\u91cd\u8981\u53c2\u8003\u3002<\/p>\n\n\n\n<p><strong>\u7d2f\u79ef\u7cbe\u5ea6\u4f18\u5316\uff1a<\/strong>&nbsp;\u4f4e\u7cbe\u5ea6 GEMM \u8fd0\u7b97\u5e38\u89c1\u7684\u4e0b\u6ea2\u95ee\u9898\u4e3b\u8981\u901a\u8fc7\u9ad8\u7cbe\u5ea6\u7d2f\u79ef\u6765\u89e3\u51b3\uff0c\u901a\u5e38\u91c7\u7528 FP32 \u7cbe\u5ea6\u3002\u7136\u800c\uff0c\u5728 NVIDIA H800 GPU \u4e0a\uff0cFP8 GEMM \u7684\u7d2f\u79ef\u7cbe\u5ea6\u4ec5\u80fd\u4fdd\u6301\u7ea6 14 \u4f4d\u6709\u6548\u6570\u5b57\uff0c\u8fdc\u4f4e\u4e8e FP32 \u7684\u7d2f\u79ef\u7cbe\u5ea6\u3002\u8fd9\u4e2a\u95ee\u9898\u5728\u5185\u90e8\u7ef4\u5ea6&nbsp;K&nbsp;\u8f83\u5927\u65f6\u5c24\u4e3a\u663e\u8457\uff0c\u8fd9\u6b63\u662f\u5927\u89c4\u6a21\u6a21\u578b\u8bad\u7ec3\u4e2d\u7684\u5e38\u89c1\u60c5\u51b5\uff0c\u56e0\u4e3a\u6279\u91cf\u89c4\u6a21\u548c\u6a21\u578b\u5bbd\u5ea6\u90fd\u6709\u6240\u589e\u52a0\u3002\u4f8b\u5982\uff0c\u5728&nbsp;K=4096&nbsp;\u7684\u4e24\u4e2a\u968f\u673a\u77e9\u9635 GEMM \u8fd0\u7b97\u6d4b\u8bd5\u4e2d\uff0cTensor Cores \u7684\u6709\u9650\u7d2f\u79ef\u7cbe\u5ea6\u5bfc\u81f4\u6700\u5927\u76f8\u5bf9\u8bef\u5dee\u63a5\u8fd1 2%\u3002\u5c3d\u7ba1\u5b58\u5728\u8fd9\u4e9b\u95ee\u9898\uff0c\u90e8\u5206 FP8 \u6846\u67b6\u4ecd\u5c06\u6709\u9650\u7d2f\u79ef\u7cbe\u5ea6\u4f5c\u4e3a\u9ed8\u8ba4\u9009\u9879\uff0c\u8fd9\u4e25\u91cd\u5236\u7ea6\u4e86\u8bad\u7ec3\u7cbe\u5ea6\u7684\u63d0\u5347\u3002<\/p>\n\n\n\n<p>\u4e3a\u89e3\u51b3\u8fd9\u4e00\u95ee\u9898\uff0c\u7cfb\u7edf\u91c7\u7528\u4e86 CUDA Cores \u63d0\u5347\u7b56\u7565\u6765\u83b7\u5f97\u66f4\u9ad8\u7684\u8ba1\u7b97\u7cbe\u5ea6\u3002\u5982\u56fe7(b)\u6240\u793a\uff0c\u5728 Tensor Cores \u6267\u884c&nbsp;<strong>MMA\uff08\u77e9\u9635\u4e58\u6cd5\u7d2f\u52a0\uff09<\/strong>\u65f6\uff0c\u4e2d\u95f4\u7ed3\u679c\u5148\u4f7f\u7528\u6709\u9650\u4f4d\u5bbd\u7d2f\u52a0\u3002\u5f53\u8fbe\u5230&nbsp;NC&nbsp;\u95f4\u9694\u65f6\uff0c\u8fd9\u4e9b\u90e8\u5206\u7ed3\u679c\u4f1a\u8f6c\u79fb\u5230 CUDA Cores \u7684 FP32 \u5bc4\u5b58\u5668\u4e2d\u8fdb\u884c\u5168\u7cbe\u5ea6\u7d2f\u52a0\u3002\u7ed3\u5408\u7ec6\u7c92\u5ea6\u91cf\u5316\u5728\u5185\u90e8\u7ef4\u5ea6 K \u4e0a\u7684\u7ec4\u7ea7\u7f29\u653e\u56e0\u5b50\uff0c\u7cfb\u7edf\u80fd\u591f\u5728 CUDA Cores \u4e0a\u9ad8\u6548\u5b8c\u6210\u53cd\u91cf\u5316\u8fc7\u7a0b\uff0c\u4ec5\u5e26\u6765\u6781\u5c11\u7684\u989d\u5916\u8ba1\u7b97\u5f00\u9500\u3002<\/p>\n\n\n\n<p>\u8fd9\u79cd\u8bbe\u8ba1\u867d\u7136\u964d\u4f4e\u4e86\u5355\u4e2a\u7ebf\u7a0b\u7ec4\u7684&nbsp;<strong>WGMMA&nbsp;<\/strong>\u6307\u4ee4\u53d1\u51fa\u7387\uff0c\u4f46\u5728 H800 \u67b6\u6784\u4e0a\u901a\u8fc7\u5e76\u53d1\u6267\u884c\u4e24\u4e2a WGMMA \u5f97\u5230\u4e86\u4f18\u5316\uff1a\u4e00\u4e2a\u7ebf\u7a0b\u7ec4\u6267\u884c\u63d0\u5347\u64cd\u4f5c\u7684\u540c\u65f6\uff0c\u53e6\u4e00\u4e2a\u53ef\u4ee5\u6267\u884c MMA \u64cd\u4f5c\u3002\u8fd9\u79cd\u91cd\u53e0\u6267\u884c\u673a\u5236\u4fdd\u8bc1\u4e86 Tensor Cores \u7684\u9ad8\u6548\u5229\u7528\u3002\u5b9e\u9a8c\u8bc1\u660e\uff0c\u5c06&nbsp;NC&nbsp;\u8bbe\u4e3a 128 \u4e2a\u5143\u7d20\uff08\u5373 4 \u4e2a WGMMA\uff09\u662f\u5728\u4e0d\u5f15\u5165\u663e\u8457\u5f00\u9500\u7684\u524d\u63d0\u4e0b\uff0c\u80fd\u591f\u6709\u6548\u63d0\u5347\u7cbe\u5ea6\u7684\u6700\u5c0f\u7d2f\u79ef\u95f4\u9694\u3002<\/p>\n\n\n\n<p>\u5728\u6570\u503c\u8868\u793a\u65b9\u9762\uff0c\u4e0d\u540c\u4e8e\u5148\u524d\u5de5\u4f5c\u91c7\u7528\u7684\u6df7\u5408 FP8 \u683c\u5f0f\uff08Fprop \u4f7f\u7528 E4M3\uff0cDgrad \u548c Wgrad \u4f7f\u7528 E5M2\uff09\uff0c\u672c\u7814\u7a76\u5728<strong>\u6240\u6709\u5f20\u91cf\u8ba1\u7b97\u4e2d\u7edf\u4e00\u91c7\u7528<\/strong>&nbsp;<strong>E4M3 \u683c\u5f0f<\/strong>\u4ee5\u63d0\u9ad8\u7cbe\u5ea6\u3002\u8fd9\u79cd\u8bbe\u8ba1\u7684\u53ef\u884c\u6027\u6e90\u4e8e\u7ec6\u7c92\u5ea6\u91cf\u5316\u7b56\u7565\uff08\u5e73\u94fa\u548c\u5757\u72b6\u7f29\u653e\uff09\uff0c\u901a\u8fc7\u5728\u8f83\u5c0f\u5143\u7d20\u7ec4\u5185\u5171\u4eab\u6307\u6570\u4f4d\u6765\u6709\u6548\u7f13\u89e3\u6709\u9650\u52a8\u6001\u8303\u56f4\u7684\u5f71\u54cd\u3002<\/p>\n\n\n\n<p>\u4e3a\u786e\u4fdd\u91cf\u5316\u7cbe\u5ea6\u5e76\u7b80\u5316\u6846\u67b6\u8bbe\u8ba1\uff0c\u7cfb\u7edf\u91c7\u7528<strong>\u5728\u7ebf\u91cf\u5316<\/strong>\u65b9\u6cd5\uff0c\u800c\u4e0d\u662f\u50cf\u5176\u4ed6\u5f20\u91cf\u7ea7\u91cf\u5316\u6846\u67b6\u90a3\u6837\u4f7f\u7528\u57fa\u4e8e\u5386\u53f2\u8bb0\u5f55\u7684\u5ef6\u8fdf\u91cf\u5316\u3002\u7cfb\u7edf\u5bf9\u6bcf\u4e2a1\u00d7128\u6fc0\u6d3b\u5e73\u94fa\u6216&nbsp;128\u00d7128&nbsp;\u6743\u91cd\u5757\u5b9e\u65f6\u8ba1\u7b97\u6700\u5927\u7edd\u5bf9\u503c\uff0c\u636e\u6b64\u786e\u5b9a\u7f29\u653e\u56e0\u5b50\u5e76\u5b8c\u6210 FP8 \u683c\u5f0f\u7684\u5728\u7ebf\u91cf\u5316\u3002<\/p>\n\n\n\n<p><strong>\u4f4e\u7cbe\u5ea6\u5b58\u50a8\u4e0e\u901a\u4fe1\u4f18\u5316<\/strong><\/p>\n\n\n\n<p>\u5728 FP8 \u8bad\u7ec3\u6846\u67b6\u7684\u57fa\u7840\u4e0a\uff0c\u901a\u8fc7\u5c06\u7f13\u5b58\u7684\u6fc0\u6d3b\u503c\u548c\u4f18\u5316\u5668\u72b6\u6001\u8f6c\u6362\u4e3a\u66f4\u4f4e\u7cbe\u5ea6\u683c\u5f0f\uff0c\u7cfb\u7edf\u8fdb\u4e00\u6b65\u4f18\u5316\u4e86\u5185\u5b58\u5360\u7528\u548c\u901a\u4fe1\u5f00\u9500\u3002<\/p>\n\n\n\n<p><strong>\u4f18\u5316\u5668\u72b6\u6001\u7684\u7cbe\u5ea6\u4f18\u5316\uff1a&nbsp;<\/strong>\u7cfb\u7edf\u5728&nbsp;<strong>AdamW<\/strong>&nbsp;\u4f18\u5316\u5668\u4e2d\u4f7f\u7528 BF16 \u4ee3\u66ff FP32 \u683c\u5f0f\u6765\u8bb0\u5f55\u4e00\u9636\u548c\u4e8c\u9636\u52a8\u91cf\uff0c\u8fd9\u79cd\u6539\u53d8\u5e76\u672a\u5e26\u6765\u660e\u663e\u7684\u6027\u80fd\u635f\u5931\u3002\u540c\u65f6\uff0c\u4e3a\u786e\u4fdd\u8bad\u7ec3\u8fc7\u7a0b\u7684\u6570\u503c\u7a33\u5b9a\u6027\uff0c\u4e3b\u8981\u6743\u91cd\u53c2\u6570\uff08\u4f18\u5316\u5668\u5b58\u50a8\uff09\u548c\u68af\u5ea6\u503c\uff08\u7528\u4e8e\u6279\u91cf\u7d2f\u79ef\uff09\u4ecd\u4fdd\u6301 FP32 \u683c\u5f0f\u3002<\/p>\n\n\n\n<p><strong>\u6fc0\u6d3b\u503c\u7cbe\u5ea6\u4f18\u5316\uff1a<\/strong>&nbsp;\u5982\u56fe6\u6240\u793a\uff0cWgrad \u8fd0\u7b97\u91c7\u7528 FP8 \u6267\u884c\u3002\u4e3a\u964d\u4f4e\u5185\u5b58\u5360\u7528\uff0c\u7cfb\u7edf\u5728\u7ebf\u6027\u8fd0\u7b97\u7684\u53cd\u5411\u4f20\u64ad\u4e2d\u4f7f\u7528 FP8 \u683c\u5f0f\u7f13\u5b58\u6fc0\u6d3b\u503c\u3002\u4f46\u5728\u5b9e\u73b0\u4f4e\u6210\u672c\u9ad8\u7cbe\u5ea6\u8bad\u7ec3\u65f6\uff0c\u4ee5\u4e0b\u8fd0\u7b97\u9700\u8981\u7279\u6b8a\u5904\u7406\uff1a<\/p>\n\n\n\n<ol><li><strong>\u6ce8\u610f\u529b\u5c42\u540e\u7684\u7ebf\u6027\u5c42\u8f93\u5165<\/strong>\uff1a\u8fd9\u4e9b\u6fc0\u6d3b\u503c\u540c\u65f6\u7528\u4e8e\u6ce8\u610f\u529b\u8fd0\u7b97\u7684\u53cd\u5411\u4f20\u64ad\uff0c\u56e0\u6b64\u5bf9\u7cbe\u5ea6\u7279\u522b\u654f\u611f\u3002\u7cfb\u7edf\u4e3a\u8fd9\u4e9b\u6fc0\u6d3b\u503c\u4e13\u95e8\u8bbe\u8ba1\u4e86 E5M6 \u6570\u636e\u683c\u5f0f\u3002\u5728\u53cd\u5411\u4f20\u64ad\u65f6\uff0c\u8fd9\u4e9b\u6fc0\u6d3b\u503c\u7684\u91cf\u5316\u6a21\u5f0f\u4ece&nbsp;1\u00d7128&nbsp;\u8f6c\u6362\u4e3a&nbsp;128\u00d71&nbsp;\u3002\u4e3a\u907f\u514d\u5f15\u5165\u989d\u5916\u7684\u91cf\u5316\u8bef\u5dee\uff0c\u6240\u6709\u7f29\u653e\u56e0\u5b50\u90fd\u91c7\u7528 2 \u7684\u6574\u6570\u6b21\u5e42\u3002<\/li><li><strong>MoE \u4e2d SwiGLU \u8fd0\u7b97\u7684\u8f93\u5165<\/strong>\uff1a\u4e3a\u4f18\u5316\u5185\u5b58\u4f7f\u7528\uff0c\u7cfb\u7edf\u4ec5\u7f13\u5b58 SwiGLU \u8fd0\u7b97\u7684\u8f93\u5165\uff0c\u5728\u53cd\u5411\u4f20\u64ad\u65f6\u91cd\u65b0\u8ba1\u7b97\u8f93\u51fa\u3002\u8fd9\u4e9b\u6fc0\u6d3b\u503c\u91c7\u7528 FP8 \u683c\u5f0f\u5b58\u50a8\uff0c\u5e76\u901a\u8fc7\u7ec6\u7c92\u5ea6\u91cf\u5316\u65b9\u6cd5\u5b9e\u73b0\u5185\u5b58\u6548\u7387\u548c\u8ba1\u7b97\u7cbe\u5ea6\u7684\u6700\u4f18\u5e73\u8861\u3002<\/li><\/ol>\n\n\n\n<p><strong>\u4f4e\u7cbe\u5ea6\u901a\u4fe1\u4f18\u5316\uff1a<\/strong>&nbsp;\u901a\u4fe1\u5e26\u5bbd\u9650\u5236\u662f MoE \u6a21\u578b\u8bad\u7ec3\u4e2d\u7684\u4e3b\u8981\u6027\u80fd\u74f6\u9888\u3002\u4e3a\u89e3\u51b3\u8fd9\u4e00\u95ee\u9898\uff0c\u7cfb\u7edf\u5728\u6267\u884c MoE \u4e0a\u6295\u5f71\u524d\u5c06\u6fc0\u6d3b\u503c\u8f6c\u6362\u4e3a FP8 \u683c\u5f0f\uff0c\u518d\u8fdb\u884c\u6570\u636e\u5206\u53d1\uff0c\u8fd9\u79cd\u65b9\u5f0f\u4e0e MoE \u4e0a\u6295\u5f71\u4e2d\u7684 FP8 \u524d\u5411\u4f20\u64ad\u4fdd\u6301\u517c\u5bb9\u3002\u4e0e\u6ce8\u610f\u529b\u5c42\u540e\u7684\u7ebf\u6027\u5c42\u8f93\u5165\u5904\u7406\u65b9\u5f0f\u76f8\u540c\uff0c\u8fd9\u91cc\u7684\u6fc0\u6d3b\u503c\u7f29\u653e\u56e0\u5b50\u4e5f\u91c7\u7528 2 \u7684\u6574\u6570\u6b21\u5e42\u3002\u540c\u6837\u7684\u5904\u7406\u65b9\u5f0f\u4e5f\u5e94\u7528\u4e8e MoE \u4e0b\u6295\u5f71\u524d\u7684\u6fc0\u6d3b\u503c\u68af\u5ea6\u8ba1\u7b97\u3002\u8003\u8651\u5230\u8bad\u7ec3\u7cbe\u5ea6\u7684\u91cd\u8981\u6027\uff0c\u524d\u5411\u548c\u53cd\u5411\u4f20\u64ad\u4e2d\u7684\u7ec4\u5408\u8fd0\u7b97\u73af\u8282\u90fd\u4fdd\u6301 BF16 \u683c\u5f0f\uff0c\u4ee5\u786e\u4fdd\u8bad\u7ec3\u7ba1\u9053\u5173\u952e\u73af\u8282\u7684\u8ba1\u7b97\u7cbe\u5ea6\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_8\">\u63a8\u7406\u548c\u90e8\u7f72<\/h3>\n\n\n\n<p>DeepSeek-V3 \u90e8\u7f72\u5728 H800 \u96c6\u7fa4\u4e0a\uff0c\u96c6\u7fa4\u4e2d\u6bcf\u4e2a\u8282\u70b9\u5185\u7684 GPU \u901a\u8fc7 NVLink \u4e92\u8fde\uff0c\u96c6\u7fa4\u5185\u6240\u6709 GPU \u901a\u8fc7 IB \u5b9e\u73b0\u5168\u8fde\u63a5\u3002\u4e3a\u540c\u65f6\u786e\u4fdd\u5728\u7ebf\u670d\u52a1\u8d28\u91cf(SLO)\u548c\u9ad8\u541e\u5410\u91cf\uff0c\u8be5\u7cfb\u7edf\u91c7\u7528\u4e86\u5c06\u9884\u586b\u5145\u548c\u89e3\u7801\u9636\u6bb5\u5206\u79bb\u7684\u90e8\u7f72\u7b56\u7565\u3002<\/p>\n\n\n\n<p><strong>\u9884\u586b\u5145<\/strong><\/p>\n\n\n\n<p>\u9884\u586b\u5145\u9636\u6bb5\u7684\u6700\u5c0f\u90e8\u7f72\u5355\u5143\u914d\u7f6e\u4e3a 4 \u4e2a\u8282\u70b9 32 \u4e2a GPU\u3002<\/p>\n\n\n\n<p>\u6ce8\u610f\u529b\u673a\u5236\u90e8\u5206\u91c7\u7528&nbsp;<strong>4 \u8def\u5f20\u91cf\u5e76\u884c(TP4)<\/strong>\u914d\u5408<strong>\u5e8f\u5217\u5e76\u884c(SP)<\/strong>\uff0c\u7ed3\u5408&nbsp;<strong>8 \u8def\u6570\u636e\u5e76\u884c(DP8)<\/strong>\u3002\u8f83\u5c0f\u7684 TP \u89c4\u6a21\u6709\u6548\u63a7\u5236\u4e86\u901a\u4fe1\u5f00\u9500\u3002<\/p>\n\n\n\n<p>MoE \u90e8\u5206\u91c7\u7528&nbsp;<strong>32 \u8def\u4e13\u5bb6\u5e76\u884c(EP32)<\/strong>\uff0c\u786e\u4fdd\u6bcf\u4e2a\u4e13\u5bb6\u80fd\u5904\u7406\u8db3\u591f\u89c4\u6a21\u7684\u6279\u91cf\u6570\u636e\uff0c\u63d0\u5347\u8ba1\u7b97\u6548\u7387\u3002MoE \u7684\u5168\u8282\u70b9\u901a\u4fe1\u91c7\u7528\u4e0e\u8bad\u7ec3\u9636\u6bb5\u76f8\u540c\u7684\u65b9\u5f0f\uff1a\u5148\u901a\u8fc7 IB \u5728\u8282\u70b9\u95f4\u4f20\u8f93 token\uff0c\u518d\u901a\u8fc7 NVLink \u5728\u8282\u70b9\u5185 GPU \u95f4\u4f20\u9012\u3002\u7279\u522b\u5730\uff0c\u6d45\u5c42\u7684\u5bc6\u96c6 MLP \u91c7\u7528\u5355\u8def\u5f20\u91cf\u5e76\u884c\u4ee5\u964d\u4f4e TP \u901a\u4fe1\u5f00\u9500\u3002<\/p>\n\n\n\n<p>\u4e3a\u5b9e\u73b0 MoE \u90e8\u5206\u5404\u4e13\u5bb6\u95f4\u7684\u8d1f\u8f7d\u5e73\u8861\uff0c\u7cfb\u7edf\u9700\u8981\u786e\u4fdd\u6bcf\u4e2a GPU \u5904\u7406\u76f8\u8fd1\u6570\u91cf\u7684 token\u3002\u4e3a\u6b64\uff0c\u91c7\u7528\u4e86<strong>\u5197\u4f59\u4e13\u5bb6\u90e8\u7f72\u7b56\u7565<\/strong>\uff0c\u5bf9\u9ad8\u8d1f\u8f7d\u4e13\u5bb6\u8fdb\u884c\u590d\u5236\u548c\u5197\u4f59\u90e8\u7f72\u3002<\/p>\n\n\n\n<p>\u7cfb\u7edf\u57fa\u4e8e\u5728\u7ebf\u90e8\u7f72\u65f6\u6536\u96c6\u7684\u7edf\u8ba1\u6570\u636e\u8bc6\u522b\u9ad8\u8d1f\u8f7d\u4e13\u5bb6\uff0c\u5e76\u5b9a\u671f\u8c03\u6574\uff08\u5982\u6bcf 10 \u5206\u949f\uff09\u3002\u786e\u5b9a\u5197\u4f59\u4e13\u5bb6\u540e\uff0c\u57fa\u4e8e\u8d1f\u8f7d\u89c2\u6d4b\u6570\u636e\u5728\u8282\u70b9\u5185 GPU \u95f4\u91cd\u65b0\u5206\u914d\u4e13\u5bb6\uff0c\u5728\u4e0d\u589e\u52a0\u8de8\u8282\u70b9\u901a\u4fe1\u5f00\u9500\u7684\u524d\u63d0\u4e0b\uff0c\u5c3d\u53ef\u80fd\u5b9e\u73b0 GPU \u95f4\u7684\u8d1f\u8f7d\u5747\u8861\u3002<\/p>\n\n\n\n<p>DeepSeek-V3 \u5728\u9884\u586b\u5145\u9636\u6bb5\u914d\u7f6e\u4e86 32 \u4e2a\u5197\u4f59\u4e13\u5bb6\uff0c\u6bcf\u4e2a GPU \u9664\u539f\u6709\u7684 8 \u4e2a\u4e13\u5bb6\u5916\uff0c\u8fd8\u5206\u914d\u4e00\u4e2a\u989d\u5916\u7684\u5197\u4f59\u4e13\u5bb6\u3002\u6b64\u5916\uff0c\u4e3a\u63d0\u5347\u541e\u5410\u91cf\u5e76\u964d\u4f4e\u5168\u5bf9\u5168\u548c TP \u901a\u4fe1\u5f00\u9500\uff0c\u7cfb\u7edf\u540c\u65f6\u5904\u7406\u4e24\u4e2a\u8ba1\u7b97\u8d1f\u8f7d\u76f8\u8fd1\u7684\u5fae\u6279\u6b21\uff0c\u5c06\u4e00\u4e2a\u6279\u6b21\u7684\u6ce8\u610f\u529b\u548c MoE \u8ba1\u7b97\u4e0e\u53e6\u4e00\u4e2a\u6279\u6b21\u7684\u6570\u636e\u5206\u53d1\u548c\u805a\u5408\u91cd\u53e0\u3002<\/p>\n\n\n\n<p>\u76ee\u524d\u6b63\u5728\u63a2\u7d22<strong>\u4e13\u5bb6\u52a8\u6001\u5197\u4f59\u673a\u5236<\/strong>\uff0c\u4f7f\u6bcf\u4e2a GPU \u5206\u914d\u66f4\u591a\u4e13\u5bb6\uff08\u5982 16 \u4e2a\uff09\uff0c\u4f46\u6bcf\u6b21\u63a8\u7406\u4ec5\u6fc0\u6d3b\u5176\u4e2d 9 \u4e2a\u3002\u5728\u6bcf\u5c42\u5168\u5bf9\u5168\u64cd\u4f5c\u5f00\u59cb\u524d\uff0c\u7cfb\u7edf\u5b9e\u65f6\u8ba1\u7b97\u5168\u5c40\u6700\u4f18\u8def\u7531\u65b9\u6848\u3002\u7531\u4e8e\u9884\u586b\u5145\u9636\u6bb5\u672c\u8eab\u8ba1\u7b97\u91cf\u8f83\u5927\uff0c\u8ba1\u7b97\u8def\u7531\u65b9\u6848\u7684\u989d\u5916\u5f00\u9500\u51e0\u4e4e\u53ef\u4ee5\u5ffd\u7565\u3002<\/p>\n\n\n\n<p><strong>\u89e3\u7801<\/strong><\/p>\n\n\n\n<p>\u5728\u89e3\u7801\u9636\u6bb5\uff0c\u7cfb\u7edf\u5c06\u5171\u4eab\u4e13\u5bb6\u4f5c\u4e3a\u4e00\u79cd\u8def\u7531\u4e13\u5bb6\u5904\u7406\u3002\u8fd9\u610f\u5473\u7740\u6bcf\u4e2a token \u5728\u8def\u7531\u65f6\u4f1a\u9009\u62e9 9 \u4e2a\u4e13\u5bb6\uff0c\u5176\u4e2d\u5171\u4eab\u4e13\u5bb6\u88ab\u89c6\u4e3a\u4e00\u4e2a\u5fc5\u7136\u9009\u62e9\u7684\u9ad8\u8d1f\u8f7d\u4e13\u5bb6\u3002<\/p>\n\n\n\n<p>\u89e3\u7801\u9636\u6bb5\u7684\u6700\u5c0f\u90e8\u7f72\u5355\u5143\u7531 40 \u4e2a\u8282\u70b9 320 \u4e2a GPU \u6784\u6210\u3002<strong>\u6ce8\u610f\u529b\u90e8\u5206<\/strong>\u91c7\u7528 TP4 \u914d\u5408 SP\uff0c\u7ed3\u5408 DP80\uff0c\u800c MoE \u90e8\u5206\u4f7f\u7528 EP320\u3002<strong>MoE \u90e8\u5206<\/strong>\uff0c\u6bcf\u4e2a GPU \u4ec5\u5206\u914d\u4e00\u4e2a\u4e13\u5bb6\uff0c\u5176\u4e2d 64 \u4e2a GPU \u4e13\u95e8\u8d1f\u8d23\u5197\u4f59\u4e13\u5bb6\u548c\u5171\u4eab\u4e13\u5bb6\u3002\u5206\u53d1\u548c\u805a\u5408\u73af\u8282\u7684\u5168\u8282\u70b9\u901a\u4fe1\u901a\u8fc7 IB \u76f4\u63a5\u70b9\u5bf9\u70b9\u4f20\u8f93\u5b9e\u73b0\u4f4e\u5ef6\u8fdf\u3002\u540c\u65f6\uff0c\u7cfb\u7edf\u5f15\u5165&nbsp;<strong>IBGDA<\/strong>&nbsp;\u6280\u672f\u8fdb\u4e00\u6b65\u964d\u4f4e\u5ef6\u8fdf\u5e76\u63d0\u5347\u901a\u4fe1\u6548\u7387\u3002<\/p>\n\n\n\n<p>\u4e0e\u9884\u586b\u5145\u9636\u6bb5\u7c7b\u4f3c\uff0c\u7cfb\u7edf\u57fa\u4e8e\u5728\u7ebf\u670d\u52a1\u7684\u4e13\u5bb6\u8d1f\u8f7d\u7edf\u8ba1\u6570\u636e\uff0c\u5b9a\u671f\u786e\u5b9a\u5197\u4f59\u4e13\u5bb6\u914d\u7f6e\u3002\u7531\u4e8e\u6bcf\u4e2a GPU \u4ec5\u5206\u914d\u4e00\u4e2a\u4e13\u5bb6\uff0c\u65e0\u9700\u8fdb\u884c\u4e13\u5bb6\u91cd\u65b0\u5206\u914d\u3002\u7cfb\u7edf\u4e5f\u5728\u7814\u7a76\u89e3\u7801\u9636\u6bb5\u7684\u52a8\u6001\u5197\u4f59\u7b56\u7565\uff0c\u4f46\u8fd9\u9700\u8981\u5bf9\u5168\u5c40\u6700\u4f18\u8def\u7531\u65b9\u6848\u7684\u8ba1\u7b97\u7b97\u6cd5\u8fdb\u884c\u66f4\u7ec6\u81f4\u7684\u4f18\u5316\uff0c\u5e76\u4e0e\u5206\u53d1\u5185\u6838\u8fdb\u884c\u878d\u5408\u4ee5\u51cf\u5c11\u5f00\u9500\u3002<\/p>\n\n\n\n<p>\u6b64\u5916\uff0c\u4e3a\u63d0\u5347\u541e\u5410\u91cf\u5e76\u964d\u4f4e\u5168\u8282\u70b9\u901a\u4fe1\u5f00\u9500\uff0c\u7cfb\u7edf\u6b63\u5728\u63a2\u7d22\u5728\u89e3\u7801\u9636\u6bb5\u540c\u65f6\u5904\u7406\u4e24\u4e2a\u8ba1\u7b97\u8d1f\u8f7d\u76f8\u8fd1\u7684\u5fae\u6279\u6b21\u3002\u4e0e\u9884\u586b\u5145\u4e0d\u540c\u7684\u662f\uff0c\u5728\u89e3\u7801\u9636\u6bb5\u6ce8\u610f\u529b\u673a\u5236\u5360\u7528\u66f4\u591a\u65f6\u95f4\uff0c\u56e0\u6b64\u7cfb\u7edf\u5c06\u4e00\u4e2a\u6279\u6b21\u7684\u6ce8\u610f\u529b\u8ba1\u7b97\u4e0e\u53e6\u4e00\u4e2a\u6279\u6b21\u7684\u5206\u53d1\u3001MoE \u5904\u7406\u548c\u6570\u636e\u805a\u5408\u8fdb\u884c\u91cd\u53e0\u3002<\/p>\n\n\n\n<p>\u5728\u89e3\u7801\u9636\u6bb5\uff0c\u6bcf\u4e2a\u4e13\u5bb6\u5904\u7406\u7684\u6279\u91cf\u89c4\u6a21\u76f8\u5bf9\u8f83\u5c0f\uff08\u901a\u5e38\u4e0d\u8d85\u8fc7 256 \u4e2a token\uff09\uff0c<strong>\u7cfb\u7edf\u74f6\u9888\u5728\u4e8e\u5185\u5b58\u8bbf\u95ee\u800c\u975e\u8ba1\u7b97\u80fd\u529b<\/strong>\u3002\u7531\u4e8e MoE \u90e8\u5206\u53ea\u9700\u52a0\u8f7d\u5355\u4e2a\u4e13\u5bb6\u7684\u53c2\u6570\uff0c\u5185\u5b58\u8bbf\u95ee\u5f00\u9500\u8f83\u5c0f\uff0c\u56e0\u6b64\u5373\u4f7f\u5206\u914d\u8f83\u5c11\u7684 SMs \u4e5f\u4e0d\u4f1a\u663e\u8457\u5f71\u54cd\u6574\u4f53\u6027\u80fd\u3002\u57fa\u4e8e\u8fd9\u4e00\u7279\u70b9\uff0c\u7cfb\u7edf\u53ea\u9700\u5206\u914d\u5c11\u91cf SMs \u7528\u4e8e\u5206\u53d1\u3001MoE \u5904\u7406\u548c\u6570\u636e\u805a\u5408\uff0c\u907f\u514d\u5f71\u54cd\u6ce8\u610f\u529b\u90e8\u5206\u7684\u8ba1\u7b97\u901f\u5ea6\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_9\">\u786c\u4ef6\u8bbe\u8ba1\u5efa\u8bae<\/h3>\n\n\n\n<p>\u57fa\u4e8e\u5168\u5bf9\u5168\u901a\u4fe1\u548c FP8 \u8bad\u7ec3\u65b9\u6848\u7684\u5b9e\u8df5\u7ecf\u9a8c\uff0c\u7814\u7a76\u56e2\u961f\u5bf9 AI \u786c\u4ef6\u5382\u5546\u63d0\u51fa\u4ee5\u4e0b\u82af\u7247\u8bbe\u8ba1\u5efa\u8bae\u3002<\/p>\n\n\n\n<p>\u901a\u4fe1\u786c\u4ef6 DeepSeek-V3 \u901a\u8fc7\u5b9e\u73b0\u8ba1\u7b97\u4e0e\u901a\u4fe1\u7684\u5e76\u884c\u5904\u7406\uff0c\u5728\u8ba1\u7b97\u8fc7\u7a0b\u4e2d\u6709\u6548\u9690\u85cf\u4e86\u901a\u4fe1\u5ef6\u8fdf\u3002\u8fd9\u79cd\u8bbe\u8ba1\u76f8\u6bd4\u4e32\u884c\u8ba1\u7b97\u548c\u901a\u4fe1\u65b9\u5f0f\uff0c\u663e\u8457\u964d\u4f4e\u4e86\u5bf9\u901a\u4fe1\u5e26\u5bbd\u7684\u8981\u6c42\u3002\u7136\u800c\uff0c\u76ee\u524d\u7684\u901a\u4fe1\u5b9e\u73b0\u9700\u8981\u5360\u7528\u5927\u91cf\u5b9d\u8d35\u7684 SMs \u8d44\u6e90\uff08\u5982\u5728 H800 GPU \u7684 132 \u4e2a SMs \u4e2d\u5360\u7528 20 \u4e2a\uff09\uff0c\u8fd9\u9650\u5236\u4e86\u8ba1\u7b97\u541e\u5410\u80fd\u529b\u3002<\/p>\n\n\n\n<p>\u53e6\u5916\uff0c\u5c06 SMs \u7528\u4e8e\u901a\u4fe1\u5bfc\u81f4\u5f20\u91cf\u6838\u5fc3\u8d44\u6e90\u7684\u4e25\u91cd\u6d6a\u8d39\u3002\u76ee\u524d\uff0cSMs \u5728\u5168\u5bf9\u5168\u901a\u4fe1\u4e2d\u4e3b\u8981\u627f\u62c5\u4ee5\u4e0b\u4efb\u52a1\uff1a<\/p>\n\n\n\n<ul><li>\u5728 IB \u548c NVLink \u7f51\u7edc\u95f4\u8f6c\u53d1\u6570\u636e\uff0c\u540c\u65f6\u6c47\u805a\u6765\u81ea\u5355\u4e2a GPU \u53d1\u5f80\u540c\u4e00\u8282\u70b9\u5185\u591a\u4e2a GPU \u7684 IB \u6570\u636e\u6d41\u3002<\/li><li>\u5728&nbsp;RDMA&nbsp;\u7f13\u51b2\u533a\uff08\u6ce8\u518c\u7684 GPU \u5185\u5b58\u533a\u57df\uff09\u4e0e\u8f93\u5165\/\u8f93\u51fa\u7f13\u51b2\u533a\u95f4\u4f20\u8f93\u6570\u636e\u3002<\/li><li>\u6267\u884c\u5168\u5bf9\u5168\u7ec4\u5408\u7684\u5f52\u7ea6\u8fd0\u7b97\u3002<\/li><li>\u5728\u8de8 IB \u548c NVLink \u7f51\u7edc\u5411\u591a\u4e2a\u4e13\u5bb6\u4f20\u8f93\u5206\u5757\u6570\u636e\u65f6\u7ba1\u7406\u7ec6\u7c92\u5ea6\u5185\u5b58\u5e03\u5c40\u3002<\/li><\/ul>\n\n\n\n<p>\u671f\u671b\u672a\u6765\u786c\u4ef6\u5382\u5546\u80fd\u5f00\u53d1\u4e13\u95e8\u7684\u786c\u4ef6\uff0c\u5c06\u8fd9\u4e9b\u901a\u4fe1\u4efb\u52a1\u4ece\u8ba1\u7b97\u6838\u5fc3 SM \u4e2d\u5206\u79bb\u51fa\u6765\uff0c\u8bbe\u8ba1\u6210\u7c7b\u4f3c NVIDIA SHARP \u7684 GPU \u534f\u5904\u7406\u5668\u6216\u7f51\u7edc\u534f\u5904\u7406\u5668\u3002\u540c\u65f6\uff0c\u4e3a\u964d\u4f4e\u5e94\u7528\u5f00\u53d1\u96be\u5ea6\uff0c\u5e0c\u671b\u8fd9\u79cd\u786c\u4ef6\u80fd\u4ece\u8ba1\u7b97\u5355\u5143\u7684\u89d2\u5ea6\u7edf\u4e00\u7ba1\u7406 IB\uff08\u6a2a\u5411\u6269\u5c55\uff09\u548c NVLink\uff08\u7eb5\u5411\u6269\u5c55\uff09\u7f51\u7edc\u3002\u901a\u8fc7\u8fd9\u79cd\u7edf\u4e00\u63a5\u53e3\uff0c\u8ba1\u7b97\u5355\u5143\u53ea\u9700\u63d0\u4ea4\u7b80\u5355\u7684\u901a\u4fe1\u8bf7\u6c42\uff0c\u5c31\u80fd\u5728\u6574\u4e2a IB-NVLink \u7edf\u4e00\u7f51\u7edc\u4e2d\u8f7b\u677e\u5b9e\u73b0\u8bfb\u53d6\u3001\u5199\u5165\u3001\u591a\u64ad\u548c\u5f52\u7ea6\u7b49\u64cd\u4f5c\u3002<\/p>\n\n\n\n<p><strong>\u8ba1\u7b97\u786c\u4ef6<\/strong><\/p>\n\n\n\n<p><strong>\u5f20\u91cf\u6838\u5fc3\u4e2d\u7684 FP8 GEMM \u7d2f\u79ef\u7cbe\u5ea6\u63d0\u5347\uff1a<\/strong>&nbsp;\u5f53\u524d NVIDIA Hopper \u67b6\u6784\u7684\u5f20\u91cf\u6838\u5fc3\u5728\u5b9e\u73b0 FP8 GEMM \u65f6\u91c7\u7528\u5b9a\u70b9\u7d2f\u79ef\u65b9\u5f0f\uff0c\u901a\u8fc7\u57fa\u4e8e\u6700\u5927\u6307\u6570\u7684\u53f3\u79fb\u64cd\u4f5c\u5bf9\u5c3e\u6570\u79ef\u8fdb\u884c\u5bf9\u9f50\u540e\u518d\u76f8\u52a0\u3002\u5b9e\u9a8c\u663e\u793a\uff0c\u8be5\u8bbe\u8ba1\u5728\u7b26\u53f7\u586b\u5145\u53f3\u79fb\u540e\u4ec5\u4f7f\u7528\u6bcf\u4e2a\u5c3e\u6570\u79ef\u7684\u6700\u9ad8 14 \u4f4d\uff0c\u5e76\u820d\u5f03\u8d85\u51fa\u8303\u56f4\u7684\u4f4d\u3002\u7136\u800c\uff0c\u4f8b\u5982\u8981\u4ece 32 \u4e2a FP8&nbsp;\u00d7&nbsp;FP8 \u4e58\u6cd5\u7684\u7d2f\u79ef\u4e2d\u83b7\u5f97\u7cbe\u786e\u7684 FP32 \u7ed3\u679c\uff0c\u81f3\u5c11\u9700\u8981 34 \u4f4d\u7cbe\u5ea6\u3002\u56e0\u6b64\uff0c\u5efa\u8bae\u672a\u6765\u82af\u7247\u8bbe\u8ba1\u63d0\u9ad8\u5f20\u91cf\u6838\u5fc3\u7684\u7d2f\u79ef\u7cbe\u5ea6\u4ee5\u652f\u6301\u5168\u7cbe\u5ea6\u7d2f\u79ef\uff0c\u6216\u6839\u636e\u5177\u4f53\u8bad\u7ec3\u548c\u63a8\u7406\u7b97\u6cd5\u7684\u7cbe\u5ea6\u9700\u6c42\u9009\u62e9\u5408\u9002\u7684\u7d2f\u79ef\u4f4d\u5bbd\uff0c\u4ee5\u5728\u4fdd\u8bc1\u8ba1\u7b97\u6548\u7387\u7684\u540c\u65f6\u5c06\u8bef\u5dee\u63a7\u5236\u5728\u53ef\u63a5\u53d7\u8303\u56f4\u5185\u3002<\/p>\n\n\n\n<p><strong>\u652f\u6301\u5e73\u94fa\u548c\u5757\u72b6\u91cf\u5316\uff1a<\/strong>\u73b0\u6709 GPU \u4ec5\u652f\u6301\u6574\u4f53\u5f20\u91cf\u91cf\u5316\uff0c\u7f3a\u4e4f\u5bf9\u5e73\u94fa\u548c\u5757\u72b6\u7b49\u7ec6\u7c92\u5ea6\u91cf\u5316\u7684\u786c\u4ef6\u652f\u6301\u3002\u5f53\u524d\u5b9e\u73b0\u4e2d\uff0c\u8fbe\u5230NC\u95f4\u9694\u65f6\u9700\u8981\u5c06\u90e8\u5206\u7ed3\u679c\u4ece\u5f20\u91cf\u6838\u5fc3\u590d\u5236\u5230 CUDA \u6838\u5fc3\uff0c\u8fdb\u884c\u7f29\u653e\u56e0\u5b50\u4e58\u6cd5\u8fd0\u7b97\uff0c\u518d\u6dfb\u52a0\u5230 CUDA \u6838\u5fc3\u7684 FP32 \u5bc4\u5b58\u5668\u4e2d\u3002\u867d\u7136\u7ed3\u5408\u7cbe\u786e FP32 \u7d2f\u79ef\u7b56\u7565\u663e\u8457\u964d\u4f4e\u4e86\u53cd\u91cf\u5316\u5f00\u9500\uff0c\u4f46\u5f20\u91cf\u6838\u5fc3\u548c CUDA \u6838\u5fc3\u95f4\u9891\u7e41\u7684\u6570\u636e\u79fb\u52a8\u4ecd\u7136\u5236\u7ea6\u4e86\u8ba1\u7b97\u6548\u7387\u3002\u56e0\u6b64\uff0c\u5efa\u8bae\u672a\u6765\u82af\u7247\u652f\u6301\u7ec6\u7c92\u5ea6\u91cf\u5316\uff0c\u4f7f\u5f20\u91cf\u6838\u5fc3\u80fd\u591f\u76f4\u63a5\u63a5\u6536\u7f29\u653e\u56e0\u5b50\u5e76\u5b9e\u73b0\u7ec4\u7ea7\u7f29\u653e\u7684 MMA \u64cd\u4f5c\u3002\u8fd9\u6837\u53ef\u4ee5\u76f4\u63a5\u5728\u5f20\u91cf\u6838\u5fc3\u5185\u5b8c\u6210\u5168\u90e8\u7684\u90e8\u5206\u548c\u7d2f\u79ef\u4e0e\u53cd\u91cf\u5316\u8ba1\u7b97\uff0c\u76f4\u5230\u751f\u6210\u6700\u7ec8\u7ed3\u679c\uff0c\u907f\u514d\u9891\u7e41\u7684\u6570\u636e\u8fc1\u79fb\u3002<\/p>\n\n\n\n<p><strong>\u652f\u6301\u5728\u7ebf\u91cf\u5316\uff1a<\/strong>\u5c3d\u7ba1\u7814\u7a76\u8bc1\u5b9e\u4e86\u5728\u7ebf\u91cf\u5316\u7684\u6709\u6548\u6027\uff0c\u4f46\u5f53\u524d\u786c\u4ef6\u96be\u4ee5\u6709\u6548\u652f\u6301\u8fd9\u4e00\u6280\u672f\u3002\u73b0\u6709\u6d41\u7a0b\u4e2d\u9700\u8981\u4ece&nbsp;HBM&nbsp;\u8bfb\u53d6 128 \u4e2a BF16 \u6fc0\u6d3b\u503c\uff08\u4e0a\u4e00\u6b65\u7684\u8ba1\u7b97\u7ed3\u679c\uff09\u8fdb\u884c\u91cf\u5316\uff0c\u5c06\u91cf\u5316\u540e\u7684 FP8 \u503c\u5199\u56de HBM\uff0c\u7136\u540e\u518d\u6b21\u8bfb\u53d6\u7528\u4e8e MMA \u64cd\u4f5c\u3002\u4e3a\u89e3\u51b3\u8fd9\u4e00\u4f4e\u6548\u95ee\u9898\uff0c\u5efa\u8bae\u672a\u6765\u82af\u7247\u5c06 FP8 \u683c\u5f0f\u8f6c\u6362\u4e0e&nbsp;TMA&nbsp;\u8bbf\u95ee\u96c6\u6210\u4e3a\u5355\u4e00\u878d\u5408\u64cd\u4f5c\uff0c\u5b9e\u73b0\u5728\u6fc0\u6d3b\u503c\u4ece\u5168\u5c40\u5185\u5b58\u4f20\u8f93\u5230\u5171\u4eab\u5185\u5b58\u8fc7\u7a0b\u4e2d\u5b8c\u6210\u91cf\u5316\uff0c\u907f\u514d\u9891\u7e41\u7684\u5185\u5b58\u8bfb\u5199\u3002\u540c\u65f6\u5efa\u8bae\u652f\u6301\u7ebf\u7a0b\u675f\u7ea7\u683c\u5f0f\u8f6c\u6362\u6307\u4ee4\u4ee5\u63d0\u5347\u6027\u80fd\uff0c\u4fc3\u8fdb\u5c42\u6807\u51c6\u5316\u4e0e FP8 \u8f6c\u6362\u7684\u66f4\u597d\u878d\u5408\u3002\u53e6\u4e00\u79cd\u65b9\u6848\u662f\u91c7\u7528\u8fd1\u5185\u5b58\u8ba1\u7b97\u65b9\u6cd5\uff0c\u5c06\u8ba1\u7b97\u903b\u8f91\u653e\u7f6e\u5728 HBM \u9644\u8fd1\uff0c\u4f7f BF16 \u5143\u7d20\u5728\u4ece HBM \u8bfb\u5165 GPU \u65f6\u76f4\u63a5\u8f6c\u6362\u4e3a FP8\uff0c\u4ece\u800c\u5c06\u7247\u5916\u5185\u5b58\u8bbf\u95ee\u51cf\u5c11\u7ea6 50%\u3002<\/p>\n\n\n\n<p><strong>\u652f\u6301\u8f6c\u7f6eGEMM\u64cd\u4f5c\uff1a<\/strong>&nbsp;\u73b0\u6709\u67b6\u6784\u96be\u4ee5\u5b9e\u73b0\u77e9\u9635\u8f6c\u7f6e\u4e0e GEMM \u64cd\u4f5c\u7684\u6709\u6548\u878d\u5408\u3002\u76ee\u524d\u7684\u5de5\u4f5c\u6d41\u4e2d\uff0c\u524d\u5411\u4f20\u64ad\u9636\u6bb5\u7684\u6fc0\u6d3b\u503c\u9700\u8981\u5148\u91cf\u5316\u4e3a 1&#215;128 FP8 \u5e73\u94fa\u683c\u5f0f\u5e76\u5b58\u50a8\u3002\u5728\u53cd\u5411\u4f20\u64ad\u65f6\uff0c\u7cfb\u7edf\u5fc5\u987b\u8bfb\u53d6\u77e9\u9635\uff0c\u6267\u884c\u53cd\u91cf\u5316\uff0c\u8fdb\u884c\u8f6c\u7f6e\u64cd\u4f5c\uff0c\u518d\u91cd\u65b0\u91cf\u5316\u4e3a 128&#215;1 \u5e73\u94fa\u683c\u5f0f\uff0c\u6700\u540e\u5b58\u5165 HBM\u3002\u4e3a\u4f18\u5316\u5185\u5b58\u64cd\u4f5c\u6548\u7387\uff0c\u5efa\u8bae\u672a\u6765\u82af\u7247\u8bbe\u8ba1\u4e2d\uff0c\u5bf9\u8bad\u7ec3\u548c\u63a8\u7406\u4e2d\u5e38\u7528\u7684\u7cbe\u5ea6\u683c\u5f0f\uff0c\u652f\u6301\u5728 MMA \u64cd\u4f5c\u524d\u76f4\u63a5\u4ece\u5171\u4eab\u5185\u5b58\u8fdb\u884c\u8f6c\u7f6e\u8bfb\u53d6\u3002\u8fd9\u4e00\u6539\u8fdb\u914d\u5408 FP8 \u683c\u5f0f\u8f6c\u6362\u548c TMA \u8bbf\u95ee\u7684\u878d\u5408\u673a\u5236\uff0c\u5c06\u5927\u5e45\u4f18\u5316\u91cf\u5316\u5904\u7406\u6d41\u7a0b\u3002<\/p>\n\n\n\n<h2 id=\"h_14890557782_10\">\u9884\u8bad\u7ec3<\/h2>\n\n\n\n<h3 id=\"h_14890557782_11\">\u6570\u636e\u6784\u5efa<\/h3>\n\n\n\n<p>\u76f8\u6bd4 DeepSeek-V2\uff0c\u672c\u6b21\u9884\u8bad\u7ec3<a href=\"https:\/\/zhida.zhihu.com\/search?content_id=252001026&amp;content_type=Article&amp;match_order=1&amp;q=%E8%AF%AD%E6%96%99%E5%BA%93&amp;zhida_source=entity\" target=\"_blank\" rel=\"noreferrer noopener\">\u8bed\u6599\u5e93<\/a>\u5728\u63d0\u5347<strong>\u6570\u5b66<\/strong>\u548c<strong>\u7f16\u7a0b<\/strong>\u6837\u672c\u5360\u6bd4\u7684\u540c\u65f6\uff0c\u6269\u5927\u4e86\u82f1\u8bed\u548c\u4e2d\u6587\u4e4b\u5916\u7684<strong>\u591a\u8bed\u8a00<\/strong>\u8986\u76d6\u8303\u56f4\u3002<\/p>\n\n\n\n<p>\u6570\u636e\u5904\u7406\u6d41\u7a0b\u4e5f\u7ecf\u8fc7\u6539\u8fdb\uff0c\u5728\u4fdd\u6301\u8bed\u6599\u591a\u6837\u6027\u7684\u540c\u65f6\u964d\u4f4e\u4e86\u6570\u636e\u5197\u4f59\u3002\u7cfb\u7edf\u91c7\u7528\u6587\u6863\u6253\u5305\u65b9\u6cd5\u7ef4\u6301\u6570\u636e\u5b8c\u6574\u6027\uff0c\u4f46\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u4e0d\u4f7f\u7528\u8de8\u6837\u672c\u6ce8\u610f\u529b\u63a9\u7801\u3002\u6700\u7ec8\u8bad\u7ec3\u8bed\u6599\u5e93\u5305\u542b&nbsp;<strong>14.8T<\/strong>&nbsp;\u7ecf&nbsp;<a href=\"https:\/\/zhida.zhihu.com\/search?content_id=252001026&amp;content_type=Article&amp;match_order=1&amp;q=tokenizer&amp;zhida_source=entity\" target=\"_blank\" rel=\"noreferrer noopener\">tokenizer<\/a>&nbsp;\u5904\u7406\u7684\u9ad8\u8d28\u91cf\u591a\u6837\u5316 token\u3002<\/p>\n\n\n\n<p>\u5728 DeepSeekCoder-V2 \u7684\u8bad\u7ec3\u4e2d\u53d1\u73b0\uff0c<strong>\u586b\u5145\u4e2d\u95f4\uff08FIM\uff09<\/strong>\u7b56\u7565\u5728\u4fdd\u6301\u4e0b\u4e00\u4e2a token \u9884\u6d4b\u80fd\u529b\u7684\u540c\u65f6\uff0c\u8fd8\u80fd\u8ba9\u6a21\u578b\u57fa\u4e8e\u4e0a\u4e0b\u6587\u51c6\u786e\u9884\u6d4b\u4e2d\u95f4\u6587\u672c\u3002\u56e0\u6b64 DeepSeek-V3 \u7684\u9884\u8bad\u7ec3\u4e5f\u91c7\u7528\u4e86\u8fd9\u4e00\u7b56\u7565\u3002\u5177\u4f53\u5b9e\u73b0\u4e0a\uff0c\u4f7f\u7528<strong>\u524d\u7f00-\u540e\u7f00-\u4e2d\u95f4\uff08PSM\uff09<\/strong>\u6846\u67b6\u6784\u5efa\u5982\u4e0b\u6570\u636e\u7ed3\u6784\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>&lt;|fim_begin|&gt; pre&lt;|fim_hole|&gt; suf&lt;|fim_end|&gt; middle&lt;|eos_token|&gt;\u3002<\/code><\/pre>\n\n\n\n<p>\u8be5\u7ed3\u6784\u5728\u9884\u6253\u5305\u9636\u6bb5\u5e94\u7528\u4e8e\u6587\u6863\u7ea7\u522b\uff0cFIM \u7b56\u7565\u7684\u5e94\u7528\u6bd4\u7387\u4e3a 0.1\uff0c\u4e0e PSM \u6846\u67b6\u4fdd\u6301\u4e00\u81f4\u3002<\/p>\n\n\n\n<p>DeepSeek-V3 \u91c7\u7528\u8bcd\u8868\u5927\u5c0f\u4e3a 128K \u7684\u5b57\u8282\u7ea7&nbsp;<strong>BPE tokenizer&nbsp;<\/strong>\u3002\u4e3a\u63d0\u9ad8\u591a\u8bed\u8a00\u538b\u7f29\u6548\u7387\uff0c\u5bf9\u9884\u5206\u8bcd\u5668\u548c\u8bad\u7ec3\u6570\u636e\u8fdb\u884c\u4e86\u76f8\u5e94\u8c03\u6574\u3002\u4e0e DeepSeek-V2 \u76f8\u6bd4\uff0c\u65b0\u7684\u9884\u5206\u8bcd\u5668<strong>\u5f15\u5165\u4e86\u6807\u70b9\u7b26\u53f7\u548c\u6362\u884c\u7b26\u7684\u7ec4\u5408 token<\/strong>\u3002\u7136\u800c\u8fd9\u79cd\u8bbe\u8ba1\u5728\u5904\u7406\u65e0\u7ec8\u7aef\u6362\u884c\u7b26\u7684\u591a\u884c\u63d0\u793a\u8bcd\u65f6\u53ef\u80fd\u4ea7\u751f token \u8fb9\u754c\u504f\u5dee\uff0c\u5c24\u5176\u662f\u5728\u5c11\u6837\u672c\u8bc4\u4f30\u573a\u666f\u3002\u4e3a\u6b64\uff0c\u8bad\u7ec3\u65f6\u5bf9\u4e00\u5b9a\u6bd4\u4f8b\u7684\u7ec4\u5408 token \u8fdb\u884c\u968f\u673a\u5206\u5272\uff0c\u4f7f\u6a21\u578b\u63a5\u89e6\u66f4\u591a\u7279\u6b8a\u60c5\u51b5\u6765\u51cf\u8f7b\u8fd9\u79cd\u504f\u5dee\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_12\">\u8d85\u53c2\u6570\u8bbe\u7f6e<\/h3>\n\n\n\n<p><strong>\u6a21\u578b\u67b6\u6784\u53c2\u6570<\/strong><\/p>\n\n\n\n<p>\u7cfb\u7edf\u91c7\u7528 61 \u5c42 Transformer \u7ed3\u6784\uff0c\u9690\u85cf\u7ef4\u5ea6\u4e3a 7168\u3002\u6240\u6709\u53ef\u5b66\u4e60\u53c2\u6570\u91c7\u7528\u6807\u51c6\u5dee 0.006 \u7684\u968f\u673a\u521d\u59cb\u5316\u3002<\/p>\n\n\n\n<p>\u5728 MLA \u7ed3\u6784\u4e2d\uff0c\u6ce8\u610f\u529b\u5934\u6570\u91cf&nbsp;<em>n<sub>h<\/sub><\/em>&nbsp;\u8bbe\u4e3a 128\uff0c\u6bcf\u4e2a\u5934\u7684\u7ef4\u5ea6&nbsp;<em>d<sub>h<\/sub><\/em>&nbsp;\u4e3a 128\u3002KV \u538b\u7f29\u7ef4\u5ea6&nbsp;<em>dc<\/em>&nbsp;\u4e3a 512\uff0c\u67e5\u8be2\u538b\u7f29\u7ef4\u5ea6&nbsp;d\u2032c&nbsp;\u4e3a 1536\u3002\u89e3\u8026\u7684\u67e5\u8be2\u548c\u952e\u90e8\u5206\uff0c\u6bcf\u4e2a\u5934\u7684\u7ef4\u5ea6&nbsp;<em>d<sub>h<\/sub><sup>R<\/sup><\/em>&nbsp;\u8bbe\u4e3a 64\u3002<\/p>\n\n\n\n<p>\u9664\u524d\u4e09\u5c42\u5916\uff0c\u6240\u6709 FFN \u5c42\u90fd\u66ff\u6362\u4e3a MoE \u5c42\uff0c\u6bcf\u4e2a MoE \u5c42\u914d\u7f6e 1 \u4e2a\u5171\u4eab\u4e13\u5bb6\u548c 256 \u4e2a\u8def\u7531\u4e13\u5bb6\uff0c\u4e13\u5bb6\u7684\u4e2d\u95f4\u9690\u85cf\u7ef4\u5ea6\u4e3a 2048\u3002<\/p>\n\n\n\n<p>\u5728\u8def\u7531\u4e13\u5bb6\u4e2d\uff0c\u6bcf\u4e2a token \u6fc0\u6d3b 8 \u4e2a\u4e13\u5bb6\uff0c\u4e14\u6700\u591a\u5206\u914d\u5230 4 \u4e2a\u8282\u70b9\u3002\u591a token \u9884\u6d4b\u6df1\u5ea6&nbsp;D&nbsp;\u8bbe\u4e3a 1\uff0c\u5373\u6bcf\u4e2a token \u9664\u9884\u6d4b\u4e0b\u4e00\u4e2a\u7cbe\u786e token \u5916\uff0c\u8fd8\u9700\u9884\u6d4b\u4e00\u4e2a\u989d\u5916 token\u3002<\/p>\n\n\n\n<p>\u4e0e DeepSeek-V2 \u7c7b\u4f3c\uff0cDeepSeek-V3 \u5728\u538b\u7f29\u6f5c\u5728\u5411\u91cf\u540e\u6dfb\u52a0\u4e86 RMSNorm \u5c42\uff0c\u5e76\u5728\u5bbd\u5ea6\u74f6\u9888\u5904\u5f15\u5165\u989d\u5916\u7f29\u653e\u56e0\u5b50\u3002\u5728\u6b64\u914d\u7f6e\u4e0b\uff0c<strong>\u6a21\u578b\u603b\u53c2\u6570\u91cf\u8fbe\u5230 671B\uff0c\u5176\u4e2d\u6bcf\u4e2a token \u6fc0\u6d3b 37B \u53c2\u6570<\/strong>\u3002<\/p>\n\n\n\n<p><strong>\u8bad\u7ec3\u53c2\u6570<\/strong><\/p>\n\n\n\n<p>\u6a21\u578b\u91c7\u7528&nbsp;<a href=\"https:\/\/zhida.zhihu.com\/search?content_id=252001026&amp;content_type=Article&amp;match_order=1&amp;q=AdamW+%E4%BC%98%E5%8C%96%E5%99%A8&amp;zhida_source=entity\" target=\"_blank\" rel=\"noreferrer noopener\">AdamW \u4f18\u5316\u5668<\/a>\uff0c\u53c2\u6570\u8bbe\u7f6e\u4e3a\uff1a&nbsp;\u03b21=0.9&nbsp;\uff0c&nbsp;\u03b22=0.95&nbsp;\uff0c\u6743\u91cd\u8870\u51cf\u4e3a 0.1\u3002\u9884\u8bad\u7ec3\u9636\u6bb5\u6700\u5927\u5e8f\u5217\u957f\u5ea6\u4e3a 4K\uff0c\u5728 14.8T token \u4e0a\u8fdb\u884c\u8bad\u7ec3\u3002<\/p>\n\n\n\n<p>\u5b66\u4e60\u7387\u8c03\u5ea6\u91c7\u7528\u4ee5\u4e0b\u7b56\u7565\uff1a\u9996\u5148\u5728\u524d 2K \u6b65\u5185\u4ece&nbsp;0&nbsp;\u7ebf\u6027\u589e\u52a0\u81f3&nbsp;2.2\u00d710\u22124&nbsp;\uff1b\u4fdd\u6301\u8be5\u5b66\u4e60\u7387\u76f4\u81f3\u5904\u7406\u5b8c 10T \u8bad\u7ec3 token\uff1b\u968f\u540e\u5728 4.3T token \u533a\u95f4\u5185\u6309\u4f59\u5f26\u8870\u51cf\u66f2\u7ebf\u964d\u81f3&nbsp;2.2\u00d710\u22125&nbsp;\u3002\u5728\u6700\u540e 500B token \u7684\u8bad\u7ec3\u4e2d\uff0c\u5148\u7528&nbsp;2.2\u00d710\u22125&nbsp;\u7684\u56fa\u5b9a\u5b66\u4e60\u7387\u8bad\u7ec3 333B token\uff0c\u518d\u4ee5&nbsp;7.3\u00d710\u22126&nbsp;\u7684\u5b66\u4e60\u7387\u5b8c\u6210\u5269\u4f59 167B token\u3002<\/p>\n\n\n\n<p>\u68af\u5ea6\u88c1\u526a\u8303\u6570\u8bbe\u4e3a 1.0\u3002\u6279\u91cf\u5927\u5c0f\u91c7\u7528\u52a8\u6001\u8c03\u6574\u7b56\u7565\uff0c\u5728\u524d 469B token \u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u4ece 3072 \u9010\u6b65\u589e\u52a0\u81f3 15360\uff0c\u6b64\u540e\u4fdd\u6301\u4e0d\u53d8\u3002\u6a21\u578b\u91c7\u7528\u6d41\u6c34\u7ebf\u5e76\u884c\u5c06\u4e0d\u540c\u5c42\u5206\u914d\u5230\u4e0d\u540c GPU\uff0c\u6bcf\u5c42\u7684\u8def\u7531\u4e13\u5bb6\u5747\u5300\u5206\u5e03\u5728 8 \u4e2a\u8282\u70b9\u7684 64 \u4e2a GPU \u4e0a\u3002\u8282\u70b9\u9650\u5236\u8def\u7531\u4e2d\uff0c\u6bcf\u4e2a token \u6700\u591a\u5206\u914d\u81f3 4 \u4e2a\u8282\u70b9\uff08&nbsp;M=4&nbsp;\uff09\u3002<\/p>\n\n\n\n<p>\u5728\u65e0\u8f85\u52a9\u635f\u5931\u8d1f\u8f7d\u5747\u8861\u65b9\u9762\uff0c\u524d 14.3T token \u7684\u504f\u7f6e\u66f4\u65b0\u901f\u5ea6&nbsp;\u03b3&nbsp;\u8bbe\u4e3a 0.001\uff0c\u5269\u4f59 500B token \u8bbe\u4e3a 0\u3002\u5e73\u8861\u635f\u5931\u53c2\u6570&nbsp;\u03b1&nbsp;\u8bbe\u4e3a 0.0001\uff0c\u4ec5\u7528\u4e8e\u9632\u6b62\u5355\u4e2a\u5e8f\u5217\u5185\u51fa\u73b0\u6781\u7aef\u4e0d\u5e73\u8861\u3002MTP \u635f\u5931\u6743\u91cd&nbsp;\u03bb&nbsp;\u5728\u524d 10T token \u4e2d\u4e3a 0.3\uff0c\u5269\u4f59 4.8T token \u4e2d\u964d\u81f3 0.1\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_13\">\u957f\u4e0a\u4e0b\u6587\u6269\u5c55<\/h3>\n\n\n\n<p>DeepSeek-V3 \u91c7\u7528\u4e0e DeepSeek-V2 \u76f8\u4f3c\u7684\u65b9\u6cd5\u5b9e\u73b0\u957f\u4e0a\u4e0b\u6587\u5904\u7406\u80fd\u529b\u3002\u9884\u8bad\u7ec3\u5b8c\u6210\u540e\uff0c\u7cfb\u7edf\u4f7f\u7528&nbsp;<strong>YaRN<\/strong>&nbsp;\u8fdb\u884c\u4e0a\u4e0b\u6587\u6269\u5c55\uff0c\u901a\u8fc7\u4e24\u4e2a\u5404\u5305\u542b 1000 \u6b65\u7684\u989d\u5916\u8bad\u7ec3\u9636\u6bb5\uff0c\u5c06\u4e0a\u4e0b\u6587\u7a97\u53e3\u4ece 4K \u4f9d\u6b21\u6269\u5c55\u81f3 32K \u548c 128K\u3002\u7cfb\u7edf\u6cbf\u7528\u4e86 DeepSeek-V2 \u7684 YaRN \u914d\u7f6e\uff0c\u4ec5\u5c06\u5176\u5e94\u7528\u4e8e\u89e3\u8026\u7684\u5171\u4eab\u952e&nbsp;ktR&nbsp;\u3002\u4e24\u4e2a\u9636\u6bb5\u91c7\u7528\u76f8\u540c\u7684\u8d85\u53c2\u6570\u8bbe\u7f6e\uff1a\u5c3a\u5ea6&nbsp;s=40&nbsp;\uff0c&nbsp;\u03b1=1&nbsp;\uff0c&nbsp;\u03b2=32&nbsp;\uff0c\u7f29\u653e\u56e0\u5b50&nbsp;0.1lns+1&nbsp;\u3002<\/p>\n\n\n\n<p>\u7b2c\u4e00\u9636\u6bb5\u5c06\u5e8f\u5217\u957f\u5ea6\u8bbe\u4e3a 32K\uff0c\u6279\u91cf\u5927\u5c0f\u4e3a 1920\u3002\u7b2c\u4e8c\u9636\u6bb5\u5c06\u5e8f\u5217\u957f\u5ea6\u63d0\u5347\u81f3 128K\uff0c\u76f8\u5e94\u5730\u5c06\u6279\u91cf\u5927\u5c0f\u8c03\u6574\u4e3a 480\u3002\u4e24\u4e2a\u9636\u6bb5\u5747\u91c7\u7528\u4e0e\u9884\u8bad\u7ec3\u672b\u671f\u76f8\u540c\u7684\u5b66\u4e60\u7387&nbsp;7.3\u00d710\u22126&nbsp;\u3002<\/p>\n\n\n\n<p>\u7ecf\u8fc7\u8fd9\u4e24\u9636\u6bb5\u7684\u6269\u5c55\u8bad\u7ec3\uff0cDeepSeek-V3 \u6210\u529f\u5b9e\u73b0\u4e86\u5bf9\u6700\u957f 128K \u8f93\u5165\u5e8f\u5217\u7684\u9ad8\u6548\u5904\u7406\u3002\u5982\u56fe8\u6240\u793a\uff0c\u5728\u5b8c\u6210\u76d1\u7763\u5fae\u8c03\u540e\uff0c\u6a21\u578b\u5728<strong>&#8220;\u5927\u6d77\u635e\u9488&#8221;(NIAH)<\/strong>\u6d4b\u8bd5\u4e2d\u8868\u73b0\u51fa\u8272\uff0c\u5728\u6574\u4e2a 128K \u7684\u4e0a\u4e0b\u6587\u8303\u56f4\u5185\u5747\u4fdd\u6301\u7a33\u5b9a\u7684\u6027\u80fd\u8868\u73b0\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"995\" height=\"488\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-25.png\" alt=\"\" class=\"wp-image-23732\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-25.png 995w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-25-300x147.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-25-768x377.png 768w\" sizes=\"(max-width: 995px) 100vw, 995px\" \/><figcaption>\u56fe 8\uff1a\u5728 NIAH \u4e2d\u7684\u8bc4\u4f30\u7ed3\u679c\u663e\u793a\uff0cDeepSeek-V3 \u5728\u6240\u6709\u4e0a\u4e0b\u6587\u7a97\u53e3\u957f\u5ea6\uff08\u6700\u957f\u53ef\u8fbe 128K\uff09\u4e0a\u5747\u8868\u73b0\u4f18\u5f02\u3002<\/figcaption><\/figure>\n\n\n\n<h3 id=\"h_14890557782_14\">\u8bc4\u4f30<\/h3>\n\n\n\n<p><strong>\u8bc4\u4f30\u57fa\u51c6<\/strong><\/p>\n\n\n\n<p>DeepSeek-V3 \u57fa\u5ea7\u6a21\u578b\u5728\u4ee5\u82f1\u8bed\u548c\u4e2d\u6587\u4e3a\u4e3b\u7684\u591a\u8bed\u8a00\u8bed\u6599\u5e93\u4e0a\u5b8c\u6210\u9884\u8bad\u7ec3\uff0c\u56e0\u6b64\u8bc4\u4f30\u5de5\u4f5c\u4e3b\u8981\u9488\u5bf9\u82f1\u8bed\u3001\u4e2d\u6587\u53ca\u591a\u8bed\u8a00\u57fa\u51c6\u3002<\/p>\n\n\n\n<p>\u8bc4\u4f30\u91c7\u7528\u96c6\u6210\u4e8e<strong>&nbsp;HAI-LLM&nbsp;<\/strong>\u6846\u67b6\u7684\u5185\u90e8\u8bc4\u4f30\u7cfb\u7edf\uff0c\u6db5\u76d6\u4ee5\u4e0b\u7c7b\u522b\uff08\u4e0b\u5212\u7ebf\u8868\u793a\u4e2d\u6587\u57fa\u51c6\uff0c\u53cc\u4e0b\u5212\u7ebf\u8868\u793a\u591a\u8bed\u8a00\u57fa\u51c6\uff09\uff1a<\/p>\n\n\n\n<ul><li><strong>\u591a\u5b66\u79d1\u9009\u62e9\u9898\u8bc4\u4f30<\/strong>\uff1aMMLU\u3001MMLU Redux\u3001MMLU-Pro\u3001MMMLU\u3001<a rel=\"noreferrer noopener\" href=\"https:\/\/zhida.zhihu.com\/search?content_id=252001026&amp;content_type=Article&amp;match_order=1&amp;q=C-Eval&amp;zhida_source=entity\" target=\"_blank\">C-Eval<\/a>&nbsp;\u548c CMMLU<\/li><li><strong>\u8bed\u8a00\u7406\u89e3\u4e0e\u63a8\u7406\u80fd\u529b<\/strong>\uff1aHellaSwag\u3001PIQA\u3001ARC \u548c BigBench Hard (BBH)<\/li><li><strong>\u77e5\u8bc6\u95ee\u7b54\u8bc4\u4f30<\/strong>\uff1aTriviaQA&nbsp;\u548c NaturalQuestions<\/li><li><strong>\u9605\u8bfb\u7406\u89e3\u6d4b\u8bd5<\/strong>\uff1aRACE\u3001DROP\u3001C3 \u548c CMRC<\/li><li><strong>\u6307\u4ee3\u6d88\u6b67\u4efb\u52a1<\/strong>\uff1aCLUEWSC \u548c WinoGrande<\/li><li><strong>\u8bed\u8a00\u5efa\u6a21\u8bc4\u4f30<\/strong>\uff1aPile \u4e2d\u6587\u7406\u89e3\u4e0e\u6587\u5316\u8ba4\u77e5\uff1aCCPM<\/li><li><strong>\u6570\u5b66\u80fd\u529b\u6d4b\u8bd5<\/strong>\uff1aGSM8K\u3001MATH\u3001MGSM \u548c CMath<\/li><li><strong>\u7f16\u7a0b\u80fd\u529b\u8bc4\u4f30<\/strong>\uff1aHumanEval\u3001LiveCodeBench-Base(0801-1101)\u3001MBPP \u548c CRUXEval<\/li><li><strong>\u7efc\u5408\u80fd\u529b\u6d4b\u8bd5<\/strong>\uff1aAGIEval\uff08\u5305\u542b\u82f1\u8bed\u548c\u4e2d\u6587\u4e24\u4e2a\u5b50\u96c6\uff09<\/li><\/ul>\n\n\n\n<p>\u4f5c\u4e3a\u524d\u671f\u5de5\u4f5c\u7684\u5ef6\u7eed\uff0c\u8bc4\u4f30\u91c7\u7528\u591a\u79cd\u65b9\u6cd5\uff1a\u90e8\u5206\u6570\u636e\u96c6\u4f7f\u7528<strong>\u56f0\u60d1\u5ea6\u6307\u6807<\/strong>\uff0c\u5305\u62ec HellaSwag\u3001PIQA\u3001WinoGrande \u7b49\uff1b\u90e8\u5206\u91c7\u7528<strong>\u751f\u6210\u5f0f\u8bc4\u4f30<\/strong>\uff0c\u5982 TriviaQA\u3001NaturalQuestions\u3001DROP \u7b49\u3002\u5bf9 Pile-test \u91c7\u7528\u8bed\u8a00\u5efa\u6a21\u8bc4\u4f30\u65b9\u6cd5\uff0c\u4f7f\u7528<strong>\u6bcf\u5b57\u8282\u6bd4\u7279\u6570\uff08BPB\uff09<\/strong>\u4f5c\u4e3a\u7edf\u4e00\u5ea6\u91cf\u6807\u51c6\uff0c\u786e\u4fdd\u4e0d\u540c\u5206\u8bcd\u5668\u6a21\u578b\u95f4\u7684\u516c\u5e73\u6bd4\u8f83\u3002<\/p>\n\n\n\n<p><strong>\u8bc4\u4f30\u7ed3\u679c<\/strong><\/p>\n\n\n\n<p>\u88683\u5c55\u793a\u4e86 DeepSeek-V3 \u57fa\u5ea7\u6a21\u578b\u4e0e\u4e3b\u6d41\u5f00\u6e90\u57fa\u5ea7\u6a21\u578b\u7684\u6027\u80fd\u5bf9\u6bd4\uff0c\u5305\u62ec&nbsp;<strong>DeepSeek-V2-Base\u3001Qwen2.5-72B- Base&nbsp;<\/strong>\u548c<strong>&nbsp;LLaMA-3.1-405B-Base<\/strong>\u3002\u6240\u6709\u6a21\u578b\u5747\u4f7f\u7528\u7edf\u4e00\u7684\u5185\u90e8\u8bc4\u4f30\u6846\u67b6\u548c\u8bc4\u4f30\u6807\u51c6\u3002\u9700\u8981\u8bf4\u660e\u7684\u662f\uff0c\u7531\u4e8e\u8fd1\u51e0\u4e2a\u6708\u8bc4\u4f30\u6846\u67b6\u7684\u66f4\u65b0\uff0cDeepSeek-V2-Base \u7684\u90e8\u5206\u6027\u80fd\u6307\u6807\u4e0e\u6b64\u524d\u62a5\u544a\u7565\u6709\u5dee\u5f02\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"981\" height=\"987\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-26.png\" alt=\"\" class=\"wp-image-23733\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-26.png 981w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-26-298x300.png 298w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-26-150x150.png 150w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-26-768x773.png 768w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-26-120x120.png 120w\" sizes=\"(max-width: 981px) 100vw, 981px\" \/><figcaption>\u88683\uff1aDeepSeek-V3-Base \u4e0e\u5176\u4ed6\u5177\u6709\u4ee3\u8868\u6027\u7684\u5f00\u6e90\u57fa\u7840\u6a21\u578b\u7684\u6027\u80fd\u5bf9\u6bd4\u3002\u6240\u6709\u6a21\u578b\u5747\u5728\u5185\u90e8\u8bc4\u4f30\u6846\u67b6\u4e0b\u8fdb\u884c\u4e86\u6d4b\u8bd5\uff0c\u5e76\u91c7\u7528\u4e86\u7edf\u4e00\u7684\u8bc4\u4f30\u8bbe\u7f6e\u3002\u5f97\u5206\u5dee\u8ddd\u5728 0.3 \u4ee5\u5185\u7684\u6a21\u578b\u88ab\u89c6\u4e3a\u8868\u73b0\u76f8\u5f53\u3002\u8bc4\u4f30\u7ed3\u679c\u8868\u660e\uff0cDeepSeek-V3-Base \u5728\u5927\u591a\u6570\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u8868\u73b0\u51fa\u8272\uff0c\u5c24\u5176\u662f\u5728\u6570\u5b66\u548c\u4ee3\u7801\u4efb\u52a1\u4e0a\u8868\u73b0\u5c24\u4e3a\u7a81\u51fa\u3002<\/figcaption><\/figure>\n\n\n\n<p>\u7efc\u5408\u8bc4\u4f30\u663e\u793a\uff0cDeepSeek-V3-Base \u5168\u9762\u8d85\u8d8a DeepSeek-V2-Base \u548c Qwen2.5-72B-Base\uff0c\u5e76\u5728\u7edd\u5927\u591a\u6570\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u9886\u5148 LLaMA-3.1-405B-Base\uff0c<strong>\u6210\u4e3a\u5f53\u524d\u6027\u80fd\u6700\u5f3a\u7684\u5f00\u6e90\u57fa\u5ea7\u6a21\u578b<\/strong>\u3002<\/p>\n\n\n\n<p>\u5177\u4f53\u6027\u80fd\u5bf9\u6bd4\u5982\u4e0b\uff1a<\/p>\n\n\n\n<ol><li>\u76f8\u6bd4 DeepSeek-V2-Base\uff1a\u901a\u8fc7\u6a21\u578b\u67b6\u6784\u4f18\u5316\u3001\u89c4\u6a21\u6269\u5c55\u548c\u6570\u636e\u8d28\u91cf\u63d0\u5347\uff0cDeepSeek-V3-Base \u5b9e\u73b0\u4e86\u663e\u8457\u6027\u80fd\u63d0\u5347\u3002<\/li><li>\u76f8\u6bd4 Qwen2.5-72B-Base\uff1a\u5c3d\u7ba1\u4ec5\u4f7f\u7528\u4e00\u534a\u7684\u6fc0\u6d3b\u53c2\u6570\uff0cDeepSeek-V3-Base \u5728\u82f1\u8bed\u3001\u591a\u8bed\u8a00\u3001\u4ee3\u7801\u548c\u6570\u5b66\u9886\u57df\u5747\u5c55\u73b0\u51fa\u660e\u663e\u4f18\u52bf\u3002\u5728\u4e2d\u6587\u8bc4\u6d4b\u4e2d\uff0c\u9664 CMMLU \u5916\u7684\u5176\u4ed6\u6d4b\u8bd5\u4e5f\u4f18\u4e8e Qwen-2.5-72B\u3002<\/li><li>\u76f8\u6bd4 LLaMA-3.1-405B-Base\uff1a\u5373\u4fbf\u5bf9\u65b9\u62e5\u6709 11 \u500d\u7684\u6fc0\u6d3b\u53c2\u6570\u91cf\uff0cDeepSeek-V3-Base \u5728\u591a\u8bed\u8a00\u3001\u4ee3\u7801\u548c\u6570\u5b66\u9886\u57df\u4ecd\u8868\u73b0\u66f4\u4f18\u3002\u5728\u82f1\u8bed\u548c\u4e2d\u6587\u8bed\u8a00\u80fd\u529b\u8bc4\u6d4b\u4e2d\u8868\u73b0\u76f8\u5f53\u6216\u66f4\u4f73\uff0c\u7279\u522b\u662f\u5728 BBH\u3001MMLU \u7cfb\u5217\u3001DROP\u3001C-Eval\u3001CMMLU \u548c CCPM \u7b49\u6d4b\u8bd5\u4e2d\u8868\u73b0\u7a81\u51fa\u3002<\/li><\/ol>\n\n\n\n<p>\u5f97\u76ca\u4e8e\u9ad8\u6548\u7684\u67b6\u6784\u8bbe\u8ba1\u548c\u5168\u9762\u7684\u5de5\u7a0b\u4f18\u5316\uff0cDeepSeek-V3 \u5b9e\u73b0\u4e86\u6781\u9ad8\u7684\u8bad\u7ec3\u6548\u7387\u3002\u5728\u73b0\u6709\u8bad\u7ec3\u6846\u67b6\u548c\u57fa\u7840\u8bbe\u65bd\u4e0b\uff0c\u6bcf\u5904\u74061T token \u4ec5\u9700 180K H800 GPU \u5c0f\u65f6\uff0c\u8fdc\u4f4e\u4e8e 72B \u6216 405B \u5bc6\u96c6\u6a21\u578b\u7684\u8bad\u7ec3\u6210\u672c\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_15\">\u8ba8\u8bba<\/h3>\n\n\n\n<p><strong>MTP\u7b56\u7565\u7684\u6548\u679c\u5206\u6790<\/strong><\/p>\n\n\n\n<p>\u88684\u663e\u793a\u4e86 MTP \u7b56\u7565\u7684\u8be6\u7ec6\u5206\u6790\u7ed3\u679c\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"947\" height=\"405\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-27.png\" alt=\"\" class=\"wp-image-23734\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-27.png 947w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-27-300x128.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-27-768x328.png 768w\" sizes=\"(max-width: 947px) 100vw, 947px\" \/><figcaption>\u8868 4\uff1aMTP \u7b56\u7565\u7684\u6d88\u878d\u5b9e\u9a8c\u7ed3\u679c\u8868\u660e\uff0c\u8be5\u7b56\u7565\u5728\u5927\u591a\u6570\u8bc4\u4f30\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u5747\u80fd\u663e\u8457\u63d0\u5347\u6a21\u578b\u6027\u80fd\u3002<\/figcaption><\/figure>\n\n\n\n<p>\u7814\u7a76\u5206\u522b\u5728\u4e24\u79cd\u89c4\u6a21\u7684\u57fa\u51c6\u6a21\u578b\u4e0a\u9a8c\u8bc1\u4e86\u8be5\u7b56\u7565\u7684\u6548\u679c\u3002\u5c0f\u89c4\u6a21\u5b9e\u9a8c\u91c7\u7528\u4e86\u603b\u53c2\u6570\u91cf\u4e3a 15.7B \u7684\u57fa\u7ebf MoE \u6a21\u578b\uff0c\u4f7f\u7528 1.33T token \u8fdb\u884c\u8bad\u7ec3\uff1b\u5927\u89c4\u6a21\u5b9e\u9a8c\u5219\u91c7\u7528\u603b\u53c2\u6570\u91cf\u4e3a 228.7B \u7684\u57fa\u7ebf MoE \u6a21\u578b\uff0c\u4f7f\u7528 540B token \u8bad\u7ec3\u3002\u5728\u4fdd\u6301\u8bad\u7ec3\u6570\u636e\u548c\u5176\u4ed6\u67b6\u6784\u4e0d\u53d8\u7684\u60c5\u51b5\u4e0b\uff0c\u4e3a\u57fa\u51c6\u6a21\u578b\u589e\u52a0\u6df1\u5ea6\u4e3a 1 \u7684 MTP \u6a21\u5757\u8fdb\u884c\u5bf9\u6bd4\u5b9e\u9a8c\u3002\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0c\u7531\u4e8e\u5728\u63a8\u7406\u9636\u6bb5\u4f1a\u79fb\u9664 MTP \u6a21\u5757\uff0c\u56e0\u6b64\u6bd4\u8f83\u6a21\u578b\u7684\u63a8\u7406\u5f00\u9500\u5b8c\u5168\u76f8\u540c\u3002<\/p>\n\n\n\n<p>\u7ed3\u679c\u8868\u660e\uff0cMTP \u7b56\u7565\u5728\u7edd\u5927\u591a\u6570\u8bc4\u4f30\u6307\u6807\u4e0a\u90fd\u5e26\u6765\u4e86\u6301\u7eed\u7684\u6027\u80fd\u63d0\u5347\u3002<\/p>\n\n\n\n<p><strong>\u65e0\u8f85\u52a9\u635f\u5931\u5e73\u8861\u7b56\u7565\u7684\u6548\u679c\u5206\u6790<\/strong><\/p>\n\n\n\n<p>\u88685\u5c55\u793a\u4e86\u65e0\u8f85\u52a9\u635f\u5931\u5e73\u8861\u7b56\u7565\u7684\u5206\u6790\u7ed3\u679c\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"940\" height=\"413\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-29.png\" alt=\"\" class=\"wp-image-23736\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-29.png 940w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-29-300x132.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-29-768x337.png 768w\" sizes=\"(max-width: 940px) 100vw, 940px\" \/><figcaption>\u88685\uff1a\u65e0\u8f85\u52a9\u635f\u5931\u8d1f\u8f7d\u5747\u8861\u7b56\u7565\u7684\u6d88\u878d\u5b9e\u9a8c\u7ed3\u679c\u663e\u793a\uff0c\u4e0e\u5b8c\u5168\u57fa\u4e8e\u8f85\u52a9\u635f\u5931\u7684\u65b9\u6cd5\u76f8\u6bd4\uff0c\u65e0\u8f85\u52a9\u635f\u5931\u7b56\u7565\u5728\u5927\u591a\u6570\u8bc4\u4f30\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u8868\u73b0\u51fa\u66f4\u4f18\u7684\u6a21\u578b\u6027\u80fd\u3002<\/figcaption><\/figure>\n\n\n\n<p>\u7814\u7a76\u540c\u6837\u5728\u4e24\u79cd\u89c4\u6a21\u7684\u57fa\u7ebf\u6a21\u578b\u4e0a\u8fdb\u884c\u4e86\u9a8c\u8bc1\u3002\u5c0f\u89c4\u6a21\u6a21\u578b\u603b\u53c2\u6570\u91cf\u4e3a 15.7B\uff0c\u4f7f\u7528 1.33T token \u8bad\u7ec3\uff1b\u5927\u89c4\u6a21\u6a21\u578b\u603b\u53c2\u6570\u91cf\u4e3a 228.7B\uff0c\u4f7f\u7528 578B token \u8bad\u7ec3\u3002\u8fd9\u4e24\u4e2a\u57fa\u51c6\u6a21\u578b\u90fd\u91c7\u7528\u7eaf\u8f85\u52a9\u635f\u5931\u6765\u5b9e\u73b0\u8d1f\u8f7d\u5e73\u8861\uff0c\u4f7f\u7528\u5e26\u6709 top-K \u76f8\u5173\u5ea6\u5f52\u4e00\u5316\u7684 sigmoid \u95e8\u63a7\u51fd\u6570\uff0c\u5176\u8f85\u52a9\u635f\u5931\u5f3a\u5ea6\u7684\u8d85\u53c2\u6570\u5206\u522b\u4e0e DeepSeek-V2-Lite \u548c DeepSeek-V2 \u4fdd\u6301\u4e00\u81f4\u3002<\/p>\n\n\n\n<p>\u7814\u7a76\u5728\u4fdd\u6301\u5176\u4ed6\u6761\u4ef6\u4e0d\u53d8\u7684\u60c5\u51b5\u4e0b\uff0c\u79fb\u9664\u6240\u6709\u8f85\u52a9\u635f\u5931\u5e76\u5f15\u5165\u65e0\u8f85\u52a9\u635f\u5931\u5e73\u8861\u7b56\u7565\u8fdb\u884c\u5bf9\u6bd4\u3002\u7ed3\u679c\u663e\u793a\uff0c\u65e0\u8f85\u52a9\u635f\u5931\u7b56\u7565\u5728\u5927\u591a\u6570\u8bc4\u4f30\u6307\u6807\u4e0a\u90fd\u5b9e\u73b0\u4e86\u66f4\u597d\u7684\u6027\u80fd\u8868\u73b0\u3002<\/p>\n\n\n\n<p><strong>\u6279\u6b21\u7ea7\u4e0e\u5e8f\u5217\u7ea7\u8d1f\u8f7d\u5e73\u8861\u5bf9\u6bd4<\/strong><\/p>\n\n\n\n<p>\u65e0\u8f85\u52a9\u635f\u5931\u5e73\u8861\u4e0e\u5e8f\u5217\u7ea7\u8f85\u52a9\u635f\u5931\u7684\u6838\u5fc3\u533a\u522b\u5728\u4e8e\u5e73\u8861\u8303\u56f4\uff1a\u524d\u8005\u662f\u6279\u6b21\u7ea7\uff0c\u540e\u8005\u662f\u5e8f\u5217\u7ea7\u3002<\/p>\n\n\n\n<p><strong>\u6279\u6b21\u7ea7\u5e73\u8861<\/strong>\u63d0\u4f9b\u4e86\u66f4\u7075\u6d3b\u7684\u7ea6\u675f\u6761\u4ef6\uff0c\u4e0d\u8981\u6c42\u6bcf\u4e2a\u5e8f\u5217\u5185\u90e8\u5b9e\u73b0\u9886\u57df\u5e73\u8861\uff0c\u8fd9\u79cd\u7075\u6d3b\u6027\u4f7f\u4e13\u5bb6\u80fd\u591f\u66f4\u597d\u5730\u9002\u5e94\u4e0d\u540c\u9886\u57df\u7684\u7279\u70b9\u3002\u4e3a\u9a8c\u8bc1\u8fd9\u4e00\u89c2\u70b9\uff0c\u7814\u7a76\u5206\u522b\u8bb0\u5f55\u548c\u5206\u6790\u4e86\u4e00\u4e2a 16B \u7684\u57fa\u4e8e\u8f85\u52a9\u635f\u5931\u6a21\u578b\u548c\u4e00\u4e2a 16B \u7684\u65e0\u8f85\u52a9\u635f\u5931\u6a21\u578b\u5728 Pile \u6d4b\u8bd5\u96c6\u5404\u9886\u57df\u7684\u4e13\u5bb6\u8d1f\u8f7d\u60c5\u51b5\u3002\u5982\u56fe9\u6240\u793a\uff0c\u65e0\u8f85\u52a9\u635f\u5931\u6a21\u578b\u786e\u5b9e\u5c55\u73b0\u51fa\u66f4\u660e\u663e\u7684\u4e13\u5bb6\u4e13\u4e1a\u5316\u7279\u5f81\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" width=\"1024\" height=\"470\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-28-1024x470.png\" alt=\"\" class=\"wp-image-23735\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-28-1024x470.png 1024w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-28-300x138.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-28-768x353.png 768w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-28.png 1082w\" sizes=\"(max-width: 1024px) 100vw, 1024px\" \/><figcaption>\u56fe9\uff1aPile \u6d4b\u8bd5\u96c6\u4e09\u4e2a\u9886\u57df\u4e2d\uff0c\u65e0\u8f85\u52a9\u635f\u5931\u6a21\u578b\u4e0e\u57fa\u4e8e\u8f85\u52a9\u635f\u5931\u6a21\u578b\u7684\u4e13\u5bb6\u8d1f\u8f7d\u5206\u5e03\u5bf9\u6bd4\u3002\u7ed3\u679c\u663e\u793a\uff0c\u65e0\u8f85\u52a9\u635f\u5931\u6a21\u578b\u5c55\u73b0\u51fa\u66f4\u5f3a\u7684\u4e13\u5bb6\u7279\u5316\u80fd\u529b\u3002\u76f8\u5bf9\u4e13\u5bb6\u8d1f\u8f7d\u5b9a\u4e49\u4e3a\u5b9e\u9645\u4e13\u5bb6\u8d1f\u8f7d\u4e0e\u7406\u8bba\u5e73\u8861\u8d1f\u8f7d\u7684\u6bd4\u503c\u3002\u7531\u4e8e\u7bc7\u5e45\u9650\u5236\uff0c\u4ec5\u5c55\u793a\u4e24\u4e2a\u5c42\u7684\u7ed3\u679c\uff0c\u5b8c\u6574\u6570\u636e\u53ef\u53c2\u89c1\u539f\u6587\u9644\u5f55C\u3002<\/figcaption><\/figure>\n\n\n\n<p>\u4e3a\u6df1\u5165\u63a2\u7a76\u8fd9\u79cd\u7075\u6d3b\u6027\u4e0e\u6027\u80fd\u63d0\u5347\u4e4b\u95f4\u7684\u5173\u8054\uff0c\u7814\u7a76\u8fd8\u8bbe\u8ba1\u5e76\u9a8c\u8bc1\u4e86\u4e00\u79cd\u6279\u6b21\u7ea7\u8f85\u52a9\u635f\u5931\u65b9\u6cd5\uff0c\u8be5\u65b9\u6cd5\u5728\u8bad\u7ec3\u6279\u6b21\u800c\u975e\u5e8f\u5217\u5c42\u9762\u5b9e\u73b0\u8d1f\u8f7d\u5e73\u8861\u3002\u5b9e\u9a8c\u8868\u660e\uff0c\u5728\u8fbe\u5230\u76f8\u4f3c\u7684\u6279\u6b21\u7ea7\u8d1f\u8f7d\u5e73\u8861\u7a0b\u5ea6\u65f6\uff0c\u6279\u6b21\u7ea7\u8f85\u52a9\u635f\u5931\u80fd\u591f\u5b9e\u73b0\u4e0e\u65e0\u8f85\u52a9\u635f\u5931\u65b9\u6cd5\u76f8\u5f53\u7684\u6027\u80fd\u3002<\/p>\n\n\n\n<p>\u5177\u4f53\u800c\u8a00\uff0c\u5728 1B MoE \u6a21\u578b\u4e0a\u7684\u9a8c\u8bc1\u635f\u5931\u5206\u522b\u4e3a\uff1a\u5e8f\u5217\u7ea7\u8f85\u52a9\u635f\u5931 2.258\uff0c\u65e0\u8f85\u52a9\u635f\u5931\u65b9\u6cd5 2.253\uff0c\u6279\u6b21\u7ea7\u8f85\u52a9\u635f\u5931 2.253\u30023B MoE \u6a21\u578b\u7684\u5b9e\u9a8c\u4e5f\u663e\u793a\u7c7b\u4f3c\u7ed3\u679c\uff1a\u5e8f\u5217\u7ea7\u8f85\u52a9\u635f\u5931\u6a21\u578b\u7684\u9a8c\u8bc1\u635f\u5931\u4e3a 2.085\uff0c\u800c\u65e0\u8f85\u52a9\u635f\u5931\u65b9\u6cd5\u548c\u6279\u6b21\u7ea7\u8f85\u52a9\u635f\u5931\u65b9\u6cd5\u5747\u8fbe\u5230 2.080\u3002<\/p>\n\n\n\n<p>\u5c3d\u7ba1\u6279\u6b21\u7ea7\u8d1f\u8f7d\u5e73\u8861\u65b9\u6cd5\u5c55\u73b0\u51fa\u7a33\u5b9a\u7684\u6027\u80fd\u4f18\u52bf\uff0c\u4f46\u5728\u5b9e\u9645\u5e94\u7528\u4e2d\u4ecd\u9762\u4e34\u4e24\u4e2a\u6548\u7387\u6311\u6218\uff1a<\/p>\n\n\n\n<ol><li>\u4e2a\u522b\u5e8f\u5217\u6216\u5c0f\u6279\u91cf\u6570\u636e\u53ef\u80fd\u51fa\u73b0\u8d1f\u8f7d\u4e0d\u5747\u8861\uff1b<\/li><li>\u63a8\u7406\u9636\u6bb5\u53ef\u80fd\u56e0\u9886\u57df\u8fc1\u79fb\u5bfc\u81f4\u8d1f\u8f7d\u5931\u8861\u3002<\/li><\/ol>\n\n\n\n<p>\u5bf9\u4e8e\u7b2c\u4e00\u4e2a\u6311\u6218\uff0c\u901a\u8fc7\u91c7\u7528<strong>\u5927\u89c4\u6a21\u4e13\u5bb6\u5e76\u884c\u548c\u6570\u636e\u5e76\u884c\u7684\u8bad\u7ec3\u6846\u67b6<\/strong>\u5f97\u5230\u4e86\u6709\u6548\u89e3\u51b3\uff0c\u8fd9\u786e\u4fdd\u4e86\u6bcf\u4e2a\u5fae\u6279\u91cf\u90fd\u5177\u6709\u8db3\u591f\u89c4\u6a21\u3002\u5bf9\u4e8e\u7b2c\u4e8c\u4e2a\u6311\u6218\uff0c\u7814\u7a76\u8bbe\u8ba1\u4e86<strong>\u5e26\u6709\u5197\u4f59\u4e13\u5bb6\u90e8\u7f72\u7684\u9ad8\u6548\u63a8\u7406\u6846\u67b6<\/strong>\u6765\u5e94\u5bf9\u3002<\/p>\n\n\n\n<h2 id=\"h_14890557782_16\">\u540e\u8bad\u7ec3<\/h2>\n\n\n\n<h3 id=\"h_14890557782_17\">\u76d1\u7763\u5fae\u8c03\uff08SFT\uff09<\/h3>\n\n\n\n<p>\u7814\u7a76\u6784\u5efa\u4e86\u5305\u542b 150 \u4e07\u4e2a\u591a\u9886\u57df\u5b9e\u4f8b\u7684\u6307\u4ee4\u8c03\u4f18\u6570\u636e\u96c6\uff0c\u9488\u5bf9\u4e0d\u540c\u9886\u57df\u7279\u70b9\u91c7\u7528\u4e86\u76f8\u5e94\u7684\u6570\u636e\u521b\u5efa\u65b9\u6cd5\u3002<\/p>\n\n\n\n<p><strong>\u63a8\u7406\u6570\u636e\u5904\u7406\uff1a<\/strong>&nbsp;\u5728\u6570\u5b66\u3001\u4ee3\u7801\u7ade\u8d5b\u548c\u903b\u8f91\u8c1c\u9898\u7b49\u63a8\u7406\u7c7b\u4efb\u52a1\u4e2d\uff0c\u7cfb\u7edf\u91c7\u7528\u5185\u90e8 DeepSeek-R1 \u6a21\u578b\u751f\u6210\u6570\u636e\u3002\u867d\u7136 R1 \u751f\u6210\u7684\u6570\u636e\u5177\u6709\u8f83\u9ad8\u7684\u51c6\u786e\u6027\uff0c\u4f46\u540c\u65f6\u5b58\u5728\u63a8\u7406\u5197\u957f\u3001\u683c\u5f0f\u4e0d\u89c4\u8303\u548c\u8f93\u51fa\u8fc7\u957f\u7b49\u95ee\u9898\u3002\u56e0\u6b64\uff0c\u7814\u7a76\u7684\u6838\u5fc3\u76ee\u6807\u662f\u5728\u4fdd\u6301 R1 \u6a21\u578b\u9ad8\u51c6\u786e\u6027\u7684\u540c\u65f6\uff0c\u5b9e\u73b0\u8f93\u51fa\u7684\u6e05\u6670\u7b80\u6d01\u3002<\/p>\n\n\n\n<p>\u5177\u4f53\u5b9e\u65bd\u65b9\u6cd5\u5982\u4e0b\uff1a\u9996\u5148\u9488\u5bf9\u7279\u5b9a\u9886\u57df\uff08\u5982\u4ee3\u7801\u3001\u6570\u5b66\u6216\u901a\u7528\u63a8\u7406\uff09\u5f00\u53d1\u4e13\u5bb6\u6a21\u578b\uff0c\u91c7\u7528 SFT \u548c RL \u76f8\u7ed3\u5408\u7684\u8bad\u7ec3\u6d41\u7a0b\u3002\u8be5\u4e13\u5bb6\u6a21\u578b\u968f\u540e\u4f5c\u4e3a\u6700\u7ec8\u6a21\u578b\u7684\u6570\u636e\u751f\u6210\u5668\u3002<\/p>\n\n\n\n<p>\u5bf9\u6bcf\u4e2a\u8bad\u7ec3\u5b9e\u4f8b\uff0c\u7cfb\u7edf\u751f\u6210\u4e24\u7c7b SFT \u6837\u672c\uff1a\u4e00\u7c7b\u662f\u95ee\u9898\u4e0e\u539f\u59cb\u7b54\u6848\u7684\u76f4\u63a5\u914d\u5bf9\uff0c\u53e6\u4e00\u7c7b\u5219\u5f15\u5165\u7cfb\u7edf\u63d0\u793a\u8bcd\uff0c\u5c06\u5176\u4e0e\u95ee\u9898\u548c R1 \u7b54\u6848\u7ec4\u5408\u3002\u7cfb\u7edf\u63d0\u793a\u7ecf\u8fc7\u4f18\u5316\u8bbe\u8ba1\uff0c\u5305\u542b\u4e86\u5f15\u5bfc\u6a21\u578b\u751f\u6210\u5177\u6709\u81ea\u6211\u53cd\u601d\u548c\u9a8c\u8bc1\u673a\u5236\u54cd\u5e94\u7684\u6307\u4ee4\u3002<\/p>\n\n\n\n<p>\u5728RL\u9636\u6bb5\uff0c\u6a21\u578b\u901a\u8fc7\u9ad8\u6e29\u91c7\u6837\u751f\u6210\u54cd\u5e94\uff0c\u5373\u4f7f\u5728\u6ca1\u6709\u660e\u786e\u7cfb\u7edf\u63d0\u793a\u7684\u60c5\u51b5\u4e0b\uff0c\u4e5f\u80fd\u6709\u6548\u878d\u5408 R1 \u751f\u6210\u6570\u636e\u548c\u539f\u59cb\u6570\u636e\u7684\u7279\u5f81\u3002\u7ecf\u8fc7\u6570\u767e\u8f6eRL\u8fed\u4ee3\uff0c\u4e2d\u95f4\u6a21\u578b\u6210\u529f\u6574\u5408\u4e86 R1 \u7684\u54cd\u5e94\u6a21\u5f0f\uff0c\u663e\u8457\u63d0\u5347\u4e86\u6574\u4f53\u6027\u80fd\u3002\u968f\u540e\uff0c\u7814\u7a76\u91c7\u7528\u62d2\u7edd\u91c7\u6837\u65b9\u6cd5\uff0c\u5229\u7528\u4e13\u5bb6\u6a21\u578b\u4f5c\u4e3a\u6570\u636e\u6e90\uff0c\u4e3a\u6700\u7ec8\u6a21\u578b\u7b5b\u9009\u9ad8\u8d28\u91cf\u7684 SFT \u6570\u636e\u3002\u8fd9\u79cd\u65b9\u6cd5\u65e2\u4fdd\u6301\u4e86 DeepSeek-R1 \u7684\u9ad8\u51c6\u786e\u6027\uff0c\u53c8\u786e\u4fdd\u4e86\u8f93\u51fa\u7684\u7b80\u6d01\u6027\u548c\u6709\u6548\u6027\u3002<\/p>\n\n\n\n<p><strong>\u975e\u63a8\u7406\u6570\u636e\u5904\u7406\uff1a&nbsp;<\/strong>\u5bf9\u4e8e\u521b\u610f\u5199\u4f5c\u3001\u89d2\u8272\u626e\u6f14\u548c\u57fa\u7840\u95ee\u7b54\u7b49\u975e\u63a8\u7406\u4efb\u52a1\uff0c\u7cfb\u7edf\u5229\u7528 DeepSeek-V2.5 \u751f\u6210\u54cd\u5e94\uff0c\u5e76\u901a\u8fc7\u4eba\u5de5\u6807\u6ce8\u786e\u4fdd\u6570\u636e\u8d28\u91cf\u3002<\/p>\n\n\n\n<p><strong>SFT \u8bad\u7ec3\u914d\u7f6e\uff1a<\/strong>&nbsp;\u7814\u7a76\u5bf9 DeepSeek-V3-Base \u8fdb\u884c\u4e86\u4e24\u8f6e SFT \u6570\u636e\u96c6\u8bad\u7ec3\uff0c\u91c7\u7528\u4f59\u5f26\u8870\u51cf\u7684\u5b66\u4e60\u7387\u8c03\u5ea6\u7b56\u7565\uff0c\u521d\u59cb\u5b66\u4e60\u7387\u4e3a&nbsp;5\u00d710\u22126\uff0c\u9010\u6b65\u964d\u4f4e\u81f3&nbsp;1\u00d710\u22126\u3002\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u91c7\u7528\u591a\u6837\u672c\u5e8f\u5217\u6253\u5305\u6280\u672f\uff0c\u540c\u65f6\u901a\u8fc7\u6837\u672c\u63a9\u7801\u673a\u5236\u786e\u4fdd\u5404\u6837\u672c\u95f4\u7684\u72ec\u7acb\u6027\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_18\">\u5f3a\u5316\u5b66\u4e60\uff08RL\uff09<\/h3>\n\n\n\n<p><strong>\u5956\u52b1\u6a21\u578b\u8bbe\u8ba1<\/strong><\/p>\n\n\n\n<p>\u5728\u5f3a\u5316\u5b66\u4e60\u8fc7\u7a0b\u4e2d\uff0c\u7cfb\u7edf\u540c\u65f6\u91c7\u7528<strong>\u89c4\u5219\u578b<\/strong>\u548c<strong>\u6a21\u578b\u578b<\/strong>\u4e24\u79cd<strong>\u5956\u52b1\u6a21\u578b(Reward Model, RM)<\/strong>\u3002<\/p>\n\n\n\n<p><strong>\u89c4\u5219\u578b\u5956\u52b1\u6a21\u578b\uff1a<\/strong>\u5bf9\u4e8e\u53ef\u901a\u8fc7\u660e\u786e\u89c4\u5219\u9a8c\u8bc1\u7684\u4efb\u52a1\uff0c\u7cfb\u7edf\u91c7\u7528\u89c4\u5219\u578b\u5956\u52b1\u673a\u5236\u8fdb\u884c\u53cd\u9988\u8bc4\u4f30\u3002\u4f8b\u5982\uff0c\u5728\u5904\u7406\u5177\u6709\u786e\u5b9a\u6027\u7b54\u6848\u7684\u6570\u5b66\u95ee\u9898\u65f6\uff0c\u8981\u6c42\u6a21\u578b\u5728\u7279\u5b9a\u683c\u5f0f\uff08\u5982\u65b9\u6846\u5185\uff09\u7ed9\u51fa\u6700\u7ec8\u7b54\u6848\uff0c\u4ece\u800c\u53ef\u4ee5\u901a\u8fc7\u89c4\u5219\u8fdb\u884c\u81ea\u52a8\u9a8c\u8bc1\u3002\u540c\u6837\uff0c\u5728\u5904\u7406&nbsp;LeetCode&nbsp;\u7f16\u7a0b\u9898\u65f6\uff0c\u7cfb\u7edf\u53ef\u901a\u8fc7\u7f16\u8bd1\u5668\u6267\u884c\u6d4b\u8bd5\u7528\u4f8b\u751f\u6210\u5ba2\u89c2\u53cd\u9988\u3002\u8fd9\u79cd\u57fa\u4e8e\u89c4\u5219\u7684\u9a8c\u8bc1\u65b9\u6cd5\u5177\u6709\u8f83\u9ad8\u7684\u53ef\u9760\u6027\uff0c\u80fd\u6709\u6548\u9632\u6b62\u6a21\u578b\u7684\u6295\u673a\u884c\u4e3a\u3002<\/p>\n\n\n\n<p><strong>\u6a21\u578b\u578b\u5956\u52b1\u6a21\u578b\uff1a<\/strong>\u5bf9\u4e8e\u5177\u6709\u6807\u51c6\u7b54\u6848\u4f46\u5f62\u5f0f\u7075\u6d3b\u7684\u95ee\u9898\uff0c\u7cfb\u7edf\u4f7f\u7528\u5956\u52b1\u6a21\u578b\u8bc4\u4f30\u8f93\u51fa\u4e0e\u6807\u51c6\u7b54\u6848\u7684\u5339\u914d\u7a0b\u5ea6\u3002\u800c\u5bf9\u4e8e\u521b\u610f\u5199\u4f5c\u7b49\u7f3a\u4e4f\u6807\u51c6\u7b54\u6848\u7684\u4efb\u52a1\uff0c\u5956\u52b1\u6a21\u578b\u5219\u57fa\u4e8e\u95ee\u9898\u548c\u56de\u7b54\u7684\u6574\u4f53\u6027\u7ed9\u51fa\u8bc4\u4f30\u53cd\u9988\u3002\u8be5\u5956\u52b1\u6a21\u578b\u57fa\u4e8e DeepSeek-V3 \u7684 SFT checkpoint \u8fdb\u884c\u8bad\u7ec3\u3002\u4e3a\u589e\u5f3a\u6a21\u578b\u53ef\u9760\u6027\uff0c\u7cfb\u7edf\u6784\u5efa\u7684\u504f\u597d\u6570\u636e\u4e0d\u4ec5\u5305\u542b\u6700\u7ec8\u8bc4\u5206\uff0c\u8fd8\u5305\u542b\u63a8\u5bfc\u8bc4\u5206\u7684\u5b8c\u6574\u63a8\u7406\u8fc7\u7a0b\uff0c\u8fd9\u79cd\u8bbe\u8ba1\u6709\u6548\u964d\u4f4e\u4e86\u7279\u5b9a\u4efb\u52a1\u4e2d\u7684\u5956\u52b1\u626d\u66f2\u98ce\u9669\u3002<\/p>\n\n\n\n<p><strong>\u7fa4\u7ec4\u76f8\u5bf9\u7b56\u7565\u4f18\u5316(Group Relative Policy Optimization, GRPO)<\/strong><\/p>\n\n\n\n<p>\u7cfb\u7edf\u91c7\u7528\u4e0e DeepSeek-V2 \u76f8\u4f3c\u7684GRPO\u65b9\u6cd5\u3002\u8fd9\u79cd\u65b9\u6cd5\u4e0d\u9700\u8981\u4e0e\u7b56\u7565\u6a21\u578b\u89c4\u6a21\u76f8\u5f53\u7684\u8bc4\u8bba\u5bb6\u6a21\u578b\uff0c\u800c\u662f\u901a\u8fc7\u7fa4\u7ec4\u8bc4\u5206\u4f30\u8ba1\u57fa\u7ebf\u3002\u5177\u4f53\u5b9e\u73b0\u4e2d\uff0c\u5bf9\u6bcf\u4e2a\u95ee\u9898&nbsp;q&nbsp;\uff0cGRPO \u4ece\u539f\u7b56\u7565\u6a21\u578b&nbsp;\u03c0<sub>\u03b8old<\/sub>&nbsp;\u91c7\u6837\u4e00\u7ec4\u8f93\u51fa{o1,o2,\u00b7\u00b7\u00b7,oG} \uff0c\u5e76\u901a\u8fc7\u6700\u5927\u5316\u4ee5\u4e0b\u76ee\u6807\u51fd\u6570\u4f18\u5316\u7b56\u7565\u6a21\u578b&nbsp;\u03c0\u03b8\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" width=\"1024\" height=\"187\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-30-1024x187.png\" alt=\"\" class=\"wp-image-23737\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-30-1024x187.png 1024w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-30-300x55.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-30-768x140.png 768w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-30.png 1047w\" sizes=\"(max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<p>\u5176\u4e2d\u03b5&nbsp;\u548c&nbsp;\u03b2&nbsp;\u8868\u793a\u8d85\u53c2\u6570\uff1b\u03c0ref&nbsp;\u4ee3\u8868\u53c2\u8003\u6a21\u578b\uff1bAi&nbsp;\u8868\u793a\u4f18\u52bf\u51fd\u6570\uff0c\u5176\u8ba1\u7b97\u57fa\u4e8e\u6bcf\u7ec4\u5185\u8f93\u51fa\u6240\u5bf9\u5e94\u7684\u5956\u52b1\u5e8f\u5217 {r1,r2,&#8230;,rG}\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"712\" height=\"83\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-31.png\" alt=\"\" class=\"wp-image-23738\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-31.png 712w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-31-300x35.png 300w\" sizes=\"(max-width: 712px) 100vw, 712px\" \/><\/figure>\n\n\n\n<p>\u5728RL\u8fc7\u7a0b\u4e2d\uff0c\u7cfb\u7edf\u878d\u5408\u4e86\u7f16\u7a0b\u3001\u6570\u5b66\u3001\u5199\u4f5c\u3001\u89d2\u8272\u626e\u6f14\u548c\u95ee\u7b54\u7b49\u591a\u9886\u57df\u7684\u63d0\u793a\u8bcd\u4efb\u52a1\u3002\u8fd9\u79cd\u591a\u6837\u5316\u7684\u8bad\u7ec3\u7b56\u7565\u4e0d\u4ec5\u63d0\u9ad8\u4e86\u6a21\u578b\u5bf9\u4eba\u7c7b\u504f\u597d\u7684\u9002\u5e94\u6027\uff0c\u8fd8\u5728\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u53d6\u5f97\u4e86\u663e\u8457\u63d0\u5347\uff0c\u7279\u522b\u662f\u5728\u76d1\u7763\u5fae\u8c03\u6570\u636e\u6709\u9650\u7684\u573a\u666f\u4e0b\u8868\u73b0\u51fa\u8272\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_19\">\u8bc4\u4f30<\/h3>\n\n\n\n<p><strong>\u8bc4\u4f30\u65b9\u6cd5\u8bbe\u7f6e<\/strong><\/p>\n\n\n\n<p><strong>\u8bc4\u4f30\u57fa\u51c6<\/strong>\uff1a\u9664\u57fa\u7840\u6a21\u578b\u6d4b\u8bd5\u6240\u7528\u7684\u57fa\u51c6\u5916\uff0c\u7cfb\u7edf\u8fd8\u5728\u4e0b\u5217\u57fa\u51c6\u4e0a\u8bc4\u4f30\u4e86\u6307\u4ee4\u8c03\u4f18\u6a21\u578b\u7684\u8868\u73b0\uff1a<a href=\"https:\/\/zhida.zhihu.com\/search?content_id=252001026&amp;content_type=Article&amp;match_order=1&amp;q=IFEval&amp;zhida_source=entity\" target=\"_blank\" rel=\"noreferrer noopener\">IFEval<\/a>\u3001FRAMES \u3001LongBench v2\u3001GPQA\u3001SimpleQA\u3001C SimpleQA\u3001SWE-Bench Verified\u3001Aider\u3001LiveCodeBench\uff08\u9009\u53d6 2024 \u5e74 8-11 \u6708\u9898\u76ee\uff09\u3001<a href=\"https:\/\/zhida.zhihu.com\/search?content_id=252001026&amp;content_type=Article&amp;match_order=1&amp;q=Codeforces&amp;zhida_source=entity\" target=\"_blank\" rel=\"noreferrer noopener\">Codeforces<\/a>\u30012024 \u5e74\u4e2d\u56fd\u9ad8\u4e2d\u6570\u5b66\u5965\u6797\u5339\u514b\uff08CNMO\uff09\u548c 2024 \u5e74\u7f8e\u56fd\u6570\u5b66\u9080\u8bf7\u8d5b\uff08<a href=\"https:\/\/zhida.zhihu.com\/search?content_id=252001026&amp;content_type=Article&amp;match_order=1&amp;q=AIME&amp;zhida_source=entity\" target=\"_blank\" rel=\"noreferrer noopener\">AIME<\/a>\uff09\u3002<\/p>\n\n\n\n<p><strong>\u57fa\u51c6\u6a21\u578b\u5bf9\u6bd4\uff1a<\/strong>\u7cfb\u7edf\u9009\u53d6\u4e86\u591a\u4e2a\u4ee3\u8868\u6027\u6a21\u578b\u4f5c\u4e3a\u6027\u80fd\u5bf9\u7167\u57fa\u51c6\uff0c\u5305\u62ec DeepSeek-V2-0506\u3001DeepSeek-V2.5-0905\u3001Qwen2.5 72B Instruct\u3001LLaMA-3.1 405B Instruct\u3001Claude-Sonnet-3.5-1022 \u548c GPT-4o-0513\u3002\u5176\u4e2d DeepSeek-V2 \u7cfb\u5217\u9009\u53d6\u6700\u5177\u4ee3\u8868\u6027\u7684\u7248\u672c\uff0c\u95ed\u6e90\u6a21\u578b\u5219\u901a\u8fc7\u5176\u5b98\u65b9 API \u8fdb\u884c\u8bc4\u4f30\u3002<\/p>\n\n\n\n<p><strong>\u8bc4\u4f30\u5177\u4f53\u914d\u7f6e\uff1a<\/strong>\u5728\u6807\u51c6\u57fa\u51c6\u8bc4\u4f30\u4e2d\uff0cMMLU\u3001DROP\u3001GPQA \u548c SimpleQA \u91c7\u7528 simple-evals \u6846\u67b6\u7684\u6807\u51c6\u63d0\u793a\u8bcd\u6a21\u677f\u3002MMLU-Redux \u7684\u96f6\u6837\u672c\u6d4b\u8bd5\u91c7\u7528 Zero-Eval \u63d0\u793a\u8bcd\u683c\u5f0f\u3002\u5176\u4ed6\u6570\u636e\u96c6\u5219\u9075\u5faa\u539f\u59cb\u8bc4\u4f30\u65b9\u6848\uff0c\u4f7f\u7528\u6570\u636e\u96c6\u5f00\u53d1\u8005\u63d0\u4f9b\u7684\u9ed8\u8ba4\u63d0\u793a\u8bcd\u6a21\u677f\u3002<\/p>\n\n\n\n<p>\u5728\u4ee3\u7801\u548c\u6570\u5b66\u80fd\u529b\u8bc4\u4f30\u65b9\u9762<\/p>\n\n\n\n<ul><li>HumanEval-Mul \u6570\u636e\u96c6\u8986\u76d6 Python\u3001Java\u3001<a href=\"https:\/\/zhida.zhihu.com\/search?content_id=252001026&amp;content_type=Article&amp;match_order=1&amp;q=Cpp&amp;zhida_source=entity\" target=\"_blank\" rel=\"noreferrer noopener\">Cpp<\/a>\u3001C#\u3001JavaScript\u3001TypeScript\u3001PHP \u548c Bash \u5171 8 \u79cd\u4e3b\u6d41\u7f16\u7a0b\u8bed\u8a00\u3002<\/li><li>LiveCodeBench\uff08\u4f7f\u7528 2024 \u5e74 8-11 \u6708\u6570\u636e\uff09\u7684\u8bc4\u4f30\u540c\u65f6\u91c7\u7528CoT\u548c\u76f4\u63a5\u8f93\u51fa\u4e24\u79cd\u65b9\u5f0f\u3002<\/li><li>Codeforces \u8bc4\u4f30\u91c7\u7528\u53c2\u8d5b\u8005\u767e\u5206\u4f4d\u6570\u4f5c\u4e3a\u8861\u91cf\u6807\u51c6\u3002<\/li><li>SWE-Bench verified \u91c7\u7528\u65e0\u4ee3\u7406\u6846\u67b6\u8fdb\u884c\u8bc4\u4f30\u3002<\/li><li>Aider \u76f8\u5173\u6d4b\u8bd5\u91c7\u7528&#8221;diff&#8221;\u683c\u5f0f\u8bc4\u4f30\u3002<\/li><\/ul>\n\n\n\n<p>\u5728\u6570\u5b66\u80fd\u529b\u6d4b\u8bd5\u4e2d\uff0cAIME \u548c CNMO 2024 \u4f7f\u7528\u91c7\u6837\u6e29\u5ea6 0.7\uff0c\u7ed3\u679c\u53d6 16 \u6b21\u8fd0\u884c\u7684\u5e73\u5747\u503c\uff0c\u800c MATH-500 \u5219\u91c7\u7528\u8d2a\u5a6a\u89e3\u7801\u7b56\u7565\u3002<\/p>\n\n\n\n<p>\u6240\u6709\u8bc4\u4f30\u4e2d\uff0c\u6a21\u578b\u7684\u6700\u5927\u8f93\u51fa\u957f\u5ea6\u9650\u5236\u4e3a 8192 \u4e2a token\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_20\">\u6807\u51c6\u8bc4\u4f30<\/h3>\n\n\n\n<p>\u88686\u7684\u8bc4\u4f30\u7ed3\u679c\u663e\u793a\uff0cDeepSeek-V3 \u5728\u5f00\u6e90\u6a21\u578b\u4e2d\u8868\u73b0\u6700\u4f73\uff0c\u4e14\u4e0e GPT-4o \u548c Claude-3.5-Sonnet \u7b49\u9876\u7ea7\u95ed\u6e90\u6a21\u578b\u76f8\u6bd4\u5177\u6709\u7ade\u4e89\u529b\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"1006\" height=\"717\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-32.png\" alt=\"\" class=\"wp-image-23739\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-32.png 1006w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-32-300x214.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-32-768x547.png 768w\" sizes=\"(max-width: 1006px) 100vw, 1006px\" \/><figcaption>\u8868 6 | DeepSeek-V3 \u4e0e\u5176\u4ed6\u5177\u6709\u4ee3\u8868\u6027\u7684\u804a\u5929\u6a21\u578b\u7684\u6bd4\u8f83\u3002\u6240\u6709\u6a21\u578b\u5747\u5728\u9650\u5236\u8f93\u51fa\u957f\u5ea6\u4e3a 8K \u7684\u914d\u7f6e\u4e0b\u8fdb\u884c\u8bc4\u4f30\u3002\u5305\u542b\u5c11\u4e8e 1000 \u4e2a\u6837\u672c\u7684\u57fa\u51c6\u6d4b\u8bd5\u4f1a\u901a\u8fc7\u591a\u6b21\u4e0d\u540c\u6e29\u5ea6\u8bbe\u7f6e\u7684\u6d4b\u8bd5\u6765\u5f97\u51fa\u7a33\u5065\u7684\u6700\u7ec8\u7ed3\u679c\u3002DeepSeek-V3 \u662f\u8868\u73b0\u6700\u4f73\u7684\u5f00\u6e90\u6a21\u578b\uff0c\u540c\u65f6\u5728\u4e0e\u524d\u6cbf\u95ed\u6e90\u6a21\u578b\u7684\u5bf9\u6bd4\u4e2d\u4e5f\u5c55\u73b0\u51fa\u5f3a\u5927\u7684\u7ade\u4e89\u529b\u3002<\/figcaption><\/figure>\n\n\n\n<p><strong>\u82f1\u8bed\u80fd\u529b\u8bc4\u4f30<\/strong>\uff1a\u5728 MMLU\uff08\u8bc4\u4f30\u5927\u8bed\u8a00\u6a21\u578b\u591a\u9886\u57df\u77e5\u8bc6\u548c\u4efb\u52a1\u80fd\u529b\u7684\u6807\u51c6\u57fa\u51c6\uff09\u4e2d\uff0cDeepSeek-V3 \u4e0e LLaMA 3.1-405B\u3001GPT-4o \u548c Claude-Sonnet 3.5 \u7b49\u9876\u7ea7\u6a21\u578b\u8868\u73b0\u76f8\u5f53\uff0c\u660e\u663e\u8d85\u8d8a Qwen2.5-72B\u3002<\/p>\n\n\n\n<p>\u5728\u66f4\u5177\u6311\u6218\u6027\u7684 MMLU-Pro \u6559\u80b2\u77e5\u8bc6\u8bc4\u6d4b\u4e2d\uff0cDeepSeek-V3 \u7684\u8868\u73b0\u4ec5\u6b21\u4e8e Claude-Sonnet 3.5\u3002<\/p>\n\n\n\n<p>\u5728\u7ecf\u8fc7\u6807\u7b7e\u4fee\u6b63\u7684 MMLU-Redux \u6d4b\u8bd5\u4e2d\uff0cDeepSeek-V3 \u7684\u8868\u73b0\u9886\u5148\u5176\u4ed6\u6a21\u578b\u3002<\/p>\n\n\n\n<p>\u5728\u535a\u58eb\u7ea7\u8bc4\u6d4b GPQA-Diamond \u4e2d\uff0cDeepSeek-V3 \u4ec5\u843d\u540e\u4e8e Claude 3.5 Sonnet\uff0c\u4f46\u5927\u5e45\u9886\u5148\u5176\u4ed6\u7ade\u4e89\u6a21\u578b\u3002<\/p>\n\n\n\n<p>\u5728\u957f\u6587\u672c\u7406\u89e3\u65b9\u9762\uff0cDeepSeek-V3 \u7ee7\u7eed\u4fdd\u6301\u9876\u7ea7\u6c34\u5e73\u3002\u5728 DROP \u7684\u5c11\u6837\u672c\u6d4b\u8bd5\u4e2d\u8fbe\u5230 91.6 \u7684 F1 \u5206\u6570\uff0c\u9886\u5148\u6240\u6709\u5bf9\u6bd4\u6a21\u578b\u3002\u5728\u9700\u8981\u5904\u7406 10 \u4e07 token \u4e0a\u4e0b\u6587\u7684 FRAMES \u95ee\u7b54\u6d4b\u8bd5\u4e2d\uff0c\u4ec5\u6b21\u4e8e GPT-4o \u4f46\u663e\u8457\u4f18\u4e8e\u5176\u4ed6\u6a21\u578b\uff0c\u5145\u5206\u5c55\u793a\u4e86\u5176\u5904\u7406\u8d85\u957f\u6587\u672c\u7684\u80fd\u529b\u3002\u5728\u6700\u65b0\u53d1\u5e03\u7684 LongBench v2 \u6d4b\u8bd5\u4e2d\u7684\u6700\u4f18\u8868\u73b0\uff0c\u8fdb\u4e00\u6b65\u8bc1\u5b9e\u4e86\u8fd9\u4e00\u80fd\u529b\u3002<\/p>\n\n\n\n<p>\u5728 SimpleQA \u4e8b\u5b9e\u6027\u77e5\u8bc6\u6d4b\u8bd5\u4e2d\uff0cDeepSeek-V3 \u867d\u7136\u843d\u540e\u4e8e GPT-4o \u548c Claude-Sonnet\uff0c\u4f46\u8fd9\u4e3b\u8981\u6e90\u4e8e\u5176\u8d44\u6e90\u5206\u914d\u7b56\u7565\u2014\u2014\u66f4\u591a\u8bad\u7ec3\u8d44\u6e90\u7528\u4e8e\u4e2d\u6587\u77e5\u8bc6\u5b66\u4e60\uff0c\u56e0\u6b64\u5728 C-SimpleQA \u4e2d\u8868\u73b0\u4f18\u5f02\u3002\u5728\u6307\u4ee4\u9075\u5faa\u80fd\u529b\u8bc4\u4f30\u4e2d\uff0c\u76f8\u6bd4\u524d\u4ee3 DeepSeek-V2 \u7cfb\u5217\u6709\u663e\u8457\u63d0\u5347\uff0c\u7279\u522b\u662f\u5728\u7406\u89e3\u548c\u6267\u884c\u7279\u5b9a\u683c\u5f0f\u8981\u6c42\u65b9\u9762\u3002<\/p>\n\n\n\n<p><strong>\u4ee3\u7801\u4e0e\u6570\u5b66\u80fd\u529b\u8bc4\u4f30\uff1a<\/strong>\u5728\u7f16\u7a0b\u9886\u57df\uff0cDeepSeek-V3 \u7684\u8bc4\u4f30\u6db5\u76d6<strong>\u5de5\u7a0b\u5b9e\u8df5\uff08SWE-Bench-Verified\uff09<\/strong>\u548c<strong>\u7b97\u6cd5\u7f16\u7a0b\uff08HumanEval\u3001LiveCodeBench\uff09<\/strong>\u4e24\u4e2a\u7ef4\u5ea6\u3002<\/p>\n\n\n\n<p>\u5728\u5de5\u7a0b\u7c7b\u4efb\u52a1\u4e2d\uff0c\u867d\u7136\u672a\u80fd\u8d85\u8d8a Claude-Sonnet-3.5-1022\uff0c\u4f46\u660e\u663e\u4f18\u4e8e\u5176\u4ed6\u5f00\u6e90\u6a21\u578b\u3002\u4f5c\u4e3a\u5f00\u6e90\u6a21\u578b\uff0cDeepSeek-V3 \u7684\u5f3a\u5927\u80fd\u529b\u5c06\u63a8\u52a8\u8f6f\u4ef6\u5de5\u7a0b\u548c\u7b97\u6cd5\u5f00\u53d1\u9886\u57df\u7684\u521b\u65b0\uff0c\u5e2e\u52a9\u5f00\u53d1\u8005\u548c\u7814\u7a76\u4eba\u5458\u62d3\u5c55\u5f00\u6e90\u6a21\u578b\u5728\u7f16\u7a0b\u9886\u57df\u7684\u5e94\u7528\u8fb9\u754c\u3002<\/p>\n\n\n\n<p>\u5728\u7b97\u6cd5\u7f16\u7a0b\u4efb\u52a1\u4e0a\uff0c\u501f\u52a9\u5148\u8fdb\u7684\u77e5\u8bc6\u84b8\u998f\u6280\u672f\uff0cDeepSeek-V3 \u5728 HumanEval-Mul \u548c LiveCodeBench \u7b49\u6d4b\u8bd5\u4e2d\u8d85\u8d8a\u6240\u6709\u57fa\u7ebf\u6a21\u578b\u3002<\/p>\n\n\n\n<p>\u5728\u6570\u5b66\u80fd\u529b\u6d4b\u8bd5\u4e2d\uff0cDeepSeek-V3 \u4e3a\u975e o1 \u7c7b\u6a21\u578b\u6811\u7acb\u4e86\u65b0\u6807\u51c6\u3002\u5728 AIME\u3001MATH-500 \u548c CNMO 2024 \u7b49\u5177\u6709\u6311\u6218\u6027\u7684\u6d4b\u8bd5\u4e2d\uff0c\u5176\u5f97\u5206\u6bd4\u7b2c\u4e8c\u540d Qwen2.5 72B \u9ad8\u51fa\u7ea6 10 \u4e2a\u767e\u5206\u70b9\uff0c\u8fd9\u79cd\u663e\u8457\u4f18\u52bf\u5145\u5206\u9a8c\u8bc1\u4e86 DeepSeek-R1 \u77e5\u8bc6\u84b8\u998f\u6280\u672f\u7684\u6709\u6548\u6027\u3002<\/p>\n\n\n\n<p><strong>\u4e2d\u6587\u80fd\u529b\u8bc4\u4f30\uff1a<\/strong>\u5728\u4e2d\u82f1\u53cc\u8bed\u652f\u6301\u65b9\u9762\uff0cQwen \u548c DeepSeek \u662f\u4e24\u4e2a\u4ee3\u8868\u6027\u7684\u6a21\u578b\u7cfb\u5217\u3002<\/p>\n\n\n\n<p>\u5728\u4e2d\u6587 SimpleQA \u4e8b\u5b9e\u6027\u77e5\u8bc6\u6d4b\u8bd5\u4e2d\uff0c\u5c3d\u7ba1 Qwen2.5 \u7684\u8bad\u7ec3\u6570\u636e\u91cf\u66f4\u5927\uff0818T token\uff0c\u8d85\u51fa DeepSeek-V3 \u7684 14.8T token \u7ea6 20%\uff09\uff0cDeepSeek-V3 \u4ecd\u9886\u5148 16.4 \u4e2a\u767e\u5206\u70b9\u3002<\/p>\n\n\n\n<p>\u5728&nbsp;<strong>C-Eval\uff08\u4e2d\u6587\u6559\u80b2\u77e5\u8bc6\u8bc4\u4f30\uff09<\/strong>\u548c<strong>&nbsp;CLUEWSC\uff08\u4e2d\u6587\u6307\u4ee3\u6d88\u6b67\u6311\u6218\uff09<\/strong>\u7b49\u6d4b\u8bd5\u4e2d\uff0c\u4e24\u4e2a\u6a21\u578b\u8868\u73b0\u76f8\u5f53\uff0c\u8868\u660e\u5b83\u4eec\u5728\u4e2d\u6587\u63a8\u7406\u548c\u6559\u80b2\u4efb\u52a1\u65b9\u9762\u90fd\u8fbe\u5230\u4e86\u8f83\u9ad8\u6c34\u5e73\u3002<\/p>\n\n\n\n<p><strong>\u5f00\u653e\u5f0f\u8bc4\u4f30<\/strong><\/p>\n\n\n\n<p>\u9664\u6807\u51c6\u57fa\u51c6\u6d4b\u8bd5\u5916\uff0c\u7cfb\u7edf\u8fd8\u91c7\u7528 LLM \u4f5c\u4e3a\u8bc4\u4f30\u8005\u5bf9\u6a21\u578b\u7684\u5f00\u653e\u5f0f\u751f\u6210\u80fd\u529b\u8fdb\u884c\u8bc4\u4f30\uff0c\u7ed3\u679c\u89c1\u88687\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"705\" height=\"253\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-33.png\" alt=\"\" class=\"wp-image-23740\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-33.png 705w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-33-300x108.png 300w\" sizes=\"(max-width: 705px) 100vw, 705px\" \/><figcaption>\u8868 7\uff1a\u82f1\u6587\u5f00\u653e\u5f0f\u5bf9\u8bdd\u8bc4\u4f30\u3002\u5728 AlpacaEval 2.0 \u4e2d\uff0cV3\u4f7f\u7528\u201c\u957f\u5ea6\u63a7\u5236\u80dc\u7387\u201d\u4f5c\u4e3a\u6838\u5fc3\u8bc4\u4f30\u6307\u6807\uff0c\u4ee5\u8861\u91cf\u6a21\u578b\u5728\u5bf9\u8bdd\u751f\u6210\u4e2d\u7684\u8868\u73b0\u3002<\/figcaption><\/figure>\n\n\n\n<p>\u8bc4\u4f30\u4e25\u683c\u9075\u5faa AlpacaEval 2.0 \u548c Arena-Hard \u7684\u6807\u51c6\u89c4\u8303\uff0c\u4f7f\u7528 GPT-4-Turbo-1106 \u8fdb\u884c\u914d\u5bf9\u8bc4\u4f30\u3002<\/p>\n\n\n\n<p>\u5728 Arena-Hard \u6d4b\u8bd5\u4e2d\uff0cDeepSeek-V3 \u76f8\u5bf9\u4e8e GPT-4-0314 \u57fa\u51c6\u53d6\u5f97\u4e86 86% \u4ee5\u4e0a\u7684\u4f18\u80dc\u7387\uff0c\u4e0e Claude-Sonnet-3.5-1022 \u7b49\u9876\u7ea7\u6a21\u578b\u8868\u73b0\u76f8\u5f53\uff0c\u5145\u5206\u5c55\u793a\u4e86\u5176\u5728\u5904\u7406\u590d\u6742\u4efb\u52a1\uff08\u5305\u62ec\u7f16\u7a0b\u548c\u8c03\u8bd5\uff09\u65b9\u9762\u7684\u5353\u8d8a\u80fd\u529b\u3002<strong>\u4f5c\u4e3a\u9996\u4e2a\u5728 Arena-Hard \u6d4b\u8bd5\u4e2d\u7a81\u7834 85% \u7684\u5f00\u6e90\u6a21\u578b<\/strong>\uff0cDeepSeek-V3 \u663e\u8457\u7f29\u5c0f\u4e86\u4e0e\u95ed\u6e90\u6a21\u578b\u7684\u5dee\u8ddd\uff0c\u4e3a\u5f00\u6e90\u6a21\u578b\u5728\u9ad8\u96be\u5ea6\u4efb\u52a1\u9886\u57df\u6811\u7acb\u4e86\u65b0\u6807\u51c6\u3002<\/p>\n\n\n\n<p>\u5728 AlpacaEval 2.0 \u8bc4\u6d4b\u4e2d\uff0cDeepSeek-V3 \u540c\u6837\u8868\u73b0\u51fa\u8272\uff0c\u8d85\u8d8a\u4e86\u6240\u6709\u53c2\u8bc4\u7684\u5f00\u6e90\u548c\u95ed\u6e90\u6a21\u578b\uff0c\u5c55\u793a\u4e86\u5176\u5728\u5199\u4f5c\u548c\u95ee\u7b54\u65b9\u9762\u7684\u4f18\u79c0\u80fd\u529b\u3002\u7279\u522b\u662f\u76f8\u6bd4 DeepSeek-V2.5-0905 \u63d0\u5347\u4e86 20%\uff0c\u8bc1\u660e\u4e86\u6a21\u578b\u5728\u57fa\u7840\u4efb\u52a1\u5904\u7406\u80fd\u529b\u4e0a\u7684\u663e\u8457\u8fdb\u6b65\u3002<\/p>\n\n\n\n<p><strong>\u751f\u6210\u5f0f\u5956\u52b1\u6a21\u578b\u6027\u80fd<\/strong><\/p>\n\n\n\n<p>\u7814\u7a76\u5c06 DeepSeek-V3 \u7684\u8bc4\u5224\u80fd\u529b\u4e0e\u9886\u5148\u6a21\u578b GPT-4o \u548c Claude-3.5 \u8fdb\u884c\u5bf9\u6bd4\u3002\u5982\u88688\u6240\u793a\uff0c\u5728 RewardBench \u8bc4\u6d4b\u4e2d\uff0cDeepSeek-V3 \u8fbe\u5230\u4e86 GPT-4o-0806 \u548c Claude-3.5-Sonnet-1022 \u6700\u4f18\u7248\u672c\u7684\u6c34\u5e73\uff0c\u5e76\u8d85\u8d8a\u4e86\u5176\u4ed6\u7248\u672c\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"955\" height=\"310\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-34.png\" alt=\"\" class=\"wp-image-23741\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-34.png 955w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-34-300x97.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-34-768x249.png 768w\" sizes=\"(max-width: 955px) 100vw, 955px\" \/><figcaption>\u8868 8\uff1aGPT-4o\u3001Claude-3.5-sonnet \u548c DeepSeek-V3 \u5728 RewardBench \u57fa\u51c6\u6d4b\u8bd5\u4e2d\u7684\u6027\u80fd\u5bf9\u6bd4\u3002<\/figcaption><\/figure>\n\n\n\n<p>\u901a\u8fc7\u5f15\u5165\u6295\u7968\u673a\u5236\uff0cDeepSeek-V3 \u7684\u8bc4\u5224\u80fd\u529b\u5f97\u5230\u8fdb\u4e00\u6b65\u63d0\u5347\u3002\u57fa\u4e8e\u6b64\uff0c\u7cfb\u7edf\u91c7\u7528 DeepSeek-V3 \u914d\u5408\u6295\u7968\u673a\u5236\u5bf9\u5f00\u653e\u5f0f\u95ee\u9898\u8fdb\u884c\u8bc4\u4f30\u53cd\u9988\uff0c\u6709\u6548\u63d0\u9ad8\u4e86\u6a21\u578b\u5bf9\u9f50\u8fc7\u7a0b\u7684\u53ef\u9760\u6027\u548c\u7a33\u5b9a\u6027\u3002<\/p>\n\n\n\n<h2 id=\"h_14890557782_21\">\u8ba8\u8bba<\/h2>\n\n\n\n<h3 id=\"h_14890557782_22\">DeepSeek-R1 \u77e5\u8bc6\u84b8\u998f\u5206\u6790<\/h3>\n\n\n\n<p>\u7814\u7a76\u57fa\u4e8e DeepSeek-V2.5 \u6a21\u578b\u8bc4\u4f30\u4e86 DeepSeek-R1 \u77e5\u8bc6\u84b8\u998f\u7684\u6548\u679c\u3002\u5bf9\u6bd4\u5b9e\u9a8c\u4e2d\uff0c\u57fa\u51c6\u6a21\u578b\u4f7f\u7528\u77ed\u94fe\u5f0f\u601d\u7ef4\u6570\u636e\u8bad\u7ec3\uff0c\u800c\u5bf9\u7167\u7ec4\u4f7f\u7528\u4e13\u5bb6\u68c0\u67e5\u70b9\u751f\u6210\u7684\u6570\u636e\u3002<\/p>\n\n\n\n<p>\u88689\u7684\u7ed3\u679c\u663e\u793a\uff0c\u84b8\u998f\u6570\u636e\u5728 LiveCodeBench \u548c MATH-500 \u57fa\u51c6\u6d4b\u8bd5\u4e2d\u90fd\u5e26\u6765\u4e86\u660e\u663e\u63d0\u5347\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-35.png\" alt=\"\" class=\"wp-image-23742\" width=\"670\" height=\"143\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-35.png 796w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-35-300x64.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/01\/image-35-768x164.png 768w\" sizes=\"(max-width: 670px) 100vw, 670px\" \/><figcaption>\u88689\uff1aDeepSeek-R1 \u84b8\u998f\u5bf9\u6a21\u578b\u6027\u80fd\u7684\u8d21\u732e\u5206\u6790\u3002\u5728 LiveCodeBench \u548c MATH-500 \u57fa\u51c6\u6d4b\u8bd5\u4e2d\u7684\u8bc4\u4f30\u8bbe\u7f6e\u4e0e\u88686\u76f8\u540c\uff0c\u65e8\u5728\u786e\u4fdd\u7ed3\u679c\u7684\u53ef\u6bd4\u6027\u3002<\/figcaption><\/figure>\n\n\n\n<p>\u7814\u7a76\u53d1\u73b0\u4e86\u4e00\u4e2a\u91cd\u8981\u7684\u5e73\u8861\u70b9\uff1a<strong>\u77e5\u8bc6\u84b8\u998f\u80fd\u63d0\u9ad8\u6027\u80fd\uff0c\u4f46\u540c\u65f6\u4f1a\u663e\u8457\u589e\u52a0\u8f93\u51fa\u957f\u5ea6<\/strong>\u3002\u4e3a\u6b64\uff0cDeepSeek-V3 \u5728\u84b8\u998f\u8fc7\u7a0b\u4e2d\u91c7\u7528\u4e86\u7ecf\u8fc7\u4f18\u5316\u7684\u53c2\u6570\u914d\u7f6e\uff0c\u4ee5\u5e73\u8861\u6a21\u578b\u51c6\u786e\u6027\u548c\u8ba1\u7b97\u6548\u7387\u3002<\/p>\n\n\n\n<p>\u7814\u7a76\u8868\u660e\uff0c\u4ece\u63a8\u7406\u6a21\u578b\u8fdb\u884c\u77e5\u8bc6\u84b8\u998f\u662f\u63d0\u5347\u6a21\u578b\u540e\u671f\u6027\u80fd\u7684\u6709\u6548\u65b9\u6cd5\u3002\u5f53\u524d\u7814\u7a76\u867d\u7136\u4e3b\u8981\u5173\u6ce8\u6570\u5b66\u548c\u7f16\u7a0b\u9886\u57df\u7684\u77e5\u8bc6\u84b8\u998f\uff0c\u4f46\u8fd9\u79cd\u65b9\u6cd5\u5728\u5176\u4ed6\u9886\u57df\u4e5f\u5c55\u73b0\u51fa\u5e7f\u9614\u524d\u666f\u3002\u5176\u5728\u7279\u5b9a\u9886\u57df\u7684\u6210\u529f\u8868\u660e\uff0c\u957f\u94fe\u5f0f\u601d\u7ef4\u84b8\u998f\u6280\u672f\u6709\u671b\u63d0\u5347\u6a21\u578b\u5728\u5176\u4ed6\u9700\u8981\u590d\u6742\u63a8\u7406\u7684\u8ba4\u77e5\u4efb\u52a1\u4e2d\u7684\u8868\u73b0\u3002\u672a\u6765\u7814\u7a76\u5c06\u7ee7\u7eed\u63a2\u7d22\u8be5\u65b9\u6cd5\u5728\u4e0d\u540c\u9886\u57df\u7684\u5e94\u7528\u3002<\/p>\n\n\n\n<h3 id=\"h_14890557782_23\">\u81ea\u6211\u5956\u52b1\u673a\u5236<\/h3>\n\n\n\n<p>\u5956\u52b1\u673a\u5236\u662f\u5f3a\u5316\u5b66\u4e60\u4e2d\u7684\u6838\u5fc3\u8981\u7d20\uff0c\u51b3\u5b9a\u7740\u4f18\u5316\u65b9\u5411\u3002\u5728\u7f16\u7a0b\u6216\u6570\u5b66\u7b49\u53ef\u901a\u8fc7\u5916\u90e8\u5de5\u5177\u76f4\u63a5\u9a8c\u8bc1\u7684\u9886\u57df\uff0c\u5f3a\u5316\u5b66\u4e60\u5c55\u73b0\u51fa\u663e\u8457\u6548\u679c\u3002\u4f46\u5728\u66f4\u4e00\u822c\u573a\u666f\u4e2d\uff0c\u76f4\u63a5\u901a\u8fc7\u89c4\u5219\u7f16\u7801\u6784\u5efa\u53cd\u9988\u673a\u5236\u5e76\u4e0d\u53ef\u884c\u3002\u56e0\u6b64\uff0c\u5728\u5f00\u53d1 DeepSeek-V3 \u65f6\uff0c\u9488\u5bf9\u8fd9\u7c7b\u5e7f\u6cdb\u573a\u666f\uff0c\u91c7\u7528\u4e86<strong>\u5baa\u5236 AI \u65b9\u6cd5<\/strong>\uff0c\u4f7f\u7528\u6a21\u578b\u81ea\u8eab\u7684\u6295\u7968\u8bc4\u4f30\u7ed3\u679c\u4f5c\u4e3a\u53cd\u9988\u3002\u8fd9\u79cd\u65b9\u6cd5\u5728\u5bf9\u9f50\u6548\u679c\u4e0a\u53d6\u5f97\u663e\u8457\u6210\u6548\uff0c\u5927\u5e45\u63d0\u5347\u4e86\u6a21\u578b\u5728\u4e3b\u89c2\u8bc4\u4f30\u4e2d\u7684\u8868\u73b0\u3002<\/p>\n\n\n\n<p>\u901a\u8fc7\u5f15\u5165\u989d\u5916\u7684\u5baa\u5236\u89c4\u5219\uff0cDeepSeek-V3 \u80fd\u591f\u5411\u9884\u671f\u65b9\u5411\u4f18\u5316\u3002\u7814\u7a76\u8ba4\u4e3a\uff0c\u5c06\u8865\u5145\u4fe1\u606f\u4e0e\u8bed\u8a00\u6a21\u578b\u7ed3\u5408\u4f5c\u4e3a\u53cd\u9988\u6765\u6e90\u7684\u8303\u5f0f\u5177\u6709\u91cd\u8981\u610f\u4e49\u3002\u5927\u8bed\u8a00\u6a21\u578b\u80fd\u591f\u5c06\u5404\u7c7b\u573a\u666f\u4e2d\u7684\u975e\u7ed3\u6784\u5316\u4fe1\u606f\u8f6c\u5316\u4e3a\u6709\u6548\u5956\u52b1\u4fe1\u53f7\uff0c\u4fc3\u8fdb\u6a21\u578b\u7684\u6301\u7eed\u4f18\u5316\u3002\u9664\u81ea\u6211\u5956\u52b1\u5916\uff0c\u7814\u7a76\u56e2\u961f\u4e5f\u5728\u63a2\u7d22\u5176\u4ed6\u901a\u7528\u4e14\u53ef\u6269\u5c55\u7684\u5956\u52b1\u65b9\u6cd5\uff0c\u4ee5\u6301\u7eed\u63d0\u5347\u6a21\u578b\u5728\u901a\u7528\u573a\u666f\u4e2d\u7684\u80fd\u529b\u3002<\/p>\n\n\n\n<p><strong>MTP\u6027\u80fd<\/strong><\/p>\n\n\n\n<p>DeepSeek-V3 \u901a\u8fc7 MTP \u6280\u672f\u5b9e\u73b0\u540c\u65f6\u9884\u6d4b\u4e24\u4e2a token\uff0c\u7ed3\u5408\u63a8\u6d4b\u89e3\u7801\u6846\u67b6\u663e\u8457\u63d0\u5347\u4e86\u89e3\u7801\u6548\u7387\u3002\u5173\u952e\u95ee\u9898\u662f\u7b2c\u4e8c\u4e2a\u9884\u6d4b token \u7684\u53ef\u7528\u6027\uff0c\u8bc4\u4f30\u663e\u793a\u5728\u4e0d\u540c\u751f\u6210\u4efb\u52a1\u4e2d\uff0c\u7b2c\u4e8c\u4e2a token \u7684<strong>\u63a5\u53d7\u7387\u7a33\u5b9a\u4fdd\u6301\u5728 85%-90%<\/strong>\uff0c\u8868\u73b0\u51fa\u8f83\u9ad8\u7684\u53ef\u9760\u6027\u3002<\/p>\n\n\n\n<p>\u8fd9\u79cd\u9ad8\u63a5\u53d7\u7387\u4f7f DeepSeek-V3 \u7684<strong>\u89e3\u7801\u901f\u5ea6\u63d0\u5347\u81f3\u539f\u6765\u7684 1.8 \u500d<\/strong>\uff08\u4ee5\u6bcf\u79d2\u751f\u6210 token \u6570\u8861\u91cf\uff09\u3002<\/p>\n\n\n\n<h2 id=\"h_14890557782_24\">\u7ed3\u8bba\u3001\u5c40\u9650\u6027\u548c\u672a\u6765\u53d1\u5c55\u65b9\u5411<\/h2>\n\n\n\n<p>\u672c\u7814\u7a76\u4ecb\u7ecd\u4e86 DeepSeek-V3 \u5927\u89c4\u6a21\u6df7\u5408\u4e13\u5bb6\u8bed\u8a00\u6a21\u578b\uff0c\u8be5\u6a21\u578b\u603b\u53c2\u6570\u91cf\u8fbe\u5230 671B\uff0c\u6bcf\u6b21\u5904\u7406\u6fc0\u6d3b 37B \u53c2\u6570\uff0c\u8bad\u7ec3\u6570\u636e\u89c4\u6a21\u8fbe 14.8T token\u3002<\/p>\n\n\n\n<p>\u6a21\u578b\u5728\u5ef6\u7eed MLA \u548c DeepSeekMoE \u67b6\u6784\u4f18\u52bf\u7684\u57fa\u7840\u4e0a\uff0c\u521b\u65b0\u6027\u5730\u63d0\u51fa\u4e86\u65e0\u8f85\u52a9\u635f\u5931\u8d1f\u8f7d\u5747\u8861\u7b56\u7565\uff0c\u5e76\u5f15\u5165\u591a token \u9884\u6d4b\u8bad\u7ec3\u76ee\u6807\u4ee5\u63d0\u5347\u6027\u80fd\u3002<\/p>\n\n\n\n<p>\u901a\u8fc7\u91c7\u7528 FP8 \u8bad\u7ec3\u6280\u672f\u548c\u7cbe\u7ec6\u7684\u5de5\u7a0b\u4f18\u5316\uff0c\u6a21\u578b\u5b9e\u73b0\u4e86\u9ad8\u6548\u7684\u8bad\u7ec3\u8fc7\u7a0b\u3002\u5728\u540e\u8bad\u7ec3\u9636\u6bb5\uff0c\u6210\u529f\u5c06 DeepSeek-R1 \u7cfb\u5217\u6a21\u578b\u7684\u63a8\u7406\u80fd\u529b\u8fc1\u79fb\u81f3\u65b0\u6a21\u578b\u3002<\/p>\n\n\n\n<p>\u7efc\u5408\u8bc4\u4f30\u663e\u793a\uff0cDeepSeek-V3 \u4e0d\u4ec5\u6210\u4e3a\u5f53\u524d\u6027\u80fd\u6700\u5f3a\u7684\u5f00\u6e90\u6a21\u578b\uff0c\u8fd8\u8fbe\u5230\u4e86\u4e0e GPT-4o \u548c Claude-3.5-Sonnet \u7b49\u9876\u7ea7\u95ed\u6e90\u6a21\u578b\u76f8\u5f53\u7684\u6c34\u5e73\u3002\u540c\u65f6\uff0c\u6a21\u578b\u7ef4\u6301\u4e86\u6781\u5177\u7ade\u4e89\u529b\u7684\u8bad\u7ec3\u6210\u672c\uff0c\u5b8c\u6574\u8bad\u7ec3\u8fc7\u7a0b\uff08\u5305\u62ec\u9884\u8bad\u7ec3\u3001\u4e0a\u4e0b\u6587\u957f\u5ea6\u6269\u5c55\u548c\u540e\u8bad\u7ec3\uff09\u4ec5\u9700 2.788M H800 GPU \u5c0f\u65f6\u3002<\/p>\n\n\n\n<p>\u5c3d\u7ba1\u6a21\u578b\u5728\u6027\u80fd\u548c\u8bad\u7ec3\u6548\u7387\u4e0a\u8868\u73b0\u51fa\u8272\uff0c\u4f46\u4ecd\u5b58\u5728\u4e00\u4e9b\u5c40\u9650\u6027\uff0c\u7279\u522b\u662f\u5728\u90e8\u7f72\u65b9\u9762\uff1a\u9996\u5148\uff0c\u4e3a\u4fdd\u8bc1\u63a8\u7406\u6548\u7387\uff0c\u6a21\u578b\u7684\u6700\u5c0f\u90e8\u7f72\u5355\u5143\u89c4\u6a21\u8f83\u5927\uff0c\u53ef\u80fd\u8d85\u51fa\u5c0f\u578b\u56e2\u961f\u7684\u8d44\u6e90\u80fd\u529b\uff1b\u5176\u6b21\uff0c\u867d\u7136\u5f53\u524d\u90e8\u7f72\u65b9\u6848\u4f7f\u6a21\u578b\u7684\u7aef\u5230\u7aef\u751f\u6210\u901f\u5ea6\u6bd4\u4e0a\u4e00\u4ee3\u63d0\u5347\u4e86\u4e24\u500d\u4ee5\u4e0a\uff0c\u4f46\u4ecd\u6709\u4f18\u5316\u7a7a\u95f4\u3002\u8fd9\u4e9b\u5c40\u9650\u6027\u6709\u671b\u968f\u7740\u786c\u4ef6\u6280\u672f\u7684\u8fdb\u6b65\u5f97\u5230\u81ea\u7136\u89e3\u51b3\u3002<\/p>\n\n\n\n<p>\u79c9\u6301\u957f\u671f\u53d1\u5c55\u7406\u5ff5\uff0cDeepSeek \u5c06\u7ee7\u7eed\u575a\u6301\u5f00\u6e90\u8def\u7ebf\uff0c\u7a33\u6b65\u63a8\u8fdb\u901a\u7528\u4eba\u5de5\u667a\u80fd\u7684\u7814\u7a76\u3002\u672a\u6765\u7814\u7a76\u5c06\u91cd\u70b9\u5173\u6ce8\u4ee5\u4e0b\u65b9\u5411\uff1a<\/p>\n\n\n\n<ul><li>\u6301\u7eed\u4f18\u5316\u6a21\u578b\u67b6\u6784\uff0c\u63d0\u5347\u8bad\u7ec3\u548c\u63a8\u7406\u6548\u7387\uff0c\u63a2\u7d22\u652f\u6301\u65e0\u9650\u4e0a\u4e0b\u6587\u957f\u5ea6\u7684\u9ad8\u6548\u65b9\u6848\u3002\u540c\u65f6\u7a81\u7834 Transformer \u67b6\u6784\u7684\u56fa\u6709\u5c40\u9650\uff0c\u62d3\u5c55\u6a21\u578b\u7684\u5efa\u6a21\u80fd\u529b\u8fb9\u754c\u3002<\/li><li>\u6df1\u5316\u8bad\u7ec3\u6570\u636e\u7684\u8d28\u91cf\u63d0\u5347\u548c\u89c4\u6a21\u6269\u5c55\uff0c\u63a2\u7d22\u65b0\u7684\u8bad\u7ec3\u4fe1\u53f7\u6765\u6e90\uff0c\u5b9e\u73b0\u6570\u636e\u5728\u591a\u4e2a\u7ef4\u5ea6\u7684\u5168\u9762\u6269\u5c55\u3002<\/li><li>\u52a0\u5f3a\u6a21\u578b\u7684\u6df1\u5c42\u63a8\u7406\u80fd\u529b\uff0c\u901a\u8fc7\u6269\u5c55\u63a8\u7406\u7684\u5e7f\u5ea6\u548c\u6df1\u5ea6\uff0c\u63d0\u5347\u6a21\u578b\u7684\u667a\u80fd\u6c34\u5e73\u548c\u95ee\u9898\u89e3\u51b3\u80fd\u529b\u3002<\/li><li>\u5efa\u7acb\u66f4\u5168\u9762\u7684\u591a\u7ef4\u5ea6\u8bc4\u4f30\u4f53\u7cfb\uff0c\u907f\u514d\u8fc7\u5ea6\u4f18\u5316\u7279\u5b9a\u57fa\u51c6\u6d4b\u8bd5\u96c6\u800c\u4ea7\u751f\u7684\u80fd\u529b\u8bef\u5224\uff0c\u786e\u4fdd\u6a21\u578b\u8bc4\u4f30\u7684\u79d1\u5b66\u6027\u548c\u5168\u9762\u6027\u3002<\/li><\/ul>\n","protected":false},"excerpt":{"rendered":"<p>https:\/\/github.com\/deepseek-ai\/DeepSeek-V3\/blob\/main\/De &hellip; <a href=\"http:\/\/139.9.1.231\/index.php\/2025\/01\/04\/deepseek-v3\/\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">DeepSeek-V3 \u6280\u672f\u62a5\u544a<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[21,4,9],"tags":[],"_links":{"self":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/23534"}],"collection":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/comments?post=23534"}],"version-history":[{"count":86,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/23534\/revisions"}],"predecessor-version":[{"id":24560,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/23534\/revisions\/24560"}],"wp:attachment":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/media?parent=23534"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/categories?post=23534"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/tags?post=23534"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}