{"id":27224,"date":"2025-06-28T16:08:00","date_gmt":"2025-06-28T08:08:00","guid":{"rendered":"http:\/\/139.9.1.231\/?p=27224"},"modified":"2025-06-27T21:55:04","modified_gmt":"2025-06-27T13:55:04","slug":"llm-padding","status":"publish","type":"post","link":"http:\/\/139.9.1.231\/index.php\/2025\/06\/28\/llm-padding\/","title":{"rendered":"\u5173\u4e8eLLM \u8bad\u7ec3\u548c\u63a8\u7406\u7684 padding"},"content":{"rendered":"\n<ul><li><strong><em>1\u3001<a rel=\"noreferrer noopener\" href=\"https:\/\/zhuanlan.zhihu.com\/p\/675273498\" target=\"_blank\">LLM padding \u7ec6\u8282<\/a><\/em><\/strong><\/li><li><strong><em>2\u3001<a rel=\"noreferrer noopener\" href=\"https:\/\/zhuanlan.zhihu.com\/p\/713813412\" target=\"_blank\">Prepacking-\u6d88\u9664attention padding\u5197\u4f59\u8ba1\u7b97<\/a><\/em><\/strong><\/li><\/ul>\n\n\n\n<p>\u8bad\u7ec3\u65f6\u5019\u53ef\u4ee5\u8fdb\u884c \u5de6pad \u6216\u8005 \u53f3pad\uff0c\u6216\u8005\u5bf9 prompt \u8fdb\u884c\u5de6pad\uff0c\u5bf9 label \u8fdb\u884c\u53f3pad\u3002\u73b0\u5728\u5176\u5b9e\u4e00\u822c\u9884\u8bad\u7ec3\u6216\u8005\u5fae\u8c03\u7684\u65f6\u5019\u90fd\u4e0dpad\uff0c\u5426\u5219\u4f1a\u5f71\u54cd\u8bad\u7ec3\u6548\u7387\uff0c\u5927\u6982\u7684\u601d\u8def\uff1a\u5047\u8bbe batch size = 2\uff0cmax_seq_len = 16\uff0csequence 1\u30012\u30013\u30014 \u5206\u522b\u6709 7\u30019\u30016\u300110 \u4e2a token\uff0c\u90a3\u4e48\u5c31\u53ef\u4ee5\u7ec4\u6210[[s1+s2], [s3+s4]] \u8fdb\u884c\u8bad\u7ec3\uff0c\u8fd9\u4e2a\u65f6\u5019\u9700\u8981\u6784\u9020\u4e00\u4e2a\u6b63\u786e\u7684 casual attention mask\u3002<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/Dao-AILab\/flash-attention\/blob\/6c9e60de566800538fedad2ad5e6b7b55ca7f0c5\/flash_attn\/flash_attn_interface.py#L839C5-L839C37\" target=\"_blank\">flash_attn_varlen_qkvpacked_func<\/a>&nbsp;\u63a5\u53e3\uff0c\u5c31\u53ef\u4ee5\u5b9e\u73b0\u8fd9\u6837\u7684\u8ba1\u7b97\u800c\u65e0\u9700 padding\u3002<\/p>\n\n\n\n<p><strong>batch \u63a8\u7406\u7684\u65f6\u5019\u4e00\u822c\u53ea\u7528 \u5de6pad\u3002\u63a8\u7406\u65f6\u4e5f\u53ea\u6709batch\u63a8\u7406\u4f1a\u6709\u5f71\u54cd\uff0c\u53e6\u5916\u5de6\u5bf9\u9f50\u65b9\u4fbf\u6240\u6709\u884c\u540c\u65f6\u4ea7\u751fnext token\u3002<\/strong>\u5728\u5f3a\u5316\u5b66\u4e60\u8bad\u7ec3PPO\/DPO\/GRPO \u7684\u65f6\u5019\u9700\u8981\u7528\u5230\u63a8\u7406\uff0c<strong><em>\u6240\u4ee5\u4e5f\u9700\u8981\u505a\u5de6pad\uff01\uff01<\/em><\/strong><\/p>\n\n\n\n<h2 id=\"h_675273498_2\">padding_side \u7684\u5f71\u54cd<\/h2>\n\n\n\n<p>\u8c08\u5230 padding\uff0c\u6211\u4eec\u81ea\u7136\u8981\u8003\u8651 attention_mask\uff0c\u501f\u52a9 attention_mask \u53ef\u4ee5\u5728\u8ba1\u7b97 attention weight \u65f6\u5c06 padding \u5e26\u6765\u7684\u5f71\u54cd\u5c4f\u853d\u6389\u3002\u4e0b\u9762\u662f\u8bbe\u7f6e\u4e0d\u540c\u7684 padding_side\uff0ctokenizer \u7684\u8f93\u51fa\uff1a<\/p>\n\n\n\n<p>\u6ca1\u6709\u8bbe\u7f6e padding_side \u6216\u8005 padding_side=&#8217;right&#8217;\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>&gt;&gt;&gt; from transformers import LlamaForCausalLM, LlamaTokenizer\n&gt;&gt;&gt; tokenizer = LlamaTokenizer.from_pretrained(\"meta-llama\/Llama-2-7b-hf\")\n&gt;&gt;&gt; tokenizer.pad_token = tokenizer.eos_token\n&gt;&gt;&gt; prompts = &#91;\"hello llama\", \"who are you?\"]\n&gt;&gt;&gt; tokenizer(prompts, return_tensors=\"pt\", padding=True)\n{\n    'input_ids': tensor(&#91;&#91;    1, 22172, 11148,  3304,     2],                                                                                                                                                                                                                         \u2502\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\n                         &#91;    1,  1058,   526,   366, 29973]]),\n    'attention_mask': tensor(&#91;&#91;1, 1, 1, 1, 0],  &#91;1, 1, 1, 1, 1]])\n}<\/code><\/pre>\n\n\n\n<p>\u8bbe\u7f6e padding_side=&#8217;left&#8217;\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>&gt;&gt;&gt; from transformers import LlamaForCausalLM, LlamaTokenizer\n&gt;&gt;&gt; tokenizer = LlamaTokenizer.from_pretrained(\"meta-llama\/Llama-2-7b-hf\", padding_side=\"left\")\n&gt;&gt;&gt; tokenizer.pad_token = tokenizer.eos_token\n&gt;&gt;&gt; prompts = &#91;\"hello llama\", \"who are you?\"]\n&gt;&gt;&gt; tokenizer(prompts, return_tensors=\"pt\", padding=True)\n{\n    'input_ids': tensor(&#91;&#91;    2,     1, 22172, 11148,  3304],                                                                                                                                                                                                                         \u2502\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\n                         &#91;    1,  1058,   526,   366, 29973]]),\n    'attention_mask': tensor(&#91;&#91;0, 1, 1, 1, 1],  &#91;1, 1, 1, 1, 1]])\n}<\/code><\/pre>\n\n\n\n<p>\u8981\u7406\u89e3 padding_side=&#8217;right&#8217; \u4e3a\u4ec0\u4e48\u4f1a\u5bfc\u81f4\u7ed3\u679c\u4e0d\u6b63\u786e\uff0c\u5173\u952e\u7684\u70b9\u662f&nbsp;<strong>next token \u7684\u9884\u6d4b<\/strong>\u662f\u4f7f\u7528\u53e5\u5b50\u7684\u6700\u540e\u4e00\u4e2a token \u7ecf\u8fc7 transformer \u5c42\u4e4b\u540e\u8f93\u51fa\u7684 logit \u6765\u5f97\u5230 next token \u7684\u3002\u4e0b\u9762\u662f&nbsp;<code>model.generate<\/code>\u901a\u8fc7\u591a\u6b21\u8df3\u8f6c\u540e\u6765\u5230 next token \u7684\u5904\u7406\u903b\u8f91\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code><em># https:\/\/github.com\/huggingface\/transformers\/blob\/a7cab3c283312b8d4de5df3bbe719971e24f4281\/src\/transformers\/generation\/utils.py#L2411<\/em>\n        \nmodel_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)\n\n<em># forward pass to get next token<\/em>\noutputs = self(\n    **model_inputs,\n    return_dict=True,\n    output_attentions=output_attentions,\n    output_hidden_states=output_hidden_states,\n)\n\nnext_token_logits = outputs.logits&#91;:, -1, :]\n<em># argmax<\/em>\nnext_tokens = torch.argmax(next_tokens_scores, dim=-1)<\/code><\/pre>\n\n\n\n<p>\u4ece\u4e0a\u9762\u7684\u4ee3\u7801\u53ef\u4ee5\u770b\u5230\uff0c<strong>\u53e5\u5b50\u6700\u540e\u4e00\u4e2a token \u6240\u5bf9\u5e94\u7684 logit \u4f1a\u88ab\u7528\u6765\u8ba1\u7b97 next token\uff0c\u56e0\u6b64\uff0c\u6700\u540e\u4e00\u4e2a token logit \u7684\u8ba1\u7b97\u662f\u5426\u6b63\u786e\u51b3\u5b9a\u4e86\u63a8\u7406\u7684\u7ed3\u679c\u662f\u5426\u6b63\u786e\u3002<br>\u63a5\u4e0b\u6765\uff0c\u6211\u4eec\u6765\u770b\u4e00\u4e0b padding_side=&#8217;left&#8217; \u548c padding_side=&#8217;right&#8217;\uff0c\u6700\u540e\u4e00\u4e2a token \u6240\u5bf9\u5e94\u7684 logit \u662f\u5426\u662f\u6b63\u786e\u8ba1\u7b97\u7684\u3002<\/strong><\/p>\n\n\n\n<p><br>\u6211\u4eec\u5148\u6765\u770b padding_side=&#8217;left&#8217; \u7684\u6700\u540e\u4e00\u4e2a logit \u7684\u8ba1\u7b97\u8fc7\u7a0b\uff0c\u7701\u7565\u4e2d\u95f4\u7684\u5177\u4f53\u7ec6\u8282\uff0c\u53ea\u7ed9\u51fa\u5173\u952e\u7684\u8fc7\u7a0b\uff09\uff0c\u8fd9\u91cc\u53ea\u5173\u6ce8\u53e5\u5b50 &#8220;hello llama&#8221;\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image\"><img src=\"https:\/\/pic1.zhimg.com\/v2-a238eb00b76bbf3fbdef50c96548727c_r.jpg\" alt=\"\"\/><\/figure>\n\n\n\n<p>\u4ece\u56fe 4 \u7684\u8ba1\u7b97\u8fc7\u7a0b\u53ef\u4ee5\u770b\u5230\uff0c\u4f7f\u7528 padding_side=&#8217;left&#8217; \u7684\u65b9\u5f0f\uff0cattention score after masked \u77e9\u9635\u7684\u6700\u540e\u4e00\u884c\u548c V \u7684\u7b2c\u4e00\u5217\u8fdb\u884c\u5185\u79ef\u540e\u5f97\u5230\u7684\u503c\u4e3a\u6b63\u786e\u4e14\u7b26\u5408\u671f\u671b\u7684\u503c\uff0c\u5373\u6700\u540e\u4e00\u4e2a token \u6240\u5bf9\u5e94\u7684 logit \u7684\u8ba1\u7b97\u6ca1\u6709\u53d7 padding \u7684\u5f71\u54cd\uff0c\u8be5 logit \u7684\u8ba1\u7b97\u8fc7\u7a0b\u6b63\u786e\u3002<\/p>\n\n\n\n<p class=\"has-light-pink-background-color has-background\"><em>\u56e0\u4e3a\u6700\u540e\u4e00\u5217\u8ba1\u7b97\u5f97\u5206\u65f6\u5019\uff0cV\u7b2c\u4e00\u884c\uff1apad token \u7684\u6743\u91cd\u3010 <em><em>attention <\/em><\/em> <\/em>score<em>\u3011\u90fd\u662f 0,\u4e14attention score \u5de6\u4e0b\u89d2\u6743\u91cd\u4e3a0<\/em>\uff0c<em>\u90a3\u4e48\u8ba1\u7b97\u7ed3\u679c\u6700\u540e\u4e00\u5217\u7684\u7ed3\u679c \u53ea\u8ddf\u975epad\u7684V\u6709\u5173<\/em><\/p>\n\n\n\n<p>\u6211\u4eec\u63a5\u4e0b\u6765\u770b\u4e00\u4e0b padding_side=&#8217;right&#8217; \u7684\u6700\u540e\u4e00\u4e2a logit \u7684\u8ba1\u7b97\u8fc7\u7a0b\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image\"><img src=\"https:\/\/picx.zhimg.com\/v2-3536dbc0395808948ce64397d197cc73_r.jpg\" alt=\"\"\/><\/figure>\n\n\n\n<p>\u4ece\u56fe 5 \u7684\u8ba1\u7b97\u8fc7\u7a0b\u53ef\u4ee5\u770b\u5230\uff0cattention score after masked \u77e9\u9635\u7684\u6700\u540e\u4e00\u884c\u548c V \u7684\u7b2c\u4e00\u5217\u8fdb\u884c\u5185\u79ef\u540e\u5f97\u5230\u7684\u503c\u662f\u4e0d\u7b26\u5408\u671f\u671b\u7684\uff0c\u5373\u6700\u540e\u4e00\u4e2a token\uff08pad token\uff09\u6240\u5bf9\u5e94\u7684 logit \u7684\u8ba1\u7b97\u4e0d\u6b63\u786e\uff0c\u56e0\u4e3a pad token \u4e5f\u53c2\u4e0e\u4e86\u8ba1\u7b97\uff0c\u800c\u6b63\u786e\u9884\u6d4b next token \u7684\u65f6\u5019 pad token \u662f\u4e0d\u5e94\u8be5\u53c2\u4e0e\u8ba1\u7b97\u7684\u3002<\/p>\n\n\n\n<p class=\"has-light-pink-background-color has-background\"> <em>\u56e0\u4e3a\u6700\u540e\u4e00\u5217\u8ba1\u7b97\u5f97\u5206\u65f6\u5019\uff0cV\u6700\u540e\u4e00\u884c\uff1apad token\u5bf9\u5e94\u7684\u7684\u6743\u91cd\u3010  <em>attention score <\/em>\u6700\u540e\u4e00\u884c\u3011<\/em>\u4e0d\u90fd\u662f0\uff0c\uff0c<em>\u90a3\u4e48\u8ba1\u7b97\u7ed3\u679c\u6700\u540e\u4e00\u5217\u7684\u7ed3\u679c \u8ddf\u975epad\u7684V\u6709\u5173\u3002<\/em> <\/p>\n\n\n\n<p>\u81f3\u6b64\uff0c\u6211\u4eec\u5f04\u6e05\u695a\u4e86\u4e3a\u4ec0\u4e48 padding_side=&#8217;right&#8217; \u4f1a\u4ea7\u751f\u4e0d\u6b63\u786e\u7684\u7ed3\u679c\u3002<\/p>\n\n\n\n<h2>Prepacking-\u6d88\u9664attention padding\u5197\u4f59\u8ba1\u7b97<\/h2>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"871\" height=\"682\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-85.png\" alt=\"\" class=\"wp-image-27241\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-85.png 871w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-85-300x235.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-85-768x601.png 768w\" sizes=\"(max-width: 871px) 100vw, 871px\" \/><\/figure>\n\n\n\n<p>\u4e00\u4e2abatch\u91cc\uff0c\u4e0d\u540c\u7684request\uff0c\u5176prompt\u957f\u5ea6\u4e0d\u4e00\u6837\uff0c\u8fd9\u6837\u8ba1\u7b97attention\u65f6\u4f1a\u505apadding\uff0c\u786e\u4fdd\u6240\u6709\u7684request\u957f\u5ea6\u76f8\u540c\u3002\u5982\u4e0b\u56fe\u6240\u793a\uff0c1\u4e2abatch\u603b\u51714\u4e2a\u53e5\u5b50\uff0c\u540e\u97623\u4e2a\u53e5\u5b50\u505a\u4e86padding\uff0c\u8fd9\u6837\u505a\u7684\u4e00\u4e2a\u95ee\u9898\u5c31\u662f\uff0c\u4f1a\u6d6a\u8d39\u8ba1\u7b97\u3002<\/p>\n\n\n\n<p>\uff0c\u4e00\u4e2a\u89e3\u51b3\u65b9\u6cd5\u662f\u53bb\u9664padding\uff0c\u628a\u8fd9\u4e9b\u53e5\u5b50\u653e\u5728\u4e00\u4e2a\u53e5\u5b50\u91cc\u8ba1\u7b97\u3002\u5982\u4e0b\u56fe\u6240\u793a\uff0c\u53bb\u9664\u4e86padding\u4e4b\u540e\uff0c\u6240\u6709\u7684request\u653e\u5728\u4e00\u4e2a\u53e5\u5b50\u91cc\u3002\u4f46\u662f\u5e26\u6765\u7684\u95ee\u9898\u662f\uff0cattention\u8ba1\u7b97\u53ea\u6709\u53e5\u5b50\u4e4b\u5185\u7684\u4e0d\u540ctoken\u9700\u8981\u8fdb\u884cattention\u8ba1\u7b97\uff08\u7ea2\u8272\u8ba1\u7b97attention\u3001\u84dd\u8272\u8ba1\u7b97attention\u7b49\u7b49\uff09\uff0c\u53e5\u5b50\u4e4b\u95f4\u662f\u72ec\u7acb\u7684\u3002\u6240\u4ee5\u8fd9\u79cd\u505a\u6cd5\u5fc5\u987b\u8981\u8fdb\u884c\u9002\u5f53\u7684\u6570\u636e\u7ec4\u7ec7\uff0c\u8ba9\u6211\u4eec\u7684attention\u7b97\u5b50\u80fd\u77e5\u9053\u81ea\u5df1\u8be5\u628a\u54ea\u4e9btoken\u653e\u5728\u4e00\u4e2a\u53e5\u5b50\u91cc\u8ba1\u7b97\u3002\u5982\u4e0b\u56fe\u6240\u793a\uff0c\u867d\u7136\u8f93\u5165\u5230attention\u7b97\u5b50\u7684\u662f\u4e00\u4e2a\u53e5\u5b50\uff0c\u5305\u542b10\u4e2atoken\uff0c\u4f46\u662f\u9700\u8981\u4e00\u4e9b\u989d\u5916\u6570\u636e\uff0c\u8ba9attention\u7b97\u5b50\u53bb\u628a\u7ea2\u8272\u90e8\u5206\u5f53\u4e00\u4e2arequest\u8ba1\u7b97attention\u3001\u84dd\u8272\u90e8\u5206\u5f53\u4e00\u4e2arequest\u8ba1\u7b97attention\u3001\u7eff\u8272\u90e8\u5206\u5f53\u4e00\u4e2arequest\u8ba1\u7b97attention\u3001\u9ec4\u8272\u90e8\u5206\u5f53\u4e00\u4e2arequest\u8ba1\u7b97attention\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"847\" height=\"622\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-86.png\" alt=\"\" class=\"wp-image-27242\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-86.png 847w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-86-300x220.png 300w, http:\/\/139.9.1.231\/wp-content\/uploads\/2025\/06\/image-86-768x564.png 768w\" sizes=\"(max-width: 847px) 100vw, 847px\" \/><\/figure>\n\n\n\n<p><strong>\u9884\u6253\u5305\u5728\u6982\u5ff5\u4e0a\u5f88\u7b80\u5355\uff1b\u6211\u4eec\u4e0d\u662f\u5c06\u6bcf\u4e2a\u5e8f\u5217\u586b\u5145\u5230\u76f8\u540c\u7684\u957f\u5ea6\uff0c\u800c\u662f\u4f7f\u7528\u73b0\u6210\u7684\u88c5\u7bb1\u7b97\u6cd5\u5c06\u591a\u4e2a\u63d0\u793a\u6253\u5305\u5728\u4e00\u8d77\uff0c\u4ee5\u4ee3\u66ff\u586b\u5145\u6807\u8bb0<\/strong>\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>1\u3001LLM padding \u7ec6\u8282 2\u3001Prepacking-\u6d88\u9664attention padding\u5197\u4f59\u8ba1\u7b97 \u8bad &hellip; <a href=\"http:\/\/139.9.1.231\/index.php\/2025\/06\/28\/llm-padding\/\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">\u5173\u4e8eLLM \u8bad\u7ec3\u548c\u63a8\u7406\u7684 padding<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[21,4,38],"tags":[],"_links":{"self":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/27224"}],"collection":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/comments?post=27224"}],"version-history":[{"count":20,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/27224\/revisions"}],"predecessor-version":[{"id":27254,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/27224\/revisions\/27254"}],"wp:attachment":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/media?parent=27224"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/categories?post=27224"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/tags?post=27224"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}