{"id":914,"date":"2021-12-28T21:57:37","date_gmt":"2021-12-28T13:57:37","guid":{"rendered":"http:\/\/139.9.1.231\/?p=914"},"modified":"2022-03-31T15:06:42","modified_gmt":"2022-03-31T07:06:42","slug":"pytorchcode","status":"publish","type":"post","link":"http:\/\/139.9.1.231\/index.php\/2021\/12\/28\/pytorchcode\/","title":{"rendered":"PyTorch\u5e38\u7528\u4ee3\u7801\u6bb5\u5408\u96c6"},"content":{"rendered":"\n<div class=\"wp-block-image is-style-default\"><figure class=\"aligncenter size-large\"><img src=\"https:\/\/cdn.pixabay.com\/photo\/2016\/10\/20\/18\/35\/earth-1756274_960_720.jpg\" alt=\"\"\/><\/figure><\/div>\n\n\n\n<p>\u633a\u6709\u7528\u7684\uff0c\u4fdd\u7559\u4e0b\u6765\uff0c\u64b8\u4ee3\u7801\u7684\u65f6\u5019\u53c2\u8003\u5907\u7528\u3002<\/p>\n\n\n\n<p>\u4f5c\u8005\u4e28Jack Stark@\u77e5\u4e4e<\/p>\n\n\n\n<p>\u6765\u6e90\u4e28https:\/\/zhuanlan.zhihu.com\/p\/104019160<\/p>\n\n\n\n<p>\u672c\u6587\u662fPyTorch\u5e38\u7528\u4ee3\u7801\u6bb5\u5408\u96c6\uff0c\u6db5\u76d6\u57fa\u672c\u914d\u7f6e\u3001\u5f20\u91cf\u5904\u7406\u3001\u6a21\u578b\u5b9a\u4e49\u4e0e\u64cd\u4f5c\u3001\u6570\u636e\u5904\u7406\u3001\u6a21\u578b\u8bad\u7ec3\u4e0e\u6d4b\u8bd5\u7b495\u4e2a\u65b9\u9762\uff0c\u8fd8\u7ed9\u51fa\u4e86\u591a\u4e2a\u503c\u5f97\u6ce8\u610f\u7684Tips\uff0c\u5185\u5bb9\u975e\u5e38\u5168\u9762\u3002<\/p>\n\n\n\n<p>PyTorch\u6700\u597d\u7684\u8d44\u6599\u662f\u5b98\u65b9\u6587\u6863\u3002\u672c\u6587\u662fPyTorch\u5e38\u7528\u4ee3\u7801\u6bb5\uff0c\u5728\u53c2\u8003\u8d44\u6599[1](\u5f20\u7693\uff1aPyTorch Cookbook)\u7684\u57fa\u7840\u4e0a\u505a\u4e86\u4e00\u4e9b\u4fee\u8865\uff0c\u65b9\u4fbf\u4f7f\u7528\u65f6\u67e5\u9605\u3002<\/p>\n\n\n\n<h2>1. \u57fa\u672c\u914d\u7f6e<\/h2>\n\n\n\n<h3>\u5bfc\u5165\u5305\u548c\u7248\u672c\u67e5\u8be2<\/h3>\n\n\n\n<pre class=\"wp-block-preformatted\"><code>import torch<\/code>\n<code>import torch.nn as nn<\/code>\n<code>import torchvision<\/code>\n<code>print(torch.__version__)<\/code>\n<code>print(torch.version.cuda)<\/code>\n<code>print(torch.backends.cudnn.version())<\/code>\n<code>print(torch.cuda.get_device_name(0))<\/code><\/pre>\n\n\n\n<h3>\u53ef\u590d\u73b0\u6027<\/h3>\n\n\n\n<p>\u5728\u786c\u4ef6\u8bbe\u5907\uff08CPU\u3001GPU\uff09\u4e0d\u540c\u65f6\uff0c\u5b8c\u5168\u7684\u53ef\u590d\u73b0\u6027\u65e0\u6cd5\u4fdd\u8bc1\uff0c\u5373\u4f7f\u968f\u673a\u79cd\u5b50\u76f8\u540c\u3002\u4f46\u662f\uff0c\u5728\u540c\u4e00\u4e2a\u8bbe\u5907\u4e0a\uff0c\u5e94\u8be5\u4fdd\u8bc1\u53ef\u590d\u73b0\u6027\u3002\u5177\u4f53\u505a\u6cd5\u662f\uff0c\u5728\u7a0b\u5e8f\u5f00\u59cb\u7684\u65f6\u5019\u56fa\u5b9atorch\u7684\u968f\u673a\u79cd\u5b50\uff0c\u540c\u65f6\u4e5f\u628anumpy\u7684\u968f\u673a\u79cd\u5b50\u56fa\u5b9a\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\"><code>np.random.seed(0)<\/code>\n<code>torch.manual_seed(0)<\/code>\n<code>torch.cuda.manual_seed_all(0)\ntorch.backends.cudnn.deterministic = True<\/code>\n<code>torch.backends.cudnn.benchmark = False<\/code><\/pre>\n\n\n\n<h3>\u663e\u5361\u8bbe\u7f6e<\/h3>\n\n\n\n<p>\u5982\u679c\u53ea\u9700\u8981\u4e00\u5f20\u663e\u5361<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\"><code># Device configuration<\/code>\n<code>device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')<\/code><\/pre>\n\n\n\n<p>\u5982\u679c\u9700\u8981\u6307\u5b9a\u591a\u5f20\u663e\u5361\uff0c\u6bd4\u59820\uff0c1\u53f7\u663e\u5361\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\"><code>import os<\/code>\n<code>os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'<\/code><\/pre>\n\n\n\n<p>\u4e5f\u53ef\u4ee5\u5728\u547d\u4ee4\u884c\u8fd0\u884c\u4ee3\u7801\u65f6\u8bbe\u7f6e\u663e\u5361\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>CUDA_VISIBLE_DEVICES=0,1 python train.py<\/code><\/pre>\n\n\n\n<p class=\"has-dark-gray-color has-text-color\"><strong>\u6e05\u9664\u663e\u5b58<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>torch.cuda.empty_cache()<\/code><\/pre>\n\n\n\n<p>\u4e5f\u53ef\u4ee5\u4f7f\u7528\u5728\u547d\u4ee4\u884c\u91cd\u7f6eGPU\u7684\u6307\u4ee4<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>nvidia-smi --gpu-reset -i &#91;gpu_id]\n<\/code><\/pre>\n\n\n\n<h2>2. \u5f20\u91cf(Tensor)\u5904\u7406<\/h2>\n\n\n\n<h3>\u5f20\u91cf\u7684\u6570\u636e\u7c7b\u578b<\/h3>\n\n\n\n<p>PyTorch\u67099\u79cdCPU\u5f20\u91cf\u7c7b\u578b\u548c9\u79cdGPU\u5f20\u91cf\u7c7b\u578b\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image is-style-default\"><img src=\"https:\/\/pic1.zhimg.com\/v2-e49fdbb7cee05950a4108e607875bdd4_r.jpg\" alt=\"\"\/><\/figure>\n\n\n\n<h3>\u5f20\u91cf\u57fa\u672c\u4fe1\u606f<\/h3>\n\n\n\n<pre class=\"wp-block-preformatted\"><code>tensor = torch.randn(3,4,5)<\/code>\n<code>print(tensor.type())  # \u6570\u636e\u7c7b\u578b<\/code>\n<code>print(tensor.size())  # \u5f20\u91cf\u7684shape\uff0c\u662f\u4e2a\u5143\u7ec4<\/code>\n<code>print(tensor.dim())   # \u7ef4\u5ea6\u7684\u6570\u91cf<\/code><\/pre>\n\n\n\n<h3>\u547d\u540d\u5f20\u91cf<\/h3>\n\n\n\n<p>\u5f20\u91cf\u547d\u540d\u662f\u4e00\u4e2a\u975e\u5e38\u6709\u7528\u7684\u65b9\u6cd5\uff0c\u8fd9\u6837\u53ef\u4ee5\u65b9\u4fbf\u5730\u4f7f\u7528\u7ef4\u5ea6\u7684\u540d\u5b57\u6765\u505a\u7d22\u5f15\u6216\u5176\u4ed6\u64cd\u4f5c\uff0c\u5927\u5927\u63d0\u9ad8\u4e86\u53ef\u8bfb\u6027\u3001\u6613\u7528\u6027\uff0c\u9632\u6b62\u51fa\u9519\u3002\u3001<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u5728PyTorch 1.3\u4e4b\u524d\uff0c\u9700\u8981\u4f7f\u7528\u6ce8\u91ca\n# Tensor&#91;N, C, H, W]\nimages = torch.randn(32, 3, 56, 56)\nimages.sum(dim=1)\nimages.select(dim=1, index=0)\n\n# PyTorch 1.3\u4e4b\u540e\nNCHW = &#91;\u2018N\u2019, \u2018C\u2019, \u2018H\u2019, \u2018W\u2019]\nimages = torch.randn(32, 3, 56, 56, names=NCHW)\nimages.sum('C')\nimages.select('C', index=0)\n# \u4e5f\u53ef\u4ee5\u8fd9\u4e48\u8bbe\u7f6e\ntensor = torch.rand(3,4,1,2,names=('C', 'N', 'H', 'W'))\n# \u4f7f\u7528align_to\u53ef\u4ee5\u5bf9\u7ef4\u5ea6\u65b9\u4fbf\u5730\u6392\u5e8f\ntensor = tensor.align_to('N', 'C', 'H', 'W')<\/code><\/pre>\n\n\n\n<p>\u6570\u636e\u7c7b\u578b\u8f6c\u6362<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>\n# \u8bbe\u7f6e\u9ed8\u8ba4\u7c7b\u578b\uff0cpytorch\u4e2d\u7684FloatTensor\u8fdc\u8fdc\u5feb\u4e8eDoubleTensor\ntorch.set_default_tensor_type(torch.FloatTensor)\n\n# \u7c7b\u578b\u8f6c\u6362\ntensor = tensor.cuda()\ntensor = tensor.cpu()\ntensor = tensor.float()\ntensor = tensor.long()<\/code><\/pre>\n\n\n\n<h3><strong>torch.Tensor\u4e0enp.ndarray\u8f6c\u6362<\/strong><\/h3>\n\n\n\n<p>\u9664\u4e86CharTensor\uff0c\u5176\u4ed6\u6240\u6709CPU\u4e0a\u7684\u5f20\u91cf\u90fd\u652f\u6301\u8f6c\u6362\u4e3anumpy\u683c\u5f0f\u7136\u540e\u518d\u8f6c\u6362\u56de\u6765\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>ndarray = tensor.cpu().numpy()\ntensor = torch.from_numpy(ndarray).float()\ntensor = torch.from_numpy(ndarray.copy()).float() # If ndarray has negative stride.<\/code><\/pre>\n\n\n\n<p><strong>Torch.tensor\u4e0ePIL.Image\u8f6c\u6362<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># pytorch\u4e2d\u7684\u5f20\u91cf\u9ed8\u8ba4\u91c7\u7528&#91;N, C, H, W]\u7684\u987a\u5e8f\uff0c\u5e76\u4e14\u6570\u636e\u8303\u56f4\u5728&#91;0,1]\uff0c\u9700\u8981\u8fdb\u884c\u8f6c\u7f6e\u548c\u89c4\u8303\u5316\n# torch.Tensor -&gt; PIL.Image\nimage = PIL.Image.fromarray(torch.clamp(tensor*255, min=0, max=255).byte().permute(1,2,0).cpu().numpy())\nimage = torchvision.transforms.functional.to_pil_image(tensor)  # Equivalently way\n\n# PIL.Image -&gt; torch.Tensor\npath = r'.\/figure.jpg'\ntensor = torch.from_numpy(np.asarray(PIL.Image.open(path))).permute(2,0,1).float() \/ 255\ntensor = torchvision.transforms.functional.to_tensor(PIL.Image.open(path)) # Equivalently way<\/code><\/pre>\n\n\n\n<h3><strong>np.ndarray\u4e0ePIL.Image\u7684\u8f6c\u6362<\/strong><\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>image = PIL.Image.fromarray(ndarray.astype(np.uint8))\n\nndarray = np.asarray(PIL.Image.open(path))<\/code><\/pre>\n\n\n\n<p><strong>\u4ece\u53ea\u5305\u542b\u4e00\u4e2a\u5143\u7d20\u7684\u5f20\u91cf\u4e2d\u63d0\u53d6\u503c<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>\nvalue = torch.rand(1).item()\n\nitem() \u2192 number  \u5c06\u5355\u4e2a\u5143\u7d20\u7684tensor\u6570\u503c\u8f6c\u6362\u4e3a\u666e\u901a\u7684python\u6570\u503c\u3002\nReturns the value of this tensor as a standard Python number. This only works for tensors with one element. For other cases, see tolist().\n\nThis operation is not differentiable.\n\nExample:\n\n&gt;&gt;&gt; x = torch.tensor(&#91;1.0])\n&gt;&gt;&gt; x.item()\n1.0<\/code><\/pre>\n\n\n\n<p><strong>torch\u5411\u91cf\u8f6cpython list   \uff1a<code>tolist<\/code>()<\/strong> <a href=\"https:\/\/pytorch.org\/docs\/1.2.0\/tensors.html#torch.Tensor.tolist\"><\/a><\/p>\n\n\n\n<p> tolist() -&gt; list or number<\/p>\n\n\n\n<p>Returns the tensor as a (nested) list. For scalars, a standard Python number is returned, just like with&nbsp;<a href=\"https:\/\/pytorch.org\/docs\/1.2.0\/tensors.html#torch.Tensor.item\"><code>item()<\/code><\/a>. Tensors are automatically moved to the CPU first if necessary.<\/p>\n\n\n\n<p>This operation is not differentiable.<\/p>\n\n\n\n<p>Examples:<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">&gt;&gt;&gt; a <strong>=<\/strong> torch<strong>.<\/strong>randn(2, 2)\n&gt;&gt;&gt; a<strong>.<\/strong>tolist()\n[[0.012766935862600803, 0.5415473580360413],\n [-0.08909505605697632, 0.7729271650314331]]\n&gt;&gt;&gt; a[0,0]<strong>.<\/strong>tolist()\n0.012766935862600803<\/pre>\n\n\n\n<p><strong>\u5f20\u91cf\u5f62\u53d8<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \u5728\u5c06\u5377\u79ef\u5c42\u8f93\u5165\u5168\u8fde\u63a5\u5c42\u7684\u60c5\u51b5\u4e0b\u901a\u5e38\u9700\u8981\u5bf9\u5f20\u91cf\u505a\u5f62\u53d8\u5904\u7406\uff0c\n# \u76f8\u6bd4torch.view\uff0ctorch.reshape\u53ef\u4ee5\u81ea\u52a8\u5904\u7406\u8f93\u5165\u5f20\u91cf\u4e0d\u8fde\u7eed\u7684\u60c5\u51b5\u3002\ntensor = torch.rand(2,3,4)\nshape = (6, 4)\ntensor = torch.reshape(tensor, shape)\n<\/code><\/pre>\n\n\n\n<h3><strong>\u6253\u4e71\u987a\u5e8f<\/strong><\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>tensor = tensor&#91;torch.randperm(tensor.size(0))]  # \u6253\u4e71\u7b2c\u4e00\u4e2a\u7ef4\u5ea6<\/code><\/pre>\n\n\n\n<h3><strong>\u6c34\u5e73\u7ffb\u8f6c<\/strong><\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code># pytorch\u4e0d\u652f\u6301tensor&#91;::-1]\u8fd9\u6837\u7684\u8d1f\u6b65\u957f\u64cd\u4f5c\uff0c\u6c34\u5e73\u7ffb\u8f6c\u53ef\u4ee5\u901a\u8fc7\u5f20\u91cf\u7d22\u5f15\u5b9e\u73b0\n# \u5047\u8bbe\u5f20\u91cf\u7684\u7ef4\u5ea6\u4e3a&#91;N, D, H, W].\ntensor = tensor&#91;:,:,:,torch.arange(tensor.size(3) - 1, -1, -1).long()]<\/code><\/pre>\n\n\n\n<h3><strong>\u590d\u5236\u5f20\u91cf<\/strong><\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code># Operation                 |  New\/Shared memory | Still in computation graph |\ntensor.clone()            # |        New         |          Yes               |\ntensor.detach()           # |      Shared        |          No                |\ntensor.detach.clone()()   # |        New         |          No                |<\/code><\/pre>\n\n\n\n<h3><strong>\u5f20\u91cf\u62fc\u63a5<\/strong><\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>\n'''\n\u6ce8\u610ftorch.cat\u548ctorch.stack\u7684\u533a\u522b\u5728\u4e8etorch.cat\u6cbf\u7740\u7ed9\u5b9a\u7684\u7ef4\u5ea6\u62fc\u63a5\uff0c\n\u800ctorch.stack\u4f1a\u65b0\u589e\u4e00\u7ef4\u3002\u4f8b\u5982\u5f53\u53c2\u6570\u662f3\u4e2a10x5\u7684\u5f20\u91cf\uff0ctorch.cat\u7684\u7ed3\u679c\u662f30x5\u7684\u5f20\u91cf\uff0c\n\u800ctorch.stack\u7684\u7ed3\u679c\u662f3x10x5\u7684\u5f20\u91cf\u3002\n'''\ntensor = torch.cat(list_of_tensors, dim=0)\ntensor = torch.stack(list_of_tensors, dim=0)<\/code><\/pre>\n\n\n\n<p><strong>\u5c06\u6574\u6570\u6807\u7b7e\u8f6c\u4e3aone-hot\u7f16\u7801<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>\n# pytorch\u7684\u6807\u8bb0\u9ed8\u8ba4\u4ece0\u5f00\u59cb\ntensor = torch.tensor(&#91;0, 2, 1, 3])\nN = tensor.size(0)\nnum_classes = 4\none_hot = torch.zeros(N, num_classes).long()\none_hot.scatter_(dim=1, index=torch.unsqueeze(tensor, dim=1), src=torch.ones(N, num_classes).long())<\/code><\/pre>\n\n\n\n<p><strong>\u5f97\u5230\u975e\u96f6\u5143\u7d20<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>torch.nonzero(tensor)               # index of non-zero elements\ntorch.nonzero(tensor==0)            # index of zero elements\ntorch.nonzero(tensor).size(0)       # number of non-zero elements\ntorch.nonzero(tensor == 0).size(0)  # number of zero elements\n\n<strong>\u5224\u65ad\u4e24\u4e2a\u5f20\u91cf\u76f8\u7b49<\/strong>\ntorch.allclose(tensor1, tensor2)  # float tensor\ntorch.equal(tensor1, tensor2)     # int tensor\n<strong>\u5f20\u91cf\u6269\u5c55<\/strong>\n# Expand tensor of shape 64*512 to shape 64*512*7*7.\ntensor = torch.rand(64,512)\ntorch.reshape(tensor, (64, 512, 1, 1)).expand(64, 512, 7, 7)\n<strong>\u77e9\u9635\u4e58\u6cd5<\/strong>\n# Matrix multiplcation: (m*n) * (n*p) * -&gt; (m*p).\nresult = torch.mm(tensor1, tensor2)\n\n# Batch matrix multiplication: (b*m*n) * (b*n*p) -&gt; (b*m*p)\nresult = torch.bmm(tensor1, tensor2)\n\n# Element-wise multiplication.\nresult = tensor1 * tensor2\n\n<strong>\u8ba1\u7b97\u4e24\u7ec4\u6570\u636e\u4e4b\u95f4\u7684\u4e24\u4e24\u6b27\u5f0f\u8ddd\u79bb<\/strong>\n\u5229\u7528broadcast\u673a\u5236\ndist = torch.sqrt(torch.sum((X1&#91;:,None,:] - X2) ** 2, dim=2))<\/code><\/pre>\n\n\n\n<h2>3. \u6a21\u578b\u5b9a\u4e49\u548c\u64cd\u4f5c<\/h2>\n\n\n\n<p>\u4e00\u4e2a\u7b80\u5355\u4e24\u5c42\u5377\u79ef\u7f51\u7edc\u7684\u793a\u4f8b<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>\n# convolutional neural network (2 convolutional layers)\nclass ConvNet(nn.Module):\n    def __init__(self, num_classes=10):\n        super(ConvNet, self).__init__()\n        self.layer1 = nn.Sequential(\n            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),\n            nn.BatchNorm2d(16),\n            nn.ReLU(),\n            nn.MaxPool2d(kernel_size=2, stride=2))\n        self.layer2 = nn.Sequential(\n            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),\n            nn.BatchNorm2d(32),\n            nn.ReLU(),\n            nn.MaxPool2d(kernel_size=2, stride=2))\n        self.fc = nn.Linear(7*7*32, num_classes)\n\n    def forward(self, x):\n        out = self.layer1(x)\n        out = self.layer2(out)\n        out = out.reshape(out.size(0), -1)\n        out = self.fc(out)\n        return out\n\n\nmodel = ConvNet(num_classes).to(device)\n<\/code><\/pre>\n\n\n\n<p class=\"has-light-pink-background-color has-background\">\u5377\u79ef\u5c42\u7684\u8ba1\u7b97\u548c\u5c55\u793a\u53ef\u4ee5\u7528\u8fd9\u4e2a<a href=\"https:\/\/ezyang.github.io\/convolution-visualizer\/index.html\">\u7f51\u7ad9<\/a>\u8f85\u52a9\u3002<\/p>\n\n\n\n<p><strong>\u53cc\u7ebf\u6027\u6c47\u5408\uff08bilinear pooling\uff09<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>X = torch.reshape(N, D, H * W)                        # Assume X has shape N*D*H*W\nX = torch.bmm(X, torch.transpose(X, 1, 2)) \/ (H * W)  # Bilinear pooling\nassert X.size() == (N, D, D)\nX = torch.reshape(X, (N, D * D))\nX = torch.sign(X) * torch.sqrt(torch.abs(X) + 1e-5)   # Signed-sqrt normalization\nX = torch.nn.functional.normalize(X)                  # L2 normalization\n<\/code><\/pre>\n\n\n\n<p><strong>\u591a\u5361\u540c\u6b65 BN\uff08Batch normalization\uff09<\/strong><\/p>\n\n\n\n<p>\u5f53\u4f7f\u7528 torch.nn.DataParallel \u5c06\u4ee3\u7801\u8fd0\u884c\u5728\u591a\u5f20 GPU \u5361\u4e0a\u65f6\uff0cPyTorch \u7684 BN \u5c42\u9ed8\u8ba4\u64cd\u4f5c\u662f\u5404\u5361\u4e0a\u6570\u636e\u72ec\u7acb\u5730\u8ba1\u7b97\u5747\u503c\u548c\u6807\u51c6\u5dee\uff0c\u540c\u6b65 BN \u4f7f\u7528\u6240\u6709\u5361\u4e0a\u7684\u6570\u636e\u4e00\u8d77\u8ba1\u7b97 BN \u5c42\u7684\u5747\u503c\u548c\u6807\u51c6\u5dee\uff0c\u7f13\u89e3\u4e86\u5f53\u6279\u91cf\u5927\u5c0f\uff08batch size\uff09\u6bd4\u8f83\u5c0f\u65f6\u5bf9\u5747\u503c\u548c\u6807\u51c6\u5dee\u4f30\u8ba1\u4e0d\u51c6\u7684\u60c5\u51b5\uff0c\u662f\u5728\u76ee\u6807\u68c0\u6d4b\u7b49\u4efb\u52a1\u4e2d\u4e00\u4e2a\u6709\u6548\u7684\u63d0\u5347\u6027\u80fd\u7684\u6280\u5de7\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>sync_bn = torch.nn.SyncBatchNorm(num_features, eps=1e-05, momentum=0.1, affine=True, \n                                 track_running_stats=True)<\/code><\/pre>\n\n\n\n<p>\u5c06\u5df2\u6709\u7f51\u7edc\u7684\u6240\u6709BN\u5c42\u6539\u4e3a\u540c\u6b65BN\u5c42<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>def convertBNtoSyncBN(module, process_group=None):\n    '''Recursively replace all BN layers to SyncBN layer.\n\n    Args:\n        module&#91;torch.nn.Module]. Network\n    '''\n    if isinstance(module, torch.nn.modules.batchnorm._BatchNorm):\n        sync_bn = torch.nn.SyncBatchNorm(module.num_features, module.eps, module.momentum, \n                                         module.affine, module.track_running_stats, process_group)\n        sync_bn.running_mean = module.running_mean\n        sync_bn.running_var = module.running_var\n        if module.affine:\n            sync_bn.weight = module.weight.clone().detach()\n            sync_bn.bias = module.bias.clone().detach()\n        return sync_bn\n    else:\n        for name, child_module in module.named_children():\n            setattr(module, name) = convert_syncbn_model(child_module, process_group=process_group))\n        return module\n<\/code><\/pre>\n\n\n\n<h3><strong>\u7c7b\u4f3c BN \u6ed1\u52a8\u5e73\u5747<\/strong><\/h3>\n\n\n\n<p>\u5982\u679c\u8981\u5b9e\u73b0\u7c7b\u4f3c BN \u6ed1\u52a8\u5e73\u5747\u7684\u64cd\u4f5c\uff0c\u5728 forward \u51fd\u6570\u4e2d\u8981\u4f7f\u7528\u539f\u5730\uff08inplace\uff09\u64cd\u4f5c\u7ed9\u6ed1\u52a8\u5e73\u5747\u8d4b\u503c\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>class BN(torch.nn.Module)\n    def __init__(self):\n        ...\n        self.register_buffer('running_mean', torch.zeros(num_features))\n\n    def forward(self, X):\n        ...\n        self.running_mean += momentum * (current - self.running_mean)<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code><strong>\u8ba1\u7b97\u6a21\u578b\u6574\u4f53\u53c2\u6570\u91cf<\/strong>\n\nnum_parameters = sum(torch.numel(parameter) for parameter in model.parameters())\n\n<strong>\u67e5\u770b\u7f51\u7edc\u4e2d\u7684\u53c2\u6570<\/strong>\n\n\u53ef\u4ee5\u901a\u8fc7model.state_dict()\u6216\u8005model.named_parameters()\u51fd\u6570\u67e5\u770b\u73b0\u5728\u7684\u5168\u90e8\u53ef\u8bad\u7ec3\u53c2\u6570\uff08\u5305\u62ec\u901a\u8fc7\u7ee7\u627f\u5f97\u5230\u7684\u7236\u7c7b\u4e2d\u7684\u53c2\u6570\uff09\nparams = list(model.named_parameters())\n(name, param) = params&#91;28]\nprint(name)\nprint(param.grad)\nprint('-------------------------------------------------')\n(name2, param2) = params&#91;29]\nprint(name2)\nprint(param2.grad)\nprint('----------------------------------------------------')\n(name1, param1) = params&#91;30]\nprint(name1)\nprint(param1.grad)\n\n<strong>\u6a21\u578b\u53ef\u89c6\u5316\uff08\u4f7f\u7528pytorchviz\uff09<\/strong>\n\n<a href=\"http:\/\/szagoruyko\/pytorchvizgithub.com\">szagoruyko\/pytorchvizgithub.com<\/a>\n\u7c7b\u4f3c Keras \u7684 model.summary() \u8f93\u51fa\u6a21\u578b\u4fe1\u606f\uff0c\u4f7f\u7528pytorch-summary\nsksq96\/pytorch-summarygithub.com\n\n<strong>\u6a21\u578b\u6743\u91cd\u521d\u59cb\u5316<\/strong>\n\u6ce8\u610f model.modules() \u548c model.children() \u7684\u533a\u522b\uff1amodel.modules() \u4f1a\u8fed\u4ee3\u5730\u904d\u5386\u6a21\u578b\u7684\u6240\u6709\u5b50\u5c42\uff0c\u800c model.children() \u53ea\u4f1a\u904d\u5386\u6a21\u578b\u4e0b\u7684\u4e00\u5c42\u3002\n\n# Common practise for initialization.\nfor layer in model.modules():\n    if isinstance(layer, torch.nn.Conv2d):\n        torch.nn.init.kaiming_normal_(layer.weight, mode='fan_out',\n                                      nonlinearity='relu')\n        if layer.bias is not None:\n            torch.nn.init.constant_(layer.bias, val=0.0)\n    elif isinstance(layer, torch.nn.BatchNorm2d):\n        torch.nn.init.constant_(layer.weight, val=1.0)\n        torch.nn.init.constant_(layer.bias, val=0.0)\n    elif isinstance(layer, torch.nn.Linear):\n        torch.nn.init.xavier_normal_(layer.weight)\n        if layer.bias is not None:\n            torch.nn.init.constant_(layer.bias, val=0.0)\n\n# Initialization with given tensor.\nlayer.weight = torch.nn.Parameter(tensor)\n\n<strong>\u63d0\u53d6\u6a21\u578b\u4e2d\u7684\u67d0\u4e00\u5c42<\/strong>\nmodules()\u4f1a\u8fd4\u56de\u6a21\u578b\u4e2d\u6240\u6709\u6a21\u5757\u7684\u8fed\u4ee3\u5668\uff0c\u5b83\u80fd\u591f\u8bbf\u95ee\u5230\u6700\u5185\u5c42\uff0c\u6bd4\u5982self.layer1.conv1\u8fd9\u4e2a\u6a21\u5757\uff0c\u8fd8\u6709\u4e00\u4e2a\u4e0e\u5b83\u4eec\u76f8\u5bf9\u5e94\u7684\u662fname_children()\u5c5e\u6027\u4ee5\u53canamed_modules(),\u8fd9\u4e24\u4e2a\u4e0d\u4ec5\u4f1a\u8fd4\u56de\u6a21\u5757\u7684\u8fed\u4ee3\u5668\uff0c\u8fd8\u4f1a\u8fd4\u56de\u7f51\u7edc\u5c42\u7684\u540d\u5b57\u3002\n# \u53d6\u6a21\u578b\u4e2d\u7684\u524d\u4e24\u5c42\nnew_model = nn.Sequential(*list(model.children())&#91;:2] \n# \u5982\u679c\u5e0c\u671b\u63d0\u53d6\u51fa\u6a21\u578b\u4e2d\u7684\u6240\u6709\u5377\u79ef\u5c42\uff0c\u53ef\u4ee5\u50cf\u4e0b\u9762\u8fd9\u6837\u64cd\u4f5c\uff1a\nfor layer in model.named_modules():\n    if isinstance(layer&#91;1],nn.Conv2d):\n         conv_model.add_module(layer&#91;0],layer&#91;1])\n\n\n<strong>\u90e8\u5206\u5c42\u4f7f\u7528\u9884\u8bad\u7ec3\u6a21\u578b<\/strong>\n\u6ce8\u610f\u5982\u679c\u4fdd\u5b58\u7684\u6a21\u578b\u662f torch.nn.DataParallel\uff0c\u5219\u5f53\u524d\u7684\u6a21\u578b\u4e5f\u9700\u8981\u662f\nmodel.load_state_dict(torch.load('model.pth'), strict=False)\n\u5c06\u5728 GPU \u4fdd\u5b58\u7684\u6a21\u578b\u52a0\u8f7d\u5230 CPU\nmodel.load_state_dict(torch.load('model.pth', map_location='cpu'))\n\n<strong>\u5bfc\u5165\u53e6\u4e00\u4e2a\u6a21\u578b\u7684\u76f8\u540c\u90e8\u5206\u5230\u65b0\u7684\u6a21\u578b<\/strong>\n\u6a21\u578b\u5bfc\u5165\u53c2\u6570\u65f6\uff0c\u5982\u679c\u4e24\u4e2a\u6a21\u578b\u7ed3\u6784\u4e0d\u4e00\u81f4\uff0c\u5219\u76f4\u63a5\u5bfc\u5165\u53c2\u6570\u4f1a\u62a5\u9519\u3002\u7528\u4e0b\u9762\u65b9\u6cd5\u53ef\u4ee5\u628a\u53e6\u4e00\u4e2a\u6a21\u578b\u7684\u76f8\u540c\u7684\u90e8\u5206\u5bfc\u5165\u5230\u65b0\u7684\u6a21\u578b\u4e2d\u3002\n# model_new\u4ee3\u8868\u65b0\u7684\u6a21\u578b\n# model_saved\u4ee3\u8868\u5176\u4ed6\u6a21\u578b\uff0c\u6bd4\u5982\u7528torch.load\u5bfc\u5165\u7684\u5df2\u4fdd\u5b58\u7684\u6a21\u578b\nmodel_new_dict = model_new.state_dict()\nmodel_common_dict = {k:v for k, v in model_saved.items() if k in model_new_dict.keys()}\nmodel_new_dict.update(model_common_dict)\nmodel_new.load_state_dict(model_new_dict)<\/code><\/pre>\n\n\n\n<p><strong>4. \u6570\u636e\u5904\u7406<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code><strong>\u8ba1\u7b97\u6570\u636e\u96c6\u7684\u5747\u503c\u548c\u6807\u51c6\u5dee<\/strong>\nimport os\nimport cv2\nimport numpy as np\nfrom torch.utils.data import Dataset\nfrom PIL import Image\n\n\ndef compute_mean_and_std(dataset):\n    # \u8f93\u5165PyTorch\u7684dataset\uff0c\u8f93\u51fa\u5747\u503c\u548c\u6807\u51c6\u5dee\n    mean_r = 0\n    mean_g = 0\n    mean_b = 0\n\n    for img, _ in dataset:\n        img = np.asarray(img) # change PIL Image to numpy array\n        mean_b += np.mean(img&#91;:, :, 0])\n        mean_g += np.mean(img&#91;:, :, 1])\n        mean_r += np.mean(img&#91;:, :, 2])\n\n    mean_b \/= len(dataset)\n    mean_g \/= len(dataset)\n    mean_r \/= len(dataset)\n\n    diff_r = 0\n    diff_g = 0\n    diff_b = 0\n\n    N = 0\n\n    for img, _ in dataset:\n        img = np.asarray(img)\n\n        diff_b += np.sum(np.power(img&#91;:, :, 0] - mean_b, 2))\n        diff_g += np.sum(np.power(img&#91;:, :, 1] - mean_g, 2))\n        diff_r += np.sum(np.power(img&#91;:, :, 2] - mean_r, 2))\n\n        N += np.prod(img&#91;:, :, 0].shape)\n\n    std_b = np.sqrt(diff_b \/ N)\n    std_g = np.sqrt(diff_g \/ N)\n    std_r = np.sqrt(diff_r \/ N)\n\n    mean = (mean_b.item() \/ 255.0, mean_g.item() \/ 255.0, mean_r.item() \/ 255.0)\n    std = (std_b.item() \/ 255.0, std_g.item() \/ 255.0, std_r.item() \/ 255.0)\n    return mean, std\n\n\n<strong>\u5f97\u5230\u89c6\u9891\u6570\u636e\u57fa\u672c\u4fe1\u606f<\/strong>\nimport cv2\nvideo = cv2.VideoCapture(mp4_path)\nheight = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))\nwidth = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))\nnum_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))\nfps = int(video.get(cv2.CAP_PROP_FPS))\nvideo.release()\nTSN \u6bcf\u6bb5\uff08segment\uff09\u91c7\u6837\u4e00\u5e27\u89c6\u9891\nK = self._num_segments\nif is_train:\n    if num_frames &gt; K:\n        # Random index for each segment.\n        frame_indices = torch.randint(\n            high=num_frames \/\/ K, size=(K,), dtype=torch.long)\n        frame_indices += num_frames \/\/ K * torch.arange(K)\n    else:\n        frame_indices = torch.randint(\n            high=num_frames, size=(K - num_frames,), dtype=torch.long)\n        frame_indices = torch.sort(torch.cat((\n            torch.arange(num_frames), frame_indices)))&#91;0]\nelse:\n    if num_frames &gt; K:\n        # Middle index for each segment.\n        frame_indices = num_frames \/ K \/\/ 2\n        frame_indices += num_frames \/\/ K * torch.arange(K)\n    else:\n        frame_indices = torch.sort(torch.cat((                              \n            torch.arange(num_frames), torch.arange(K - num_frames))))&#91;0]\nassert frame_indices.size() == (K,)\nreturn &#91;frame_indices&#91;i] for i in range(K)]\n\n\n\n<strong>\u5e38\u7528\u8bad\u7ec3\u548c\u9a8c\u8bc1\u6570\u636e\u9884\u5904\u7406<\/strong>\n\u5176\u4e2d ToTensor \u64cd\u4f5c\u4f1a\u5c06 PIL.Image \u6216\u5f62\u72b6\u4e3a H\u00d7W\u00d7D\uff0c\u6570\u503c\u8303\u56f4\u4e3a &#91;0, 255] \u7684 np.ndarray \u8f6c\u6362\u4e3a\u5f62\u72b6\u4e3a D\u00d7H\u00d7W\uff0c\u6570\u503c\u8303\u56f4\u4e3a &#91;0.0, 1.0] \u7684 torch.Tensor\u3002\ntrain_transform = torchvision.transforms.Compose(&#91;\n    torchvision.transforms.RandomResizedCrop(size=224,\n                                             scale=(0.08, 1.0)),\n    torchvision.transforms.RandomHorizontalFlip(),\n    torchvision.transforms.ToTensor(),\n    torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),\n                                     std=(0.229, 0.224, 0.225)),\n ])\n val_transform = torchvision.transforms.Compose(&#91;\n    torchvision.transforms.Resize(256),\n    torchvision.transforms.CenterCrop(224),\n    torchvision.transforms.ToTensor(),\n    torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),\n                                     std=(0.229, 0.224, 0.225)),\n])<\/code><\/pre>\n\n\n\n<h2>5. \u6a21\u578b\u8bad\u7ec3\u548c\u6d4b\u8bd5<\/h2>\n\n\n\n<pre class=\"wp-block-code\"><code><strong>\u5206\u7c7b\u6a21\u578b\u8bad\u7ec3\u4ee3\u7801<\/strong>\n# Loss and optimizer\ncriterion = nn.CrossEntropyLoss()\noptimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n\n# Train the model\ntotal_step = len(train_loader)\nfor epoch in range(num_epochs):\n    for i ,(images, labels) in enumerate(train_loader):\n        images = images.to(device)\n        labels = labels.to(device)\n\n        # Forward pass\n        outputs = model(images)\n        loss = criterion(outputs, labels)\n\n        # Backward and optimizer\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n\n        if (i+1) % 100 == 0:\n            print('Epoch: &#91;{}\/{}], Step: &#91;{}\/{}], Loss: {}'\n                  .format(epoch+1, num_epochs, i+1, total_step, loss.item()))\n\n\n<strong>\u5206\u7c7b\u6a21\u578b\u6d4b\u8bd5\u4ee3\u7801<\/strong>\n# Test the model\nmodel.eval()  # eval mode(batch norm uses moving mean\/variance \n              #instead of mini-batch mean\/variance)\nwith torch.no_grad():\n    correct = 0\n    total = 0\n    for images, labels in test_loader:\n        images = images.to(device)\n        labels = labels.to(device)\n        outputs = model(images)\n        _, predicted = torch.max(outputs.data, 1)\n        total += labels.size(0)\n        correct += (predicted == labels).sum().item()\n\n    print('Test accuracy of the model on the 10000 test images: {} %'\n          .format(100 * correct \/ total))\n\n\n\n<strong>\u81ea\u5b9a\u4e49loss<\/strong>\n\u7ee7\u627ftorch.nn.Module\u7c7b\u5199\u81ea\u5df1\u7684loss\u3002\nclass MyLoss(torch.nn.Moudle):\n    def __init__(self):\n        super(MyLoss, self).__init__()\n\n    def forward(self, x, y):\n        loss = torch.mean((x - y) ** 2)\n        return loss\n\n\n\n<strong>\u6807\u7b7e\u5e73\u6ed1\uff08label smoothing\uff09<\/strong>\n\u5199\u4e00\u4e2alabel_smoothing.py\u7684\u6587\u4ef6\uff0c\u7136\u540e\u5728\u8bad\u7ec3\u4ee3\u7801\u91cc\u5f15\u7528\uff0c\u7528LSR\u4ee3\u66ff\u4ea4\u53c9\u71b5\u635f\u5931\u5373\u53ef\u3002label_smoothing.py\u5185\u5bb9\u5982\u4e0b\uff1a\nimport torch\nimport torch.nn as nn\n\n\nclass LSR(nn.Module):\n\n    def __init__(self, e=0.1, reduction='mean'):\n        super().__init__()\n\n        self.log_softmax = nn.LogSoftmax(dim=1)\n        self.e = e\n        self.reduction = reduction\n\n    def _one_hot(self, labels, classes, value=1):\n        \"\"\"\n            Convert labels to one hot vectors\n\n        Args:\n            labels: torch tensor in format &#91;label1, label2, label3, ...]\n            classes: int, number of classes\n            value: label value in one hot vector, default to 1\n\n        Returns:\n            return one hot format labels in shape &#91;batchsize, classes]\n        \"\"\"\n\n        one_hot = torch.zeros(labels.size(0), classes)\n\n        #labels and value_added  size must match\n        labels = labels.view(labels.size(0), -1)\n        value_added = torch.Tensor(labels.size(0), 1).fill_(value)\n\n        value_added = value_added.to(labels.device)\n        one_hot = one_hot.to(labels.device)\n\n        one_hot.scatter_add_(1, labels, value_added)\n\n        return one_hot\n\n    def _smooth_label(self, target, length, smooth_factor):\n        \"\"\"convert targets to one-hot format, and smooth\n        them.\n        Args:\n            target: target in form with &#91;label1, label2, label_batchsize]\n            length: length of one-hot format(number of classes)\n            smooth_factor: smooth factor for label smooth\n\n        Returns:\n            smoothed labels in one hot format\n        \"\"\"\n        one_hot = self._one_hot(target, length, value=1 - smooth_factor)\n        one_hot += smooth_factor \/ (length - 1)\n\n        return one_hot.to(target.device)\n\n    def forward(self, x, target):\n\n        if x.size(0) != target.size(0):\n            raise ValueError('Expected input batchsize ({}) to match target batch_size({})'\n                    .format(x.size(0), target.size(0)))\n\n        if x.dim() &lt; 2:\n            raise ValueError('Expected input tensor to have least 2 dimensions(got {})'\n                    .format(x.size(0)))\n\n        if x.dim() != 2:\n            raise ValueError('Only 2 dimension tensor are implemented, (got {})'\n                    .format(x.size()))\n\n\n        smoothed_target = self._smooth_label(target, x.size(1), self.e)\n        x = self.log_softmax(x)\n        loss = torch.sum(- x * smoothed_target, dim=1)\n\n        if self.reduction == 'none':\n            return loss\n\n        elif self.reduction == 'sum':\n            return torch.sum(loss)\n\n        elif self.reduction == 'mean':\n            return torch.mean(loss)\n\n        else:\n            raise ValueError('unrecognized option, expect reduction to be one of none, mean, sum')\n\u6216\u8005\u76f4\u63a5\u5728\u8bad\u7ec3\u6587\u4ef6\u91cc\u505alabel smoothing\nfor images, labels in train_loader:\n    images, labels = images.cuda(), labels.cuda()\n    N = labels.size(0)\n    # C is the number of classes.\n    smoothed_labels = torch.full(size=(N, C), fill_value=0.1 \/ (C - 1)).cuda()\n    smoothed_labels.scatter_(dim=1, index=torch.unsqueeze(labels, dim=1), value=0.9)\n\n    score = model(images)\n    log_prob = torch.nn.functional.log_softmax(score, dim=1)\n    loss = -torch.sum(log_prob * smoothed_labels) \/ N\n    optimizer.zero_grad()\n    loss.backward()\n    optimizer.step()\n<strong>\n\n<\/strong>\n<strong>Mixup\u8bad\u7ec3<\/strong>\nbeta_distribution = torch.distributions.beta.Beta(alpha, alpha)\nfor images, labels in train_loader:\n    images, labels = images.cuda(), labels.cuda()\n\n    # Mixup images and labels.\n    lambda_ = beta_distribution.sample(&#91;]).item()\n    index = torch.randperm(images.size(0)).cuda()\n    mixed_images = lambda_ * images + (1 - lambda_) * images&#91;index, :]\n    label_a, label_b = labels, labels&#91;index]\n\n    # Mixup loss.\n    scores = model(mixed_images)\n    loss = (lambda_ * loss_function(scores, label_a)\n            + (1 - lambda_) * loss_function(scores, label_b))\n    optimizer.zero_grad()\n    loss.backward()\n    optimizer.step()\n\n\n<strong>L1 \u6b63\u5219\u5316<\/strong>\nl1_regularization = torch.nn.L1Loss(reduction='sum')\nloss = ...  # Standard cross-entropy loss\nfor param in model.parameters():\n    loss += torch.sum(torch.abs(param))\nloss.backward()\n\n\n\n<strong>\u4e0d\u5bf9\u504f\u7f6e\u9879\u8fdb\u884c\u6743\u91cd\u8870\u51cf\uff08weight decay\uff09<\/strong>\npytorch\u91cc\u7684weight decay\u76f8\u5f53\u4e8el2\u6b63\u5219\nbias_list = (param for name, param in model.named_parameters() if name&#91;-4:] == 'bias')\nothers_list = (param for name, param in model.named_parameters() if name&#91;-4:] != 'bias')\nparameters = &#91;{'parameters': bias_list, 'weight_decay': 0},                \n              {'parameters': others_list}]\noptimizer = torch.optim.SGD(parameters, lr=1e-2, momentum=0.9, weight_decay=1e-4)\n\n\n\n<strong>\u68af\u5ea6\u88c1\u526a\uff08gradient clipping\uff09<\/strong>\ntorch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=20)\n\n\n\n<strong>\u5f97\u5230\u5f53\u524d\u5b66\u4e60\u7387<\/strong>\n# If there is one global learning rate (which is the common case).\nlr = next(iter(optimizer.param_groups))&#91;'lr']\n\n# If there are multiple learning rates for different layers.\nall_lr = &#91;]\nfor param_group in optimizer.param_groups:\n    all_lr.append(param_group&#91;'lr'])\n\u53e6\u4e00\u79cd\u65b9\u6cd5\uff0c\u5728\u4e00\u4e2abatch\u8bad\u7ec3\u4ee3\u7801\u91cc\uff0c\u5f53\u524d\u7684lr\u662foptimizer.param_groups&#91;0]&#91;'lr']\n\n\n\n<strong>\u5b66\u4e60\u7387\u8870\u51cf<\/strong>\n# Reduce learning rate when validation accuarcy plateau.\nscheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=5, verbose=True)\nfor t in range(0, 80):\n    train(...)\n    val(...)\n    scheduler.step(val_acc)\n\n# Cosine annealing learning rate.\nscheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=80)\n# Reduce learning rate by 10 at given epochs.\nscheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=&#91;50, 70], gamma=0.1)\nfor t in range(0, 80):\n    scheduler.step()    \n    train(...)\n    val(...)\n\n# Learning rate warmup by 10 epochs.\nscheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda t: t \/ 10)\nfor t in range(0, 10):\n    scheduler.step()\n    train(...)\n    val(...)\n\n\n\n<strong>\u4f18\u5316\u5668\u94fe\u5f0f\u66f4\u65b0<\/strong>\n\u4ece1.4\u7248\u672c\u5f00\u59cb\uff0ctorch.optim.lr_scheduler \u652f\u6301\u94fe\u5f0f\u66f4\u65b0\uff08chaining\uff09\uff0c\u5373\u7528\u6237\u53ef\u4ee5\u5b9a\u4e49\u4e24\u4e2a schedulers\uff0c\u5e76\u4ea4\u66ff\u5728\u8bad\u7ec3\u4e2d\u4f7f\u7528\u3002\nimport torch\nfrom torch.optim import SGD\nfrom torch.optim.lr_scheduler import ExponentialLR, StepLR\nmodel = &#91;torch.nn.Parameter(torch.randn(2, 2, requires_grad=True))]\noptimizer = SGD(model, 0.1)\nscheduler1 = ExponentialLR(optimizer, gamma=0.9)\nscheduler2 = StepLR(optimizer, step_size=3, gamma=0.1)\nfor epoch in range(4):\n    print(epoch, scheduler2.get_last_lr()&#91;0])\n    optimizer.step()\n    scheduler1.step()\n    scheduler2.step()\n\n\n\n<strong>\u6a21\u578b\u8bad\u7ec3\u53ef\u89c6\u5316<\/strong>\nPyTorch\u53ef\u4ee5\u4f7f\u7528tensorboard\u6765\u53ef\u89c6\u5316\u8bad\u7ec3\u8fc7\u7a0b\u3002\n\u5b89\u88c5\u548c\u8fd0\u884cTensorBoard\u3002\npip install tensorboard\ntensorboard --logdir=runs\n\u4f7f\u7528SummaryWriter\u7c7b\u6765\u6536\u96c6\u548c\u53ef\u89c6\u5316\u76f8\u5e94\u7684\u6570\u636e\uff0c\u653e\u4e86\u65b9\u4fbf\u67e5\u770b\uff0c\u53ef\u4ee5\u4f7f\u7528\u4e0d\u540c\u7684\u6587\u4ef6\u5939\uff0c\u6bd4\u5982'Loss\/train'\u548c'Loss\/test'\u3002\nfrom torch.utils.tensorboard import SummaryWriter\nimport numpy as np\n\nwriter = SummaryWriter()\n\nfor n_iter in range(100):\n    writer.add_scalar('Loss\/train', np.random.random(), n_iter)\n    writer.add_scalar('Loss\/test', np.random.random(), n_iter)\n    writer.add_scalar('Accuracy\/train', np.random.random(), n_iter)\n    writer.add_scalar('Accuracy\/test', np.random.random(), n_iter)\n\n\n\n<strong>\u4fdd\u5b58\u4e0e\u52a0\u8f7d\u65ad\u70b9<\/strong>\n\u6ce8\u610f\u4e3a\u4e86\u80fd\u591f\u6062\u590d\u8bad\u7ec3\uff0c\u6211\u4eec\u9700\u8981\u540c\u65f6\u4fdd\u5b58\u6a21\u578b\u548c\u4f18\u5316\u5668\u7684\u72b6\u6001\uff0c\u4ee5\u53ca\u5f53\u524d\u7684\u8bad\u7ec3\u8f6e\u6570\u3002\nstart_epoch = 0\n# Load checkpoint.\nif resume: # resume\u4e3a\u53c2\u6570\uff0c\u7b2c\u4e00\u6b21\u8bad\u7ec3\u65f6\u8bbe\u4e3a0\uff0c\u4e2d\u65ad\u518d\u8bad\u7ec3\u65f6\u8bbe\u4e3a1\n    model_path = os.path.join('model', 'best_checkpoint.pth.tar')\n    assert os.path.isfile(model_path)\n    checkpoint = torch.load(model_path)\n    best_acc = checkpoint&#91;'best_acc']\n    start_epoch = checkpoint&#91;'epoch']\n    model.load_state_dict(checkpoint&#91;'model'])\n    optimizer.load_state_dict(checkpoint&#91;'optimizer'])\n    print('Load checkpoint at epoch {}.'.format(start_epoch))\n    print('Best accuracy so far {}.'.format(best_acc))\n\n# Train the model\nfor epoch in range(start_epoch, num_epochs): \n    ... \n\n    # Test the model\n    ...\n\n    # save checkpoint\n    is_best = current_acc &gt; best_acc\n    best_acc = max(current_acc, best_acc)\n    checkpoint = {\n        'best_acc': best_acc,\n        'epoch': epoch + 1,\n        'model': model.state_dict(),\n        'optimizer': optimizer.state_dict(),\n    }\n    model_path = os.path.join('model', 'checkpoint.pth.tar')\n    best_model_path = os.path.join('model', 'best_checkpoint.pth.tar')\n    torch.save(checkpoint, model_path)\n    if is_best:\n        shutil.copy(model_path, best_model_path)\n\n\n<strong>\u63d0\u53d6 ImageNet \u9884\u8bad\u7ec3\u6a21\u578b\u67d0\u5c42\u7684\u5377\u79ef\u7279\u5f81<\/strong>\n# VGG-16 relu5-3 feature.\nmodel = torchvision.models.vgg16(pretrained=True).features&#91;:-1]\n# VGG-16 pool5 feature.\nmodel = torchvision.models.vgg16(pretrained=True).features\n# VGG-16 fc7 feature.\nmodel = torchvision.models.vgg16(pretrained=True)\nmodel.classifier = torch.nn.Sequential(*list(model.classifier.children())&#91;:-3])\n# ResNet GAP feature.\nmodel = torchvision.models.resnet18(pretrained=True)\nmodel = torch.nn.Sequential(collections.OrderedDict(\n    list(model.named_children())&#91;:-1]))\n\nwith torch.no_grad():\n    model.eval()\n    conv_representation = model(image)\n\n\n\n\n<strong>\u63d0\u53d6 ImageNet \u9884\u8bad\u7ec3\u6a21\u578b\u591a\u5c42\u7684\u5377\u79ef\u7279\u5f81<\/strong>\nclass FeatureExtractor(torch.nn.Module):\n    \"\"\"Helper class to extract several convolution features from the given\n    pre-trained model.\n\n    Attributes:\n        _model, torch.nn.Module.\n        _layers_to_extract, list&lt;str&gt; or set&lt;str&gt;\n\n    Example:\n        &gt;&gt;&gt; model = torchvision.models.resnet152(pretrained=True)\n        &gt;&gt;&gt; model = torch.nn.Sequential(collections.OrderedDict(\n                list(model.named_children())&#91;:-1]))\n        &gt;&gt;&gt; conv_representation = FeatureExtractor(\n                pretrained_model=model,\n                layers_to_extract={'layer1', 'layer2', 'layer3', 'layer4'})(image)\n    \"\"\"\n    def __init__(self, pretrained_model, layers_to_extract):\n        torch.nn.Module.__init__(self)\n        self._model = pretrained_model\n        self._model.eval()\n        self._layers_to_extract = set(layers_to_extract)\n\n    def forward(self, x):\n        with torch.no_grad():\n            conv_representation = &#91;]\n            for name, layer in self._model.named_children():\n                x = layer(x)\n                if name in self._layers_to_extract:\n                    conv_representation.append(x)\n            return conv_representation\n\n\n\n<strong>\u5fae\u8c03\u5168\u8fde\u63a5\u5c42<\/strong>\nmodel = torchvision.models.resnet18(pretrained=True)\nfor param in model.parameters():\n    param.requires_grad = False\nmodel.fc = nn.Linear(512, 100)  # Replace the last fc layer\noptimizer = torch.optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)\n\n\n<strong>\u4ee5\u8f83\u5927\u5b66\u4e60\u7387\u5fae\u8c03\u5168\u8fde\u63a5\u5c42\uff0c\u8f83\u5c0f\u5b66\u4e60\u7387\u5fae\u8c03\u5377\u79ef\u5c42<\/strong>\nmodel = torchvision.models.resnet18(pretrained=True)\nfinetuned_parameters = list(map(id, model.fc.parameters()))\nconv_parameters = (p for p in model.parameters() if id(p) not in finetuned_parameters)\nparameters = &#91;{'params': conv_parameters, 'lr': 1e-3}, \n              {'params': model.fc.parameters()}]\noptimizer = torch.optim.SGD(parameters, lr=1e-2, momentum=0.9, weight_decay=1e-4)<\/code><\/pre>\n\n\n\n<p><strong>6. \u5176\u4ed6\u6ce8\u610f\u4e8b\u9879<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>\n\u4e0d\u8981\u4f7f\u7528\u592a\u5927\u7684\u7ebf\u6027\u5c42\u3002\u56e0\u4e3ann.Linear(m,n)\u4f7f\u7528\u7684\u662f\u7684\u5185\u5b58\uff0c\u7ebf\u6027\u5c42\u592a\u5927\u5f88\u5bb9\u6613\u8d85\u51fa\u73b0\u6709\u663e\u5b58\u3002\n\n\u4e0d\u8981\u5728\u592a\u957f\u7684\u5e8f\u5217\u4e0a\u4f7f\u7528RNN\u3002\u56e0\u4e3aRNN\u53cd\u5411\u4f20\u64ad\u4f7f\u7528\u7684\u662fBPTT\u7b97\u6cd5\uff0c\u5176\u9700\u8981\u7684\u5185\u5b58\u548c\u8f93\u5165\u5e8f\u5217\u7684\u957f\u5ea6\u5448\u7ebf\u6027\u5173\u7cfb\u3002\n\nmodel(x) \u524d\u7528 model.train() \u548c model.eval() \u5207\u6362\u7f51\u7edc\u72b6\u6001\u3002\n\u4e0d\u9700\u8981\u8ba1\u7b97\u68af\u5ea6\u7684\u4ee3\u7801\u5757\u7528 with torch.no_grad() \u5305\u542b\u8d77\u6765\u3002\n\nmodel.eval() \u548c torch.no_grad() \u7684\u533a\u522b\u5728\u4e8e\uff0cmodel.eval() \u662f\u5c06\u7f51\u7edc\u5207\u6362\u4e3a\u6d4b\u8bd5\u72b6\u6001\uff0c\u4f8b\u5982 BN \u548cdropout\u5728\u8bad\u7ec3\u548c\u6d4b\u8bd5\u9636\u6bb5\u4f7f\u7528\u4e0d\u540c\u7684\u8ba1\u7b97\u65b9\u6cd5\u3002torch.no_grad() \u662f\u5173\u95ed PyTorch \u5f20\u91cf\u7684\u81ea\u52a8\u6c42\u5bfc\u673a\u5236\uff0c\u4ee5\u51cf\u5c11\u5b58\u50a8\u4f7f\u7528\u548c\u52a0\u901f\u8ba1\u7b97\uff0c\u5f97\u5230\u7684\u7ed3\u679c\u65e0\u6cd5\u8fdb\u884c loss.backward()\u3002\n\nmodel.zero_grad()\u4f1a\u628a\u6574\u4e2a\u6a21\u578b\u7684\u53c2\u6570\u7684\u68af\u5ea6\u90fd\u5f52\u96f6, \u800coptimizer.zero_grad()\u53ea\u4f1a\u628a\u4f20\u5165\u5176\u4e2d\u7684\u53c2\u6570\u7684\u68af\u5ea6\u5f52\u96f6.\n\ntorch.nn.CrossEntropyLoss \u7684\u8f93\u5165\u4e0d\u9700\u8981\u7ecf\u8fc7 Softmax\u3002torch.nn.CrossEntropyLoss \u7b49\u4ef7\u4e8e torch.nn.functional.log_softmax + torch.nn.NLLLoss\u3002\n\nloss.backward() \u524d\u7528 optimizer.zero_grad() \u6e05\u9664\u7d2f\u79ef\u68af\u5ea6\u3002\n\ntorch.utils.data.DataLoader \u4e2d\u5c3d\u91cf\u8bbe\u7f6e pin_memory=True\uff0c\u5bf9\u7279\u522b\u5c0f\u7684\u6570\u636e\u96c6\u5982 MNIST \u8bbe\u7f6e pin_memory=False \u53cd\u800c\u66f4\u5feb\u4e00\u4e9b\u3002num_workers \u7684\u8bbe\u7f6e\u9700\u8981\u5728\u5b9e\u9a8c\u4e2d\u627e\u5230\u6700\u5feb\u7684\u53d6\u503c\u3002\n\n\u7528 del \u53ca\u65f6\u5220\u9664\u4e0d\u7528\u7684\u4e2d\u95f4\u53d8\u91cf\uff0c\u8282\u7ea6 GPU \u5b58\u50a8\u3002\n\u4f7f\u7528 inplace \u64cd\u4f5c\u53ef\u8282\u7ea6 GPU \u5b58\u50a8\uff0c\u5982x = torch.nn.functional.relu(x, inplace=True)\n\u51cf\u5c11 CPU \u548c GPU \u4e4b\u95f4\u7684\u6570\u636e\u4f20\u8f93\u3002\u4f8b\u5982\u5982\u679c\u4f60\u60f3\u77e5\u9053\u4e00\u4e2a epoch \u4e2d\u6bcf\u4e2a mini-batch \u7684 loss \u548c\u51c6\u786e\u7387\uff0c\u5148\u5c06\u5b83\u4eec\u7d2f\u79ef\u5728 GPU \u4e2d\u7b49\u4e00\u4e2a epoch \u7ed3\u675f\u4e4b\u540e\u4e00\u8d77\u4f20\u8f93\u56de CPU \u4f1a\u6bd4\u6bcf\u4e2a mini-batch \u90fd\u8fdb\u884c\u4e00\u6b21 GPU \u5230 CPU \u7684\u4f20\u8f93\u66f4\u5feb\u3002\n\n\u4f7f\u7528\u534a\u7cbe\u5ea6\u6d6e\u70b9\u6570 half() \u4f1a\u6709\u4e00\u5b9a\u7684\u901f\u5ea6\u63d0\u5347\uff0c\u5177\u4f53\u6548\u7387\u4f9d\u8d56\u4e8e GPU \u578b\u53f7\u3002\u9700\u8981\u5c0f\u5fc3\u6570\u503c\u7cbe\u5ea6\u8fc7\u4f4e\u5e26\u6765\u7684\u7a33\u5b9a\u6027\u95ee\u9898\u3002\n\u65f6\u5e38\u4f7f\u7528 assert tensor.size() == (N, D, H, W) \u4f5c\u4e3a\u8c03\u8bd5\u624b\u6bb5\uff0c\u786e\u4fdd\u5f20\u91cf\u7ef4\u5ea6\u548c\u4f60\u8bbe\u60f3\u4e2d\u4e00\u81f4\u3002\n\n\u9664\u4e86\u6807\u8bb0 y \u5916\uff0c\u5c3d\u91cf\u5c11\u4f7f\u7528\u4e00\u7ef4\u5f20\u91cf\uff0c\u4f7f\u7528 n*1 \u7684\u4e8c\u7ef4\u5f20\u91cf\u4ee3\u66ff\uff0c\u53ef\u4ee5\u907f\u514d\u4e00\u4e9b\u610f\u60f3\u4e0d\u5230\u7684\u4e00\u7ef4\u5f20\u91cf\u8ba1\u7b97\u7ed3\u679c\u3002\n\n\u7edf\u8ba1\u4ee3\u7801\u5404\u90e8\u5206\u8017\u65f6\n\nwith torch.autograd.profiler.profile(enabled=True, use_cuda=False) as profile:    ...print(profile)# \u6216\u8005\u5728\u547d\u4ee4\u884c\u8fd0\u884cpython -m torch.utils.bottleneck main.py\n\n\u4f7f\u7528TorchSnooper\u6765\u8c03\u8bd5PyTorch\u4ee3\u7801\uff0c\u7a0b\u5e8f\u5728\u6267\u884c\u7684\u65f6\u5019\uff0c\u5c31\u4f1a\u81ea\u52a8 print \u51fa\u6765\u6bcf\u4e00\u884c\u7684\u6267\u884c\u7ed3\u679c\u7684 tensor \u7684\u5f62\u72b6\u3001\u6570\u636e\u7c7b\u578b\u3001\u8bbe\u5907\u3001\u662f\u5426\u9700\u8981\u68af\u5ea6\u7684\u4fe1\u606f\u3002\n# pip install torchsnooperimport torchsnooper# \u5bf9\u4e8e\u51fd\u6570\uff0c\u4f7f\u7528\u4fee\u9970\u5668@torchsnooper.snoop()# \u5982\u679c\u4e0d\u662f\u51fd\u6570\uff0c\u4f7f\u7528 with \u8bed\u53e5\u6765\u6fc0\u6d3b TorchSnooper\uff0c\u628a\u8bad\u7ec3\u7684\u90a3\u4e2a\u5faa\u73af\u88c5\u8fdb with \u8bed\u53e5\u4e2d\u53bb\u3002with torchsnooper.snoop():    \u539f\u672c\u7684\u4ee3\u7801\n\n<\/code><\/pre>\n\n\n\n<p>https:\/\/github.com\/zasdfgbnm\/TorchSnoopergithub.com<br>\u6a21\u578b\u53ef\u89e3\u91ca\u6027\uff0c\u4f7f\u7528captum\u5e93\uff1ahttps:\/\/captum.ai\/captum.ai<\/p>\n","protected":false},"excerpt":{"rendered":"<p>PyTorch\u5e38\u7528\u4ee3\u7801\u6bb5\u5408\u96c6\uff0c\u6db5\u76d6\u57fa\u672c\u914d\u7f6e\u3001\u5f20\u91cf\u5904\u7406\u3001\u6a21\u578b\u5b9a\u4e49\u4e0e\u64cd\u4f5c\u3001\u6570\u636e\u5904\u7406\u3001\u6a21\u578b\u8bad\u7ec3\u4e0e\u6d4b\u8bd5\u7b495\u4e2a\u65b9\u9762\uff0c\u8fd8\u7ed9\u51fa\u4e86\u591a\u4e2a\u503c\u5f97\u6ce8\u610f\u7684Tips\uff0c\u5185\u5bb9\u975e\u5e38\u5168\u9762\u3002<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[11],"tags":[],"_links":{"self":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/914"}],"collection":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/comments?post=914"}],"version-history":[{"count":17,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/914\/revisions"}],"predecessor-version":[{"id":3322,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/914\/revisions\/3322"}],"wp:attachment":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/media?parent=914"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/categories?post=914"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/tags?post=914"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}