{"id":11565,"date":"2023-01-03T14:53:40","date_gmt":"2023-01-03T06:53:40","guid":{"rendered":"http:\/\/139.9.1.231\/?p=11565"},"modified":"2023-01-03T14:53:42","modified_gmt":"2023-01-03T06:53:42","slug":"nerf-code","status":"publish","type":"post","link":"http:\/\/139.9.1.231\/index.php\/2023\/01\/03\/nerf-code\/","title":{"rendered":"\u795e\u7ecf\u8f90\u5c04\u573a(NeRF)-\u4ee3\u7801\u89e3\u6790"},"content":{"rendered":"\n<p>\u53c2\u8003\uff1a<a rel=\"noreferrer noopener\" href=\"https:\/\/www.zhihu.com\/people\/dasuda\" target=\"_blank\">Dasuda<\/a>\u200b and <a href=\"https:\/\/liwen.site\/archives\/2302\">Liwen.site<\/a><\/p>\n\n\n\n<p class=\"has-bright-blue-background-color has-background\">\u53c2\u8003\u4ee3\u7801\uff1aNerf-pl:\u00a0<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/kwea123\/nerf_pl\" target=\"_blank\">https:\/\/github.com\/kwea123\/nerf_pl<\/a><\/p>\n\n\n\n<ul><li>\u7f51\u7ad9\uff1a&nbsp;<a href=\"https:\/\/www.matthewtancik.com\/nerf\" target=\"_blank\" rel=\"noreferrer noopener\">https:\/\/www.matthewtancik.com\/nerf<\/a><\/li><li>\u8bba\u6587\uff1a<a href=\"https:\/\/arxiv.org\/pdf\/2003.08934.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">https:\/\/arxiv.org\/pdf\/2003.08934.pdf<\/a><\/li><li>\u4ee3\u7801\uff1a<a href=\"https:\/\/github.com\/bmild\/nerf\" target=\"_blank\" rel=\"noreferrer noopener\">https:\/\/github.com\/bmild\/nerf<\/a><\/li><\/ul>\n\n\n\n\n\n<h2>\u4f4d\u7f6e\u7f16\u7801<\/h2>\n\n\n\n<p>       NeRF \u7684\u8f93\u5165\u662f\u4e00\u4e2a\u4e94\u7ef4\u5411\u91cf: \uff08\u7269\u4f53\uff09\u7a7a\u95f4\u70b9\u7684\u4f4d\u7f6e<strong>x<\/strong>=(<em>x<\/em>,<em>y<\/em>,<em>z<\/em>)\u00a0\u548c \uff08\u76f8\u673a\uff09\u89c2\u6d4b\u65b9\u5411<strong>d<\/strong>=(<em>\u03b8<\/em>,<em>\u03d5<\/em>)\u3002NeRF \u4f7f\u7528\u4e86\u4f4d\u7f6e\u7f16\u7801\uff08positional encoding\uff09\u628a\u4e00\u7ef4\u7684\u4f4d\u7f6e\u5750\u6807\uff0c\u8f6c\u6362\u4e3a\u9ad8\u7ef4\u7684\u8868\u5f81\u3002\u4f8b\u5982\u00a0<em>p<\/em>\u2208RL, \u901a\u8fc7\u51fd\u6570<em>\u03b3<\/em>(\u22c5)\u00a0\u6620\u5c04\u5230R2L\u00a0\u7a7a\u95f4\u4e2d\uff0c\u8fd9\u91cc<em>L<\/em>\u00a0\u6307\u7684\u662f\u7f16\u7801\u7684\u6570\u91cf\uff0c\u5bf9\u4e8e\u4f4d\u7f6e\u5750\u6807\uff0cL=10\uff1b\u5bf9\u4e8e\u89c2\u6d4b\u89d2\u5ea6\uff0cL=4\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"678\" height=\"56\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2023\/01\/image-4.png\" alt=\"\" class=\"wp-image-11575\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2023\/01\/image-4.png 678w, http:\/\/139.9.1.231\/wp-content\/uploads\/2023\/01\/image-4-300x25.png 300w\" sizes=\"(max-width: 678px) 100vw, 678px\" \/><\/figure>\n\n\n\n<h3 id=\"header-id-3\">\u4ee3\u7801\u5b9e\u73b0<\/h3>\n\n\n\n<pre id=\"vzlx3XUOp8l9L2dq4faBfS672NTLfc34\" class=\"wp-block-code\"><code> # \u7c7b\u7684\u5b9a\u4e49\nclass Embedding(nn.Module):\n    def __init__(self, in_channels, N_freqs, logscale=True):\n        \"\"\"\n        Defines a function that embeds x to (x, sin(2^k x), cos(2^k x), ...)\n        in_channels: number of input channels (3 for both xyz and direction)\n        \"\"\"\n        super(Embedding, self).__init__()\n        self.N_freqs = N_freqs\n        self.in_channels = in_channels\n        self.funcs = &#091;torch.sin, torch.cos]\n        self.out_channels = in_channels*(len(self.funcs)*N_freqs+1)\n \n        if logscale:\n            self.freq_bands = 2**torch.linspace(0, N_freqs-1, N_freqs)\n        else:\n            self.freq_bands = torch.linspace(1, 2**(N_freqs-1), N_freqs)\n \n    def forward(self, x):\n        \"\"\"\n        Embeds x to (x, sin(2^k x), cos(2^k x), ...) \n        Different from the paper, \"x\" is also in the output\n        See https:\/\/github.com\/bmild\/nerf\/issues\/12\n \n        Inputs:\n            x: (B, self.in_channels)\n \n        Outputs:\n            out: (B, self.out_channels)\n        \"\"\"\n        out = &#091;x]\n        for freq in self.freq_bands:\n            for func in self.funcs:\n                out += &#091;func(freq*x)]\n \n        return torch.cat(out, -1)\n \n# \u4f7f\u7528\n \nclass NeRFSystem(LightningModule):\n    def __init__(self, hparams):\n        ...\n        self.embedding_xyz = Embedding(3, 10) # 10 is the default number\n        self.embedding_dir = Embedding(3, 4) # 4 is the default number\n        self.embeddings = &#091;self.embedding_xyz, self.embedding_dir]\n        ...   <\/code><\/pre>\n\n\n\n<h3 id=\"header-id-4\">\u89e3\u91ca<\/h3>\n\n\n\n<ul><li>\u5bf9\u4e8e\u4f4d\u7f6e\u5750\u6807\u00a0(<em>x<\/em>,<em>y<\/em>,<em>z<\/em>)\uff0c \u6bcf\u4e00\u4e2a\u503c\u90fd\u4f7f\u7528 10 \u4e2a\u00a0<em>sin<\/em>\u00a0\u548c 10 \u4e2a<em>cos<\/em>\u00a0\u9891\u7387\u8fdb\u884c\u62d3\u5c55\u3002\u4f8b\u5982 Embeds x to (x, sin (2^k x), cos (2^k x), \u2026) \u3002\u518d\u8fde\u63a5\u4e00\u4e2a\u672c\u8eab\u3002\u56e0\u6b64\u6bcf\u4e00\u4e2a\u503c\u90fd\u62d3\u5c55\u4e3a\u00a010+10+1=21\u7ef4\u3002\u5bf9\u4e8e\u4f4d\u7f6e\u5750\u6807\u7684\u4e09\u4e2a\u503c\uff0c\u603b\u5171\u6709\u00a03\u00d721=63\u00a0\u7ef4\u3002<\/li><li>\u5bf9\u4e8e\u76f8\u673a\u89d2\u5ea6\u00a0(<em>\u03b8<\/em>,<em>\u03d5<\/em>)\uff0c\u4e5f\u662f\u7c7b\u4f3c\uff0c\u4f7f\u7528 4 \u4e2a<em>sin<\/em>\u00a0\u548c 4 \u4e2a\u00a0<em>cos<\/em>\u00a0\u9891\u7387\u8fdb\u884c\u62d3\u5c55\u3002\u8fd9\u91cc\u8f93\u5165\u4fdd\u7559\u4e86\u4e00\u4f4d\uff0c\u5b9e\u9645\u8f93\u5165\u662f(<em>\u03b8<\/em>,<em>\u03d5<\/em>,1)\u3002\u518d\u8fde\u63a5\u4e00\u4e2a\u672c\u8eab\u3002\u56e0\u6b64\u6bcf\u4e00\u4e2a\u503c\u90fd\u62d3\u5c55\u4e3a4+4+1=9\u00a0\u7ef4\u3002\u5bf9\u4e8e\u76f8\u673a\u89d2\u5ea6\u7684\u4e09\u4e2a\u503c\uff0c\u603b\u5171\u6709\u00a03\u00d79=27\u00a0\u7ef4\u3002<\/li><\/ul>\n\n\n\n<h2 id=\"header-id-5\">NeRF \u7f51\u7edc<\/h2>\n\n\n\n<p>NeRF \u7f51\u7edc\u9ed8\u8ba4\u662f\u4e00\u4e2a\u591a\u5c42\u7684 MLP\u3002\u4e2d\u95f4\u7b2c\u56db\u5c42\u6709 skip connection\uff0c\u6784\u6210\u4e86\u4e00\u4e2a ResNet \u7684\u7ed3\u6784\u3002\u7f51\u7edc\u7684\u5bbd\u5ea6\u9ed8\u8ba4\u4e3a 256\u3002<\/p>\n\n\n\n<p><strong>\u8f93\u5165<\/strong>\uff1a<\/p>\n\n\n\n<ol><li>\u4f4d\u7f6e\u5750\u6807\u7684\u8868\u5f81\uff08in_channels_xyz\uff09\uff1a63d<\/li><\/ol>\n\n\n\n<p><strong>\u8f93\u51fa<\/strong>\uff1a<\/p>\n\n\n\n<ol><li>\u4f53\u5bc6\u5ea6<em>\u03c3<\/em>\uff1a1d<\/li><li>RGB \u8272\u5f69\u503c<strong>C<\/strong>: 3d<\/li><\/ol>\n\n\n\n<p><strong>\u7f51\u7edc\u7ed3\u6784<\/strong>\uff1a<br>FC \u6307\u7684\u662f\u5e26 ReLU \u7684\u5168\u8fde\u63a5\u5c42\u3002Linear \u5c42\u6307\u7684\u662f\u5355\u7eaf\u7684\u7ebf\u6027\u65b9\u7a0b\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image\"><img src=\"https:\/\/liwen.site\/wp-content\/uploads\/2022\/05\/post-2302-62735ca03e7cb.png\" alt=\"quicker_40ae8453-64cb-4645-8a27-e62001a85aa2.png\"\/><\/figure>\n\n\n\n<h3 id=\"header-id-6\">\u4ee3\u7801\u5b9e\u73b0<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>class NeRF(nn.Module):\n    def __init__(self,\n                 D=8, W=256,\n                 in_channels_xyz=63, in_channels_dir=27, \n                 skips=&#091;4]):\n        \"\"\"\n        D: number of layers for density (sigma) encoder\n        W: number of hidden units in each layer\n        in_channels_xyz: number of input channels for xyz (3+3*10*2=63 by default)\n        in_channels_dir: number of input channels for direction (3+3*4*2=27 by default)\n        skips: add skip connection in the Dth layer\n        \"\"\"\n        super(NeRF, self).__init__()\n        self.D = D\n        self.W = W\n        self.in_channels_xyz = in_channels_xyz\n        self.in_channels_dir = in_channels_dir\n        self.skips = skips\n \n        # xyz encoding layers\n        for i in range(D):\n            if i == 0:\n                layer = nn.Linear(in_channels_xyz, W)\n            elif i in skips:\n                layer = nn.Linear(W+in_channels_xyz, W)\n            else:\n                layer = nn.Linear(W, W)\n            layer = nn.Sequential(layer, nn.ReLU(True))\n            setattr(self, f\"xyz_encoding_{i+1}\", layer)\n        self.xyz_encoding_final = nn.Linear(W, W)\n \n        # direction encoding layers\n        self.dir_encoding = nn.Sequential(\n                                nn.Linear(W+in_channels_dir, W\/\/2),\n                                nn.ReLU(True))\n \n        # output layers\n        self.sigma = nn.Linear(W, 1)\n        self.rgb = nn.Sequential(\n                        nn.Linear(W\/\/2, 3),\n                        nn.Sigmoid())\n \n    def forward(self, x, sigma_only=False):\n        \"\"\"\n        Encodes input (xyz+dir) to rgb+sigma (not ready to render yet).\n        For rendering this ray, please see rendering.py\n \n        Inputs:\n            x: (B, self.in_channels_xyz(+self.in_channels_dir))\n               the embedded vector of position and direction\n            sigma_only: whether to infer sigma only. If True,\n                        x is of shape (B, self.in_channels_xyz)\n \n        Outputs:\n            if sigma_ony:\n                sigma: (B, 1) sigma\n            else:\n                out: (B, 4), rgb and sigma\n        \"\"\"\n        if not sigma_only:\n            input_xyz, input_dir = \\\n                torch.split(x, &#091;self.in_channels_xyz, self.in_channels_dir], dim=-1)\n        else:\n            input_xyz = x\n \n        xyz_ = input_xyz\n        for i in range(self.D):\n            if i in self.skips:\n                xyz_ = torch.cat(&#091;input_xyz, xyz_], -1)\n            xyz_ = getattr(self, f\"xyz_encoding_{i+1}\")(xyz_)\n \n        sigma = self.sigma(xyz_)\n        if sigma_only:\n            return sigma\n \n        xyz_encoding_final = self.xyz_encoding_final(xyz_)\n \n        dir_encoding_input = torch.cat(&#091;xyz_encoding_final, input_dir], -1)\n        dir_encoding = self.dir_encoding(dir_encoding_input)\n        rgb = self.rgb(dir_encoding)\n \n        out = torch.cat(&#091;rgb, sigma], -1)\n \n        return out<\/code><\/pre>\n\n\n\n<h3 id=\"header-id-7\">  \u4f53\u7d20\u6e32\u67d3<\/h3>\n\n\n\n<p>       \u5047\u8bbe\u6211\u4eec\u5df2\u7ecf\u5f97\u5230\u4e86\u4e00\u675f\u5149\u7ebf\u4e0a\u6240\u6709\u7684\u4f4d\u7f6e\u5bf9\u5e94\u7684\u8272\u5f69\u548c\u4f53\u5bc6\u5ea6\u3002\u6211\u4eec\u9700\u8981\u5bf9\u8fd9\u675f\u5149\u7ebf\u8fdb\u884c\u540e\u5904\u7406\uff08\u4f53\u7d20\u6e32\u67d3\uff09\uff0c\u5f97\u5230\u6700\u7ec8\u5728\u56fe\u7247\u4e0a\u7684\u50cf\u7d20\u503c\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># z_vals: (N_rays, N_samples_) depths of the sampled positions\n# noise_std: factor to perturb the model's prediction of sigma\uff08\u63d0\u5347\u6a21\u578b\u9c81\u68d2\u6027\uff1f\uff1f\uff09\n \n# Convert these values using volume rendering (Section 4)\ndeltas = z_vals&#091;:, 1:] - z_vals&#091;:, :-1] # (N_rays, N_samples_-1)\ndelta_inf = 1e10 * torch.ones_like(deltas&#091;:, :1]) # (N_rays, 1) the last delta is infinity\ndeltas = torch.cat(&#091;deltas, delta_inf], -1)  # (N_rays, N_samples_)\n \n# Multiply each distance by the norm of its corresponding direction ray\n# to convert to real world distance (accounts for non-unit directions).\ndeltas = deltas * torch.norm(dir_.unsqueeze(1), dim=-1)\n \nnoise = torch.randn(sigmas.shape, device=sigmas.device) * noise_std\n \n# compute alpha by the formula (3)\nalphas = 1-torch.exp(-deltas*torch.relu(sigmas+noise)) # (N_rays, N_samples_)\nalphas_shifted = \\\n    torch.cat(&#091;torch.ones_like(alphas&#091;:, :1]), 1-alphas+1e-10], -1) # &#091;1, a1, a2, ...]\nweights = \\\n    alphas * torch.cumprod(alphas_shifted, -1)&#091;:, :-1] # (N_rays, N_samples_)\nweights_sum = weights.sum(1) # (N_rays), the accumulated opacity along the rays\n                                # equals \"1 - (1-a1)(1-a2)...(1-an)\" mathematically\nif weights_only:\n    return weights\n \n# compute final weighted outputs\nrgb_final = torch.sum(weights.unsqueeze(-1)*rgbs, -2) # (N_rays, 3)\ndepth_final = torch.sum(weights*z_vals, -1) # (N_rays)<\/code><\/pre>\n\n\n\n<h3 id=\"header-id-8\">\u7b2c\u4e8c\u8f6e\u6e32\u67d3<\/h3>\n\n\n\n<p>\u5bf9\u4e8e\u6e32\u67d3\u7684\u7ed3\u679c\uff0c\u4f1a\u6839\u636e \u5bf9\u5e94\u7684\u6743\u91cd\uff0c\u4f7f\u7528 pdf \u62bd\u6837\uff0c\u5f97\u5230\u65b0\u7684\u6e32\u67d3\u70b9\u3002\u4f8b\u5982\u9ed8\u8ba4\u7b2c\u4e00\u8f6e\u7c97\u6e32\u67d3\u6bcf\u675f\u5149\u7ebf\u662f 64 \u4e2a\u6837\u672c\u70b9\uff0c\u7b2c\u4e8c\u8f6e\u518d\u589e\u52a0 128 \u4e2a\u62bd\u6837\u70b9\u3002<\/p>\n\n\n\n<p>\u7136\u540e\u4f7f\u7528 finemodel \u8fdb\u884c\u9884\u6d4b\uff0c\u540e\u5bf9\u6240\u6709\u7684\u6837\u672c\u70b9\uff0864+128\uff09\u8fdb\u884c\u4f53\u7d20\u6e32\u67d3\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>def sample_pdf(bins, weights, N_importance, det=False, eps=1e-5):\n    \"\"\"\n    Sample @N_importance samples from @bins with distribution defined by @weights.\n \n    Inputs:\n        bins: (N_rays, N_samples_+1) where N_samples_ is \"the number of coarse samples per ray - 2\"\n        weights: (N_rays, N_samples_)\n        N_importance: the number of samples to draw from the distribution\n        det: deterministic or not\n        eps: a small number to prevent division by zero\n \n    Outputs:\n        samples: the sampled samples\n    \"\"\"\n    N_rays, N_samples_ = weights.shape\n    weights = weights + eps # prevent division by zero (don't do inplace op!)\n    pdf = weights \/ torch.sum(weights, -1, keepdim=True) # (N_rays, N_samples_)\n    cdf = torch.cumsum(pdf, -1) # (N_rays, N_samples), cumulative distribution function\n    cdf = torch.cat(&#091;torch.zeros_like(cdf&#091;: ,:1]), cdf], -1)  # (N_rays, N_samples_+1) \n                                                               # padded to 0~1 inclusive\n \n    if det:\n        u = torch.linspace(0, 1, N_importance, device=bins.device)\n        u = u.expand(N_rays, N_importance)\n    else:\n        u = torch.rand(N_rays, N_importance, device=bins.device)\n    u = u.contiguous()\n \n    inds = searchsorted(cdf, u, side='right')\n    below = torch.clamp_min(inds-1, 0)\n    above = torch.clamp_max(inds, N_samples_)\n \n    inds_sampled = torch.stack(&#091;below, above], -1).view(N_rays, 2*N_importance)\n    cdf_g = torch.gather(cdf, 1, inds_sampled).view(N_rays, N_importance, 2)\n    bins_g = torch.gather(bins, 1, inds_sampled).view(N_rays, N_importance, 2)\n \n    denom = cdf_g&#091;...,1]-cdf_g&#091;...,0]\n    denom&#091;denom&lt;eps] = 1 # denom equals 0 means a bin has weight 0, in which case it will not be sampled\n                         # anyway, therefore any value for it is fine (set to 1 here)\n \n    samples = bins_g&#091;...,0] + (u-cdf_g&#091;...,0])\/denom * (bins_g&#091;...,1]-bins_g&#091;...,0])\n    return samples<\/code><\/pre>\n\n\n\n<h2 id=\"header-id-9\">Loss<\/h2>\n\n\n\n<p>\u8fd9\u91cc\u76f4\u63a5\u4f7f\u7528\u7684 MSE loss\uff0c\u5bf9\u8f93\u51fa\u7684\u50cf\u7d20\u503c\u548c ground truth \u8ba1\u7b97 L2-norm loss.<\/p>\n\n\n\n<h2 id=\"header-id-10\">\u8bad\u7ec3\u6570\u636e<\/h2>\n\n\n\n<h3 id=\"header-id-11\">\u8bad\u7ec3\u6570\u636e<\/h3>\n\n\n\n<figure class=\"wp-block-image\"><img src=\"https:\/\/liwen.site\/wp-content\/uploads\/2022\/05\/post-2302-62735ca1e226a.png\" alt=\"quicker_a23cfc59-ae27-4cb6-83fa-5149a4b91f19.png\"\/><\/figure>\n\n\n\n<p>       \u6839\u636e\u524d\u9762\u7684\u4ecb\u7ecd\uff0cNeRF \u5b9e\u73b0\u7684\uff0c\u662f\u4ece \u3010\u4f4d\u7f6e\u5750\u6807\u00a0<a rel=\"noreferrer noopener\" href=\"https:\/\/liwen.site\/archives\/x,y,z\" target=\"_blank\">katex<\/a> \u548c \u62cd\u6444\u89d2\u5ea6(<em>\u03b8<\/em>,<em>\u03d5<\/em>)\u3011 \u5230 \u3010\u4f53\u5bc6\u5ea6 (<em>\u03c3<\/em>) \u548c RGB \u8272\u5f69\u503c (<strong>C<\/strong>)\u3011\u7684\u6620\u5c04\u3002\u6839\u636e\u4f53\u7d20\u6e32\u67d3\u7406\u8bba\uff0c\u56fe\u7247\u4e2d\u7684\u6bcf\u4e00\u4e2a\u50cf\u7d20\uff0c\u5b9e\u8d28\u4e0a\u90fd\u662f\u4ece\u76f8\u673a\u53d1\u5c04\u51fa\u7684\u4e00\u6761\u5149\u7ebf\u6e32\u67d3\u5f97\u5230\u7684\u3002 \u56e0\u6b64\uff0c\u6211\u4eec\u9996\u5148\uff0c\u9700\u8981\u5f97\u5230\u6bcf\u4e00\u4e2a\u50cf\u7d20\u5bf9\u5e94\u7684\u5149\u7ebf\uff08ray\uff09, \u7136\u540e\uff0c\u8ba1\u7b97\u5149\u7ebf\u4e0a\u6bcf\u4e00\u4e2a\u70b9\u7684\u3010\u4f53\u5bc6\u5ea6<em>\u03c3<\/em>) \u548c RGB \u8272\u5f69\u503c (<strong>C<\/strong>)\u3011\uff0c\u6700\u540e\u518d\u6e32\u67d3\u5f97\u5230\u5bf9\u5e94\u7684\u50cf\u7d20\u503c\u3002<\/p>\n\n\n\n<p>       \u5bf9\u4e8e\u8bad\u7ec3\u6570\u636e\uff0c\u6211\u4eec\u9700\u8981\u62cd\u6444\u4e00\u7cfb\u5217\u7684\u56fe\u7247\uff08\u5982 100 \u5f20\uff09\u56fe\u7247\u548c\u4ed6\u4eec\u7684\u62cd\u6444\u76f8\u673a\u89d2\u5ea6\u3001\u5185\u53c2\u3001\u573a\u666f\u8fb9\u754c\uff08\u53ef\u4ee5\u4f7f\u7528 COLMAP \u83b7\u5f97\uff09\u3002\u6211\u4eec\u9700\u8981\u51c6\u5907\u6bcf\u4e00\u4e2a\u50cf\u7d20\u5bf9\u5e94\u7684\u5149\u7ebf\uff08ray\uff09\u4fe1\u606f\uff0c\u8fd9\u6837\u53ef\u4ee5\u7ec4\u6210\u6210\u5bf9\u7684\u8bad\u7ec3\u6570\u636e\u3010\u5149\u7ebf\u4fe1\u606f &lt;==> \u50cf\u7d20\u503c\u3011\u3002<\/p>\n\n\n\n<p>\u4e0b\u9762\u4ee5 LLFFDataset \uff08&#8221;datasets\/llff.py&#8221;\uff09 \u4e3a\u4f8b\uff0c\u8fdb\u884c\u5206\u6790\uff1a<\/p>\n\n\n\n<p><strong>\u8bfb\u53d6\u7684\u6570\u636e\uff08\u4ee5\u4e00\u5f20\u56fe\u7247\u4e3a\u4f8b\uff09<\/strong>\uff1a<\/p>\n\n\n\n<ul><li>\u56fe\u7247\uff1a\u5c3a\u5bf8\u662f\u00a0N<em>_img<\/em>\u200b\u00d7<em>C<\/em>\u00d7<em>H<\/em>\u00d7<em>W<\/em>\u3002 \u5176\u4e2d\u00a0<em>C<\/em>=3\u00a0\u4ee3\u8868\u4e86\u8fd9\u662f RGB \u4e09\u901a\u9053\u56fe\u7247<\/li><li>\u62cd\u6444\u89d2\u5ea6\u4fe1\u606f\uff08\u4ece COLMAP \u751f\u6210\uff09\uff1a<em>Nimg<\/em>\u200b\u00d717\u3002\u524d 15 \u7ef4\u53ef\u4ee5\u53d8\u5f62\u4e3a\u00a03\u00d75\uff0c\u4ee3\u8868\u4e86\u76f8\u673a\u7684 pose\uff0c\u540e 2 \u7ef4\u662f\u6700\u8fd1\u548c\u6700\u8fdc\u7684\u6df1\u5ea6\u3002\u89e3\u91ca\uff1a 3\u00d75 pose matrices and 2 depth bounds for each image. Each pose has [R T] as the left 3\u00d74 matrix and [H W F] as the right 3\u00d71 matrix. R matrix is in the form [down right back] instead of [right up back] . \uff08<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/bmild\/nerf\/issues\/34\" target=\"_blank\">https:\/\/github.com\/bmild\/nerf\/issues\/34<\/a>\uff09<\/li><\/ul>\n\n\n\n<h3 id=\"header-id-12\">\u62cd\u6444\u89d2\u5ea6\u9884\u5904\u7406<\/h3>\n\n\n\n<p>\u7b2c\u4e00\u6b65\uff1a\u6839\u636e\u62cd\u6444\u7684\u5c3a\u5bf8\u548c\u5904\u7406\u5c3a\u5bf8\u7684\u5173\u7cfb\uff0c\u7f29\u653e\u76f8\u673a\u7684\u7126\u8ddd\u3002\u4f8b\u5982\uff1a<em>Himg<\/em>\u200b=3024,<em>Wimg<\/em>\u200b=4032,<em>Fimg<\/em>\u200b=3260, \u5982\u679c\u6211\u4eec\u60f3\u5904\u7406\u7684\u5c3a\u5bf8\u662f<em>H<\/em>=378,<em>W<\/em>=504\u00a0(\u4e3a\u4e86\u63d0\u5347\u8bad\u7ec3\u7684\u901f\u5ea6)\uff0c\u6211\u4eec\u9700\u8981\u7f29\u653e\u7126\u8ddd F:<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-full\"><img loading=\"lazy\" width=\"452\" height=\"88\" src=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2023\/01\/image-5.png\" alt=\"\" class=\"wp-image-11604\" srcset=\"http:\/\/139.9.1.231\/wp-content\/uploads\/2023\/01\/image-5.png 452w, http:\/\/139.9.1.231\/wp-content\/uploads\/2023\/01\/image-5-300x58.png 300w\" sizes=\"(max-width: 452px) 100vw, 452px\" \/><\/figure><\/div>\n\n\n\n<pre class=\"wp-block-code\"><code># \"datasets\/llff.py\", line\uff1a188\n    # Step 1: rescale focal length according to training resolution\n    H, W, self.focal = poses&#091;0, :, -1] # original intrinsics, same for all images\n    assert H*self.img_wh&#091;0] == W*self.img_wh&#091;1], \\\n        f'You must set @img_wh to have the same aspect ratio as ({W}, {H}) !'\n \n    self.focal *= self.img_wh&#091;0]\/W<\/code><\/pre>\n\n\n\n<p>\u7b2c\u4e8c\u6b65\uff1a\u8c03\u6574 pose \u7684\u65b9\u5411\u3002\u5728 &#8220;poses_bounds.npy&#8221; \u4e2d\uff0cpose \u7684\u65b9\u5411\u662f \u201c\u4e0b\u53f3\u540e\u201d\uff0c\u6211\u4eec\u8c03\u6574\u5230 \u201c\u53f3\u4e0a\u540e\u201d\u3002\u540c\u65f6\u4f7f\u7528 \u201c<em>center_poses(poses)<\/em>\u201d \u51fd\u6570\uff0c\u5bf9\u6574\u4e2a dataset \u7684\u5750\u6807\u8f74\u8fdb\u884c\u6807\u51c6\u5316\uff08\uff1f\uff1f\uff09\u3002<br>\u89e3\u91ca\uff1a\u201cposes_avg computes a &#8220;central&#8221; pose for the dataset, based on using the mean translation, the mean z axis, and adopting the mean y axis as an &#8220;up&#8221; direction (so that Up x Z = X and then Z x X = Y). recenter_poses very simply applies the inverse of this average pose to the dataset (a rigid rotation\/translation) so that the identity extrinsic matrix is looking at the scene, which is nice because normalizes the orientation of the scene for later rendering from the learned NeRF. This is also important for using NDC (Normalized device coordinates) coordinates, since we assume the scene is centered there too.\u201d\uff08<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/bmild\/nerf\/issues\/34\" target=\"_blank\">https:\/\/github.com\/bmild\/nerf\/issues\/34<\/a>\uff09<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># \"datasets\/llff.py\", line\uff1a195\r\n    # Step 2: correct poses\r\n    # Original poses has rotation in form \"down right back\", change to \"right up back\"\r\n    # See https:\/\/github.com\/bmild\/nerf\/issues\/34\r\n    poses = np.concatenate(&#091;poses&#091;..., 1:2], -poses&#091;..., :1], poses&#091;..., 2:4]], -1)\r\n            # (N_images, 3, 4) exclude H, W, focal\r\n    self.poses, self.pose_avg = center_poses(poses)<\/code><\/pre>\n\n\n\n<p>\u7b2c\u4e09\u6b65\uff1a\u4ee4\u6700\u8fd1\u7684\u8ddd\u79bb\u7ea6\u4e3a 1\u3002 \u89e3\u91ca\uff1a\u201cThe NDC code takes in a &#8220;near&#8221; bound and assumes the far bound is infinity (this doesn\u2019t matter too much since NDC space samples in 1\/depth so moving from &#8220;far&#8221; to infinity is only slightly less sample-efficient). You can see here that the &#8220;near&#8221; bound is hardcoded to 1\u201d\u3002For more details on how to use NDC space see&nbsp;<a href=\"https:\/\/github.com\/bmild\/nerf\/files\/4451808\/ndc_derivation.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">https:\/\/github.com\/bmild\/nerf\/files\/4451808\/ndc_derivation.pdf<\/a><\/p>\n\n\n\n<pre id=\"8JcgNNbdvXcxDEK4C0vc6Sf5JFdyCAhk\" class=\"wp-block-code\"><code><em># \"datasets\/llff.py\", line\uff1a205<\/em>    <em># Step 3: correct scale so that the nearest depth is at a little more than 1.0<\/em>    <em># See https:\/\/github.com\/bmild\/nerf\/issues\/34<\/em>    near_original = self.bounds.min()    scale_factor = near_original*0.75 <em># 0.75 is the default parameter<\/em>                                        <em># the nearest depth is at 1\/0.75=1.33<\/em>    self.bounds \/= scale_factor    self.poses&#091;..., 3] \/= scale_factor<\/code><\/pre>\n\n\n\n<h3>\u8ba1\u7b97\u5149\u7ebf\u89d2\u5ea6<\/h3>\n\n\n\n<p><br>\u63a5\u4e0b\u6765\u5c31\u662f\u5bf9\u6bcf\u4e00\u4e2a\u50cf\u7d20\uff0c\u4f7f\u7528 \u201cget_ray_directions()\u201d \u51fd\u6570\u8ba1\u7b97\u6240\u5bf9\u5e94\u7684\u5149\u7ebf\u3002\u8fd9\u91cc\u53ea\u9700\u8981\u4f7f\u7528\u56fe\u50cf\u7684\u957f\u5bbd\u548c\u7126\u8ddd\u5373\u53ef\u8ba1\u7b97<\/p>\n\n\n\n<p>self.directions = get_ray_directions(self.img_wh[1], self.img_wh[0], self.focal) # (H, W, 3)<\/p>\n\n\n\n<p>\u8c03\u7528\u51fd\u6570\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>def get_ray_directions(H, W, focal):\n    \"\"\"\n    Get ray directions for all pixels in camera coordinate.\n    Reference: https:\/\/www.scratchapixel.com\/lessons\/3d-basic-rendering\/\n               ray-tracing-generating-camera-rays\/standard-coordinate-systems\n \n    Inputs:\n        H, W, focal: image height, width and focal length\n \n    Outputs:\n        directions: (H, W, 3), the direction of the rays in camera coordinate\n    \"\"\"\n    grid = create_meshgrid(H, W, normalized_coordinates=False)&#091;0]\n    i, j = grid.unbind(-1)\n    # the direction here is without +0.5 pixel centering as calibration is not so accurate\n    # see https:\/\/github.com\/bmild\/nerf\/issues\/24\n    directions = \\\n        torch.stack(&#091;(i-W\/2)\/focal, -(j-H\/2)\/focal, -torch.ones_like(i)], -1) # (H, W, 3)\n \n    return directions<\/code><\/pre>\n\n\n\n<p>\u4e16\u754c\u5750\u6807\u7cfb\u4e0b\u7684\u5149\u7ebf<br>\u5728\u62ff\u5230\u6bcf\u4e00\u4e2a\u50cf\u7d20\u5bf9\u5e94\u7684\u5149\u7ebf\u89d2\u5ea6\u540e\uff0c\u6211\u4eec\u9700\u8981\u5f97\u5230\u5177\u4f53\u7684\u5149\u7ebf\u4fe1\u606f\u3002\u9996\u5148\uff0c\u5148\u8ba1\u7b97\u5728\u4e16\u754c\u5750\u6807\u7cfb\u4e0b\u7684\u5149\u7ebf\u4fe1\u606f\u3002\u4e3b\u8981\u662f\u4e00\u4e2a\u5f52\u4e00\u5316\u7684\u64cd\u4f5c\u3002<\/p>\n\n\n\n<p>Get ray origin and normalized directions in world coordinate for all pixels in one image. Reference: https:\/\/www.scratchapixel.com\/lessons\/3d-basic-rendering\/ray-tracing-generating-camera-rays\/standard-coordinate-systems<\/p>\n\n\n\n<p>\u8f93\u5165\uff1a<\/p>\n\n\n\n<p>\u56fe\u50cf\u4e0a\u6bcf\u4e00\u70b9\u6240\u5bf9\u5e94\u7684\u5149\u7ebf\u89d2\u5ea6\uff1a(H, W, 3) precomputed ray directions in camera coordinate\u3002<br>\u76f8\u673a\u6620\u5c04\u77e9\u9635 c2w\uff1a(3, 4) transformation matrix from camera coordinate to world coordinate<br>\u8f93\u51fa\uff1a<\/p>\n\n\n\n<p>\u5149\u7ebf\u539f\u70b9\u5728\u4e16\u754c\u5750\u6807\u7cfb\u4e2d\u7684\u5750\u6807\uff1a(H<em>W, 3), the origin of the rays in world coordinate \u5728\u4e16\u754c\u5750\u6807\u7cfb\u4e2d\uff0c\u5f52\u4e00\u5316\u7684\u5149\u7ebf\u89d2\u5ea6\uff1a(H<\/em>W, 3), the normalized direction of the rays in world <\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>\ndef get_rays(directions, c2w):\n    \"\"\"\n    Get ray origin and normalized directions in world coordinate for all pixels in one image.\n    Reference: https:\/\/www.scratchapixel.com\/lessons\/3d-basic-rendering\/\n               ray-tracing-generating-camera-rays\/standard-coordinate-systems\n \n    Inputs:\n        directions: (H, W, 3) precomputed ray directions in camera coordinate\n        c2w: (3, 4) transformation matrix from camera coordinate to world coordinate\n \n    Outputs:\n        rays_o: (H*W, 3), the origin of the rays in world coordinate\n        rays_d: (H*W, 3), the normalized direction of the rays in world coordinate\n    \"\"\"\n    # Rotate ray directions from camera coordinate to the world coordinate\n    rays_d = directions @ c2w&#091;:, :3].T # (H, W, 3)\n    rays_d = rays_d \/ torch.norm(rays_d, dim=-1, keepdim=True)\n    # The origin of all rays is the camera origin in world coordinate\n    rays_o = c2w&#091;:, 3].expand(rays_d.shape) # (H, W, 3)\n \n    rays_d = rays_d.view(-1, 3)\n    rays_o = rays_o.view(-1, 3)\n \n    return rays_o, rays_d<\/code><\/pre>\n\n\n\n<h2 id=\"header-id-15\">NDC \u4e0b\u7684\u5149\u7ebf<\/h2>\n\n\n\n<p>NDC (Normalized device coordinates) \u5f52\u4e00\u5316\u7684\u8bbe\u5907\u5750\u6807\u7cfb\u3002<\/p>\n\n\n\n<p>\u9996\u5148\u5bf9\u5149\u7ebf\u7684\u8fb9\u754c\u8fdb\u884c\u9650\u5b9a\uff1a<\/p>\n\n\n\n<pre id=\"xnFijfeRY3pBAyIwTlTFcTPRHpf1Py9w\" class=\"wp-block-code\"><code>near, far = 0, 1<\/code><\/pre>\n\n\n\n<p>\u7136\u540e\u5bf9\u5750\u6807\u8fdb\u884c\u5e73\u79fb\u548c\u6620\u5c04\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>def get_ndc_rays(H, W, focal, near, rays_o, rays_d):\n    \"\"\"\n    Transform rays from world coordinate to NDC.\n    NDC: Space such that the canvas is a cube with sides &#091;-1, 1] in each axis.\n    For detailed derivation, please see:\n    http:&#47;&#47;www.songho.ca\/opengl\/gl_projectionmatrix.html\n    https:\/\/github.com\/bmild\/nerf\/files\/4451808\/ndc_derivation.pdf\n \n    In practice, use NDC \"if and only if\" the scene is unbounded (has a large depth).\n    See https:\/\/github.com\/bmild\/nerf\/issues\/18\n \n    Inputs:\n        H, W, focal: image height, width and focal length\n        near: (N_rays) or float, the depths of the near plane\n        rays_o: (N_rays, 3), the origin of the rays in world coordinate\n        rays_d: (N_rays, 3), the direction of the rays in world coordinate\n \n    Outputs:\n        rays_o: (N_rays, 3), the origin of the rays in NDC\n        rays_d: (N_rays, 3), the direction of the rays in NDC\n    \"\"\"\n    # Shift ray origins to near plane\n    t = -(near + rays_o&#091;...,2]) \/ rays_d&#091;...,2]\n    rays_o = rays_o + t&#091;...,None] * rays_d\n \n    # Store some intermediate homogeneous results\n    ox_oz = rays_o&#091;...,0] \/ rays_o&#091;...,2]\n    oy_oz = rays_o&#091;...,1] \/ rays_o&#091;...,2]\n \n    # Projection\n    o0 = -1.\/(W\/(2.*focal)) * ox_oz\n    o1 = -1.\/(H\/(2.*focal)) * oy_oz\n    o2 = 1. + 2. * near \/ rays_o&#091;...,2]\n \n    d0 = -1.\/(W\/(2.*focal)) * (rays_d&#091;...,0]\/rays_d&#091;...,2] - ox_oz)\n    d1 = -1.\/(H\/(2.*focal)) * (rays_d&#091;...,1]\/rays_d&#091;...,2] - oy_oz)\n    d2 = 1 - o2\n \n    rays_o = torch.stack(&#091;o0, o1, o2], -1) # (B, 3)\n    rays_d = torch.stack(&#091;d0, d1, d2], -1) # (B, 3)\n \n    return rays_o, rays_d<\/code><\/pre>\n\n\n\n<h2 id=\"header-id-16\">\u8bad\u7ec3\u6570\u636e\u7684\u751f\u6210<\/h2>\n\n\n\n<p>\u8f93\u51fa\u5206\u4e3a\u4e24\u90e8\u5206\uff1a\u5149\u7ebf\u7684\u4fe1\u606f\uff0c\u548c\u5bf9\u5e94\u7684\u56fe\u7247\u50cf\u7d20\u503c<\/p>\n\n\n\n<ul><li>\u5bf9\u4e8e\u6bcf\u4e00\u675f\u5149\u7ebf\uff0c\u6309\u7167 \u3010\u5149\u7ebf\u539f\u70b9 (3d), \u5149\u7ebf\u89d2\u5ea6 (3d), \u6700\u8fd1\u7684\u8fb9\u754c (1d), \u6700\u8fdc\u7684\u8fb9\u754c (1d)\u3011= 8d \u7684\u683c\u5f0f\u5b58\u50a8\u3002<\/li><li>\u5149\u7ebf\u5bf9\u5e94\u7684\u50cf\u7d20\uff0cRGB=3d \u7684\u683c\u5f0f\u5b58\u50a8\u3002<\/li><\/ul>\n\n\n\n<pre id=\"iLmU02cahA3Otqfb8riHWWRxKoUWtipH\" class=\"wp-block-code\"><code>self.all_rays += &#091;torch.cat(&#091;rays_o, rays_d,                                              near*torch.ones_like(rays_o&#091;:, :1]),                                             far*torch.ones_like(rays_o&#091;:, :1])],                                             1)] <em># (h*w, 8)<\/em><\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u53c2\u8003\uff1aDasuda\u200b and Liwen.site \u53c2\u8003\u4ee3\u7801\uff1aNerf-pl:\u00a0https:\/\/github. &hellip; <a href=\"http:\/\/139.9.1.231\/index.php\/2023\/01\/03\/nerf-code\/\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">\u795e\u7ecf\u8f90\u5c04\u573a(NeRF)-\u4ee3\u7801\u89e3\u6790<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[4,31],"tags":[],"_links":{"self":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/11565"}],"collection":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/comments?post=11565"}],"version-history":[{"count":41,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/11565\/revisions"}],"predecessor-version":[{"id":11609,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/posts\/11565\/revisions\/11609"}],"wp:attachment":[{"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/media?parent=11565"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/categories?post=11565"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/139.9.1.231\/index.php\/wp-json\/wp\/v2\/tags?post=11565"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}