Source code for escnn.nn.modules.normalization.fieldnorm

from collections import defaultdict

from escnn.gspaces import *
from escnn.nn import FieldType
from escnn.nn import GeometricTensor

from ..equivariant_module import EquivariantModule

import torch
from torch.nn import Parameter
from typing import List, Tuple, Any, Union
import numpy as np

__all__ = [

[docs]class FieldNorm(EquivariantModule): def __init__(self, in_type: FieldType, eps: float = 1e-05, affine: bool = True, ): r""" Normalization module which normalizes each field individually. The statistics are only computed over the channels within a single field (not over the batch dimension or the spatial dimensions). Moreover, this layer does not track running statistics and uses only the current input, so it behaves similarly at train and eval time. For each individual field, the mean is given by the projection on the subspaces transforming under the trivial representation while the variance is the squared norm of the field, after the mean has been subtracted. If ``affine = True``, a single scale is learnt per input field and the bias is applied only to the trivial irreps (this scale and bias are shared over the spatial dimensions in order to preserve equivariance). .. warning:: If a field is only containing trivial irreps, this layer will just set its values to zero and, possibly, replace them with a learnable bias if ``affine = True``. Args: in_type (FieldType): the input field type eps (float, optional): a value added to the denominator for numerical stability. Default: ``1e-5`` affine (bool, optional): if ``True``, this module has learnable affine parameters. Default: ``True`` """ assert isinstance(in_type.gspace, GSpace) super(FieldNorm, self).__init__() = in_type.gspace self.in_type = in_type self.out_type = in_type self.affine = affine self._nfields = None # group fields by their type and # - check if fields of the same type are contiguous # - retrieve the indices of the fields # number of fields of each type self._nfields = defaultdict(int) # indices of the channels corresponding to fields belonging to each group _indices = defaultdict(lambda: []) # whether each group of fields is contiguous or not self._contiguous = {} ntrivials = 0 position = 0 last_field = None for i, r in enumerate(self.in_type.representations): for irr in r.irreps: if self.in_type.fibergroup.irrep(*irr).is_trivial(): ntrivials += 1 if != last_field: if not in self._contiguous: self._contiguous[] = True else: self._contiguous[] = False last_field = _indices[] += list(range(position, position + r.size)) self._nfields[] += 1 position += r.size for name, contiguous in self._contiguous.items(): if contiguous: # for contiguous fields, only the first and last indices are kept _indices[name] = [min(_indices[name]), max(_indices[name]) + 1] setattr(self, f"{self._escape_name(name)}_indices", _indices[name]) else: # otherwise, transform the list of indices into a tensor _indices[name] = torch.LongTensor(_indices[name]) # register the indices tensors as parameters of this module self.register_buffer(f"{self._escape_name(name)}_indices", _indices[name]) # store the size of each field type self._sizes = [] # store for each field type the indices of the trivial irreps in it self._trivial_idxs = {} # store for each field type the sizes and the indices of all its irreps, grouped by their size self._irreps_sizes = {} self._has_trivial = {} # for each different representation in the input type for r in self.in_type._unique_representations: p = 0 trivials = [] # mask containing the location of the trivial irreps in the irrep decomposition of the representation S = np.zeros((r.size, r.size)) # find all trivial irreps occurring in the representation for i, irr in enumerate(r.irreps): irr = self.in_type.fibergroup.irrep(*irr) if irr.is_trivial(): trivials.append(p) S[p, p] = 1. p += irr.size name = self._sizes.append((name, r.size)) self._has_trivial[name] = len(trivials) > 0 if self._has_trivial[name]: # averaging matrix which computes the expectation of a input vector, i.e. projects it in the trivial # subspace by masking out all non-trivial irreps P = r.change_of_basis @ S @ r.change_of_basis_inv self.register_buffer(f'{self._escape_name(name)}_avg', torch.tensor(P, dtype=torch.float)) Q = torch.tensor(r.change_of_basis, dtype=torch.float)[:, trivials] self.register_buffer(f'{self._escape_name(name)}_change_of_basis', Q) if self.affine: # scale all dimensions of the same field by the same weight weight = Parameter(torch.ones((self._nfields[], 1)), requires_grad=True) self.register_parameter(f'{self._escape_name(name)}_weight', weight) if self._has_trivial[name]: # the bias is applied only to the trivial channels bias = Parameter(torch.zeros((self._nfields[], len(trivials))), requires_grad=True) self.register_parameter(f'{self._escape_name(name)}_bias', bias) self.eps = eps def reset_parameters(self): if self.affine: for name, size in self._sizes: weight = getattr(self, f"{self._escape_name(name)}_weight") if hasattr(self, f"{self._escape_name(name)}_bias"): bias = getattr(self, f"{self._escape_name(name)}_bias") def reset_running_stats(self): pass def _estimate_stats(self, slice, name: str): if self._has_trivial[name]: P = getattr(self, f'{self._escape_name(name)}_avg') # compute the mean means = torch.einsum( 'ij,bcj...->bci...', P, slice.detach() ) centered = slice - means else: means = None centered = slice # Center the data and compute the variance # N.B.: we implicitly assume the dimensions to be iid, # i.e. the covariance matrix is a scalar multiple of the identity # vars = centered.var(dim=2, unbiased=False, keepdim=True).detach() vars = (centered**2).mean(dim=2, keepdim=True).detach() return means, vars
[docs] def forward(self, input: GeometricTensor) -> GeometricTensor: r""" Normalize the input feature map Args: input (GeometricTensor): the input feature map Returns: the resulting feature map """ assert input.type == self.in_type coords = input.coords input = input.tensor b, c = input.shape[:2] spatial_dims = input.shape[2:] output = torch.empty_like(input) # iterate through all field types for name, size in self._sizes: indices = getattr(self, f"{self._escape_name(name)}_indices") if self._contiguous[name]: slice = input[:, indices[0]:indices[1], ...] else: slice = input[:, indices, ...] slice = slice.view(b, -1, size, *spatial_dims) means, vars = self._estimate_stats(slice, name) if self._has_trivial[name]: # center data by subtracting the mean slice = slice - means # normalize dividing by the std and multiply by the new scale if self.affine: weight = getattr(self, f"{self._escape_name(name)}_weight").view(1, self._nfields[name], 1, *(1,)*len(spatial_dims)) else: weight = 1. # compute the scalar multipliers needed scales = weight / (vars + self.eps).sqrt() # scales[vars < self.eps] = 0 # print(name, size, indices, self._has_trivial[name]) # print(slice.shape, scales.shape) # if not # np.set_printoptions(precision=5, suppress=True, threshold=1000000, linewidth=3000) # print(scales.detach().cpu().numpy().reshape(scales.shape[0], -1).T) # scale features slice = slice * scales # shift the features with the learnable bias if self.affine and self._has_trivial[name]: bias = getattr(self, f"{self._escape_name(name)}_bias") Q = getattr(self, f'{self._escape_name(name)}_change_of_basis') slice = slice + torch.einsum( 'ij,cj->ci', Q, bias ).view(1, bias.shape[0], Q.shape[0], *(1,) * len(spatial_dims)) # needed for PyTorch's adaptive mixed precision slice = if not self._contiguous[name]: output[:, indices, ...] = slice.view(b, -1, *spatial_dims) else: output[:, indices[0]:indices[1], ...] = slice.view(b, -1, *spatial_dims) # wrap the result in a GeometricTensor return GeometricTensor(output, self.out_type, coords)
def evaluate_output_shape(self, input_shape: Tuple[int, ...]) -> Tuple[int, ...]: assert len(input_shape) > 1, input_shape assert input_shape[1] == self.in_type.size, input_shape return (input_shape[0], self.out_type.size, *input_shape[2:]) def check_equivariance(self, atol: float = 1e-6, rtol: float = 1e-5) -> List[Tuple[Any, float]]: # return super(NormBatchNorm, self).check_equivariance(atol=atol, rtol=rtol) pass def _escape_name(self, name: str): return name.replace('.', '^') def __repr__(self): extra_lines = [] extra_repr = self.extra_repr() if extra_repr: extra_lines = extra_repr.split('\n') main_str = self._get_name() + '(' if len(extra_lines) == 1: main_str += extra_lines[0] else: main_str += '\n ' + '\n '.join(extra_lines) + '\n' main_str += ')' return main_str def extra_repr(self): return '{in_type}, eps={eps}, affine={affine}' \ .format(**self.__dict__) def export(self): raise NotImplementedError()