Source code for dfa_recommender.net

'''
Gated network for energy prediction.
'''
import torch
import numpy as np
from torch import nn
from torch import Tensor


[docs]def call_bn(bn: nn.BatchNorm1d, x: Tensor, update_batch_stats: bool = True) -> None:
    '''
    Call for batch normalization
    '''
    if bn.training is False:
        return bn(x)
    elif not update_batch_stats:
        return torch.nn.functional.batch_norm(x, None, None, bn.weight, bn.bias, True,
                                              bn.momentum, bn.eps)
    else:
        return bn(x)


[docs]class MySoftplus(nn.Module):
    """
    Shifted Softplus such as MySoftplus(0) = 0
    """
    __constants__ = ['beta', 'threshold']
    beta: int
    threshold: int

    def __init__(self, beta: int = 1, threshold: int = 20) -> None:
        super(MySoftplus, self).__init__()
        self.beta = beta
        self.threshold = threshold

[docs]    def forward(self, input: Tensor) -> Tensor:
        return torch.nn.functional.softplus(input, self.beta, self.threshold) - torch.tensor(np.log(2.), dtype=torch.double)

[docs]    def extra_repr(self) -> str:
        return 'beta={}, threshold={}'.format(self.beta, self.threshold)


[docs]class MLP(nn.Module):
    '''
    Multiple layer fully connected neural network.
    Each type of element has a MLP.
    Same elements share the same MLP (i.e., weight sharing)
    '''

    def __init__(self, n_in: int, n_out: int,
                 n_hidden: int = 50, n_layers: int = 3,
                 droprate: float = 0.2) -> None:
        '''
        Build model

        Parameters
        ----------
        n_in: int,
            number of input neurons
        n_out: int,
            number of output neurons
        n_hidden: int,
            number of hidden neurons
        n_layers: int,
            number of total layers
        droprate: float
            dropout rate at each hidden layer
        
        '''
        super(MLP, self).__init__()
        self.n_neurons = [n_in] + \
            [n_hidden for _ in range(n_layers-1)] + [n_out]

        layers = []
        for i in range(n_layers - 1):
            layers += [nn.Linear(self.n_neurons[i], self.n_neurons[i + 1],
                                 bias=True), MySoftplus(), nn.Dropout(droprate)]

        layers.append(
            nn.Linear(self.n_neurons[-2], self.n_neurons[-1], bias=True))
        self.out_net = nn.Sequential(*layers)

[docs]    def forward(self, inputs: Tensor) -> Tensor:
        '''
        Compute output.

        Parameters
        ----------
        inputs: torch.Tensor,
            model input.

        Returns:
        --------
        outputs: torch.Tensor,
            model output.
        '''
        outputs = self.out_net(inputs)
        return outputs


[docs]class TiledMultiLayerNN(nn.Module):
    """
    Tiled multilayer networks.
    A list of MLPs
    These MLPs are applied to the input to which the outputs as concatenated.
    The purpose is to create element-wise prediction.
    Note that n_tiles should be the same as the number of element types in your data set.
    """

    def __init__(self, n_in: int, n_out: int, n_tiles: int,
                 n_hidden: int = 50, n_layers: int = 3,
                 droprate: float = 0.2) -> None:
        '''
        Build model

        Parameters
        ----------
        n_in: int,
            number of input neurons
        n_out: int,
            number of output neurons
        n_hidden: int,
            number of hidden neurons
        n_layers: int,
            number of total layers
        droprate: float
            dropout rate at each hidden layer
        n_tiles: int,
            number of independent shared-weights MLPs. this number should be the same as the number of element i your set.
        '''
        super(TiledMultiLayerNN, self).__init__()
        self.mlps = nn.ModuleList(
            [
                MLP(
                    n_in,
                    n_out,
                    n_hidden=n_hidden,
                    n_layers=n_layers,
                    droprate=droprate,
                )
                for _ in range(n_tiles)
            ]
        )

[docs]    def forward(self, inputs: Tensor) -> Tensor:
        '''
        Compute output.

        Parameters
        ----------
        inputs: torch.Tensor,
            model input.

        Returns:
        --------
        outputs: list,
            model output as list of torch.Tensor
        '''
        outputs = torch.cat([net(inputs) for net in self.mlps], dim=-1)
        return outputs


[docs]class ElementalGate(nn.Module):
    """
    Element based masking.
    Produces a Nbatch x Natoms x Nelem mask depending on the nuclear charges passed as an argument.
    The purpose is to create element-wise activate based on the block-wise weights in self.gate
    If onehot is set, mask is one-hot mask, else a random embedding is used.
    If the trainable flag is set to true, the gate values can be adapted during training.
    It is recommended to create a mapping dictionary for your elements. For example:
    mapping = {"X": 0, "H": 1, "C": 2, "N": 3, "O": 4, "F": 5}
    """

    def __init__(self, elements, n_out, onehot=True, trainable=False):
        '''
        Build model

        Parameters
        ----------
        elements: list,
            set of atomic number present in the data
        n_out: int,
            number of output neurons
        onehot: bool, default as True
            Use one hit encoding for elemental gate. If set to False, random embedding is used instead
        trainable: bool, default as False
            If set to true, gate can be learned during training
        '''
        super(ElementalGate, self).__init__()
        self.trainable = trainable
        self.n_out = n_out

        self.nelems = len(elements)
        maxelem = int(max(elements) + 1)

        self.gate = nn.Embedding(maxelem, self.nelems)

        if onehot:
            weights = torch.zeros(maxelem, self.nelems*self.n_out)
            for idx, Z in enumerate(elements):
                weights[Z, self.n_out * idx: self.n_out*(idx+1)] = 1.0
            self.gate.weight.data = weights

        if not trainable:
            self.gate.weight.requires_grad = False

[docs]    def forward(self, inputs: Tensor) -> Tensor:
        '''
        Compute output.

        Parameters
        ----------
        inputs: torch.Tensor,
            model input as atomic numbers

        Returns:
        --------
        outputs: torch.Tensor,
            model output which is unity at the position of the element and zero otherwise.
        '''
        outputs = self.gate(inputs)
        return outputs


[docs]class finalMLP(nn.Module):
    '''
    The final fully connected neural network that maps the outputs from ElementalGate to the final outputs.

    '''

    def __init__(
        self, elements, n_out, droprate=0.2,
    ):
        '''
        Build model

        Parameters
        ----------
        elements: list,
            set of atomic number present in the data
        n_out: int,
            number of output neurons
        droprate: float
            dropout rate at each hidden layer
        '''
        super(finalMLP, self).__init__()
        self.fc1 = nn.Linear(len(elements)*n_out, len(elements)*n_out)
        self.fc2 = nn.Linear(len(elements)*n_out, len(elements)*n_out)
        self.fc3 = nn.Linear(len(elements)*n_out, 1)
        self.dropout1 = nn.Dropout(droprate)
        self.dropout2 = nn.Dropout(droprate)
        self.activation = MySoftplus()
        self.bn_fc1 = nn.BatchNorm1d(len(elements)*n_out)
        self.bn_fc2 = nn.BatchNorm1d(len(elements)*n_out)

[docs]    def forward(self, inputs: Tensor,
                update_batch_stats: bool = True) -> Tensor:
        '''
        Compute output.

        Parameters
        ----------
        inputs: torch.Tensor,
            model inputs
        update_batch_stats: bool, Optional, default as True
            used only in batch normalization

        Returns:
        --------
        outputs: torch.Tensor,
            model outputs.
        '''
        x0 = inputs
#         x1 = call_bn(self.bn_fc1, self.activation(
#             self.fc1(x0)), update_batch_stats)
        x1 = self.activation(self.fc1(x0))
        x1 = self.dropout1(x1)
#         x2 = call_bn(self.bn_fc2, self.activation(
#             self.fc2(x1)), update_batch_stats)
        x2 = self.activation(self.fc2(x1))
        x2 = self.dropout2(x2)
        return self.fc3(x2)


[docs]class GatedNetwork(nn.Module):
    '''
    Behler-Parrinello type gated networks that combines all the building blocks above.
    '''

    def __init__(
        self,
        nin: int,
        n_out: int,
        elements: list,
        n_hidden: int = 50,
        n_layers: int = 3,
        trainable: bool = False,
        onehot: bool = True,
        droprate: float = 0.2,
    ):
        '''
        Build model

        Parameters
        ----------
        n_in: int,
            number of input neurons
        n_out: int,
            number of output neurons
        n_hidden: int,
            number of hidden neurons
        n_layers: int,
            number of total layers
        droprate: float
            dropout rate at each hidden layer
        elements: list,
            set of atomic number present in the data
        n_out: int,
            number of output neurons
        onehot: bool, default as True
            Use one hit encoding for elemental gate. If set to False, random embedding is used instead
        trainable: bool, default as False
            If set to true, gate can be learned during training
        '''
        super(GatedNetwork, self).__init__()
        self.nelem = len(elements)
        self.gate = ElementalGate(
            elements, n_out=n_out, 
            trainable=trainable, onehot=onehot
            )
        self.fmpl = finalMLP(elements, n_out, droprate)
        self.network = TiledMultiLayerNN(
            nin,
            n_out,
            self.nelem,
            n_hidden=n_hidden,
            n_layers=n_layers,
            droprate=droprate,
        )

[docs]    def forward(self, inputs: Tensor,
                update_batch_stats: bool = True) -> Tensor:
        '''
        Compute output.

        Parameters
        ----------
        inputs: torch.Tensor,
            model inputs, [batch_size, max(natoms), :-1] are the molecule features,
            [batch_size, max(natoms), -1] encode the element type.
        update_batch_stats: bool, Optional, default as True
            used only in batch normalization

        Returns:
        --------
        outputs: torch.Tensor,
            model outputs.
        '''
        atomic_numbers = torch.Tensor.int(
            inputs[:, :, -1]).type(torch.LongTensor)
        representation = inputs[:, :, :-1]
        gated_network = self.gate(atomic_numbers) * \
            self.network(representation)
        ## ---direct summation without feed forward, original BP---
        # return torch.sum(gated_network, dim=[-2, -1], keepdim=False)
        ## ---element aggregation---
        aggre = torch.sum(gated_network, dim=[-2], keepdim=False)
        outputs = torch.squeeze(
            self.fmpl(aggre, update_batch_stats=update_batch_stats))
        return outputs