Commit 9bb94b09 authored by Artem Pulkin's avatar Artem Pulkin

ml_util: add LAMMPS parser

parent 8311fd7b
Pipeline #43534 passed with stage
in 2 minutes and 29 seconds
This diff is collapsed.
......@@ -1329,37 +1329,42 @@ class EnergyNNMixin:
class SequentialSoleEnergyNN(EnergyNNMixin, torch.nn.Sequential):
def __init__(self, n_features, n_layers=3, n_internal=15, bias=True):
def __init__(self, arg, n_layers=3, n_internal=15, bias=True):
"""
Sequential neural network with descriptor inputs and
energy output.
Parameters
----------
n_features : int
arg : int, list
n_layers : int
n_internal : int
bias : bool, list, tuple
Bias per layer or for all layers.
"""
if isinstance(bias, (list, tuple)):
if len(bias) != n_layers:
raise ValueError(f"len(bias) = {len(bias)} != n_layers = {n_layers}")
else:
bias = (bias,) * n_layers
if not isinstance(arg, int):
super().__init__(*arg)
if n_layers == 1:
super().__init__(torch.nn.Linear(n_features, 1, bias=bias[0]))
else:
super().__init__(
torch.nn.Linear(n_features, n_internal, bias=bias[0]),
*sum((
(torch.nn.Sigmoid(), torch.nn.Linear(n_internal, n_internal, bias=b))
for b in bias[1:-1]
), tuple()),
torch.nn.Sigmoid(),
torch.nn.Linear(n_internal, 1, bias=bias[-1]),
)
n_features = arg
if isinstance(bias, (list, tuple)):
if len(bias) != n_layers:
raise ValueError(f"len(bias) = {len(bias)} != n_layers = {n_layers}")
else:
bias = (bias,) * n_layers
if n_layers == 1:
super().__init__(torch.nn.Linear(n_features, 1, bias=bias[0]))
else:
super().__init__(
torch.nn.Linear(n_features, n_internal, bias=bias[0]),
*sum((
(torch.nn.Sigmoid(), torch.nn.Linear(n_internal, n_internal, bias=b))
for b in bias[1:-1]
), tuple()),
torch.nn.Sigmoid(),
torch.nn.Linear(n_internal, 1, bias=bias[-1]),
)
def fw_cauldron(modules, dataset, grad=False, normalization=None):
......
......@@ -6,10 +6,12 @@ from string import ascii_lowercase
import numpy as np
from scipy.optimize import root_scalar
from dfttools.utypes import CrystalCell, RealSpaceBasis
from dfttools.parsers.generic import StringParser, cre_word
from .potentials import behler2_descriptor_family, behler5_descriptor_family, lj_potential_family,\
harmonic_repulsion_potential_family, behler_turning_point
from .ml import fw_cauldron, fw_cauldron_charges, Dataset
harmonic_repulsion_potential_family, behler_turning_point, behler4_descriptor_family
from .ml import fw_cauldron, fw_cauldron_charges, Dataset, Normalization, SequentialSoleEnergyNN,\
potentials_from_ml_data
from .kernel import NeighborWrapper
from .util import dict_reduce
......@@ -216,6 +218,111 @@ def parse_runner_input(f):
return result
def parse_lammps_input(f):
"""
Parse LAMMPS input for NN potential.
Parameters
----------
f : file
Text file to parse.
Returns
-------
result : dict
A dict of potentials.
"""
behler2_parameter_order = "a", "eta", "r_sphere"
behler4_parameter_order = "a", "eta", "zeta", "l"
parser = StringParser(f.read())
descriptors = {}
scales = {}
layers = {}
while parser.present("pot"):
parser.skip("pot")
element = parser.next_match(cre_word)
descriptors[element] = []
scales[element] = []
layers[element] = []
parser.next_line()
n_descriptors = parser.next_int()
for i in range(n_descriptors):
parser.next_line()
parameters = parser.next_float(5)
parameters[1] *= angstrom
parameters[2] /= angstrom ** 2
if parameters[0] == 2:
elements = parser.next_match(cre_word, n=1)
parameters[3] *= angstrom
parameters = dict(zip(behler2_parameter_order, parameters[1:]))
descriptors[element].append(behler2_descriptor_family(
**parameters, tag="-".join([element] + list(elements))
))
elif parameters[0] == 4:
elements = parser.next_match(cre_word, n=2)
parameters = dict(zip(behler4_parameter_order, parameters[1:]))
if elements[0] == elements[1]:
parameters["epsilon"] = 0.5 # No double-counting
descriptors[element].append(behler4_descriptor_family(
**parameters, tag="-".join([element] + list(elements))
))
else:
raise NotImplementedError(f"Unknown descriptor {parameters[0]}")
parser.skip("scale1")
scales[element].append(parser.next_float(n_descriptors))
parser.skip("scale2")
scales[element].append(parser.next_float(n_descriptors))
parser.skip("net")
net_shape = parser.next_int("\n")
assert net_shape[0] == len(net_shape) - 2
assert net_shape[-1] == 1
net_shape = net_shape[1:]
input_dims = n_descriptors
for i, output_dims in enumerate(net_shape):
parser.skip(f"layer {i:d}")
kind = parser.next_match(cre_word)
weights = []
bias = []
for j in range(output_dims):
parser.skip(f"w{j:d}")
weights.append(parser.next_float(input_dims))
parser.skip(f"b{j:d}")
bias.append(parser.next_float())
linear = torch.nn.Linear(input_dims, output_dims, bias=True)
linear.weight = torch.nn.Parameter(torch.tensor(np.array(weights)))
linear.bias = torch.nn.Parameter(torch.tensor(np.array(bias)))
layers[element].append(linear)
if kind == "sigmoid":
layers[element].append(torch.nn.Sigmoid())
elif kind == "linear":
pass
else:
raise NotImplementedError(f"Unknown layer: {kind}")
input_dims = int(output_dims)
nn = []
for _, _layers in sorted(layers.items()):
nn.append(SequentialSoleEnergyNN(_layers))
normalization = Normalization(
energy_scale=torch.tensor(np.array(Ry)),
features_scale=[torch.tensor(v[1]) for _, v in sorted(scales.items())],
energy_offsets=torch.tensor(np.zeros((len(scales), 1), dtype=float)),
features_offsets=[torch.tensor(v[0]) for _, v in sorted(scales.items())],
)
return potentials_from_ml_data(nn=nn, descriptors=descriptors, normalization=normalization)
loss_result = namedtuple("stats_tuple", ("loss_id", "loss_value", "reference", "prediction", "components"))
......
from . import ml_util
from .potentials import behler2_descriptor_family as behler2, behler5_descriptor_family as behler5
from .ml import NNPotential
from .potentials import behler2_descriptor_family as behler2, behler5_descriptor_family as behler5,\
behler4_descriptor_family as behler4
from unittest import TestCase
from pathlib import Path
from numericalunits import aBohr
from numericalunits import aBohr, angstrom, Ry
from collections import Counter
import numpy as np
from numpy import testing
import torch
test_location = Path(__file__).parent
dataset_location = test_location.parent / "datasets"
......@@ -81,6 +86,96 @@ class RunnerParserTest(TestCase):
self.assertEqual(set(id_full), set(id_parsed) | set(id_blacklisted))
class LAMMPSParserTest(TestCase):
def test_file(self):
with open(dataset_location / "sio2-lammps", 'r') as f:
potentials = ml_util.parse_lammps_input(f)
self.assertEqual(len(potentials), 2)
potential_sample = potentials[0]
self.assertIsInstance(potential_sample, NNPotential)
self.assertEqual(potential_sample.tag, "O")
self.assertEqual(len(potential_sample.parameters["descriptors"]), 70)
descriptor_sample = potential_sample.parameters["descriptors"][4]
self.assertIs(descriptor_sample.family, behler2)
testing.assert_equal(descriptor_sample.parameters["eta"], 0.214264 / angstrom ** 2)
testing.assert_equal(descriptor_sample.parameters["a"], 6 * angstrom)
testing.assert_equal(descriptor_sample.parameters["r_sphere"], 0)
testing.assert_equal(descriptor_sample.tag, "O-Si")
descriptor_sample = potential_sample.parameters["descriptors"][37]
self.assertIs(descriptor_sample.family, behler4)
testing.assert_equal(descriptor_sample.parameters["eta"], 0.000357 / angstrom ** 2)
testing.assert_equal(descriptor_sample.parameters["a"], 6 * angstrom)
testing.assert_equal(descriptor_sample.parameters["zeta"], 2)
testing.assert_equal(descriptor_sample.parameters["l"], -1)
testing.assert_equal(descriptor_sample.tag, "O-Si-O")
potential_sample = potentials[1]
self.assertIsInstance(potential_sample, NNPotential)
self.assertEqual(potential_sample.tag, "Si")
self.assertEqual(len(potential_sample.parameters["descriptors"]), 70)
descriptor_sample = potential_sample.parameters["descriptors"][10]
self.assertIs(descriptor_sample.family, behler2)
testing.assert_equal(descriptor_sample.parameters["eta"], 0.071421 / angstrom ** 2)
testing.assert_equal(descriptor_sample.parameters["a"], 6 * angstrom)
testing.assert_equal(descriptor_sample.parameters["r_sphere"], 0)
testing.assert_equal(descriptor_sample.tag, "Si-O")
descriptor_sample = potential_sample.parameters["descriptors"][42]
self.assertIs(descriptor_sample.family, behler4)
testing.assert_equal(descriptor_sample.parameters["eta"], 0.089277 / angstrom ** 2)
testing.assert_equal(descriptor_sample.parameters["a"], 6 * angstrom)
testing.assert_equal(descriptor_sample.parameters["zeta"], 4)
testing.assert_equal(descriptor_sample.parameters["l"], -1)
testing.assert_equal(descriptor_sample.tag, "Si-Si-O")
self.assertIs(potentials[0].parameters["normalization"], potentials[1].parameters["normalization"])
self.assertEqual(potentials[0].parameters["normalization_handle"], 0)
self.assertEqual(potentials[1].parameters["normalization_handle"], 1)
normalization = potentials[0].parameters["normalization"]
self.assertIs(normalization.dtype, torch.float64)
testing.assert_equal(normalization.energy_scale.item(), Ry)
testing.assert_equal(normalization.energy_offsets.numpy(), np.zeros((2, 1)))
# O
testing.assert_equal(
normalization.features_offsets[0].numpy()[15:20],
[0.001286592108231314, 1.615018174190524, 0.8992337695059681, 0.2891024449138566, 0.9410533294211596])
testing.assert_equal(
normalization.features_scale[0].numpy()[40:45],
[0.690412761572473, 0.33830388432626607, 0.08148770379598626, 5.344496403824106, 2.9638922703877952])
# Si
testing.assert_equal(
normalization.features_offsets[1].numpy()[0:5],
[2.9579656875347435, 2.010843911436129, 1.3495712305969263, 0.7817472414666415, 0.3489072530470396])
testing.assert_equal(
normalization.features_scale[1].numpy()[0:5],
[0.7019746179904176, 0.530114971386907, 0.40562997229952297, 0.2792499782562679, 0.16943875861884494])
nn_sample = potentials[0].parameters["nn"]
self.assertEqual(len(nn_sample), 5)
for i in range(0, 5, 2):
layer = nn_sample[i]
self.assertIsInstance(layer, torch.nn.Linear, msg=f"#{i:d}")
self.assertIs(layer.weight.dtype, torch.float64, msg=f"#{i:d}")
self.assertIs(layer.bias.dtype, torch.float64, msg=f"#{i:d}")
for i in range(1, 5, 2):
self.assertIsInstance(nn_sample[i], torch.nn.Sigmoid, msg=f"#{i:d}")
linear_sample = nn_sample[2]
testing.assert_equal(linear_sample.weight.detach().numpy()[2, 3:6], [0.5721273035047503, 0.10739355013492079, 0.5264601494420822])
testing.assert_equal(linear_sample.bias.detach().numpy()[4:6], [-0.194013890882, -0.691780401616])
nn_sample = potentials[1].parameters["nn"]
linear_sample = nn_sample[4]
testing.assert_equal(linear_sample.weight.detach().numpy()[0, 5:7], [-0.1973799972276568, 0.4005192115707977])
testing.assert_equal(linear_sample.bias.detach().item(), 0.132187475486)
class UtilTest(TestCase):
def test_default_behler_choice(self):
descriptors = ml_util.default_behler_descriptors({"a-a": 2, "a-b": 3, "b-b": 4}, 6, 12)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment