Commit e854ed81 authored by Artem Pulkin's avatar Artem Pulkin
Browse files

Merge branch 'dev'

parents b0efbf39 eb8ac49a
Pipeline #81821 passed with stages
in 17 minutes and 15 seconds
......@@ -8,8 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
+ ``dyn`` module and ``dyn.DynWrapper`` to simulate simplest molecular dynamics.
+ Support cell gradients.
+ ``dyn`` module and ``dyn.DynWrapper`` to simulate the simplest molecular dynamics with
or without Cell updates.
+ Generate reasonably randomized disordered atomic configurations as a part of ``ml_util.SDWorkflow``.
+ Support units in ``yml`` inputs through ``!nu`` directive.
......
......@@ -11,7 +11,7 @@ dependencies:
- numericalunits>=1.25
- numpy>=1.18.4
- pytorch>=1.5.1
- scipy>=1.4.1
- scipy>=1.6.0
- sphinx>=3.2
- sphinx_rtd_theme>=0.5
- PyYAML>=5.4.1
This diff is collapsed.
from .potentials import eval_potentials, NestedLocalPotential
from .util import dict_reduce, lattice_up_to_radius, _np_cache, _ro, __assert_dimension_count__,\
from .util import dict_reduce, lattice_up_to_radius, np_hash, _ro, __assert_dimension_count__,\
__assert_same_dimension__
from .units import load, dump
from ._util import adf as _adf
import numpy as np
from scipy.sparse import coo_matrix, csr_matrix
from scipy.spatial import cKDTree
from scipy.spatial import KDTree
from itertools import product
from collections import namedtuple, OrderedDict
try:
from functools import cached_property
except ImportError:
from .future import cached_property
from functools import wraps
from dataclasses import dataclass, field, InitVar
......@@ -122,7 +122,7 @@ class Cell:
def __post_init__(self, _vectors_inv):
d = self.__dict__
for i in 'vectors', 'coordinates', 'values':
d[i] = _ro(np.asanyarray(d[i]))
d[i] = _ro(np.asanyarray(d[i], dtype=float if i != 'values' else None))
__assert_dimension_count__(d, "vectors", 2, "coordinates", 2, "values", 1)
__assert_same_dimension__(d, "size", "coordinates", 0, "values", 0)
__assert_same_dimension__(d, "basis size", "coordinates", 1, "vectors", 0)
......@@ -134,12 +134,13 @@ class Cell:
np.all(self.coordinates == other.coordinates) and np.all(self.values == other.values)
@classmethod
def from_cartesian(cls, vectors, cartesian, values, *args, proto=None, **kwargs):
def from_cartesian(cls, vectors, cartesian, values, *args, proto=None, _vectors_inv=None, **kwargs):
"""Constructs an instance from cartesian coordinates."""
vectors_inv = np.linalg.inv(vectors)
if _vectors_inv is None:
_vectors_inv = np.linalg.inv(vectors)
if proto is None:
proto = cls
return proto(vectors, cartesian @ vectors_inv, values, *args, **kwargs)
return proto(vectors, cartesian @ _vectors_inv, values, *args, _vectors_inv=_vectors_inv, **kwargs)
def copy(self, vectors=None, coordinates=None, cartesian=None, values=None, meta=None, proto=None):
"""Creates a copy with optional modifications."""
......@@ -154,7 +155,7 @@ class Cell:
if cartesian is not None:
return proto.from_cartesian(
self.vectors.copy() if vectors is None else vectors,
cartesian,
self.cartesian if cartesian is True else cartesian,
self.values.copy() if values is None else values,
self.meta.copy() if meta is None else meta,
_vectors_inv=vinv,
......@@ -404,8 +405,8 @@ def compute_images(cell, cutoff, reciprocal_cutoff=None, pbc=True):
cartesian_col = (cell.cartesian[np.newaxis, :, :] + (shift_vectors[:, np.newaxis, :] @ cell.vectors)).reshape(-1, 3)
# Collect close neighbors
t_row = cKDTree(cartesian_row)
t_col = cKDTree(cartesian_col)
t_row = KDTree(cartesian_row)
t_col = KDTree(cartesian_col)
spd = t_row.sparse_distance_matrix(t_col, cutoff, output_type="coo_matrix")
......@@ -513,7 +514,8 @@ def total(images, potentials, kname="kernel", squeeze=False, resolving=False, **
def grad(images, potentials, kname="kernel_gradient", **kwargs):
"""
Total energy gradients.
Total energy gradients with respect to cartesian atomic
coordinates.
Similarly to `self.total`, this function totally ignores
any symmetry issues related to double-counting, etc.
......@@ -540,7 +542,8 @@ def grad(images, potentials, kname="kernel_gradient", **kwargs):
def grad_cell(images, potentials, kname="kernel_cell_gradient", **kwargs):
"""
Total energy cell gradients.
Total energy gradients with respect to cell vectors assuming
cartesian atomic coordinates are fixed.
Parameters
----------
......@@ -562,6 +565,26 @@ def grad_cell(images, potentials, kname="kernel_cell_gradient", **kwargs):
return total(images, potentials, kname=kname, **kwargs)
def common_cutoff(potentials):
"""
The maximal (common) cutoff of many potentials.
Parameters
----------
potentials : Iterable
Potentials to compute the cutoff for.
Returns
-------
result : float
The resulting cutoff.
"""
if len(potentials) == 0:
return 0
else:
return max(i.cutoff for i in potentials)
@dataclass(eq=False, frozen=True)
class CellImages:
cell: Cell
......@@ -712,14 +735,25 @@ class SnapshotHistory(list):
self.append(cell)
sf_parameters = namedtuple("sf_parameters", ("cartesian", "vectors"))
def _parameters_cache(f):
cache = {}
@wraps(f)
def _lru(self, *a):
h = np_hash(*a)
if h in cache:
return cache[h]
else:
cache.clear()
result = f(self, *a)
cache[h] = result
return result
return _lru
class ScalarFunctionWrapper:
_grad_dispatcher = {"cartesian": grad, "vectors": grad_cell}
_meta_key_map = {"cartesian": "forces", "vectors": "stress"}
def __init__(self, sample, potentials, include_coordinates=True, include_vectors=False, normalize=True,
class ScalarFunctionWrapper:
def __init__(self, sample, potentials, include_coordinates=True, include_vectors=False, normalize=None,
prefer_parallel=None, cell_logger=None, track_potential_fidelity=False, **kwargs):
"""
A wrapper providing interfaces to total energy and gradient computation.
......@@ -746,9 +780,12 @@ class ScalarFunctionWrapper:
kwargs
Additional arguments to ``compute_images``.
"""
if normalize is None:
normalize = kwargs.get("pbc", True)
self.sample = sample
self.potentials = potentials
self.include = OrderedDict([("cartesian", include_coordinates), ("vectors", include_vectors)])
self.include_coordinates = include_coordinates
self.include_vectors = include_vectors
self.normalize = normalize
self.prefer_parallel = prefer_parallel
self.cell_logger = cell_logger
......@@ -759,27 +796,7 @@ class ScalarFunctionWrapper:
i.get_kernel_by_name("fidelity")
except KeyError:
raise ValueError("One or more potentials do not include the 'fidelity' kernel")
self.compute_images_kwargs = {"cutoff": max(i.cutoff for i in potentials), **kwargs}
def p2c(self, parameters: np.ndarray) -> sf_parameters:
"""Unpacks parameters."""
result = []
for k, v in self.include.items():
if v:
ref = getattr(self.sample, k)
result.append(parameters[:ref.size].reshape(*ref.shape))
parameters = parameters[ref.size:]
else:
result.append(None)
return sf_parameters(*result)
def c2p(self, coordinates: sf_parameters) -> np.ndarray:
"""Packs parameters."""
result = []
for k, v in self.include.items():
if v:
result.append(getattr(coordinates, k).reshape(-1))
return np.concatenate(result)
self.compute_images_kwargs = {"cutoff": common_cutoff(potentials), **kwargs}
def start_recording(self):
"""Starts recording of coordinates passed."""
......@@ -791,85 +808,99 @@ class ScalarFunctionWrapper:
result, self.cell_logger = self.cell_logger, None
return result
@_np_cache
def eval(self, parameters):
def make_cell(self, coordinates, vectors):
return self.sample.copy(coordinates=coordinates, vectors=vectors,
meta={k: v for k, v in self.sample.meta.items() if k not in ("forces", "_stress")})
@_parameters_cache
def eval_(self, coordinates, vectors):
"""
Computes function and gradients.
(Skips saving into history.)
Parameters
----------
parameters : np.ndarray
Input parameters: coordinates, vectors.
coordinates : np.ndarray
Cell (crystal) coordinates.
vectors : np.ndarray
Cell vectors.
Returns
-------
images : CellImages
cell : Cell
The resulting cell with energy and gradients set.
f : float
The energy value.
g : np.ndarray
Energy gradient.
gc : np.ndarray
Cartesian gradients.
gv : np.ndarray
Vector gradients.
"""
parameters = self.p2c(parameters)
cell = self.sample.copy(**parameters._asdict(), meta={})
cell = self.make_cell(coordinates, vectors)
if self.normalize:
cell = cell.normalized()
images = compute_images(cell, **self.compute_images_kwargs)
f = cell.meta["total-energy"] = total(images, self.potentials, prefer_parallel=self.prefer_parallel)
g = []
for k, v in self.include.items():
if v:
kernel = self._grad_dispatcher[k]
cache_key = self._meta_key_map[k]
# pick up cache if present, minus sign stands for force=-gradient
_result = cell.meta[cache_key] = - kernel(images, self.potentials, prefer_parallel=self.prefer_parallel)
g.append(-_result)
if self.include_coordinates:
gc = grad(images, self.potentials, prefer_parallel=self.prefer_parallel)
cell.meta['forces'] = - gc
else:
gc = None
if self.include_vectors:
gv = grad_cell(images, self.potentials, prefer_parallel=self.prefer_parallel)
cell.meta['_stress'] = - gv
else:
gv = None
if self.track_potential_fidelity:
images.cell.meta["fidelity"] = total(images, self.potentials, "fidelity", resolving=True,
prefer_parallel=self.prefer_parallel)
self._notify(images)
return images, f, np.concatenate(tuple(i.flatten() for i in g))
def _notify(self, images):
"""Notifies of a new call."""
if self.cell_logger is not None:
self.cell_logger(images.cell)
return images.cell, f, gc, gv
def f(self, parameters):
def eval(self, coordinates, vectors):
"""
Total energy value interface.
Computes function and gradients.
Parameters
----------
parameters : np.ndarray
Cartesian coordinates presented as a 1D array.
coordinates : np.ndarray
Cell (crystal) coordinates.
vectors : np.ndarray
Cell vectors.
Returns
-------
energy : float
The total energy value.
cell : Cell
The resulting cell with energy and gradients set.
f : float
The energy value.
gc : np.ndarray
Cartesian gradients.
gv : np.ndarray
Vector gradients.
"""
images, energy, gradients = self.eval(parameters)
return energy
cell, f, gc, gv = self.eval_(coordinates, vectors)
self._notify(cell)
return cell, f, gc, gv
def g(self, parameters):
"""
Total energy gradient interface.
def _notify(self, cell):
"""Notifies of a new call."""
if self.cell_logger is not None:
self.cell_logger(cell)
Parameters
----------
parameters : np.ndarray
Cartesian coordinates presented as a 1D array.
def eval_to_cell(self, coordinates: np.ndarray, vectors: np.ndarray) -> Cell:
return self.eval(coordinates, vectors)[0]
Returns
-------
gradient : np.ndarray
The total energy gradient.
"""
images, energy, gradients = self.eval(parameters)
return gradients
def f(self, coordinates: np.ndarray, vectors: np.ndarray) -> float:
return self.eval(coordinates, vectors)[1]
def gc(self, coordinates: np.ndarray, vectors: np.ndarray) -> np.ndarray:
return self.eval(coordinates, vectors)[2]
def gv(self, coordinates: np.ndarray, vectors: np.ndarray) -> np.ndarray:
return self.eval(coordinates, vectors)[3]
def batch_rdf(cells, *args, inner=CellImages.rdf, **kwargs):
......@@ -924,7 +955,7 @@ def profile(potentials, f, *args, **kwargs):
result_shape = result.shape
result.shape = result.size,
cutoff = max(i.cutoff for i in potentials)
cutoff = common_cutoff(potentials)
for i, pt in enumerate(product(*args)):
cell = f(*pt)
......
......@@ -16,11 +16,12 @@ from .potentials import behler2_descriptor_family, PotentialRuntimeWarning, behl
behler4_descriptor_family, behler5_descriptor_family, potential_from_state_dict, behler5x_descriptor_family
from .ml import fw_cauldron, fw_cauldron_charges, Dataset, Normalization, potentials_from_ml_data, learn_cauldron,\
cpu_copy
from .kernel import Cell, ScalarFunctionWrapper, compute_images
from .util import dict_reduce, lookup
from .units import UnitsDict, check_units_known, element_masses, delayed
from .kernel import Cell, compute_images
from .util import dict_reduce, split2
from .units import UnitsDict, check_units_known, delayed
from .presentation import plot_convergence, plot_diagonal, text_bars
from .dyn import DynWrapper, random_cell
from .dyn import random_cell, for_name as dynamics_for_name, relax as dyn_relax, integrate as dyn_integrate,\
nvt_vs as dyn_nvt_vs
def behler_nn(n_inputs, n_internal=15, n_layers=3, bias=True, activation=None):
......@@ -158,14 +159,13 @@ def default_behler_descriptors_3(arg, a, eta=0, zeta=(1, 2, 4, 16), family=4, co
Parameters
----------
arg : dict, list, tuple
A dict with minimal distance between atoms
per specimen pair or wrapped cells to deduce
these distances from.
arg : list, tuple
A list of species or images to take species from.
a : float
The function cutoff.
eta : float, list
A list of etas.
eta : float, list, dict
A dingle eta, a list of etas or a dictionary with etas per
specimen pair.
zeta : float, tuple, list
A list of zetas.
family : {4, 5, "5x", LocalPotentialFamily}
......@@ -199,27 +199,37 @@ def default_behler_descriptors_3(arg, a, eta=0, zeta=(1, 2, 4, 16), family=4, co
for i in arg:
_species |= set(i.cell.values)
arg = _species
arg = sorted(arg)
if isinstance(eta, (list, tuple, np.ndarray)):
eta = {f"{i}-{j}": eta for i in arg for j in arg}
descriptors = {k: [] for k in arg}
for p1 in arg:
for i, p2 in enumerate(arg):
for p3 in arg[i:]:
tag = f"{p1}-{p2}-{p3}"
for _i_eta, _eta in enumerate(eta):
if family is behler5x_descriptor_family:
if family is behler5x_descriptor_family:
etas1 = eta[f"{p1}-{p2}"]
etas2 = eta[f"{p1}-{p3}"]
for _i_eta, _eta in enumerate(etas1):
if amount_5x == "full":
_etas2 = eta[_i_eta:]
_etas2 = etas2[_i_eta:]
elif amount_5x == "half":
_etas2 = eta[_i_eta:len(eta) - _i_eta]
_etas2 = etas2[_i_eta:len(eta) - _i_eta]
elif amount_5x in ("diagonal", "diag"):
_etas2 = eta[_i_eta:_i_eta + 1]
_etas2 = etas2[_i_eta:_i_eta + 1]
else:
raise ValueError(f"Unknown amount_5x={amount_5x}")
for _eta2 in _etas2:
for _cos in cos_theta0:
descriptors[p1].append(family(eta1=_eta, eta2=_eta2, cos_theta0=_cos, a=a, tag=tag))
else:
else:
etas = (np.array(eta[f"{p1}-{p2}"]) + np.array(eta[f"{p1}-{p3}"])) / 2
for _eta in etas:
for _zeta in zeta:
for l in 1, -1:
descriptors[p1].append(family(eta=_eta, l=l, zeta=_zeta, a=a, tag=tag))
......@@ -1163,9 +1173,14 @@ class FitWorkflow(Workflow):
self.log.info(f" common grid: {common_grid}")
self.log.info(f" 3p: {three_point}, family: {three_point_family}")
descriptors = default_behler_descriptors(self.images, n=n, a=a, left=left, common_eta=common_grid)
all_descriptors = sum(map(tuple, descriptors.values()), tuple())
all_etas = sorted(set(i.parameters["eta"] for i in all_descriptors))
self.log.info(f" etas: {all_etas}")
all_etas = {}
for ds in descriptors.values():
for d in ds:
if d.tag not in all_etas:
all_etas[d.tag] = []
all_etas[d.tag].append(d.parameters["eta"])
for k, eta in all_etas.items():
self.log.info(f" etas {k}: {eta}")
if three_point:
for k, v in default_behler_descriptors_3(
self.images, a=a,
......@@ -2140,13 +2155,13 @@ class SDWorkflow(Workflow):
self.cells = [random_cell(density, atoms, a, **kwargs) for _ in range(n)]
return self.cells
def load_potentials(self, *potentials):
def load_potentials(self, potentials):
"""
Load previously saved potential.
Parameters
----------
potentials
potentials : list
File names to load from or an explicit list of potentials.
Returns
......@@ -2154,16 +2169,21 @@ class SDWorkflow(Workflow):
potentials : list
The loaded potentials.
"""
if not isinstance(potentials, list):
potentials = [potentials]
if len(potentials) == 0:
potentials = ["learn.pt"]
result = []
for src in potentials:
self.log.info(f"Restoring potentials from {src} ...")
if isinstance(src, str):
self.log.info(f" file {src}")
result.extend(list(map(potential_from_state_dict, torch.load(src))))
elif isinstance(src, (list, tuple)):
result.extend(src)
elif isinstance(src, dict):
self.log.info(f" serialized {src}")
result.append(potential_from_state_dict(src))
else:
self.log.info(f" object {src}")
result.append(src)
self.potentials = result
return result
......@@ -2182,14 +2202,19 @@ class SDWorkflow(Workflow):
return self.cutoff
@requires_fields("potentials", "cells")
def setup_dynamics(self, masses=None, **kwargs):
def setup_dynamics(self, name='fixed-cell', ek=None, **kwargs):
"""
Sets up dynamics wrappers.
Parameters
----------
masses : dict, bool
A dictionary with species masses.
name : str
The dynamics to setup.
ek : float
Setup initial (random) velocities to match
the provided value. This option is required
(but does not guarantee) reproducible parallel
runs.
kwargs
Arguments to ``ScalarFunctionWrapper``.
......@@ -2198,12 +2223,15 @@ class SDWorkflow(Workflow):
wrappers : list
A list of wrappers.
"""
if masses is None:
masses = element_masses()
constructor = dynamics_for_name[name].from_cell
self.wrappers = list(
DynWrapper(ScalarFunctionWrapper(i, self.potentials, **kwargs), masses=lookup(i.values, masses) if masses else None)
constructor(i, self.potentials, **kwargs)
for i in self.cells
)
if ek:
for i in self.wrappers:
actual = i.ek(i.p)
i.p = i.velocity_for_ek(np.full_like(actual, ek), i.p)
return self.wrappers
@requires_fields("cells_result")
......@@ -2231,13 +2259,13 @@ class SDWorkflow(Workflow):
job_id, cell = cell
if what == "relax":
default_kwargs = dict(options=dict(maxiter=10000), method="L-BFGS-B")
driver = DynWrapper.relax
elif what == "interate":
driver = dyn_relax
elif what == "integrate":
default_kwargs = {}
driver = DynWrapper.integrate
driver = dyn_integrate
elif what == "nvt-vs":
default_kwargs = {}
driver = DynWrapper.nvt_vs
driver = dyn_nvt_vs
else:
raise NotImplementedError
default_kwargs.update(kwargs)
......@@ -2247,12 +2275,15 @@ class SDWorkflow(Workflow):
warnings_fw = []
with warnings.catch_warnings(record=True) as warnings_list:
driver(cell, **kwargs)
_last = cell.state
_last.meta['flag-potential-warning'] = False
result = driver(cell, **kwargs)
if kwargs.get("snapshots", False):
_, result = result
else:
result = cell.interpret(*split2(result))
result.meta['flag-potential-warning'] = False
for w in warnings_list:
if issubclass(w.category, PotentialRuntimeWarning):
_last.meta['flag-potential-warning'] = True
result.meta['flag-potential-warning'] = True
else:
warnings_fw.append(w)
for w in warnings_fw:
......@@ -2262,10 +2293,10 @@ class SDWorkflow(Workflow):
filename=w.filename,
lineno=w.lineno,
)
return _last, cell.snapshots, job_id
return result, job_id
@requires_fields("wrappers")
def run(self, parallel=False, pool_kwargs=None, save=None, **kwargs):
def run(self, parallel=False, pool_kwargs=None, save=None, update_state=True, **kwargs):
"""
Computes descriptors.
......@@ -2277,6 +2308,9 @@ class SDWorkflow(Workflow):
Arguments to Pool constructor in parallel mode.
save : str
Saves the resulting cells to a file.
update_state : bool
If True, keeps the final state of dynamics as
a starting point for subsequent runs.
kwargs
Arguments to ``self.worker``.
......@@ -2304,7 +2338,7 @@ class SDWorkflow(Workflow):
else:
kwargs["prefer_parallel"] = False
worker = partial(self.worker, **kwargs)
worker = partial(self.worker, update_state=update_state, **kwargs)
if parallel:
# Disable OpenMP because it causes deadlocks