"""On-the-fly training classes adapted for CHAMP."""
# MIT License
# Copyright (c) 2019 Harvard University
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE
from time import time
from copy import deepcopy
import numpy as np
from flare.otf import OTF
from flare.ase.otf import ASE_OTF
from flare.ase.atoms import FLARE_Atoms
import flare.ase.dft as dft_source
from flare.utils.learner import is_std_in_bound
from ase import units
from .verlet import CustomVerlet
[docs]class C_OTF(OTF):
"""Trains a Gaussian process force field on the fly during molecular dynamics.
Args:
dt (float): MD timestep.
number_of_steps (:obj:`int`): Number of timesteps in the training
simulation.
prev_pos_init ([type], optional): Previous positions. Defaults
to None.
rescale_steps (List[:obj:`int`], optional): List of frames for which the
velocities of the atoms are rescaled. Defaults to [].
rescale_temps (List[:obj:`int`], optional): List of rescaled temperatures.
Defaults to [].
gp (gp.GaussianProcess): Initial GP model.
calculate_energy (:obj:`bool`, optional): If True, the energy of each
frame is calculated with the GP. Defaults to False.
calculate_efs (:obj:`bool`, optional): If True, the energy and stress of each
frame is calculated with the GP. Defaults to False.
write_model (:obj:`int`, optional): If 0, write never. If 1, write at
end of run. If 2, write after each training and end of run.
If 3, write after each time atoms are added and end of run.
If 4, write after each training and end of run, and back up
after each write.
force_only (:obj:`bool`, optional): If True, only use forces for training.
Default to False, use forces, energy and stress for training.
std_tolerance_factor (float, optional): Threshold that determines
when DFT is called. Specifies a multiple of the current noise
hyperparameter. If the epistemic uncertainty on a force
component exceeds this value, DFT is called. Defaults to 1.
skip (:obj:`int`, optional): Number of frames that are skipped when
dumping to the output file. Defaults to 0.
init_atoms (List[:obj:`int`], optional): List of atoms from the input
structure whose local environments and force components are
used to train the initial GP model. If None is specified, all
atoms are used to train the initial GP. Defaults to None.
output_name (:obj:`str`, optional): Name of the output file. Defaults to
'otf_run'.
max_atoms_added (:obj:`int`, optional): Number of atoms added each time
DFT is called. Defaults to 1.
freeze_hyps (:obj:`int`, optional): Specifies the number of times the
hyperparameters of the GP are optimized. After this many
updates to the GP, the hyperparameters are frozen.
Defaults to 10.
min_steps_with_model (:obj:`int`, optional): Minimum number of steps the
model takes in between calls to DFT. Defaults to 0.
force_source (Union[:obj:`str`, object], optional): DFT code used to calculate
ab initio forces during training. A custom module can be used here
in place of the DFT modules available in the FLARE package. The
module must contain two functions: parse_dft_input, which takes a
file name (in string format) as input and returns the positions,
species, cell, and masses of a structure of atoms; and run_dft_par,
which takes a number of DFT related inputs and returns the forces
on all atoms. Defaults to "qe".
npool (:obj:`int`, optional): Number of k-point pools for DFT
calculations. Defaults to None.
mpi (:obj:`str`, optional): Determines how mpi is called. Defaults to
"srun".
dft_loc (:obj:`str`): Location of DFT executable.
dft_input (:obj:`str`): Input file.
dft_output (:obj:`str`): Output file.
dft_kwargs ([type], optional): Additional arguments which are
passed when DFT is called; keyword arguments vary based on the
program (e.g. ESPRESSO vs. VASP). Defaults to None.
store_dft_output (Tuple[Union[:obj:`str`,List[:obj:`str`]],:obj:`str`], optional):
After DFT calculations are called, copy the file or files
specified in the first element of the tuple to a directory
specified as the second element of the tuple.
Useful when DFT calculations are expensive and want to be kept
for later use. The first element of the tuple can either be a
single file name, or a list of several. Copied files will be
prepended with the date and time with the format
'Year.Month.Day:Hour:Minute:Second:'.
n_cpus (:obj:`int`, optional): Number of cpus used during training.
Defaults to 1.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
[docs] def run(self):
"""Performs an on-the-fly training run.
If OTF has store_dft_output set, then the specified DFT files will
be copied with the current date and time prepended in the format
'Year.Month.Day:Hour:Minute:Second:'.
"""
optional_dict = {"Restart": self.curr_step}
self.output.write_header(
str(self.gp),
self.dt,
self.number_of_steps,
self.structure,
self.std_tolerance,
optional_dict,
)
counter = 0
self.start_time = time()
while self.curr_step < self.number_of_steps:
# run DFT and train initial model if first step and DFT is on
if (
(self.curr_step == 0)
and (self.std_tolerance != 0)
and (len(self.gp.training_data) == 0)
):
# Are the recorded forces from the GP or DFT in ASE OTF?
# When DFT is called, ASE energy, forces, and stresses should
# get updated.
self.initialize_train()
# after step 1, try predicting with GP model
else:
# compute forces and stds with GP
self.dft_step = False
self.compute_properties()
# get max uncertainty atoms
std_in_bound, target_atoms = is_std_in_bound(
self.std_tolerance,
self.gp.force_noise,
self.structure,
max_atoms_added=self.max_atoms_added,
update_style=self.update_style,
update_threshold=self.update_threshold,
)
steps_since_dft = self.curr_step - self.last_dft_step
if (not std_in_bound) and (steps_since_dft > self.min_steps_with_model):
# record GP forces
self.update_temperature()
self.record_state()
gp_energy = self.structure.potential_energy
gp_forces = deepcopy(self.structure.forces)
gp_stress = deepcopy(self.structure.stress)
# run DFT and record forces
self.dft_step = True
self.last_dft_step = self.curr_step
self.run_dft()
dft_frcs = deepcopy(self.structure.forces)
dft_stress = deepcopy(self.structure.stress)
dft_energy = self.structure.potential_energy
# run MD step & record the state
self.record_state()
# record DFT data into an .xyz file with filename self.dft_xyz.
# the file includes the structure, e/f/s labels and atomic
# indices of environments added to gp
self.record_dft_data(self.structure, target_atoms)
# compute mae and write to output
self.compute_mae(
gp_energy,
gp_forces,
gp_stress,
dft_energy,
dft_frcs,
dft_stress,
)
# add max uncertainty atoms to training set
self.update_gp(
target_atoms,
dft_frcs,
dft_stress=dft_stress,
dft_energy=dft_energy,
)
if self.write_model == 4:
self.checkpoint()
self.backup_checkpoint()
# write gp forces
if counter >= self.skip and not self.dft_step:
self.update_temperature()
self.record_state()
counter = 0
counter += 1
self.md_step(forces=self.structure.forces) # update positions by Verlet
self.rescale_temperature(self.structure.positions)
if self.write_model == 3:
self.checkpoint()
self.output.conclude_run()
if self.write_model >= 1:
self.write_gp()
self.checkpoint()
[docs] def initialize_train(self):
# call dft and update positions
self.run_dft()
dft_frcs = deepcopy(self.structure.forces)
dft_stress = deepcopy(self.structure.stress)
dft_energy = self.structure.potential_energy
self.update_temperature()
self.record_state()
self.record_dft_data(self.structure, self.init_atoms)
# make initial gp model and predict forces
self.update_gp(
self.init_atoms, dft_frcs, dft_stress=dft_stress, dft_energy=dft_energy
)
self.structure.forces = dft_frcs
[docs]class C_ASE_OTF(ASE_OTF, C_OTF):
"""On-the-fly training module using ASE MD engine, a subclass of OTF.
Args:
atoms (ASE Atoms): the ASE Atoms object for the on-the-fly MD run.
timestep (:obj:`float`): the timestep in MD. Please use ASE units, e.g. if the
timestep is 1 fs, then set `timestep = 1 * units.fs`
number_of_steps (:obj:`int`): the total number of steps for MD.
dft_calc (ASE Calculator): any ASE calculator is supported,
e.g. Espresso, VASP etc.
md_engine (:obj:`str`): the name of MD thermostat, only `VelocityVerlet`,
`NVTBerendsen`, `NPTBerendsen`, `NPT` and `Langevin`, `NoseHoover`
are supported.
md_kwargs (dict): specify the args for MD as a dictionary, the args are
as required by the ASE MD modules consistent with the `md_engine`.
update_settings (List[List[dict]]): array containg CHAMP simulation
parameters to update.
calculator (:obj:`Calculator <ase.calculators.calculator.Calculator>`): ASE calculator.
Must have "get_uncertainties" method
implemented.
trajectory (ASE Trajectory): default `None`, not recommended,
currently in experiment.
The following arguments are for on-the-fly training, the user can also
refer to :class:`flare.otf.OTF`
Args:
prev_pos_init ([type], optional): Previous positions. Defaults
to None.
rescale_steps (List[:obj:`int`], optional): List of frames for which the
velocities of the atoms are rescaled. Defaults to [].
rescale_temps (List[:obj:`int`], optional): List of rescaled temperatures.
Defaults to [].
calculate_energy (:obj:`bool`, optional): If True, the energy of each
frame is calculated with the GP. Defaults to False.
write_model (:obj:`int`, optional): If 0, write never. If 1, write at
end of run. If 2, write after each training and end of run.
If 3, write after each time atoms are added and end of run.
If 4, write after each training and end of run, and back up
after each write.
std_tolerance_factor (float, optional): Threshold that determines
when DFT is called. Specifies a multiple of the current noise
hyperparameter. If the epistemic uncertainty on a force
component exceeds this value, DFT is called. Defaults to 1.
skip (:obj:`int`, optional): Number of frames that are skipped when
dumping to the output file. Defaults to 0.
init_atoms (List[:obj:`int`], optional): List of atoms from the input
structure whose local environments and force components are
used to train the initial GP model. If None is specified, all
atoms are used to train the initial GP. Defaults to None.
output_name (:obj:`str`, optional): Name of the output file. Defaults to
'otf_run'.
max_atoms_added (:obj:`int`, optional): Number of atoms added each time
DFT is called. Defaults to 1.
freeze_hyps (:obj:`int`, optional): Specifies the number of times the
hyperparameters of the GP are optimized. After this many
updates to the GP, the hyperparameters are frozen.
Defaults to 10.
n_cpus (:obj:`int`, optional): Number of cpus used during training.
Defaults to 1.
"""
def __init__(
self,
atoms,
timestep,
number_of_steps,
dft_calc,
md_engine,
md_kwargs,
update_settings,
calculator=None,
trajectory=None,
**otf_kwargs
):
self.structure = FLARE_Atoms.from_ase_atoms(atoms)
if calculator is not None:
self.structure.calc = calculator
self.timestep = timestep
self.md_engine = md_engine
self.md_kwargs = md_kwargs
self._kernels = None
self.update_settings = np.array(update_settings)
if md_engine == "CustomVerlet":
MD = CustomVerlet
else:
raise NotImplementedError(md_engine + " is not implemented in ASE")
self.md = MD(
atoms=self.structure,
timestep=timestep,
trajectory=trajectory,
**md_kwargs,
)
force_source = dft_source
self.flare_calc = self.structure.calc
# Convert ASE timestep to ps for the output file.
flare_dt = timestep / (units.fs * 1e3)
C_OTF.__init__(
self,
dt=flare_dt,
number_of_steps=number_of_steps,
gp=self.flare_calc.gp_model,
force_source=force_source,
dft_loc=dft_calc,
dft_input=self.structure,
**otf_kwargs,
)
self.flare_name = self.output_name + "_flare.json"
self.dft_name = self.output_name + "_dft.pickle"
self.structure_name = self.output_name + "_atoms.json"
self.checkpt_files = [
self.checkpt_name,
self.flare_name,
self.dft_name,
self.structure_name,
self.dft_xyz,
]
[docs] def md_step(self, forces=None):
"""Perform an MD step.
Get new position in molecular dynamics based on the forces predicted by
FLARE_Calculator or DFT calculator
Args:
forces (List[float]): array containing the forces as calculated by CHAMP (or FLARE).
"""
if self.curr_step in self.update_settings and self.dft_loc.name == "CHAMP":
sets = self.dft_loc.parameters.settings
ind = np.where(self.update_settings == self.curr_step)[0][0]
changes = self.update_settings[ind][1]
for key, val in changes.items():
if isinstance(val, dict):
for key2, val2 in val.items():
setattr(getattr(sets, key), key2, val2)
else:
setattr(sets, key, val)
# Update previous positions.
self.structure.prev_positions = np.copy(self.structure.positions)
# Reset FLARE calculator.
if self.dft_step:
self.flare_calc.reset()
self.structure.calc = self.flare_calc
# Take MD step.
# Inside the step() function, get_forces() is called
self.md.step(forces=forces)
self.curr_step += 1