Source code for qmcblip.flare.otf

"""On-the-fly training classes adapted for CHAMP."""
# MIT License

# Copyright (c) 2019 Harvard University

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE

from time import time
from copy import deepcopy
import numpy as np

from flare.otf import OTF
from flare.ase.otf import ASE_OTF
from flare.ase.atoms import FLARE_Atoms
import flare.ase.dft as dft_source
from flare.utils.learner import is_std_in_bound

from ase import units

from .verlet import CustomVerlet

[docs]class C_OTF(OTF):
    """Trains a Gaussian process force field on the fly during molecular dynamics.

    Args:
        dt (float): MD timestep.
        number_of_steps (:obj:`int`): Number of timesteps in the training
            simulation.
        prev_pos_init ([type], optional): Previous positions. Defaults
            to None.
        rescale_steps (List[:obj:`int`], optional): List of frames for which the
            velocities of the atoms are rescaled. Defaults to [].
        rescale_temps (List[:obj:`int`], optional): List of rescaled temperatures.
            Defaults to [].
        gp (gp.GaussianProcess): Initial GP model.
        calculate_energy (:obj:`bool`, optional): If True, the energy of each
            frame is calculated with the GP. Defaults to False.
        calculate_efs (:obj:`bool`, optional): If True, the energy and stress of each
            frame is calculated with the GP. Defaults to False.
        write_model (:obj:`int`, optional): If 0, write never. If 1, write at
            end of run. If 2, write after each training and end of run.
            If 3, write after each time atoms are added and end of run.
            If 4, write after each training and end of run, and back up
            after each write.
        force_only (:obj:`bool`, optional): If True, only use forces for training.
            Default to False, use forces, energy and stress for training.
        std_tolerance_factor (float, optional): Threshold that determines
            when DFT is called. Specifies a multiple of the current noise
            hyperparameter. If the epistemic uncertainty on a force
            component exceeds this value, DFT is called. Defaults to 1.
        skip (:obj:`int`, optional): Number of frames that are skipped when
            dumping to the output file. Defaults to 0.
        init_atoms (List[:obj:`int`], optional): List of atoms from the input
            structure whose local environments and force components are
            used to train the initial GP model. If None is specified, all
            atoms are used to train the initial GP. Defaults to None.
        output_name (:obj:`str`, optional): Name of the output file. Defaults to
            'otf_run'.
        max_atoms_added (:obj:`int`, optional): Number of atoms added each time
            DFT is called. Defaults to 1.
        freeze_hyps (:obj:`int`, optional): Specifies the number of times the
            hyperparameters of the GP are optimized. After this many
            updates to the GP, the hyperparameters are frozen.
            Defaults to 10.
        min_steps_with_model (:obj:`int`, optional): Minimum number of steps the
            model takes in between calls to DFT. Defaults to 0.
        force_source (Union[:obj:`str`, object], optional): DFT code used to calculate
            ab initio forces during training. A custom module can be used here
            in place of the DFT modules available in the FLARE package. The
            module must contain two functions: parse_dft_input, which takes a
            file name (in string format) as input and returns the positions,
            species, cell, and masses of a structure of atoms; and run_dft_par,
            which takes a number of DFT related inputs and returns the forces
            on all atoms.  Defaults to "qe".
        npool (:obj:`int`, optional): Number of k-point pools for DFT
            calculations. Defaults to None.
        mpi (:obj:`str`, optional): Determines how mpi is called. Defaults to
            "srun".
        dft_loc (:obj:`str`): Location of DFT executable.
        dft_input (:obj:`str`): Input file.
        dft_output (:obj:`str`): Output file.
        dft_kwargs ([type], optional): Additional arguments which are
            passed when DFT is called; keyword arguments vary based on the
            program (e.g. ESPRESSO vs. VASP). Defaults to None.
        store_dft_output (Tuple[Union[:obj:`str`,List[:obj:`str`]],:obj:`str`], optional):
            After DFT calculations are called, copy the file or files
            specified in the first element of the tuple to a directory
            specified as the second element of the tuple.
            Useful when DFT calculations are expensive and want to be kept
            for later use. The first element of the tuple can either be a
            single file name, or a list of several. Copied files will be
            prepended with the date and time with the format
            'Year.Month.Day:Hour:Minute:Second:'.
        n_cpus (:obj:`int`, optional): Number of cpus used during training.
            Defaults to 1.
    """

    def __init__(self, *args, **kwargs):

        super().__init__(*args, **kwargs)

[docs]    def run(self):
        """Performs an on-the-fly training run.

        If OTF has store_dft_output set, then the specified DFT files will
        be copied with the current date and time prepended in the format
        'Year.Month.Day:Hour:Minute:Second:'.
        """

        optional_dict = {"Restart": self.curr_step}
        self.output.write_header(
            str(self.gp),
            self.dt,
            self.number_of_steps,
            self.structure,
            self.std_tolerance,
            optional_dict,
        )

        counter = 0
        self.start_time = time()

        while self.curr_step < self.number_of_steps:
            # run DFT and train initial model if first step and DFT is on
            if (
                (self.curr_step == 0)
                and (self.std_tolerance != 0)
                and (len(self.gp.training_data) == 0)
            ):

                # Are the recorded forces from the GP or DFT in ASE OTF?
                # When DFT is called, ASE energy, forces, and stresses should
                # get updated.
                self.initialize_train()

            # after step 1, try predicting with GP model
            else:
                # compute forces and stds with GP
                self.dft_step = False
                self.compute_properties()

                # get max uncertainty atoms
                std_in_bound, target_atoms = is_std_in_bound(
                    self.std_tolerance,
                    self.gp.force_noise,
                    self.structure,
                    max_atoms_added=self.max_atoms_added,
                    update_style=self.update_style,
                    update_threshold=self.update_threshold,
                )

                steps_since_dft = self.curr_step - self.last_dft_step
                if (not std_in_bound) and (steps_since_dft > self.min_steps_with_model):
                    # record GP forces
                    self.update_temperature()
                    self.record_state()
                    gp_energy = self.structure.potential_energy
                    gp_forces = deepcopy(self.structure.forces)
                    gp_stress = deepcopy(self.structure.stress)

                    # run DFT and record forces
                    self.dft_step = True
                    self.last_dft_step = self.curr_step
                    self.run_dft()
                    dft_frcs = deepcopy(self.structure.forces)
                    dft_stress = deepcopy(self.structure.stress)
                    dft_energy = self.structure.potential_energy

                    # run MD step & record the state
                    self.record_state()

                    # record DFT data into an .xyz file with filename self.dft_xyz.
                    # the file includes the structure, e/f/s labels and atomic
                    # indices of environments added to gp
                    self.record_dft_data(self.structure, target_atoms)

                    # compute mae and write to output
                    self.compute_mae(
                        gp_energy,
                        gp_forces,
                        gp_stress,
                        dft_energy,
                        dft_frcs,
                        dft_stress,
                    )

                    # add max uncertainty atoms to training set
                    self.update_gp(
                        target_atoms,
                        dft_frcs,
                        dft_stress=dft_stress,
                        dft_energy=dft_energy,
                    )

                    if self.write_model == 4:
                        self.checkpoint()
                        self.backup_checkpoint()

            # write gp forces
            if counter >= self.skip and not self.dft_step:
                self.update_temperature()
                self.record_state()
                counter = 0

            counter += 1

            self.md_step(forces=self.structure.forces)  # update positions by Verlet
            self.rescale_temperature(self.structure.positions)

            if self.write_model == 3:
                self.checkpoint()

        self.output.conclude_run()

        if self.write_model >= 1:
            self.write_gp()
            self.checkpoint()

[docs]    def initialize_train(self):
        # call dft and update positions
        self.run_dft()
        dft_frcs = deepcopy(self.structure.forces)
        dft_stress = deepcopy(self.structure.stress)
        dft_energy = self.structure.potential_energy

        self.update_temperature()
        self.record_state()
        self.record_dft_data(self.structure, self.init_atoms)

        # make initial gp model and predict forces
        self.update_gp(
            self.init_atoms, dft_frcs, dft_stress=dft_stress, dft_energy=dft_energy
        )
        self.structure.forces = dft_frcs

[docs]class C_ASE_OTF(ASE_OTF, C_OTF):
    """On-the-fly training module using ASE MD engine, a subclass of OTF.

    Args:
        atoms (ASE Atoms): the ASE Atoms object for the on-the-fly MD run.
        timestep (:obj:`float`): the timestep in MD. Please use ASE units, e.g. if the
            timestep is 1 fs, then set `timestep = 1 * units.fs`
        number_of_steps (:obj:`int`): the total number of steps for MD.
        dft_calc (ASE Calculator): any ASE calculator is supported,
            e.g. Espresso, VASP etc.
        md_engine (:obj:`str`): the name of MD thermostat, only `VelocityVerlet`,
            `NVTBerendsen`, `NPTBerendsen`, `NPT` and `Langevin`, `NoseHoover`
            are supported.
        md_kwargs (dict): specify the args for MD as a dictionary, the args are
            as required by the ASE MD modules consistent with the `md_engine`.
        update_settings (List[List[dict]]): array containg CHAMP simulation
            parameters to update.
        calculator (:obj:`Calculator <ase.calculators.calculator.Calculator>`): ASE calculator.
            Must have "get_uncertainties" method
          implemented.
        trajectory (ASE Trajectory): default `None`, not recommended,
            currently in experiment.

    The following arguments are for on-the-fly training, the user can also
    refer to :class:`flare.otf.OTF`

    Args:
        prev_pos_init ([type], optional): Previous positions. Defaults
            to None.
        rescale_steps (List[:obj:`int`], optional): List of frames for which the
            velocities of the atoms are rescaled. Defaults to [].
        rescale_temps (List[:obj:`int`], optional): List of rescaled temperatures.
            Defaults to [].
        calculate_energy (:obj:`bool`, optional): If True, the energy of each
            frame is calculated with the GP. Defaults to False.
        write_model (:obj:`int`, optional): If 0, write never. If 1, write at
            end of run. If 2, write after each training and end of run.
            If 3, write after each time atoms are added and end of run.
            If 4, write after each training and end of run, and back up
            after each write.
        std_tolerance_factor (float, optional): Threshold that determines
            when DFT is called. Specifies a multiple of the current noise
            hyperparameter. If the epistemic uncertainty on a force
            component exceeds this value, DFT is called. Defaults to 1.
        skip (:obj:`int`, optional): Number of frames that are skipped when
            dumping to the output file. Defaults to 0.
        init_atoms (List[:obj:`int`], optional): List of atoms from the input
            structure whose local environments and force components are
            used to train the initial GP model. If None is specified, all
            atoms are used to train the initial GP. Defaults to None.
        output_name (:obj:`str`, optional): Name of the output file. Defaults to
            'otf_run'.
        max_atoms_added (:obj:`int`, optional): Number of atoms added each time
            DFT is called. Defaults to 1.
        freeze_hyps (:obj:`int`, optional): Specifies the number of times the
            hyperparameters of the GP are optimized. After this many
            updates to the GP, the hyperparameters are frozen.
            Defaults to 10.
        n_cpus (:obj:`int`, optional): Number of cpus used during training.
            Defaults to 1.
    """

    def __init__(
        self,
        atoms,
        timestep,
        number_of_steps,
        dft_calc,
        md_engine,
        md_kwargs,
        update_settings,
        calculator=None,
        trajectory=None,
        **otf_kwargs
    ):

        self.structure = FLARE_Atoms.from_ase_atoms(atoms)
        if calculator is not None:
            self.structure.calc = calculator
        self.timestep = timestep
        self.md_engine = md_engine
        self.md_kwargs = md_kwargs
        self._kernels = None
        self.update_settings = np.array(update_settings)

        if md_engine == "CustomVerlet":
            MD = CustomVerlet
        else:
            raise NotImplementedError(md_engine + " is not implemented in ASE")

        self.md = MD(
            atoms=self.structure,
            timestep=timestep,
            trajectory=trajectory,
            **md_kwargs,
        )

        force_source = dft_source
        self.flare_calc = self.structure.calc

        # Convert ASE timestep to ps for the output file.
        flare_dt = timestep / (units.fs * 1e3)

        C_OTF.__init__(
            self,
            dt=flare_dt,
            number_of_steps=number_of_steps,
            gp=self.flare_calc.gp_model,
            force_source=force_source,
            dft_loc=dft_calc,
            dft_input=self.structure,
            **otf_kwargs,
        )

        self.flare_name = self.output_name + "_flare.json"
        self.dft_name = self.output_name + "_dft.pickle"
        self.structure_name = self.output_name + "_atoms.json"
        self.checkpt_files = [
            self.checkpt_name,
            self.flare_name,
            self.dft_name,
            self.structure_name,
            self.dft_xyz,
        ]

[docs]    def md_step(self, forces=None):
        """Perform an MD step.

        Get new position in molecular dynamics based on the forces predicted by
        FLARE_Calculator or DFT calculator

        Args:
            forces (List[float]): array containing the forces as calculated by CHAMP (or FLARE).
        """

        if self.curr_step in self.update_settings and self.dft_loc.name == "CHAMP":
            sets = self.dft_loc.parameters.settings
            ind = np.where(self.update_settings == self.curr_step)[0][0]
            changes = self.update_settings[ind][1]
            for key, val in changes.items():
                if isinstance(val, dict):
                    for key2, val2 in val.items():
                        setattr(getattr(sets, key), key2, val2)
                else:
                    setattr(sets, key, val)

        # Update previous positions.
        self.structure.prev_positions = np.copy(self.structure.positions)

        # Reset FLARE calculator.
        if self.dft_step:
            self.flare_calc.reset()
            self.structure.calc = self.flare_calc

        # Take MD step.
        # Inside the step() function, get_forces() is called
        self.md.step(forces=forces)
        self.curr_step += 1