Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 95 additions & 10 deletions n3fit/src/n3fit/backends/keras_backend/MetaLayer.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,105 @@
"""
The class MetaLayer is an extension of the backend Layer class
with a number of methods and helpers to facilitate writing new custom layers
in such a way that the new custom layer don't need to rely in anything backend-dependent
The class MetaLayer is an extension of the backend Layer class
with a number of methods and helpers to facilitate writing new custom layers
in such a way that the new custom layer don't need to rely in anything backend-dependent

In other words, if you want to implement a new layer and need functions not included here
it is better to add a new method which is just a call to the relevant backend-dependent function
For instance: np_to_tensor is just a call to K.constant
In other words, if you want to implement a new layer and need functions not included here
it is better to add a new method which is just a call to the relevant backend-dependent function
For instance: np_to_tensor is just a call to K.constant
"""

from keras.initializers import Constant, RandomUniform, glorot_normal, glorot_uniform
import math

from keras import random
from keras.initializers import Constant, RandomUniform, VarianceScaling, glorot_uniform
from keras.layers import Layer


class GammaVarianceScaling(VarianceScaling):
"""``VarianceScaling`` with a tunable exponent ``gamma`` on the variance.

keras' ``VarianceScaling`` draws weights with variance ``scale / fan`` (standard
deviation ``sqrt(scale / fan)``). This variant raises that variance to the power
``gamma``:

variance = (scale / fan) ** gamma, std = (scale / fan) ** (gamma / 2).

``gamma = 1`` reproduces ``VarianceScaling`` (hence ``glorot_normal``) exactly;
``gamma > 1`` makes the initialisation narrower, ``gamma < 1`` wider. ``fan`` is
``fan_in``, ``fan_out`` or their average, per ``mode`` (``fan_avg`` for
``glorot_normal``).
"""

# keras' correction so a truncated normal has the requested std after truncation.
_TRUNCATED_CORRECTION = 0.87962566103423978

def __init__(
self, gamma=1.0, scale=1.0, mode="fan_in", distribution="truncated_normal", seed=None
):
super().__init__(scale=scale, mode=mode, distribution=distribution, seed=seed)
self.gamma = gamma

@staticmethod
def _compute_fans(shape):
"""fan_in, fan_out for a weight of the given shape (matches keras)."""
shape = tuple(shape)
if len(shape) < 1:
fan_in = fan_out = 1
elif len(shape) == 1:
fan_in = fan_out = shape[0]
elif len(shape) == 2:
fan_in, fan_out = shape
else:
receptive_field_size = 1
for dim in shape[:-2]:
receptive_field_size *= dim
fan_in = shape[-2] * receptive_field_size
fan_out = shape[-1] * receptive_field_size
return float(fan_in), float(fan_out)

def __call__(self, shape, dtype=None):
scale = self.scale
fan_in, fan_out = self._compute_fans(shape)
if self.mode == "fan_in":
scale /= max(1.0, fan_in)
elif self.mode == "fan_out":
scale /= max(1.0, fan_out)
else:
scale /= max(1.0, (fan_in + fan_out) / 2.0)
# `scale` is now the post-division variance scale/fan; keras would take
# std = sqrt(scale). Raise the *variance* to gamma -> std = scale**(gamma/2).
# gamma=1 gives sqrt(scale) (standard glorot).
std = scale ** (self.gamma / 2.0)
if self.distribution == "truncated_normal":
return random.truncated_normal(
shape,
mean=0.0,
stddev=std / self._TRUNCATED_CORRECTION,
dtype=dtype,
seed=self.seed,
)
elif self.distribution == "untruncated_normal":
return random.normal(shape, mean=0.0, stddev=std, dtype=dtype, seed=self.seed)
else: # uniform: keras uses limit = sqrt(3 * variance) = sqrt(3) * std
limit = math.sqrt(3.0) * std
return random.uniform(shape, minval=-limit, maxval=limit, dtype=dtype, seed=self.seed)

def get_config(self):
return {**super().get_config(), "gamma": self.gamma}


# Define in this dictionary new initializers as well as the arguments they accept (with default values if needed be)
initializers = {
"random_uniform": (RandomUniform, {"minval": -0.5, "maxval": 0.5}),
"glorot_uniform": (glorot_uniform, {}),
"glorot_normal": (glorot_normal, {}),
# glorot_normal expressed via GammaVarianceScaling so its width is tunable through
# `scale` (variance multiplier) and `gamma` (exponent on the variance:
# variance = (scale/fan)**gamma). scale=1.0, gamma=1.0 reproduces keras'
# glorot_normal exactly; gamma>1 narrower, gamma<1 wider.
"glorot_normal": (
GammaVarianceScaling,
{"scale": 1.0, "gamma": 1.0, "mode": "fan_avg", "distribution": "untruncated_normal"},
),
}


Expand Down Expand Up @@ -91,10 +175,11 @@ def select_initializer(ini_name, seed=None, **kwargs):
) from e

ini_class = ini_tuple[0]
ini_args = ini_tuple[1]
# Copy so per-call overrides (seed, scale, ...) don't leak into the shared defaults
ini_args = dict(ini_tuple[1])
ini_args["seed"] = seed

for key, value in kwargs.items():
if key in ini_args.keys():
if key in ini_args:
ini_args[key] = value
return ini_class(**ini_args)
66 changes: 66 additions & 0 deletions n3fit/src/n3fit/backends/keras_backend/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"""

import logging
from pathlib import Path
from time import time

from keras import backend as K
Expand Down Expand Up @@ -196,6 +197,71 @@ def on_step_end(self, epoch, logs=None):
self._update_weights()


class StoreCallback(CallbackStep):
"""
Given a ``savedir``, the callback will store the model parameters in
that directory every ``check_freq`` epochs.

Parameters
----------
pdf_model: MetaModel
The multi-replica PDF model
replica_paths: list[Path]
One path for replica. Weights are saved under <path>/weights/.
check_freq: int
Save every this many epochs (default: 100)
"""

def __init__(self, pdf_model, replica_paths, stopping_object, check_freq=100):
super().__init__()
self.check_freq = check_freq
self.pdf_model = pdf_model
self.weight_dirs = []
self.stopping_object = stopping_object
for path in replica_paths:
weight_dir = path / "parameters"
weight_dir.mkdir(parents=True, exist_ok=True)
self.weight_dirs.append(weight_dir)

def _save_weights(self, epoch, tr_weights, weight_dir):
filepath = weight_dir / f"params_{epoch}.npz"
# save parameters as expected by colibri
trainable_weights_flat = np.concatenate([np.asarray(w).flatten() for w in tr_weights])
np.savez(filepath, params=trainable_weights_flat)
log.info(f"Saved parameters at epoch {epoch} in {filepath}")

def on_train_begin(self, logs=None):
"""Store the model parameters at initialisation (epoch 0), before any
gradient step has been taken."""
pdf_replicas = self.pdf_model.split_replicas()
for replica_model, weight_dir in zip(pdf_replicas, self.weight_dirs):
self._save_weights(0, replica_model.trainable_weights, weight_dir)

def on_step_end(self, epoch, logs=None):
"""Function to be called at the end of every epoch
Every ``check_freq`` number of epochs, the parameters of the model will
be stored in the indicated directory.
"""
if ((epoch + 1) % self.check_freq) == 0:
pdf_replicas = self.pdf_model.split_replicas()
for replica_model, weight_dir in zip(pdf_replicas, self.weight_dirs):
weights = replica_model.trainable_weights
self._save_weights(epoch + 1, weights, weight_dir)

def on_train_end(self, logs=None):
"""Store the best parameters"""
for idx, weight_dir in enumerate(self.weight_dirs):
weights = self.stopping_object._best_weights[idx]
if weights is not None:
best_weights = weights['all_NNs']
best_epoch = self.stopping_object._best_epochs[idx]
self._save_weights(best_epoch, best_weights, weight_dir)
else:
log.warning(
f"No best weights found for replica {idx+1}, skipping saving best parameters."
)


def gen_tensorboard_callback(log_dir, profiling=False, histogram_freq=0):
"""
Generate tensorboard logging details at ``log_dir``.
Expand Down
39 changes: 25 additions & 14 deletions n3fit/src/n3fit/io/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,9 @@


class WriterWrapper:
def __init__(self, replica_numbers, pdf_objects, stopping_object, all_chi2s, theory, timings, trials):
def __init__(
self, replica_numbers, pdf_objects, stopping_object, all_chi2s, theory, timings, trials
):
"""
Initializes the writer for all replicas.

Expand Down Expand Up @@ -298,18 +300,18 @@ def _hyperparam_settings(self, replica_number):
trials_number = self.trials["number_of_trials"]
idx_trial = replica_number % trials_number
hyperparam_info = {}
hyperparam_info["optimizer"]=self.trials["optimizer"][idx_trial]
hyperparam_info["learning_rate"]=self.trials["learning_rate"][idx_trial]
hyperparam_info["clipnorm"]=self.trials["clipnorm"][idx_trial]
hyperparam_info["epochs"]=self.trials["epochs"][idx_trial]
hyperparam_info["stopping_patience"]=self.trials["stopping_patience"][idx_trial]
hyperparam_info["initial"]=self.trials["initial"][idx_trial]
hyperparam_info["nodes_per_layer"]=self.trials["nodes_per_layer"][idx_trial]
hyperparam_info["number_of_layers"]=self.trials["number_of_layers"][idx_trial]
hyperparam_info["activation"]=self.trials["activation_per_layer"][idx_trial]
hyperparam_info["layer_type"]=self.trials["layer_type"][idx_trial]
hyperparam_info["initializer"]=self.trials["initializer"][idx_trial]
hyperparam_info["dropout"]=self.trials["dropout"][idx_trial]
hyperparam_info["optimizer"] = self.trials["optimizer"][idx_trial]
hyperparam_info["learning_rate"] = self.trials["learning_rate"][idx_trial]
hyperparam_info["clipnorm"] = self.trials["clipnorm"][idx_trial]
hyperparam_info["epochs"] = self.trials["epochs"][idx_trial]
hyperparam_info["stopping_patience"] = self.trials["stopping_patience"][idx_trial]
hyperparam_info["initial"] = self.trials["initial"][idx_trial]
hyperparam_info["nodes_per_layer"] = self.trials["nodes_per_layer"][idx_trial]
hyperparam_info["number_of_layers"] = self.trials["number_of_layers"][idx_trial]
hyperparam_info["activation"] = self.trials["activation_per_layer"][idx_trial]
hyperparam_info["layer_type"] = self.trials["layer_type"][idx_trial]
hyperparam_info["initializer"] = self.trials["initializer"][idx_trial]
hyperparam_info["dropout"] = self.trials["dropout"][idx_trial]
return hyperparam_info
else:
hyperparam_info = "from runcard"
Expand All @@ -329,6 +331,11 @@ def _write_metadata_json(self, i, replica_number, out_path):
# Note: the 2 arguments below are the same for all replicas, unless run separately
timing=self.timings,
stop_epoch=self.stopping_object.stop_epoch,
would_stop_epoch=(
self.stopping_object.would_stop_epoch
if self.stopping_object._dont_stop
else self.stopping_object.stop_epoch
),
)

with open(out_path, "w", encoding="utf-8") as fs:
Expand Down Expand Up @@ -373,6 +380,7 @@ def jsonfit(
true_chi2,
stop_epoch,
timing,
would_stop_epoch,
hyperparam_info,
):
"""Generates a dictionary containing all relevant metadata for the fit
Expand All @@ -399,7 +407,9 @@ def jsonfit(
epoch at which the stopping stopped (not the one for the best fit!)
timing: dict
dictionary of the timing of the different events that happened
hyperparam_info: dict
would_stop_epoch: int
epoch at which the stopping would have stopped if it were not set to "dont_stop"
hyperparam_info: dict
dictionary of hyperparameter settings
"""
all_info = {}
Expand All @@ -415,6 +425,7 @@ def jsonfit(
all_info["arc_lengths"] = arc_lengths
all_info["integrability"] = integrability_numbers
all_info["timing"] = timing
all_info["would_stop_epoch"] = would_stop_epoch
all_info["hyperparameters"] = hyperparam_info
# Versioning info
all_info["version"] = version()
Expand Down
22 changes: 20 additions & 2 deletions n3fit/src/n3fit/model_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,13 @@ class ReplicaSettings:
e.g. ``dense`` or ``dense_per_flavour``
initializer: str
initializer to be used for this replica
initializer_scale: float
width multiplier for the initializer distribution. Only affects ``glorot_normal``
(weight std scales as sqrt(scale)); 1.0 reproduces standard glorot_normal
initializer_gamma: float
exponent on the initializer variance: ``variance = (scale/fan)**gamma``
(``std = (scale/fan)**(gamma/2)``). Only affects ``glorot_normal``; 1.0
reproduces standard glorot_normal
dropout: float
rate of dropout for each layer
regularizer: str
Expand All @@ -360,6 +367,8 @@ class ReplicaSettings:
activations: list[str]
architecture: str = "dense"
initializer: str = "glorot_normal"
initializer_scale: float = 1.0
initializer_gamma: float = 1.0
dropout_rate: float = 0.0
regularizer: str = None
regularizer_args: dict = field(default_factory=dict)
Expand Down Expand Up @@ -806,6 +815,8 @@ def _generate_nn(
activations: list[str] = None,
architecture: str = "dense",
initializer: str = None,
initializer_scale: float = 1.0,
initializer_gamma: float = 1.0,
dropout_rate: float = 0.0,
regularizer: str = None,
regularizer_args: dict = field(default_factory=dict),
Expand Down Expand Up @@ -848,7 +859,9 @@ def layer_generator(i_layer, nodes_out, activation):
"""Generate the ``i_layer``-th dense_per_flavour layer for all replicas."""
l_seed = int(seed + i_layer * n_flavours)
initializers = [
MetaLayer.select_initializer(initializer, seed=l_seed + b)
MetaLayer.select_initializer(
initializer, seed=l_seed + b, scale=initializer_scale, gamma=initializer_gamma
)
for b in range(n_flavours)
]
layer = base_layer_selector(
Expand All @@ -863,7 +876,12 @@ def layer_generator(i_layer, nodes_out, activation):
elif architecture == "dense":

def layer_generator(i_layer, nodes_out, activation):
kini = MetaLayer.select_initializer(initializer, seed=int(seed + i_layer))
kini = MetaLayer.select_initializer(
initializer,
seed=int(seed + i_layer),
scale=initializer_scale,
gamma=initializer_gamma,
)
return base_layer_selector(
architecture,
kernel_initializer=kini,
Expand Down
Loading
Loading