-
Notifications
You must be signed in to change notification settings - Fork 14
ATLAS_WCHARM_13TEV #2337
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
ATLAS_WCHARM_13TEV #2337
Changes from 9 commits
f205bfe
bf1e7d8
68e203d
d1adbcc
43cf998
d8e6aec
8ef8942
0580b11
15e98dd
dbef8c6
3f24200
6bde456
cc395a8
421a5bb
6019a3a
109f27a
202b0e2
2330693
52ebd23
7f15390
883bca9
eb117c5
64aba29
5d820f2
77b5b84
665b6af
5430028
21ff988
70f0be3
f6e22dd
f40ed74
2c870b6
0522fa7
9665cd0
de68a7d
18eda5c
3d6dce2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| data_central: | ||
| - 12.27 | ||
| - 11.57 | ||
| - 10.41 | ||
| - 9.09 | ||
| - 6.85 | ||
| - 11.87 | ||
| - 11.55 | ||
| - 10.09 | ||
| - 8.6 | ||
| - 6.25 | ||
| - 12.18 | ||
| - 11.77 | ||
| - 10.61 | ||
| - 8.85 | ||
| - 7.22 | ||
| - 12.52 | ||
| - 12.14 | ||
| - 10.29 | ||
| - 8.38 | ||
| - 6.55 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,119 @@ | ||
| """ | ||
| When running `python filter.py` the relevant data yaml | ||
| file will be created in the `nnpdf_data/commondata/ATLAS_WPWM_7TEV_46FB` directory. | ||
| """ | ||
|
|
||
| import yaml | ||
| from filter_utils import ( | ||
| get_data_values, | ||
| get_kinematics, | ||
| get_artificial_uncertainties, | ||
| get_uncertainties, | ||
| ) | ||
| from nnpdf_data.filter_utils.utils import prettify_float | ||
|
|
||
| yaml.add_representer(float, prettify_float) | ||
|
|
||
|
|
||
| def filter_ATLAS_WCHARM_13TEV_data_kinematic(): | ||
| """ | ||
| This function writes the systematics to yaml files. | ||
| """ | ||
|
|
||
| central_values = get_data_values() | ||
|
|
||
| kin = get_kinematics() | ||
|
|
||
| data_central_yaml = {"data_central": central_values} | ||
|
|
||
| kinematics_yaml = {"bins": kin} | ||
|
|
||
| # write central values and kinematics to yaml file | ||
| with open("data.yaml", "w") as file: | ||
| yaml.dump(data_central_yaml, file, sort_keys=False) | ||
|
|
||
| with open("kinematics.yaml", "w") as file: | ||
| yaml.dump(kinematics_yaml, file, sort_keys=False) | ||
|
|
||
|
|
||
| def filter_get_artificial_uncertainties(): | ||
| with open("metadata.yaml", "r") as file: | ||
| metadata = yaml.safe_load(file) | ||
|
|
||
| systematics = get_artificial_uncertainties() | ||
|
|
||
| error_definitions = {} | ||
| errors = [] | ||
|
|
||
| for sys in systematics: | ||
| if sys[0]['name'] == 'stat': | ||
| error_definitions[sys[0]['name']] = { | ||
| "description": "Uncorrelated statistical uncertainties", | ||
| "treatment": "ADD", | ||
| "type": "UNCORR", | ||
| } | ||
| else: | ||
| error_definitions[sys[0]['name']] = { | ||
| "description": "Systematic uncertainty", | ||
| "treatment": "MULT", # Not sure if this is correct | ||
| "type": "CORR", | ||
| } | ||
|
|
||
| for i in range(metadata['implemented_observables'][0]['ndata']): | ||
| error_value = {} | ||
|
|
||
| for sys in systematics: | ||
| error_value[sys[0]['name']] = float(sys[0]['values'][i]) | ||
|
|
||
| errors.append(error_value) | ||
|
|
||
| uncertainties_yaml = {"definitions": error_definitions, "bins": errors} | ||
|
|
||
| # write uncertainties | ||
| with open(f"uncertainties_covariances.yaml", 'w') as file: | ||
| yaml.dump(uncertainties_yaml, file, sort_keys=False) | ||
|
|
||
|
|
||
| def filter_get_systematics(): | ||
| with open("metadata.yaml", "r") as file: | ||
| metadata = yaml.safe_load(file) | ||
|
|
||
| systematics = get_uncertainties() | ||
|
|
||
| error_definitions = {} | ||
| errors = [] | ||
| # print("Systematics:", systematics) | ||
| for sys in systematics: | ||
| if sys[0]['name'] == 'stat': | ||
| error_definitions[sys[0]['name']] = { | ||
| "description": "Uncorrelated statistical uncertainties", | ||
| "treatment": "ADD", | ||
| "type": "UNCORR", | ||
| } | ||
| else: | ||
| error_definitions[sys[0]['name']] = { | ||
| "description": "Systematic uncertainty", | ||
| "treatment": "MULT", # Not sure if this is correct | ||
| "type": "CORR", | ||
| } | ||
|
|
||
| for i in range(metadata['implemented_observables'][0]['ndata']): | ||
| error_value = {} | ||
|
|
||
| for sys in systematics: | ||
|
|
||
| error_value[sys[0]['name']] = float(sys[0]['values'][i]) | ||
|
|
||
| errors.append(error_value) | ||
|
|
||
| uncertainties_yaml = {"definitions": error_definitions, "bins": errors} | ||
|
|
||
| # write uncertainties | ||
| with open(f"uncertainties.yaml", 'w') as file: | ||
| yaml.dump(uncertainties_yaml, file, sort_keys=False) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| filter_ATLAS_WCHARM_13TEV_data_kinematic() | ||
| filter_get_artificial_uncertainties() | ||
| filter_get_systematics() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,221 @@ | ||
| """ | ||
| This module contains helper functions that are used to extract the data values | ||
| from the rawdata files. | ||
| """ | ||
|
|
||
| import yaml | ||
| import pandas as pd | ||
| import numpy as np | ||
| from numpy.linalg import eig | ||
|
|
||
|
|
||
| def get_data_values(): | ||
| """ | ||
| returns the central data values in the form of a list. | ||
| """ | ||
|
|
||
| data_central = [] | ||
|
|
||
| for i in range(19, 23): | ||
| hepdata_table = f"rawdata/HEPData-ins2628732-v1-Table_{i}.yaml" | ||
|
|
||
| with open(hepdata_table, 'r') as file: | ||
| input = yaml.safe_load(file) | ||
|
|
||
| values = input['dependent_variables'][0]['values'] | ||
|
|
||
| for value in values: | ||
| # store data central and convert the units and apply the correction factor | ||
| data_central.append(value['value']) | ||
|
|
||
| return data_central | ||
|
|
||
|
|
||
| def get_kinematics(): | ||
| """ | ||
| returns the kinematics in the form of a list of dictionaries. | ||
| """ | ||
| kin = [] | ||
|
|
||
| for i in range(19, 23): | ||
| hepdata_table = f"rawdata/HEPData-ins2628732-v1-Table_{i}.yaml" | ||
|
|
||
| with open(hepdata_table, 'r') as file: | ||
| input = yaml.safe_load(file) | ||
|
|
||
| for i, M in enumerate(input["independent_variables"][0]['values']): | ||
| kin_value = { | ||
| 'abs_eta': {'min': None, 'mid': (0.5 * (M['low'] + M['high'])), 'max': None}, | ||
| 'm_W2': {'min': None, 'mid': 6.46046213e03, 'max': None}, | ||
| 'sqrts': {'min': None, 'mid': 13000.0, 'max': None}, | ||
| } | ||
| kin.append(kin_value) | ||
|
|
||
| return kin | ||
|
|
||
|
|
||
| def covmat_to_artunc(ndata, covmat_list, no_of_norm_mat=0): | ||
| r"""Convert the covariance matrix to a matrix of | ||
| artificial uncertainties. | ||
|
|
||
| NOTE: This function has been taken from validphys.newcommondata_utils. | ||
| If those utils get merged in the future, we can replace this. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| ndata : integer | ||
| Number of data points | ||
| covmat_list : list | ||
| A one dimensional list which contains the elements of | ||
| the covariance matrix row by row. Since experimental | ||
| datasets provide these matrices in a list form, this | ||
| simplifies the implementation for the user. | ||
| no_of_norm_mat : int | ||
| Normalized covariance matrices may have an eigenvalue | ||
| of 0 due to the last data point not being linearly | ||
| independent. To allow for this, the user should input | ||
| the number of normalized matrices that are being treated | ||
| in an instance. For example, if a single covariance matrix | ||
| of a normalized distribution is being processed, the input | ||
| would be 1. If a covariance matrix contains pertains to | ||
| 3 normalized datasets (i.e. cross covmat for 3 | ||
| distributions), the input would be 3. The default value is | ||
| 0 for when the covariance matrix pertains to an absolute | ||
| distribution. | ||
|
|
||
| Returns | ||
| ------- | ||
| artunc : list | ||
| A two dimensional matrix (given as a list of lists) | ||
| which contains artificial uncertainties to be added | ||
| to the commondata. i^th row (or list) contains the | ||
| artificial uncertainties of the i^th data point. | ||
|
|
||
| """ | ||
| epsilon = -0.0000000001 | ||
| neg_eval_count = 0 | ||
| psd_check = True | ||
| covmat = np.zeros((ndata, ndata)) | ||
| artunc = np.zeros((ndata, ndata)) | ||
| for i in range(len(covmat_list)): | ||
| a = i // ndata | ||
| b = i % ndata | ||
| covmat[a][b] = covmat_list[i] | ||
| eigval, eigvec = eig(covmat) | ||
| for j in range(len(eigval)): | ||
| if eigval[j] < epsilon: | ||
| psd_check = False | ||
| elif eigval[j] > epsilon and eigval[j] <= 0: | ||
| neg_eval_count = neg_eval_count + 1 | ||
| if neg_eval_count == (no_of_norm_mat + 1): | ||
| psd_check = False | ||
| elif eigval[j] > 0: | ||
| continue | ||
| if psd_check == False: | ||
| raise ValueError("The covariance matrix is not positive-semidefinite") | ||
| else: | ||
| for i in range(ndata): | ||
| for j in range(ndata): | ||
| if eigval[j] < 0: | ||
| continue | ||
| else: | ||
| artunc[i][j] = eigvec[i][j] * np.sqrt(eigval[j]) | ||
| return artunc.tolist() | ||
|
|
||
|
|
||
| def get_artificial_uncertainties(): | ||
| """ | ||
| returns the uncertainties. | ||
| """ | ||
|
|
||
| ndat = 5 | ||
| # Produce covmat of form [[W-/W+],[0], | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @enocera We are given the covariance matrices for W-/W+ and for W-( We are given the systematics for each point but I am not sure if these include the correlations from this covmat. The covariances matrices are also combined statistical and systematic uncertainty covariance matrices so does this mean that decomposing this
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dear @ecole41 after looking into this data set a little, I suggest to implement two variants insofar as uncertainties are concerned. This means that you have to generate two
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you, that makes a lot of sense. I just wanted to check the function used to symmetrise the errors. The function in: nnpdf_data/nnpdf_data/filter_utils/uncertainties.py, shows that the se_delta is equal to the average of the two errors and the se_sigma is related to their difference. Should this be the other way around?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also wanted to check how we should treat the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You are totally right, that function looks wrong, it should be the other way around to be consistent with Eqs. (23)-(24) and (27) of https://arxiv.org/pdf/physics/0403086. We should check how many data sets have been affected by that typo. Thanks.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You have to compute |
||
| # [0],[W-*/W+*]] | ||
| covmat = np.zeros((4 * ndat, 4 * ndat)) # Multiply by 4 because of W+/- and */not * | ||
|
|
||
| def edit_covmat(filename, offset): | ||
| with open(filename) as f: | ||
| data = yaml.safe_load(f) | ||
| flat_values = [v["value"] for v in data["dependent_variables"][0]["values"]] | ||
| matrix = np.array(flat_values).reshape((2 * ndat, 2 * ndat)) | ||
| covmat[offset : offset + 2 * ndat, offset : offset + 2 * ndat] = matrix | ||
|
|
||
| edit_covmat("rawdata/HEPData-ins2628732-v1-Table_16.yaml", offset=0) | ||
| edit_covmat("rawdata/HEPData-ins2628732-v1-Table_18.yaml", offset=2 * ndat) | ||
|
|
||
| covmat_list = covmat.flatten().tolist() | ||
| artificial_sys = np.array(covmat_to_artunc(4 * ndat, covmat_list)) | ||
| uncertainties = [] | ||
| uncertainties.append([{"name": "stat", "values": np.zeros(4 * ndat)}]) | ||
|
|
||
| for i in range(len(artificial_sys)): | ||
| name = f"sys_{i}" | ||
| values = artificial_sys[:, i] | ||
| uncertainties.append([{"name": name, "values": values}]) | ||
|
|
||
| return uncertainties | ||
|
|
||
|
|
||
| def symmetrize_errors(delta_plus, delta_minus): | ||
| r"""Compute the symmetrized uncertainty and the shift in data point. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| delta_plus : float | ||
| The top/plus uncertainty with sign | ||
| delta_minus : float | ||
| The bottom/minus uncertainty with sign | ||
|
|
||
| Returns | ||
| ------- | ||
| se_delta : float | ||
| The value to be added to the data point | ||
| se_sigma : float | ||
| The symmetrized uncertainty to be used in commondata | ||
|
|
||
| """ | ||
| semi_diff = (delta_plus + delta_minus) / 2 | ||
| average = (delta_plus - delta_minus) / 2 | ||
| se_delta = semi_diff | ||
| se_sigma = np.sqrt(average * average + 2 * semi_diff * semi_diff) | ||
| return se_delta, se_sigma | ||
|
|
||
|
|
||
| def get_uncertainties(): | ||
| syst_dict = {} | ||
| value_id = 0 | ||
| ndat = 20 | ||
| for i in range(19, 23): | ||
| hepdata_table = f"rawdata/HEPData-ins2628732-v1-Table_{i}.yaml" | ||
|
|
||
| with open(hepdata_table, 'r') as file: | ||
| input = yaml.safe_load(file) | ||
|
|
||
| values = input['dependent_variables'][1]['values'] | ||
|
|
||
| for point_idx, point in enumerate(values): | ||
| for err in point['errors']: | ||
| label = err['label'] | ||
| if 'asymerror' in err: | ||
| minus = err['asymerror']['minus'] | ||
| plus = err['asymerror']['plus'] | ||
| elif 'symerror' in err: | ||
| minus = plus = err['symerror'] | ||
| else: | ||
| raise ValueError(f"Unknown error type in {hepdata_table} for point {point_idx}") | ||
|
|
||
| if label not in syst_dict: | ||
| syst_dict[label] = np.zeros(ndat) | ||
|
|
||
| symmetrized_error = symmetrize_errors(plus, minus) | ||
| syst_dict[label][value_id] = symmetrized_error[1] | ||
| value_id += 1 | ||
|
|
||
| syst_list = [] | ||
| for label, values in syst_dict.items(): | ||
| syst_list.append([{"name": label, "values": values.tolist()}]) | ||
| return syst_list | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| get_uncertainties() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@enocera Do we want to have all of the channels ((W−+D+), (W++D−), (W−+D∗+), (W++D∗−)) put together into one dataset like this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ecole41 Yes we want a single data set with all the channels. Two remarks.