[added] convergence of sampled variales

86ce7490 · Greg Henning · c25b3669 · 86ce7490 · 86ce7490 · 86ce7490
Commit 86ce7490 authored 1 year ago by Greg Henning
--- a/analysis/run_on_cc.sh
+++ b/analysis/run_on_cc.sh
@@ -85,6 +85,7 @@ echo "#... Collecting ouputs of MC iteration"
 echo "#... Checking convergence"
 ./run_task tasks/do_MC_convergence_report.py
 echo "#... Making mean, std, cov and corr"
+./run_task tasks/do_MC_calcmean.py
 echo "# Removing job venv"
 rm -rf `hostname`_env


--- a/analysis/scripts/check_sampled_values.py
+++ b/analysis/scripts/check_sampled_values.py
+#!/usr/env python
+# -*- coding: utf-8 -*-
+# -*- format: python -*-
+# -*- author: G. Henning -*-
+# -*- created: feb 2024 -*-
+
+'''
+Parse MC sampled confirguration files and check if the sampled distributions overlaps with the target one
+'''
+
+import argparse
+
+import yaml 
+import pathlib
+
+from statistics import NormalDist
+
+import numpy as np
+
+
+def find_in_files(key: str,
+                  flist: list,
+                  superkey: str = None) -> list:
+    values = []
+    #print(f"# Called find_in_file({key}, ...{len(flist)}...)")
+    for one_file in flist:
+        file_dict = yaml.load(open(one_file, 'r'), Loader=yaml.Loader)
+        if superkey:
+            values.append(file_dict.get(superkey, file_dict).get(key))
+        else:
+            values.append(file_dict.get(key))
+    return values
+
+def get_mean_std(arr: list) -> tuple:
+    a = np.array(arr)
+    return float(a.mean()), float(a.std())
+
+
+def get_overlap(m1, s1, 
+                m2, s2) -> float:
+    ref_dist = NormalDist(m1, s1)
+    sampled_dist = NormalDist(m2, s2)
+    return ref_dist.overlap(sampled_dist)
+
+def parse_dict(data: dict,
+               mc_files: list,
+               superkey = None) -> dict:
+    r_dict = {}
+    for k, v in data.items():
+        if type(v) == dict:
+            r_dict.update(parse_dict(v, mc_files, superkey=k))
+            continue
+        elif f'u_{k}' in data.keys():
+            key_name = f"{superkey}/{k}" if superkey else k
+            r_dict[key_name] = {}
+            r_dict[key_name]['reference value'] = {'mean': v, 'stdev': data[f'u_{k}']}
+            ref_m1_s1 = v, data[f'u_{k}']
+            sampled_m1_s1 = get_mean_std(find_in_files(k, mc_files, superkey))
+            r_dict[key_name]['sampled value'] = {'mean': sampled_m1_s1[0], 'stdev': sampled_m1_s1[1]}
+            r_dict[key_name]['overlap'] = get_overlap(*ref_m1_s1,
+                                                      *sampled_m1_s1)
+        elif f'pm_{k}' in data.keys():
+            key_name = f"{superkey}/{k}" if superkey else k
+            r_dict[key_name] = {}
+            r_dict[key_name]['reference value'] = {'mean': v, 'half width': data[f'pm_{k}']}
+            ref_m1_s1 = v, data[f'pm_{k}'] * 0.577
+            sampled_m1_s1 = get_mean_std(find_in_files(k, mc_files, superkey))
+            r_dict[key_name]['sampled value'] = {'mean': sampled_m1_s1[0], 'half width': sampled_m1_s1[1]/0.577}
+            r_dict[key_name]['overlap'] = get_overlap(*ref_m1_s1,
+                                                      *sampled_m1_s1)
+    return r_dict
+
+
+
+def _main(mc_source_file: str,
+          mc_files_pattern: str) -> None:
+    
+    gen_dict = {}
+    src_files_info = yaml.load(open(mc_source_file, 'r'), Loader=yaml.Loader)
+    #
+    src_files_dir = src_files_info['dir']
+    for src_file in src_files_info['files']:
+        gen_dict[src_file] = {}
+        # load data from subdir/file
+        #
+        data = yaml.safe_load(open(f"./{src_files_dir}/{src_file}", 'r'))
+        # find if there is variables with uncertainty
+        gen_dict[src_file]['pattern'] = f"{mc_files_pattern}{src_file}"
+        #
+        mc_files = list(pathlib.Path('.').glob(gen_dict[src_file]['pattern']))
+        #
+        gen_dict[src_file]['number of sample files'] = len(mc_files)
+        gen_dict[src_file]['variables'] = parse_dict(data, mc_files)
+
+    print(yaml.dump(gen_dict))
+    
+if __name__ == "__main__":
+    arg_parser = argparse.ArgumentParser(description=__doc__)
+    arg_parser.add_argument('--mc_source_file', type=str,
+                            nargs='?', help="yaml file with the files to sample")
+    arg_parser.add_argument('--mc_files_pattern', type=str,
+                            nargs='?', 
+                            default='./output/MC_output/*/etc_',
+                            help="pattern of sampled files")
+    the_arguments = arg_parser.parse_args()
+    #
+    try:
+        _main(**vars(the_arguments))
+    except Exception as err_:
+        print("error")
+        print(err_)
--- a/analysis/tasks/do_MC_convergence_report.py
+++ b/analysis/tasks/do_MC_convergence_report.py
@@ -19,6 +19,24 @@ the_transitions = yaml.load(open('etc/transitions_to_look_at.yaml'),
                        Loader=yaml.Loader)


+
+def task_convergence_of_sampled_values():
+    '''Testing if sampled parameters match the "target" distribution'''
+    output_dir = "./output/MC_convergence"
+    output_report = f"{output_dir}/sampling_report.yaml"
+    mc_source_file = 'etc/mc_source_files.yaml'
+    mc_files_pattern = './output/MC_output/\\*/etc_'
+    return {
+        'file_dep': [mc_source_file,],
+        'targets': [output_report,],
+        'actions': [
+                (create_folder, (output_dir,)),
+                f"./env.run scripts/check_sampled_values.py --mc_files_pattern={mc_files_pattern} --mc_source_file={mc_source_file} > {output_report}",
+            ]
+        }
+
+
+
 def task_CV_flux():
    '''Cross validation of flux'''
    file_pattern = f"./output/MC_output/*/*_flux_smooth.h1.txt"
@@ -57,5 +75,18 @@ def task_CV_transitions():
            ]
        }

-
+# def task_CV_transitions_merge():
+#     '''all previous files merged into one'''
+#     output_dir = "./output/MC_convergence"
+#     report_partern = f"{output_dir}/*_cv.report.yaml"
+#     source_files = tuple(pathlib.Path('.').glob(report_partern))
+#     target_file = f"{output_dir}/transitions_cv_report.yaml"
+#     return {
+#         'file_dep': source_files,
+#         'targets': [target_file],
+#         'actions':[
+#             (create_folder, (output_dir,)),
+#             f"cat {' '.join(map(str, source_files))} > {target_file}",
+#             ]
+#     }
 # EOF
--- a/analysis/tasks/do_ge_timelimits.py
+++ b/analysis/tasks/do_ge_timelimits.py
@@ -44,7 +44,8 @@ def task_make_tof_window():
                    'etc/transitions_to_look_at.yaml',
                    'etc/ge_dof.yaml',
                    'scripts/energy_to_time_list.py',
-                    'etc/detectors_jitters.yaml'
+                    'etc/detectors_jitters.yaml',
+                    'etc/uuid.txt'
                            ],
                'actions': [
                            (create_folder, (f"./output/transitions/{this_transition}/",)),

--- a/analysis/tasks/do_start_new_iter.py
+++ b/analysis/tasks/do_start_new_iter.py
@@ -35,7 +35,7 @@ iter_type = 'mc' if get_var('iter', 'mc')=='mc' else 'center'

    
 def mc_this_dict(src_d: dict) -> dict:
-    '''Tool function to automatically vary the variables in a '''
+    '''Tool function to automatically vary the variables in a dictionnary'''
    new_k = {}
    for k, v in src_d.items():
        if type(v) == dict: