|
@@ -244,8 +244,73 @@ def __estimate_point_with_closest(comb_df: pd.DataFrame, runs_df: pd.DataFrame,
|
|
|
'used_lum': 0,
|
|
|
'comment': 'indirect measurement #2',
|
|
|
}, pd.DataFrame([])
|
|
|
+
|
|
|
+def averager_on_lums(df: pd.DataFrame) -> dict:
|
|
|
+ """Averaging as <E> = \frac{\sum{L_i E_i}{\sum{L_i}},
|
|
|
+ \deltaE^2 = \frac{\sum{(L_i \delta E_i)^2}}{(\sum L_i)^2}
|
|
|
+
|
|
|
+ Attention: I think it's incorrect way of avaraging.
|
|
|
+
|
|
|
+ Parameters
|
|
|
+ ----------
|
|
|
+ df : pd.DataFrame
|
|
|
+ input dataframe containg means and spreads
|
|
|
+
|
|
|
+ Returns
|
|
|
+ -------
|
|
|
+ dict
|
|
|
+ averaged mean and spread
|
|
|
+ """
|
|
|
+
|
|
|
+ mean_en = (df.e_mean * df.luminosity).sum() / df.luminosity.sum()
|
|
|
+ sys_err = df.e_mean.std()
|
|
|
+ stat_err = np.sqrt( np.sum((df.luminosity * df.e_std)**2) ) / df.luminosity.sum()
|
|
|
+
|
|
|
+ mean_spread = (df.spread_mean * df.luminosity).sum() / df.luminosity.sum()
|
|
|
+ std_spread = np.sqrt( np.sum((df.luminosity * df.spread_std)**2) ) / df.luminosity.sum()
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'mean_energy': mean_en,
|
|
|
+ 'mean_energy_stat_err': stat_err,
|
|
|
+ 'mean_energy_sys_err': sys_err,
|
|
|
+ 'mean_spread': mean_spread,
|
|
|
+ 'mean_spread_stat_err': std_spread,
|
|
|
+ }
|
|
|
+
|
|
|
+def ultimate_averager(df: pd.DataFrame) -> dict:
|
|
|
+ """Complete averager for estimation of mean energy and energy spread
|
|
|
+
|
|
|
+ Parameters
|
|
|
+ ----------
|
|
|
+ df : pd.DataFrame
|
|
|
+ input dataframe containing means and spreads
|
|
|
+
|
|
|
+ Returns
|
|
|
+ -------
|
|
|
+ dict
|
|
|
+ averaged mean and spread
|
|
|
+ """
|
|
|
+
|
|
|
+ m = Minuit(Likelihood(df.e_mean, df.e_std, df.luminosity), mean=df.e_mean.mean(), sigma=df.e_mean.std(ddof=0))
|
|
|
+ m.errordef = 0.5
|
|
|
+ m.limits['sigma'] = (0, None)
|
|
|
+ m.migrad();
|
|
|
+ # print(m.migrad())
|
|
|
+ sys_err = m.values['sigma']
|
|
|
+ mean_en = m.values['mean']
|
|
|
+
|
|
|
+ mean_spread = np.sum(df.spread_mean*df.luminosity/(df.spread_std**2))/np.sum(df.luminosity/(df.spread_std**2))
|
|
|
+ std_spread = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.spread_std**2))
|
|
|
+ return {
|
|
|
+ 'mean_energy': mean_en,
|
|
|
+ 'mean_energy_stat_err': m.errors['mean'],
|
|
|
+ 'mean_energy_sys_err': sys_err,
|
|
|
+ 'mean_spread': mean_spread,
|
|
|
+ 'mean_spread_stat_err': std_spread,
|
|
|
+ }
|
|
|
+
|
|
|
|
|
|
-def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, rdb) -> dict:
|
|
|
+def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, rdb, averager: callable = ultimate_averager) -> dict:
|
|
|
"""Calculates parameters of the energy (mean, std, spread) in this dataFrame
|
|
|
|
|
|
Parameters
|
|
@@ -256,6 +321,8 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
|
|
|
table of the runs
|
|
|
compton_df : pd.DataFrame
|
|
|
table of the comptons
|
|
|
+ averager : callable
|
|
|
+ function for averaging (ultimate_averager or averager_on_lums)
|
|
|
|
|
|
Returns
|
|
|
-------
|
|
@@ -314,42 +381,32 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
|
|
|
|
|
|
|
|
|
df = comb_df.loc[~comb_df.compton_start.isna()].copy()
|
|
|
- # df.spread_mean = np.where(df.spread_mean < 1e-3, 1e-3, df.spread_mean)
|
|
|
df.spread_std = np.where(df.spread_std < 1e-4, 1e-4, df.spread_std)
|
|
|
|
|
|
df = df[df.e_std > 0]
|
|
|
mean_energy = np.sum(df.e_mean*df.luminosity/(df.e_std**2))/np.sum(df.luminosity/(df.e_std**2))
|
|
|
- # std_energy = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.e_std**2))
|
|
|
|
|
|
good_criterion = np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std(ddof=0)**2 + df.e_std**2)) < 5
|
|
|
- # print('vals', np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std()**2 + df.e_std**2)) )
|
|
|
- # print(df[~good_criterion].elabel.value_counts())
|
|
|
df = df[good_criterion]
|
|
|
|
|
|
- m = Minuit(Likelihood(df.e_mean, df.e_std, df.luminosity), mean=df.e_mean.mean(), sigma=df.e_mean.std(ddof=0))
|
|
|
- m.errordef = 0.5
|
|
|
- m.limits['sigma'] = (0, None)
|
|
|
- m.migrad();
|
|
|
- # print(m.migrad())
|
|
|
- sys_err = m.values['sigma']
|
|
|
- mean_en = m.values['mean']
|
|
|
-
|
|
|
- mean_spread = np.sum(df.spread_mean*df.luminosity/(df.spread_std**2))/np.sum(df.luminosity/(df.spread_std**2))
|
|
|
- std_spread = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.spread_std**2))
|
|
|
+ averages = averager(df)
|
|
|
|
|
|
res_dict = {
|
|
|
'energy_point': comb_df.elabel.min(),
|
|
|
'first_run': comb_df.run_first.min(),
|
|
|
'last_run': comb_df.run_last.max(),
|
|
|
- 'mean_energy': mean_en,
|
|
|
- 'mean_energy_stat_err': m.errors['mean'],
|
|
|
- 'mean_energy_sys_err': sys_err,
|
|
|
- 'mean_spread': mean_spread,
|
|
|
- 'mean_spread_stat_err': std_spread,
|
|
|
+ 'mean_energy': averages['mean_energy'],
|
|
|
+ 'mean_energy_stat_err': averages['mean_energy_stat_err'],
|
|
|
+ 'mean_energy_sys_err': averages['mean_energy_sys_err'],
|
|
|
+ 'mean_spread': averages['mean_spread'],
|
|
|
+ 'mean_spread_stat_err': averages['mean_spread_stat_err'],
|
|
|
'used_lum': df.luminosity.sum()/comb_df.luminosity_total.sum(),
|
|
|
'comment': '',
|
|
|
}
|
|
|
- return res_dict, df
|
|
|
+
|
|
|
+ comb_df['accepted'] = 0
|
|
|
+ comb_df.loc[df.index, 'accepted'] = 1
|
|
|
+ return res_dict, comb_df
|
|
|
|
|
|
def process_intersected_compton_meas(combined_df: pd.DataFrame) -> pd.DataFrame:
|
|
|
"""Replaces compton measurements writed on the border of two energy points on NaNs
|
|
@@ -361,16 +418,15 @@ def process_intersected_compton_meas(combined_df: pd.DataFrame) -> pd.DataFrame:
|
|
|
df_comb = combined_df.set_index('point_idx').join(energy_point_borders, how='left')
|
|
|
|
|
|
df_comb['comptonmeas_in_elabel'] = (df_comb[['elabel_stop_time', 'compton_stop']].min(axis=1) - df_comb[['elabel_start_time', 'compton_start']].max(axis=1))/(df_comb['compton_stop'] - df_comb['compton_start'])
|
|
|
- #print(df_comb['comptonmeas_in_elabel'].dropna().sort_values())
|
|
|
+
|
|
|
df_comb = df_comb.query('comptonmeas_in_elabel < 0.7')
|
|
|
border_comptons = df_comb.compton_start.values
|
|
|
- #print(combined_df.compton_start.isin(border_comptons).sum())
|
|
|
- #print(combined_df.loc[combined_df.compton_start.isin(border_comptons)].elabel.value_counts())
|
|
|
|
|
|
- combined_df.loc[combined_df.compton_start.isin(border_comptons), ['compton_start', 'compton_stop', 'e_mean', 'e_std', 'spread_mean', 'spread_std']] = np.nan
|
|
|
+ combined_df.loc[combined_df.compton_start.isin(border_comptons),
|
|
|
+ ['compton_start', 'compton_stop', 'e_mean', 'e_std', 'spread_mean', 'spread_std', 'luminosity']] = np.nan
|
|
|
return combined_df
|
|
|
|
|
|
-def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None, rdb: Optional[RunsDBHandler] = None) -> pd.DataFrame:
|
|
|
+def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None, rdb: Optional[RunsDBHandler] = None, old_averager: bool = False, energy_point_csv_folder: Optional[str] = None) -> pd.DataFrame:
|
|
|
|
|
|
if pics_folder is not None:
|
|
|
plt.ioff()
|
|
@@ -383,12 +439,14 @@ def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_d
|
|
|
formatter.offset_formats = ['', '%Y', '%b %Y', '%d %b %Y', '%d %b %Y', '%d %b %Y %H:%M', ]
|
|
|
|
|
|
runs_df = runs_df.rename({'luminosity': 'luminosity_full', 'energy': 'elabel'}, axis=1)
|
|
|
+
|
|
|
combined_df = pd.merge(combined_df.drop(['elabel'], axis=1), runs_df[['run', 'elabel', 'luminosity_full']], how='outer')
|
|
|
combined_df = combined_df.sort_values(by='run')
|
|
|
- combined_df['luminosity'] = combined_df['luminosity'].fillna(0)
|
|
|
-
|
|
|
+
|
|
|
combined_df['point_idx'] = np.cumsum(~np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4))
|
|
|
combined_df = process_intersected_compton_meas(combined_df)
|
|
|
+ combined_df['luminosity'] = combined_df['luminosity'].fillna(0)
|
|
|
+ # combined_df.to_csv('file.csv')
|
|
|
|
|
|
combined_df = combined_df.groupby(['point_idx', 'compton_start'], dropna=False).agg(
|
|
|
elabel=('elabel', 'min'), elabel_test=('elabel', 'max'),
|
|
@@ -406,9 +464,16 @@ def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_d
|
|
|
|
|
|
for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
|
|
|
try:
|
|
|
- res_dict, good_df = calculate_point(table, runs_df, compton_df, rdb)
|
|
|
+ res_dict, good_df = calculate_point(table, runs_df, compton_df, rdb, averager_on_lums if old_averager else ultimate_averager)
|
|
|
+
|
|
|
+ if energy_point_csv_folder is not None:
|
|
|
+ save_columns = ['elabel', 'run_first', 'run_last', 'luminosity', 'compton_start', 'compton_stop', 'e_mean', 'e_std', 'spread_mean', 'spread_std', 'accepted']
|
|
|
+ save_csv(good_df[save_columns].dropna(), f'{energy_point_csv_folder}/{res_dict["energy_point"]}_{res_dict["first_run"]}.csv', update_current=False)
|
|
|
+
|
|
|
+ good_df = good_df.query('accepted==1')
|
|
|
except Exception:
|
|
|
continue
|
|
|
+
|
|
|
result_df = result_df.append(res_dict, ignore_index=True)
|
|
|
|
|
|
if pics_folder is not None:
|
|
@@ -475,7 +540,9 @@ def main():
|
|
|
parser.add_argument('--csv_dir', help = 'Save csv file with data in the folder or not if skip it')
|
|
|
parser.add_argument('--clbrdb', action = 'store_true', help = 'Update Compton_run_avg clbrdb or not')
|
|
|
parser.add_argument('--pics_folder', help = 'Path to the directory for saving the pictures')
|
|
|
+ parser.add_argument('--energy_point_csv_folder', help = 'Path to the directory for saving the result in detail for each energy point')
|
|
|
parser.add_argument('--only_last', action = 'store_true', help = 'Compute values of the last (in Compton_run_avg clbrdb) and new points only')
|
|
|
+ parser.add_argument('--old_averaging', action = 'store_true', help = 'Use old incomplete <E> = \frac{\sum{L_i E_i}{\sum{L_i}} averaging')
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
# logging.info(f"Arguments: season: {args.season}, config {args.config}")
|
|
@@ -508,7 +575,7 @@ def main():
|
|
|
|
|
|
compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
|
|
|
|
|
|
- cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder, rdb)
|
|
|
+ cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder, rdb, args.old_averaging, args.energy_point_csv_folder)
|
|
|
|
|
|
if args.csv_dir is not None:
|
|
|
csv_path = os.path.join(args.csv_dir, f'{args.season}.csv')
|