Browse Source

Add sum(LE)/sum(L) average function and tables

compton 3 years ago
parent
commit
ed2e2d13de
2 changed files with 101 additions and 32 deletions
  1. 97 30
      compton_combiner.py
  2. 4 2
      compton_filter.py

+ 97 - 30
compton_combiner.py

@@ -244,8 +244,73 @@ def __estimate_point_with_closest(comb_df: pd.DataFrame, runs_df: pd.DataFrame,
         'used_lum': 0, 
         'used_lum': 0, 
         'comment': 'indirect measurement #2',
         'comment': 'indirect measurement #2',
     }, pd.DataFrame([])
     }, pd.DataFrame([])
+
+def averager_on_lums(df: pd.DataFrame) -> dict:
+    """Averaging as <E> = \frac{\sum{L_i E_i}{\sum{L_i}}, 
+    \deltaE^2 = \frac{\sum{(L_i \delta E_i)^2}}{(\sum L_i)^2}
+    
+    Attention: I think it's incorrect way of avaraging.
+    
+    Parameters
+    ----------
+    df : pd.DataFrame
+        input dataframe containg means and spreads
+        
+    Returns
+    -------
+    dict
+        averaged mean and spread
+    """
+    
+    mean_en = (df.e_mean * df.luminosity).sum() / df.luminosity.sum()
+    sys_err = df.e_mean.std()
+    stat_err = np.sqrt( np.sum((df.luminosity * df.e_std)**2) ) / df.luminosity.sum()
+    
+    mean_spread = (df.spread_mean * df.luminosity).sum() / df.luminosity.sum()
+    std_spread = np.sqrt( np.sum((df.luminosity * df.spread_std)**2) ) / df.luminosity.sum()
+        
+    return {
+        'mean_energy': mean_en, 
+        'mean_energy_stat_err': stat_err, 
+        'mean_energy_sys_err': sys_err, 
+        'mean_spread': mean_spread,
+        'mean_spread_stat_err': std_spread,
+    }
+
+def ultimate_averager(df: pd.DataFrame) -> dict:
+    """Complete averager for estimation of mean energy and energy spread
+    
+    Parameters
+    ----------
+    df : pd.DataFrame
+        input dataframe containing means and spreads
+        
+    Returns
+    -------
+    dict
+        averaged mean and spread
+    """
+    
+    m = Minuit(Likelihood(df.e_mean, df.e_std, df.luminosity), mean=df.e_mean.mean(), sigma=df.e_mean.std(ddof=0))
+    m.errordef = 0.5 
+    m.limits['sigma'] = (0, None)
+    m.migrad();
+    # print(m.migrad())
+    sys_err = m.values['sigma']
+    mean_en = m.values['mean']
+    
+    mean_spread = np.sum(df.spread_mean*df.luminosity/(df.spread_std**2))/np.sum(df.luminosity/(df.spread_std**2))
+    std_spread = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.spread_std**2))
+    return {
+        'mean_energy': mean_en, 
+        'mean_energy_stat_err': m.errors['mean'], 
+        'mean_energy_sys_err': sys_err, 
+        'mean_spread': mean_spread,
+        'mean_spread_stat_err': std_spread,
+    }
+
     
     
-def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, rdb) -> dict:
+def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, rdb, averager: callable = ultimate_averager) -> dict:
     """Calculates parameters of the energy (mean, std, spread) in this dataFrame
     """Calculates parameters of the energy (mean, std, spread) in this dataFrame
     
     
     Parameters
     Parameters
@@ -256,6 +321,8 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
         table of the runs
         table of the runs
     compton_df : pd.DataFrame
     compton_df : pd.DataFrame
         table of the comptons
         table of the comptons
+    averager : callable
+        function for averaging (ultimate_averager or averager_on_lums)
     
     
     Returns
     Returns
     -------
     -------
@@ -314,42 +381,32 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
         
         
     
     
     df = comb_df.loc[~comb_df.compton_start.isna()].copy()
     df = comb_df.loc[~comb_df.compton_start.isna()].copy()
-    # df.spread_mean = np.where(df.spread_mean < 1e-3, 1e-3, df.spread_mean)
     df.spread_std = np.where(df.spread_std < 1e-4, 1e-4, df.spread_std)
     df.spread_std = np.where(df.spread_std < 1e-4, 1e-4, df.spread_std)
     
     
     df = df[df.e_std > 0]
     df = df[df.e_std > 0]
     mean_energy = np.sum(df.e_mean*df.luminosity/(df.e_std**2))/np.sum(df.luminosity/(df.e_std**2))
     mean_energy = np.sum(df.e_mean*df.luminosity/(df.e_std**2))/np.sum(df.luminosity/(df.e_std**2))
-    # std_energy = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.e_std**2))
     
     
     good_criterion = np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std(ddof=0)**2 + df.e_std**2)) < 5
     good_criterion = np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std(ddof=0)**2 + df.e_std**2)) < 5
-    # print('vals', np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std()**2 + df.e_std**2)) )
-    # print(df[~good_criterion].elabel.value_counts())
     df = df[good_criterion]
     df = df[good_criterion]
     
     
-    m = Minuit(Likelihood(df.e_mean, df.e_std, df.luminosity), mean=df.e_mean.mean(), sigma=df.e_mean.std(ddof=0))
-    m.errordef = 0.5 
-    m.limits['sigma'] = (0, None)
-    m.migrad();
-    # print(m.migrad())
-    sys_err = m.values['sigma']
-    mean_en = m.values['mean']
-    
-    mean_spread = np.sum(df.spread_mean*df.luminosity/(df.spread_std**2))/np.sum(df.luminosity/(df.spread_std**2))
-    std_spread = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.spread_std**2))
+    averages = averager(df)
     
     
     res_dict = {
     res_dict = {
         'energy_point': comb_df.elabel.min(), 
         'energy_point': comb_df.elabel.min(), 
         'first_run': comb_df.run_first.min(),
         'first_run': comb_df.run_first.min(),
         'last_run': comb_df.run_last.max(), 
         'last_run': comb_df.run_last.max(), 
-        'mean_energy': mean_en, 
-        'mean_energy_stat_err': m.errors['mean'], 
-        'mean_energy_sys_err': sys_err, 
-        'mean_spread': mean_spread,
-        'mean_spread_stat_err': std_spread, 
+        'mean_energy': averages['mean_energy'], 
+        'mean_energy_stat_err': averages['mean_energy_stat_err'], 
+        'mean_energy_sys_err': averages['mean_energy_sys_err'], 
+        'mean_spread': averages['mean_spread'],
+        'mean_spread_stat_err': averages['mean_spread_stat_err'], 
         'used_lum': df.luminosity.sum()/comb_df.luminosity_total.sum(), 
         'used_lum': df.luminosity.sum()/comb_df.luminosity_total.sum(), 
         'comment': '',
         'comment': '',
     }
     }
-    return res_dict, df
+    
+    comb_df['accepted'] = 0
+    comb_df.loc[df.index, 'accepted'] = 1
+    return res_dict, comb_df
     
     
 def process_intersected_compton_meas(combined_df: pd.DataFrame) -> pd.DataFrame:
 def process_intersected_compton_meas(combined_df: pd.DataFrame) -> pd.DataFrame:
     """Replaces compton measurements writed on the border of two energy points on NaNs
     """Replaces compton measurements writed on the border of two energy points on NaNs
@@ -361,16 +418,15 @@ def process_intersected_compton_meas(combined_df: pd.DataFrame) -> pd.DataFrame:
     df_comb = combined_df.set_index('point_idx').join(energy_point_borders, how='left')
     df_comb = combined_df.set_index('point_idx').join(energy_point_borders, how='left')
     
     
     df_comb['comptonmeas_in_elabel'] = (df_comb[['elabel_stop_time', 'compton_stop']].min(axis=1) - df_comb[['elabel_start_time', 'compton_start']].max(axis=1))/(df_comb['compton_stop'] - df_comb['compton_start'])
     df_comb['comptonmeas_in_elabel'] = (df_comb[['elabel_stop_time', 'compton_stop']].min(axis=1) - df_comb[['elabel_start_time', 'compton_start']].max(axis=1))/(df_comb['compton_stop'] - df_comb['compton_start'])
-    #print(df_comb['comptonmeas_in_elabel'].dropna().sort_values())
+    
     df_comb = df_comb.query('comptonmeas_in_elabel < 0.7')
     df_comb = df_comb.query('comptonmeas_in_elabel < 0.7')
     border_comptons = df_comb.compton_start.values
     border_comptons = df_comb.compton_start.values
-    #print(combined_df.compton_start.isin(border_comptons).sum())
-    #print(combined_df.loc[combined_df.compton_start.isin(border_comptons)].elabel.value_counts())
     
     
-    combined_df.loc[combined_df.compton_start.isin(border_comptons), ['compton_start', 'compton_stop', 'e_mean', 'e_std', 'spread_mean', 'spread_std']] = np.nan
+    combined_df.loc[combined_df.compton_start.isin(border_comptons), 
+                    ['compton_start', 'compton_stop', 'e_mean', 'e_std', 'spread_mean', 'spread_std', 'luminosity']] = np.nan
     return combined_df
     return combined_df
 
 
-def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None, rdb: Optional[RunsDBHandler] = None) -> pd.DataFrame:
+def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None, rdb: Optional[RunsDBHandler] = None, old_averager: bool = False, energy_point_csv_folder: Optional[str] = None) -> pd.DataFrame:
     
     
     if pics_folder is not None:
     if pics_folder is not None:
         plt.ioff()
         plt.ioff()
@@ -383,12 +439,14 @@ def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_d
         formatter.offset_formats = ['', '%Y', '%b %Y', '%d %b %Y', '%d %b %Y', '%d %b %Y %H:%M', ]
         formatter.offset_formats = ['', '%Y', '%b %Y', '%d %b %Y', '%d %b %Y', '%d %b %Y %H:%M', ]
     
     
     runs_df = runs_df.rename({'luminosity': 'luminosity_full', 'energy': 'elabel'}, axis=1)
     runs_df = runs_df.rename({'luminosity': 'luminosity_full', 'energy': 'elabel'}, axis=1)
+    
     combined_df = pd.merge(combined_df.drop(['elabel'], axis=1), runs_df[['run', 'elabel', 'luminosity_full']], how='outer')
     combined_df = pd.merge(combined_df.drop(['elabel'], axis=1), runs_df[['run', 'elabel', 'luminosity_full']], how='outer')
     combined_df = combined_df.sort_values(by='run')
     combined_df = combined_df.sort_values(by='run')
-    combined_df['luminosity'] = combined_df['luminosity'].fillna(0)
-    
+        
     combined_df['point_idx'] = np.cumsum(~np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4))
     combined_df['point_idx'] = np.cumsum(~np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4))
     combined_df = process_intersected_compton_meas(combined_df)
     combined_df = process_intersected_compton_meas(combined_df)
+    combined_df['luminosity'] = combined_df['luminosity'].fillna(0)
+    # combined_df.to_csv('file.csv')
     
     
     combined_df = combined_df.groupby(['point_idx', 'compton_start'], dropna=False).agg(
     combined_df = combined_df.groupby(['point_idx', 'compton_start'], dropna=False).agg(
         elabel=('elabel', 'min'), elabel_test=('elabel', 'max'),
         elabel=('elabel', 'min'), elabel_test=('elabel', 'max'),
@@ -406,9 +464,16 @@ def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_d
     
     
     for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
     for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
         try:
         try:
-            res_dict, good_df = calculate_point(table, runs_df, compton_df, rdb)
+            res_dict, good_df = calculate_point(table, runs_df, compton_df, rdb, averager_on_lums if old_averager else ultimate_averager)
+            
+            if energy_point_csv_folder is not None:
+                save_columns = ['elabel', 'run_first', 'run_last', 'luminosity', 'compton_start', 'compton_stop', 'e_mean', 'e_std', 'spread_mean', 'spread_std', 'accepted']
+                save_csv(good_df[save_columns].dropna(), f'{energy_point_csv_folder}/{res_dict["energy_point"]}_{res_dict["first_run"]}.csv', update_current=False)
+            
+            good_df = good_df.query('accepted==1')
         except Exception:
         except Exception:
             continue
             continue
+            
         result_df = result_df.append(res_dict, ignore_index=True)
         result_df = result_df.append(res_dict, ignore_index=True)
         
         
         if pics_folder is not None:
         if pics_folder is not None:
@@ -475,7 +540,9 @@ def main():
     parser.add_argument('--csv_dir', help = 'Save csv file with data in the folder or not if skip it')
     parser.add_argument('--csv_dir', help = 'Save csv file with data in the folder or not if skip it')
     parser.add_argument('--clbrdb', action = 'store_true', help = 'Update Compton_run_avg clbrdb or not')
     parser.add_argument('--clbrdb', action = 'store_true', help = 'Update Compton_run_avg clbrdb or not')
     parser.add_argument('--pics_folder', help = 'Path to the directory for saving the pictures')
     parser.add_argument('--pics_folder', help = 'Path to the directory for saving the pictures')
+    parser.add_argument('--energy_point_csv_folder', help = 'Path to the directory for saving the result in detail for each energy point')
     parser.add_argument('--only_last', action = 'store_true', help = 'Compute values of the last (in Compton_run_avg clbrdb) and new points only')
     parser.add_argument('--only_last', action = 'store_true', help = 'Compute values of the last (in Compton_run_avg clbrdb) and new points only')
+    parser.add_argument('--old_averaging', action = 'store_true', help = 'Use old incomplete <E> = \frac{\sum{L_i E_i}{\sum{L_i}} averaging')
     
     
     args = parser.parse_args()
     args = parser.parse_args()
     # logging.info(f"Arguments: season: {args.season}, config {args.config}")
     # logging.info(f"Arguments: season: {args.season}, config {args.config}")
@@ -508,7 +575,7 @@ def main():
     
     
     compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
     compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
     
     
-    cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder, rdb)
+    cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder, rdb, args.old_averaging, args.energy_point_csv_folder)
     
     
     if args.csv_dir is not None:
     if args.csv_dir is not None:
         csv_path = os.path.join(args.csv_dir, f'{args.season}.csv')
         csv_path = os.path.join(args.csv_dir, f'{args.season}.csv')

+ 4 - 2
compton_filter.py

@@ -76,11 +76,11 @@ class SlowdbComptonHandler(PostgreSQLHandler):
             clear table
             clear table
         """
         """
         
         
-        if len(table) == 0:
+        n_rows = len(table)
+        if n_rows == 0:
             logging.info("Empty list. No overlapping rows")
             logging.info("Empty list. No overlapping rows")
             return table
             return table
         
         
-        logging.info("Drop overlapping rows in list representation")
         table = table[::-1] # sort table by time from last to past
         table = table[::-1] # sort table by time from last to past
         min_time = table[0][6]
         min_time = table[0][6]
         overlapped_idxs = list()
         overlapped_idxs = list()
@@ -95,6 +95,8 @@ class SlowdbComptonHandler(PostgreSQLHandler):
         for index in sorted(overlapped_idxs, reverse=True): # strict condition of the backward loop
         for index in sorted(overlapped_idxs, reverse=True): # strict condition of the backward loop
             table.pop(index)
             table.pop(index)
         
         
+        logging.info(f"Drop overlapping rows in list representation. Survived {len(table)} from {n_rows}")
+        
         return table[::-1]
         return table[::-1]
     
     
     def load_tables(self, tables: List[str], daterange: Optional[datetime] = None):
     def load_tables(self, tables: List[str], daterange: Optional[datetime] = None):