4 years ago · d9367da7a0
--- a/src/compton_combiner.py
+++ b/src/compton_combiner.py
@@ -35,7 +35,7 @@ class RunsDBHandler():
 
				         self.cur.execute("""DESCRIBE Runlog""")
			
 
				         return self.cur.fetchall()
			
 
				         
			
 
				-    def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]]):
			
 
				+    def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]], energy_point: Optional[float] = None, select_bad_runs: bool = False):
			
 
				         """
			
 
				         Parameters
			
 
				         ----------
			
@@ -43,6 +43,10 @@ class RunsDBHandler():
 
				             selection range
			
 
				             int range defines an interval in runs
			
 
				             datetime range defines a time interval (NSK: +7:00 time)
			
 
				+        energy_point : Optional[float]
			
 
				+            energy point name, MeV (default is  None)
			
 
				+        select_bad_runs : bool
			
 
				+            select runs with labels except (Y) (default is False)
			
 
				         """
			
 
				         
			
 
				         cond = ""
			
@@ -56,6 +60,14 @@ class RunsDBHandler():
 
				                 cond += " AND stoptime <= %s"
			
 
				             else:
			
 
				                 range = (range[0], )
			
 
				+                
			
 
				+        energy_cond = ""
			
 
				+        if energy_point is not None:
			
 
				+            energy_cond = f" AND energy = {energy_point}"
			
 
				+            
			
 
				+        quality_cond = ' quality = "Y" '
			
 
				+        if select_bad_runs:
			
 
				+            quality_cond = ' quality <> "Y" '
			
 
				             
			
 
				         sql_query = f"""
			
 
				         SELECT 
			
@@ -66,8 +78,9 @@ class RunsDBHandler():
 
				             luminosity
			
 
				         FROM Runlog 
			
 
				         WHERE 
			
 
				-            quality = "Y"
			
 
				+            {quality_cond}
			
 
				             {cond}
			
 
				+            {energy_cond}
			
 
				             AND luminosity > 0
			
 
				             AND stoptime > starttime
			
 
				             AND nevent > 0
			
@@ -196,7 +209,41 @@ class Likelihood():
 
				         ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
			
 
				         return -ln_L
			
 
				     
			
 
				-def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame) -> dict:
			
 
				+def __estimate_point_with_closest(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame):
			
 
				+    # estimate energy by the nearest points
			
 
				+    min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
			
 
				+    max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
			
 
				+
			
 
				+    nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
			
 
				+    nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
			
 
				+
			
 
				+    # regulatization
			
 
				+    nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
			
 
				+    nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
			
 
				+    nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
			
 
				+    nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
			
 
				+
			
 
				+    mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
			
 
				+    mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
			
 
				+    std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
			
 
				+    std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
			
 
				+    sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
			
 
				+
			
 
				+
			
 
				+    return {
			
 
				+        'energy_point': comb_df.elabel.min(),
			
 
				+        'first_run': comb_df.run_first.min(),
			
 
				+        'last_run': comb_df.run_last.max(), 
			
 
				+        'mean_energy': mean_energy, 
			
 
				+        'mean_energy_stat_err': std_energy, 
			
 
				+        'mean_energy_sys_err': sys_energy, 
			
 
				+        'mean_spread': mean_spread,
			
 
				+        'mean_spread_stat_err': std_spread, 
			
 
				+        'used_lum': 0, 
			
 
				+        'comment': 'indirect measurement #2',
			
 
				+    }, pd.DataFrame([])
			
 
				+    
			
 
				+def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, rdb) -> dict:
			
 
				     """Calculates parameters of the energy (mean, std, spread) in this dataFrame
			
 
				     
			
 
				     Parameters
			
@@ -216,40 +263,45 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
 
				         
			
 
				     if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
			
 
				         # no direct measurements of the compton during data runs
			
 
				-        # estimate energy by the nearest points
			
 
				-        # print('Hello')
			
 
				-        min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
			
 
				-        max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
			
 
				+                
			
 
				+        min_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
			
 
				+        max_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
			
 
				+        dlt0 = timedelta(days=1)
			
 
				+        # assymetric time because energy can be stable only after
			
 
				+        runs_df_with_bads = rdb.load_tables((min_Yruntime, max_Yruntime + dlt0), energy_point = comb_df.iloc[0].at['elabel'], select_bad_runs = True)
			
 
				         
			
 
				-        nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
			
 
				-        nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
			
 
				+        if len(runs_df_with_bads[0]) == 0:
			
 
				+            return __estimate_point_with_closest(comb_df, runs_df, compton_df)
			
 
				         
			
 
				-        # regulatization
			
 
				-        nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
			
 
				-        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
			
 
				-        nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
			
 
				-        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
			
 
				-                
			
 
				-        mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
			
 
				-        mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
			
 
				-        std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
			
 
				-        std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
			
 
				-        sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
			
 
				+        runs_df_with_bads_df = pd.DataFrame(runs_df_with_bads[0], columns = runs_df_with_bads[1])
			
 
				+        min_run_time, max_run_time = min(min_Yruntime, runs_df_with_bads_df.starttime.min()), max(max_Yruntime, runs_df_with_bads_df.stoptime.max())
			
 
				         
			
 
				+        compton_meas = compton_df.query('((begintime>=@min_run_time)&(begintime<=@max_run_time))|((endtime>=@min_run_time)&(endtime<=@max_run_time))').copy()
			
 
				+        res_df = pd.DataFrame(list(map(lambda x: {
			
 
				+            'compton_start': x[1]['begintime'],
			
 
				+            'compton_stop': x[1]['endtime'],
			
 
				+            'e_mean': float(x[1]['data'][0]),
			
 
				+            'e_std': float(x[1]['data'][1]),
			
 
				+            'spread_mean': float(x[1]['data'][2]),
			
 
				+            'spread_std': float(x[1]['data'][3]),
			
 
				+        }, compton_meas.iterrows())))
			
 
				+        res_df = res_df.query(f'abs(e_mean -{comb_df.iloc[0].at["elabel"]})<5')
			
 
				         
			
 
				+        if len(res_df) == 0:
			
 
				+            return __estimate_point_with_closest(comb_df, runs_df, compton_df)
			
 
				+                        
			
 
				         return {
			
 
				             'energy_point': comb_df.elabel.min(),
			
 
				             'first_run': comb_df.run_first.min(),
			
 
				             'last_run': comb_df.run_last.max(), 
			
 
				-            'mean_energy': mean_energy, 
			
 
				-            'mean_energy_stat_err': std_energy, 
			
 
				-            'mean_energy_sys_err': sys_energy, 
			
 
				-            'mean_spread': mean_spread,
			
 
				-            'mean_spread_stat_err': std_spread, 
			
 
				+            'mean_energy': res_df.e_mean.mean(), 
			
 
				+            'mean_energy_stat_err': np.sqrt(1/np.sum(1/(res_df.e_std)**2)), #res_df.e_std.mean()/np.sqrt(len(res_df)), 
			
 
				+            'mean_energy_sys_err': np.abs(comb_df.iloc[0].at['elabel'] - res_df.e_mean.mean()), 
			
 
				+            'mean_spread': res_df.spread_mean.mean(),
			
 
				+            'mean_spread_stat_err':np.sqrt(1/np.sum(1/(res_df.spread_std)**2)),
			
 
				             'used_lum': 0, 
			
 
				-            'comment': 'indirect measurement',
			
 
				-        }, pd.DataFrame()
			
 
				-        
			
 
				+            'comment': 'indirect measurement #1',
			
 
				+        }, res_df
			
 
				         
			
 
				     
			
 
				     df = comb_df.copy()
			
@@ -287,7 +339,7 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
 
				     }
			
 
				     return res_dict, df
			
 
				 
			
 
				-def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None) -> pd.DataFrame:
			
 
				+def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None, rdb: Optional[RunsDBHandler] = None) -> pd.DataFrame:
			
 
				     
			
 
				     if pics_folder is not None:
			
 
				         plt.ioff()
			
@@ -320,7 +372,7 @@ def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_d
 
				     result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
			
 
				     
			
 
				     for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
			
 
				-        res_dict, good_df = calculate_point(table, runs_df, compton_df)
			
 
				+        res_dict, good_df = calculate_point(table, runs_df, compton_df, rdb)
			
 
				         result_df = result_df.append(res_dict, ignore_index=True)
			
 
				         
			
 
				         if pics_folder is not None:
			
@@ -407,7 +459,7 @@ def main():
 
				     
			
 
				     compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
			
 
				     
			
 
				-    cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder)
			
 
				+    cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder, rdb)
			
 
				     
			
 
				     if args.csv:
			
 
				         cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')
			
--- a/src/compton_filter.py
+++ b/src/compton_filter.py
@@ -226,7 +226,18 @@ class CalibrdbHandler(PostgreSQLHandler):
 
				         return table, fields_name
			
 
				         
			
 
				     def update(self, new_rows: list, system: str = "Misc", algo: str = "RunHeader", 
			
 
				-               name: str = "Compton_run", version: str = 'Default', handle_last_time_row: bool = True):
			
 
				+               name: str = "Compton_run", version: str = 'Default', handle_last_time_row: bool = False):
			
 
				+        """Writes new_rows in clbrdb
			
 
				+        
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        new_rows : list
			
 
				+            list of the data for writing
			
 
				+        handle_last_time_row : bool
			
 
				+            (DANGEROUS PLACE - keep default False or don't commit changes if you don't know what you want)
			
 
				+            update current values or not: replace all values in interval from min(begintime in new_rows) to max(endtime in new_rows)
			
 
				+        """
			
 
				+        
			
 
				         if len(new_rows) == 0:
			
 
				             return
			
 
				         
			
@@ -235,12 +246,9 @@ class CalibrdbHandler(PostgreSQLHandler):
 
				         new_rows = list(map(lambda x: (sid, 'lxeuser', x[0], x[5], x[6], [x[1], x[2], x[3], x[4]]), new_rows))
			
 
				         
			
 
				         if handle_last_time_row:
			
 
				-            last_written_row, _ = self.load_table(system, algo, name, version, num_last_rows = 1, return_timezone = True)
			
 
				-            if len(last_written_row) > 0:
			
 
				-                if last_written_row[0][5] > new_rows[0][3]:
			
 
				-                    logging.info('Removing of overlapping written row')
			
 
				-                    self.delete_row(sid = last_written_row[0][1], createdby = last_written_row[0][2], time = last_written_row[0][3])
			
 
				-                
			
 
				+            min_new_time, max_new_time = min(map(lambda x: x[3], new_rows)), max(map(lambda x: x[4], new_rows))
			
 
				+            self.delete_rows(sid = sid, createdby = 'lxeuser', time = (min_new_time, max_new_time))
			
 
				+        
			
 
				         insert_query = """INSERT INTO clbrdata (sid, createdby, time, begintime, endtime, data) VALUES %s;"""
			
 
				         execute_values(self.cur, insert_query, new_rows, fetch=False)
			
 
				         logging.info(f"Inserted {len(new_rows)} new rows")
			
@@ -313,6 +321,14 @@ class CalibrdbHandler(PostgreSQLHandler):
 
				         logging.info(f"Deleted ({sid}, {createdby}, {time}) row")
			
 
				         return
			
 
				     
			
 
				+    def delete_rows(self, sid: int, createdby: str, time: Tuple[datetime, datetime]):
			
 
				+        delete_query = f"""DELETE FROM clbrdata 
			
 
				+        WHERE sid = %s AND createdby = %s AND endtime > %s AND begintime < %s
			
 
				+        """
			
 
				+        self.cur.execute(delete_query, (sid, createdby, time[0], time[1]))
			
 
				+        logging.info(f"Deleted ({sid}, {createdby} from {time[0]} to {time[1]}) rows")
			
 
				+        return
			
 
				+    
			
 
				     def remove_duplicates(self, system: str = "Misc", algo: str = "RunHeader", name: str = "Compton_run", version: str = 'Default', keep: str = 'last'):
			
 
				         sid = self.select_table(system, algo, name, version)
			
 
				         
			
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -1,2 +1 @@
 
				-psycopg2==2.9.1
			
 
				-mysql-connector-python==8.0.23
			
 
				+psycopg2==2.9.1