Browse Source

New version of scripts

compton 3 years ago
parent
commit
05cd7d6ad0
3 changed files with 124 additions and 43 deletions
  1. 98 33
      compton_combiner.py
  2. 25 8
      compton_filter.py
  3. 1 2
      requirements.txt

+ 98 - 33
compton_combiner.py

@@ -35,7 +35,7 @@ class RunsDBHandler():
         self.cur.execute("""DESCRIBE Runlog""")
         return self.cur.fetchall()
         
-    def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]]):
+    def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]], energy_point: Optional[float] = None, select_bad_runs: bool = False):
         """
         Parameters
         ----------
@@ -43,6 +43,10 @@ class RunsDBHandler():
             selection range
             int range defines an interval in runs
             datetime range defines a time interval (NSK: +7:00 time)
+        energy_point : Optional[float]
+            energy point name, MeV (default is  None)
+        select_bad_runs : bool
+            select runs with labels except (Y) (default is False)
         """
         
         cond = ""
@@ -51,7 +55,19 @@ class RunsDBHandler():
             if range[1] is not None:
                 cond += f" AND run <= {range[1]} "
         elif isinstance(range[0], datetime):
-            cond = f" AND stoptime BETWEEN %s AND %s"
+            cond = f" AND starttime >= %s "
+            if range[1] is not None:
+                cond += " AND stoptime <= %s"
+            else:
+                range = (range[0], )
+                
+        energy_cond = ""
+        if energy_point is not None:
+            energy_cond = f" AND energy = {energy_point}"
+            
+        quality_cond = ' quality = "Y" '
+        if select_bad_runs:
+            quality_cond = ' quality <> "Y" '
             
         sql_query = f"""
         SELECT 
@@ -62,8 +78,9 @@ class RunsDBHandler():
             luminosity
         FROM Runlog 
         WHERE 
-            quality = "Y"
+            {quality_cond}
             {cond}
+            {energy_cond}
             AND luminosity > 0
             AND stoptime > starttime
             AND nevent > 0
@@ -192,7 +209,41 @@ class Likelihood():
         ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
         return -ln_L
     
-def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame) -> dict:
+def __estimate_point_with_closest(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame):
+    # estimate energy by the nearest points
+    min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
+    max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
+
+    nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
+    nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
+
+    # regulatization
+    nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
+    nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
+    nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
+    nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
+
+    mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
+    mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
+    std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
+    std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
+    sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
+
+
+    return {
+        'energy_point': comb_df.elabel.min(),
+        'first_run': comb_df.run_first.min(),
+        'last_run': comb_df.run_last.max(), 
+        'mean_energy': mean_energy, 
+        'mean_energy_stat_err': std_energy, 
+        'mean_energy_sys_err': sys_energy, 
+        'mean_spread': mean_spread,
+        'mean_spread_stat_err': std_spread, 
+        'used_lum': 0, 
+        'comment': 'indirect measurement #2',
+    }, pd.DataFrame([])
+    
+def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, rdb) -> dict:
     """Calculates parameters of the energy (mean, std, spread) in this dataFrame
     
     Parameters
@@ -212,40 +263,45 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
         
     if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
         # no direct measurements of the compton during data runs
-        # estimate energy by the nearest points
-        # print('Hello')
-        min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
-        max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
+                
+        min_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
+        max_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
+        dlt0 = timedelta(days=1)
+        # assymetric time because energy can be stable only after
+        runs_df_with_bads = rdb.load_tables((min_Yruntime, max_Yruntime + dlt0), energy_point = comb_df.iloc[0].at['elabel'], select_bad_runs = True)
         
-        nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
-        nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
+        if len(runs_df_with_bads[0]) == 0:
+            return __estimate_point_with_closest(comb_df, runs_df, compton_df)
         
-        # regulatization
-        nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
-        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
-        nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
-        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
-                
-        mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
-        mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
-        std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
-        std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
-        sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
+        runs_df_with_bads_df = pd.DataFrame(runs_df_with_bads[0], columns = runs_df_with_bads[1])
+        min_run_time, max_run_time = min(min_Yruntime, runs_df_with_bads_df.starttime.min()), max(max_Yruntime, runs_df_with_bads_df.stoptime.max())
         
+        compton_meas = compton_df.query('((begintime>=@min_run_time)&(begintime<=@max_run_time))|((endtime>=@min_run_time)&(endtime<=@max_run_time))').copy()
+        res_df = pd.DataFrame(list(map(lambda x: {
+            'compton_start': x[1]['begintime'],
+            'compton_stop': x[1]['endtime'],
+            'e_mean': float(x[1]['data'][0]),
+            'e_std': float(x[1]['data'][1]),
+            'spread_mean': float(x[1]['data'][2]),
+            'spread_std': float(x[1]['data'][3]),
+        }, compton_meas.iterrows())))
+        res_df = res_df.query(f'abs(e_mean -{comb_df.iloc[0].at["elabel"]})<5')
         
+        if len(res_df) == 0:
+            return __estimate_point_with_closest(comb_df, runs_df, compton_df)
+                        
         return {
             'energy_point': comb_df.elabel.min(),
             'first_run': comb_df.run_first.min(),
             'last_run': comb_df.run_last.max(), 
-            'mean_energy': mean_energy, 
-            'mean_energy_stat_err': std_energy, 
-            'mean_energy_sys_err': sys_energy, 
-            'mean_spread': mean_spread,
-            'mean_spread_stat_err': std_spread, 
+            'mean_energy': res_df.e_mean.mean(), 
+            'mean_energy_stat_err': np.sqrt(1/np.sum(1/(res_df.e_std)**2)), #res_df.e_std.mean()/np.sqrt(len(res_df)), 
+            'mean_energy_sys_err': np.abs(comb_df.iloc[0].at['elabel'] - res_df.e_mean.mean()), 
+            'mean_spread': res_df.spread_mean.mean(),
+            'mean_spread_stat_err':np.sqrt(1/np.sum(1/(res_df.spread_std)**2)),
             'used_lum': 0, 
-            'comment': 'indirect measurement',
-        }, pd.DataFrame()
-        
+            'comment': 'indirect measurement #1',
+        }, res_df
         
     
     df = comb_df.copy()
@@ -283,7 +339,7 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
     }
     return res_dict, df
 
-def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None) -> pd.DataFrame:
+def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None, rdb: Optional[RunsDBHandler] = None) -> pd.DataFrame:
     
     if pics_folder is not None:
         plt.ioff()
@@ -316,7 +372,7 @@ def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_d
     result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
     
     for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
-        res_dict, good_df = calculate_point(table, runs_df, compton_df)
+        res_dict, good_df = calculate_point(table, runs_df, compton_df, rdb)
         result_df = result_df.append(res_dict, ignore_index=True)
         
         if pics_folder is not None:
@@ -370,6 +426,7 @@ def main():
     parser.add_argument('--csv', action = 'store_true', help = 'Save csv file with data or not')
     parser.add_argument('--clbrdb', action = 'store_true', help = 'Update Compton_run_avg clbrdb or not')
     parser.add_argument('--pics_folder', help = 'Path to the directory for saving the pictures')
+    parser.add_argument('--only_last', action = 'store_true', help = 'Compute values of the last (in Compton_run_avg clbrdb) and new points only')
     
     args = parser.parse_args()
     # logging.info(f"Arguments: season: {args.season}, config {args.config}")
@@ -378,15 +435,23 @@ def main():
     parser.read(args.config);
     
     rdb = RunsDBHandler(**parser['cmdruns'])
+    clbrdb = CalibrdbHandler(**parser['clbrDB'])
     
     idx = SEASONS['name'].index(args.season)
     runs_range = (SEASONS['start_run'][idx], SEASONS['start_run'][idx+1])
+    
+    if args.only_last:
+        res_avg = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run_avg', num_last_rows = 1)
+        if len(res_avg[0]) != 0:
+            begintime = res_avg[0][0][res_avg[1].index("begintime")]
+            runs_range = (begintime, None)
+    
     res_rdb = rdb.load_tables(runs_range)
     runs_df = pd.DataFrame(res_rdb[0], columns=res_rdb[1])
     
     tdlt0 = timedelta(days=2)
     time_range = (runs_df.starttime.min() - tdlt0, runs_df.stoptime.max() + tdlt0)
-    clbrdb = CalibrdbHandler(**parser['clbrDB'])
+    
     res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = None, timerange = time_range)
     
     cb = Combiner(res_rdb, res_clbrdb)
@@ -394,7 +459,7 @@ def main():
     
     compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
     
-    cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder)
+    cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder, rdb)
     
     if args.csv:
         cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')

+ 25 - 8
compton_filter.py

@@ -5,6 +5,7 @@ Script to fill calibration database with filtering slowdb compton measurements
 import argparse
 from configparser import ConfigParser
 from datetime import datetime, timedelta, timezone
+import sys
 from typing import Tuple, List, Dict, Union, Optional
 import warnings
 import logging
@@ -225,7 +226,18 @@ class CalibrdbHandler(PostgreSQLHandler):
         return table, fields_name
         
     def update(self, new_rows: list, system: str = "Misc", algo: str = "RunHeader", 
-               name: str = "Compton_run", version: str = 'Default', handle_last_time_row: bool = True):
+               name: str = "Compton_run", version: str = 'Default', handle_last_time_row: bool = False):
+        """Writes new_rows in clbrdb
+        
+        Parameters
+        ----------
+        new_rows : list
+            list of the data for writing
+        handle_last_time_row : bool
+            (DANGEROUS PLACE - keep default False or don't commit changes if you don't know what you want)
+            update current values or not: replace all values in interval from min(begintime in new_rows) to max(endtime in new_rows)
+        """
+        
         if len(new_rows) == 0:
             return
         
@@ -234,12 +246,9 @@ class CalibrdbHandler(PostgreSQLHandler):
         new_rows = list(map(lambda x: (sid, 'lxeuser', x[0], x[5], x[6], [x[1], x[2], x[3], x[4]]), new_rows))
         
         if handle_last_time_row:
-            last_written_row, _ = self.load_table(system, algo, name, version, num_last_rows = 1, return_timezone = True)
-            if len(last_written_row) > 0:
-                if last_written_row[0][5] > new_rows[0][3]:
-                    logging.info('Removing of overlapping written row')
-                    self.delete_row(sid = last_written_row[0][1], createdby = last_written_row[0][2], time = last_written_row[0][3])
-                
+            min_new_time, max_new_time = min(map(lambda x: x[3], new_rows)), max(map(lambda x: x[4], new_rows))
+            self.delete_rows(sid = sid, createdby = 'lxeuser', time = (min_new_time, max_new_time))
+        
         insert_query = """INSERT INTO clbrdata (sid, createdby, time, begintime, endtime, data) VALUES %s;"""
         execute_values(self.cur, insert_query, new_rows, fetch=False)
         logging.info(f"Inserted {len(new_rows)} new rows")
@@ -312,6 +321,14 @@ class CalibrdbHandler(PostgreSQLHandler):
         logging.info(f"Deleted ({sid}, {createdby}, {time}) row")
         return
     
+    def delete_rows(self, sid: int, createdby: str, time: Tuple[datetime, datetime]):
+        delete_query = f"""DELETE FROM clbrdata 
+        WHERE sid = %s AND createdby = %s AND endtime > %s AND begintime < %s
+        """
+        self.cur.execute(delete_query, (sid, createdby, time[0], time[1]))
+        logging.info(f"Deleted ({sid}, {createdby} from {time[0]} to {time[1]}) rows")
+        return
+    
     def remove_duplicates(self, system: str = "Misc", algo: str = "RunHeader", name: str = "Compton_run", version: str = 'Default', keep: str = 'last'):
         sid = self.select_table(system, algo, name, version)
         
@@ -353,7 +370,7 @@ class CalibrdbHandler(PostgreSQLHandler):
     
 def main():
     log_format = '[%(asctime)s] %(levelname)s: %(message)s'
-    logging.basicConfig(filename="compton_filter.log", format=log_format, level=logging.INFO)
+    logging.basicConfig(stream=sys.stdout, format=log_format, level=logging.INFO) #"filename=compton_filter.log"
     logging.info("Program started")
     
     parser = argparse.ArgumentParser(description = 'Filter compton energy measurements from slowdb')

+ 1 - 2
requirements.txt

@@ -1,2 +1 @@
-psycopg2==2.9.1
-mysql-connector-python==8.0.23
+psycopg2==2.9.1