0
0
compton 4 лет назад
Родитель
Сommit
05cd7d6ad0
3 измененных файлов с 124 добавлено и 43 удалено
  1. 98 33
      compton_combiner.py
  2. 25 8
      compton_filter.py
  3. 1 2
      requirements.txt

+ 98 - 33
compton_combiner.py

@@ -35,7 +35,7 @@ class RunsDBHandler():
         self.cur.execute("""DESCRIBE Runlog""")
         self.cur.execute("""DESCRIBE Runlog""")
         return self.cur.fetchall()
         return self.cur.fetchall()
         
         
-    def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]]):
+    def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]], energy_point: Optional[float] = None, select_bad_runs: bool = False):
         """
         """
         Parameters
         Parameters
         ----------
         ----------
@@ -43,6 +43,10 @@ class RunsDBHandler():
             selection range
             selection range
             int range defines an interval in runs
             int range defines an interval in runs
             datetime range defines a time interval (NSK: +7:00 time)
             datetime range defines a time interval (NSK: +7:00 time)
+        energy_point : Optional[float]
+            energy point name, MeV (default is  None)
+        select_bad_runs : bool
+            select runs with labels except (Y) (default is False)
         """
         """
         
         
         cond = ""
         cond = ""
@@ -51,7 +55,19 @@ class RunsDBHandler():
             if range[1] is not None:
             if range[1] is not None:
                 cond += f" AND run <= {range[1]} "
                 cond += f" AND run <= {range[1]} "
         elif isinstance(range[0], datetime):
         elif isinstance(range[0], datetime):
-            cond = f" AND stoptime BETWEEN %s AND %s"
+            cond = f" AND starttime >= %s "
+            if range[1] is not None:
+                cond += " AND stoptime <= %s"
+            else:
+                range = (range[0], )
+                
+        energy_cond = ""
+        if energy_point is not None:
+            energy_cond = f" AND energy = {energy_point}"
+            
+        quality_cond = ' quality = "Y" '
+        if select_bad_runs:
+            quality_cond = ' quality <> "Y" '
             
             
         sql_query = f"""
         sql_query = f"""
         SELECT 
         SELECT 
@@ -62,8 +78,9 @@ class RunsDBHandler():
             luminosity
             luminosity
         FROM Runlog 
         FROM Runlog 
         WHERE 
         WHERE 
-            quality = "Y"
+            {quality_cond}
             {cond}
             {cond}
+            {energy_cond}
             AND luminosity > 0
             AND luminosity > 0
             AND stoptime > starttime
             AND stoptime > starttime
             AND nevent > 0
             AND nevent > 0
@@ -192,7 +209,41 @@ class Likelihood():
         ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
         ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
         return -ln_L
         return -ln_L
     
     
-def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame) -> dict:
+def __estimate_point_with_closest(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame):
+    # estimate energy by the nearest points
+    min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
+    max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
+
+    nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
+    nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
+
+    # regulatization
+    nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
+    nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
+    nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
+    nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
+
+    mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
+    mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
+    std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
+    std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
+    sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
+
+
+    return {
+        'energy_point': comb_df.elabel.min(),
+        'first_run': comb_df.run_first.min(),
+        'last_run': comb_df.run_last.max(), 
+        'mean_energy': mean_energy, 
+        'mean_energy_stat_err': std_energy, 
+        'mean_energy_sys_err': sys_energy, 
+        'mean_spread': mean_spread,
+        'mean_spread_stat_err': std_spread, 
+        'used_lum': 0, 
+        'comment': 'indirect measurement #2',
+    }, pd.DataFrame([])
+    
+def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, rdb) -> dict:
     """Calculates parameters of the energy (mean, std, spread) in this dataFrame
     """Calculates parameters of the energy (mean, std, spread) in this dataFrame
     
     
     Parameters
     Parameters
@@ -212,40 +263,45 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
         
         
     if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
     if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
         # no direct measurements of the compton during data runs
         # no direct measurements of the compton during data runs
-        # estimate energy by the nearest points
-        # print('Hello')
-        min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
-        max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
+                
+        min_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
+        max_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
+        dlt0 = timedelta(days=1)
+        # assymetric time because energy can be stable only after
+        runs_df_with_bads = rdb.load_tables((min_Yruntime, max_Yruntime + dlt0), energy_point = comb_df.iloc[0].at['elabel'], select_bad_runs = True)
         
         
-        nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
-        nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
+        if len(runs_df_with_bads[0]) == 0:
+            return __estimate_point_with_closest(comb_df, runs_df, compton_df)
         
         
-        # regulatization
-        nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
-        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
-        nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
-        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
-                
-        mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
-        mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
-        std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
-        std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
-        sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
+        runs_df_with_bads_df = pd.DataFrame(runs_df_with_bads[0], columns = runs_df_with_bads[1])
+        min_run_time, max_run_time = min(min_Yruntime, runs_df_with_bads_df.starttime.min()), max(max_Yruntime, runs_df_with_bads_df.stoptime.max())
         
         
+        compton_meas = compton_df.query('((begintime>=@min_run_time)&(begintime<=@max_run_time))|((endtime>=@min_run_time)&(endtime<=@max_run_time))').copy()
+        res_df = pd.DataFrame(list(map(lambda x: {
+            'compton_start': x[1]['begintime'],
+            'compton_stop': x[1]['endtime'],
+            'e_mean': float(x[1]['data'][0]),
+            'e_std': float(x[1]['data'][1]),
+            'spread_mean': float(x[1]['data'][2]),
+            'spread_std': float(x[1]['data'][3]),
+        }, compton_meas.iterrows())))
+        res_df = res_df.query(f'abs(e_mean -{comb_df.iloc[0].at["elabel"]})<5')
         
         
+        if len(res_df) == 0:
+            return __estimate_point_with_closest(comb_df, runs_df, compton_df)
+                        
         return {
         return {
             'energy_point': comb_df.elabel.min(),
             'energy_point': comb_df.elabel.min(),
             'first_run': comb_df.run_first.min(),
             'first_run': comb_df.run_first.min(),
             'last_run': comb_df.run_last.max(), 
             'last_run': comb_df.run_last.max(), 
-            'mean_energy': mean_energy, 
-            'mean_energy_stat_err': std_energy, 
-            'mean_energy_sys_err': sys_energy, 
-            'mean_spread': mean_spread,
-            'mean_spread_stat_err': std_spread, 
+            'mean_energy': res_df.e_mean.mean(), 
+            'mean_energy_stat_err': np.sqrt(1/np.sum(1/(res_df.e_std)**2)), #res_df.e_std.mean()/np.sqrt(len(res_df)), 
+            'mean_energy_sys_err': np.abs(comb_df.iloc[0].at['elabel'] - res_df.e_mean.mean()), 
+            'mean_spread': res_df.spread_mean.mean(),
+            'mean_spread_stat_err':np.sqrt(1/np.sum(1/(res_df.spread_std)**2)),
             'used_lum': 0, 
             'used_lum': 0, 
-            'comment': 'indirect measurement',
-        }, pd.DataFrame()
-        
+            'comment': 'indirect measurement #1',
+        }, res_df
         
         
     
     
     df = comb_df.copy()
     df = comb_df.copy()
@@ -283,7 +339,7 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
     }
     }
     return res_dict, df
     return res_dict, df
 
 
-def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None) -> pd.DataFrame:
+def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None, rdb: Optional[RunsDBHandler] = None) -> pd.DataFrame:
     
     
     if pics_folder is not None:
     if pics_folder is not None:
         plt.ioff()
         plt.ioff()
@@ -316,7 +372,7 @@ def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_d
     result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
     result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
     
     
     for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
     for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
-        res_dict, good_df = calculate_point(table, runs_df, compton_df)
+        res_dict, good_df = calculate_point(table, runs_df, compton_df, rdb)
         result_df = result_df.append(res_dict, ignore_index=True)
         result_df = result_df.append(res_dict, ignore_index=True)
         
         
         if pics_folder is not None:
         if pics_folder is not None:
@@ -370,6 +426,7 @@ def main():
     parser.add_argument('--csv', action = 'store_true', help = 'Save csv file with data or not')
     parser.add_argument('--csv', action = 'store_true', help = 'Save csv file with data or not')
     parser.add_argument('--clbrdb', action = 'store_true', help = 'Update Compton_run_avg clbrdb or not')
     parser.add_argument('--clbrdb', action = 'store_true', help = 'Update Compton_run_avg clbrdb or not')
     parser.add_argument('--pics_folder', help = 'Path to the directory for saving the pictures')
     parser.add_argument('--pics_folder', help = 'Path to the directory for saving the pictures')
+    parser.add_argument('--only_last', action = 'store_true', help = 'Compute values of the last (in Compton_run_avg clbrdb) and new points only')
     
     
     args = parser.parse_args()
     args = parser.parse_args()
     # logging.info(f"Arguments: season: {args.season}, config {args.config}")
     # logging.info(f"Arguments: season: {args.season}, config {args.config}")
@@ -378,15 +435,23 @@ def main():
     parser.read(args.config);
     parser.read(args.config);
     
     
     rdb = RunsDBHandler(**parser['cmdruns'])
     rdb = RunsDBHandler(**parser['cmdruns'])
+    clbrdb = CalibrdbHandler(**parser['clbrDB'])
     
     
     idx = SEASONS['name'].index(args.season)
     idx = SEASONS['name'].index(args.season)
     runs_range = (SEASONS['start_run'][idx], SEASONS['start_run'][idx+1])
     runs_range = (SEASONS['start_run'][idx], SEASONS['start_run'][idx+1])
+    
+    if args.only_last:
+        res_avg = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run_avg', num_last_rows = 1)
+        if len(res_avg[0]) != 0:
+            begintime = res_avg[0][0][res_avg[1].index("begintime")]
+            runs_range = (begintime, None)
+    
     res_rdb = rdb.load_tables(runs_range)
     res_rdb = rdb.load_tables(runs_range)
     runs_df = pd.DataFrame(res_rdb[0], columns=res_rdb[1])
     runs_df = pd.DataFrame(res_rdb[0], columns=res_rdb[1])
     
     
     tdlt0 = timedelta(days=2)
     tdlt0 = timedelta(days=2)
     time_range = (runs_df.starttime.min() - tdlt0, runs_df.stoptime.max() + tdlt0)
     time_range = (runs_df.starttime.min() - tdlt0, runs_df.stoptime.max() + tdlt0)
-    clbrdb = CalibrdbHandler(**parser['clbrDB'])
+    
     res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = None, timerange = time_range)
     res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = None, timerange = time_range)
     
     
     cb = Combiner(res_rdb, res_clbrdb)
     cb = Combiner(res_rdb, res_clbrdb)
@@ -394,7 +459,7 @@ def main():
     
     
     compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
     compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
     
     
-    cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder)
+    cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder, rdb)
     
     
     if args.csv:
     if args.csv:
         cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')
         cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')

+ 25 - 8
compton_filter.py

@@ -5,6 +5,7 @@ Script to fill calibration database with filtering slowdb compton measurements
 import argparse
 import argparse
 from configparser import ConfigParser
 from configparser import ConfigParser
 from datetime import datetime, timedelta, timezone
 from datetime import datetime, timedelta, timezone
+import sys
 from typing import Tuple, List, Dict, Union, Optional
 from typing import Tuple, List, Dict, Union, Optional
 import warnings
 import warnings
 import logging
 import logging
@@ -225,7 +226,18 @@ class CalibrdbHandler(PostgreSQLHandler):
         return table, fields_name
         return table, fields_name
         
         
     def update(self, new_rows: list, system: str = "Misc", algo: str = "RunHeader", 
     def update(self, new_rows: list, system: str = "Misc", algo: str = "RunHeader", 
-               name: str = "Compton_run", version: str = 'Default', handle_last_time_row: bool = True):
+               name: str = "Compton_run", version: str = 'Default', handle_last_time_row: bool = False):
+        """Writes new_rows in clbrdb
+        
+        Parameters
+        ----------
+        new_rows : list
+            list of the data for writing
+        handle_last_time_row : bool
+            (DANGEROUS PLACE - keep default False or don't commit changes if you don't know what you want)
+            update current values or not: replace all values in interval from min(begintime in new_rows) to max(endtime in new_rows)
+        """
+        
         if len(new_rows) == 0:
         if len(new_rows) == 0:
             return
             return
         
         
@@ -234,12 +246,9 @@ class CalibrdbHandler(PostgreSQLHandler):
         new_rows = list(map(lambda x: (sid, 'lxeuser', x[0], x[5], x[6], [x[1], x[2], x[3], x[4]]), new_rows))
         new_rows = list(map(lambda x: (sid, 'lxeuser', x[0], x[5], x[6], [x[1], x[2], x[3], x[4]]), new_rows))
         
         
         if handle_last_time_row:
         if handle_last_time_row:
-            last_written_row, _ = self.load_table(system, algo, name, version, num_last_rows = 1, return_timezone = True)
-            if len(last_written_row) > 0:
-                if last_written_row[0][5] > new_rows[0][3]:
-                    logging.info('Removing of overlapping written row')
-                    self.delete_row(sid = last_written_row[0][1], createdby = last_written_row[0][2], time = last_written_row[0][3])
-                
+            min_new_time, max_new_time = min(map(lambda x: x[3], new_rows)), max(map(lambda x: x[4], new_rows))
+            self.delete_rows(sid = sid, createdby = 'lxeuser', time = (min_new_time, max_new_time))
+        
         insert_query = """INSERT INTO clbrdata (sid, createdby, time, begintime, endtime, data) VALUES %s;"""
         insert_query = """INSERT INTO clbrdata (sid, createdby, time, begintime, endtime, data) VALUES %s;"""
         execute_values(self.cur, insert_query, new_rows, fetch=False)
         execute_values(self.cur, insert_query, new_rows, fetch=False)
         logging.info(f"Inserted {len(new_rows)} new rows")
         logging.info(f"Inserted {len(new_rows)} new rows")
@@ -312,6 +321,14 @@ class CalibrdbHandler(PostgreSQLHandler):
         logging.info(f"Deleted ({sid}, {createdby}, {time}) row")
         logging.info(f"Deleted ({sid}, {createdby}, {time}) row")
         return
         return
     
     
+    def delete_rows(self, sid: int, createdby: str, time: Tuple[datetime, datetime]):
+        delete_query = f"""DELETE FROM clbrdata 
+        WHERE sid = %s AND createdby = %s AND endtime > %s AND begintime < %s
+        """
+        self.cur.execute(delete_query, (sid, createdby, time[0], time[1]))
+        logging.info(f"Deleted ({sid}, {createdby} from {time[0]} to {time[1]}) rows")
+        return
+    
     def remove_duplicates(self, system: str = "Misc", algo: str = "RunHeader", name: str = "Compton_run", version: str = 'Default', keep: str = 'last'):
     def remove_duplicates(self, system: str = "Misc", algo: str = "RunHeader", name: str = "Compton_run", version: str = 'Default', keep: str = 'last'):
         sid = self.select_table(system, algo, name, version)
         sid = self.select_table(system, algo, name, version)
         
         
@@ -353,7 +370,7 @@ class CalibrdbHandler(PostgreSQLHandler):
     
     
 def main():
 def main():
     log_format = '[%(asctime)s] %(levelname)s: %(message)s'
     log_format = '[%(asctime)s] %(levelname)s: %(message)s'
-    logging.basicConfig(filename="compton_filter.log", format=log_format, level=logging.INFO)
+    logging.basicConfig(stream=sys.stdout, format=log_format, level=logging.INFO) #"filename=compton_filter.log"
     logging.info("Program started")
     logging.info("Program started")
     
     
     parser = argparse.ArgumentParser(description = 'Filter compton energy measurements from slowdb')
     parser = argparse.ArgumentParser(description = 'Filter compton energy measurements from slowdb')

+ 1 - 2
requirements.txt

@@ -1,2 +1 @@
-psycopg2==2.9.1
-mysql-connector-python==8.0.23
+psycopg2==2.9.1