4 лет назад · 05cd7d6ad0
--- a/compton_combiner.py
+++ b/compton_combiner.py
@@ -35,7 +35,7 @@ class RunsDBHandler():
 
															         self.cur.execute("""DESCRIBE Runlog""")
														
 
															         return self.cur.fetchall()
														
 
															-    def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]]):
														
 
															+    def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]], energy_point: Optional[float] = None, select_bad_runs: bool = False):
														
 
															         """
														
 
															         Parameters
														
 
															         ----------
														
@@ -43,6 +43,10 @@ class RunsDBHandler():
 
															             selection range
														
 
															             int range defines an interval in runs
														
 
															             datetime range defines a time interval (NSK: +7:00 time)
														
 
															+        energy_point : Optional[float]
														
 
															+            energy point name, MeV (default is  None)
														
 
															+        select_bad_runs : bool
														
 
															+            select runs with labels except (Y) (default is False)
														
 
															         """
														
 
															         cond = ""
														
@@ -51,7 +55,19 @@ class RunsDBHandler():
 
															             if range[1] is not None:
														
 
															                 cond += f" AND run <= {range[1]} "
														
 
															         elif isinstance(range[0], datetime):
														
 
															-            cond = f" AND stoptime BETWEEN %s AND %s"
														
 
															+            cond = f" AND starttime >= %s "
														
 
															+            if range[1] is not None:
														
 
															+                cond += " AND stoptime <= %s"
														
 
															+            else:
														
 
															+                range = (range[0], )
														
 
															+                
														
 
															+        energy_cond = ""
														
 
															+        if energy_point is not None:
														
 
															+            energy_cond = f" AND energy = {energy_point}"
														
 
															+            
														
 
															+        quality_cond = ' quality = "Y" '
														
 
															+        if select_bad_runs:
														
 
															+            quality_cond = ' quality <> "Y" '
														
 
															         sql_query = f"""
														
 
															         SELECT 
														
@@ -62,8 +78,9 @@ class RunsDBHandler():
 
															             luminosity
														
 
															         FROM Runlog 
														
 
															         WHERE 
														
 
															-            quality = "Y"
														
 
															+            {quality_cond}
														
 
															             {cond}
														
 
															+            {energy_cond}
														
 
															             AND luminosity > 0
														
 
															             AND stoptime > starttime
														
 
															             AND nevent > 0
														
@@ -192,7 +209,41 @@ class Likelihood():
 
															         ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
														
 
															         return -ln_L
														
 
															-def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame) -> dict:
														
 
															+def __estimate_point_with_closest(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame):
														
 
															+    # estimate energy by the nearest points
														
 
															+    min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
														
 
															+    max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
														
 
															+
														
 
															+    nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
														
 
															+    nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
														
 
															+
														
 
															+    # regulatization
														
 
															+    nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
														
 
															+    nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
														
 
															+    nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
														
 
															+    nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
														
 
															+
														
 
															+    mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
														
 
															+    mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
														
 
															+    std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
														
 
															+    std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
														
 
															+    sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
														
 
															+
														
 
															+
														
 
															+    return {
														
 
															+        'energy_point': comb_df.elabel.min(),
														
 
															+        'first_run': comb_df.run_first.min(),
														
 
															+        'last_run': comb_df.run_last.max(), 
														
 
															+        'mean_energy': mean_energy, 
														
 
															+        'mean_energy_stat_err': std_energy, 
														
 
															+        'mean_energy_sys_err': sys_energy, 
														
 
															+        'mean_spread': mean_spread,
														
 
															+        'mean_spread_stat_err': std_spread, 
														
 
															+        'used_lum': 0, 
														
 
															+        'comment': 'indirect measurement #2',
														
 
															+    }, pd.DataFrame([])
														
 
															+    
														
 
															+def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, rdb) -> dict:
														
 
															     """Calculates parameters of the energy (mean, std, spread) in this dataFrame
														
 
															     Parameters
														
@@ -212,40 +263,45 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
 
															     if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
														
 
															         # no direct measurements of the compton during data runs
														
 
															-        # estimate energy by the nearest points
														
 
															-        # print('Hello')
														
 
															-        min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
														
 
															-        max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
														
 
															+                
														
 
															+        min_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
														
 
															+        max_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
														
 
															+        dlt0 = timedelta(days=1)
														
 
															+        # assymetric time because energy can be stable only after
														
 
															+        runs_df_with_bads = rdb.load_tables((min_Yruntime, max_Yruntime + dlt0), energy_point = comb_df.iloc[0].at['elabel'], select_bad_runs = True)
														
 
															-        nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
														
 
															-        nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
														
 
															+        if len(runs_df_with_bads[0]) == 0:
														
 
															+            return __estimate_point_with_closest(comb_df, runs_df, compton_df)
														
 
															-        # regulatization
														
 
															-        nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
														
 
															-        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
														
 
															-        nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
														
 
															-        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
														
 
															-                
														
 
															-        mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
														
 
															-        mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
														
 
															-        std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
														
 
															-        std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
														
 
															-        sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
														
 
															+        runs_df_with_bads_df = pd.DataFrame(runs_df_with_bads[0], columns = runs_df_with_bads[1])
														
 
															+        min_run_time, max_run_time = min(min_Yruntime, runs_df_with_bads_df.starttime.min()), max(max_Yruntime, runs_df_with_bads_df.stoptime.max())
														
 
															+        compton_meas = compton_df.query('((begintime>=@min_run_time)&(begintime<=@max_run_time))|((endtime>=@min_run_time)&(endtime<=@max_run_time))').copy()
														
 
															+        res_df = pd.DataFrame(list(map(lambda x: {
														
 
															+            'compton_start': x[1]['begintime'],
														
 
															+            'compton_stop': x[1]['endtime'],
														
 
															+            'e_mean': float(x[1]['data'][0]),
														
 
															+            'e_std': float(x[1]['data'][1]),
														
 
															+            'spread_mean': float(x[1]['data'][2]),
														
 
															+            'spread_std': float(x[1]['data'][3]),
														
 
															+        }, compton_meas.iterrows())))
														
 
															+        res_df = res_df.query(f'abs(e_mean -{comb_df.iloc[0].at["elabel"]})<5')
														
 
															+        if len(res_df) == 0:
														
 
															+            return __estimate_point_with_closest(comb_df, runs_df, compton_df)
														
 
															+                        
														
 
															         return {
														
 
															             'energy_point': comb_df.elabel.min(),
														
 
															             'first_run': comb_df.run_first.min(),
														
 
															             'last_run': comb_df.run_last.max(), 
														
 
															-            'mean_energy': mean_energy, 
														
 
															-            'mean_energy_stat_err': std_energy, 
														
 
															-            'mean_energy_sys_err': sys_energy, 
														
 
															-            'mean_spread': mean_spread,
														
 
															-            'mean_spread_stat_err': std_spread, 
														
 
															+            'mean_energy': res_df.e_mean.mean(), 
														
 
															+            'mean_energy_stat_err': np.sqrt(1/np.sum(1/(res_df.e_std)**2)), #res_df.e_std.mean()/np.sqrt(len(res_df)), 
														
 
															+            'mean_energy_sys_err': np.abs(comb_df.iloc[0].at['elabel'] - res_df.e_mean.mean()), 
														
 
															+            'mean_spread': res_df.spread_mean.mean(),
														
 
															+            'mean_spread_stat_err':np.sqrt(1/np.sum(1/(res_df.spread_std)**2)),
														
 
															             'used_lum': 0, 
														
 
															-            'comment': 'indirect measurement',
														
 
															-        }, pd.DataFrame()
														
 
															-        
														
 
															+            'comment': 'indirect measurement #1',
														
 
															+        }, res_df
														
 
															     df = comb_df.copy()
														
@@ -283,7 +339,7 @@ def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd
 
															     }
														
 
															     return res_dict, df
														
 
															-def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None) -> pd.DataFrame:
														
 
															+def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None, rdb: Optional[RunsDBHandler] = None) -> pd.DataFrame:
														
 
															     if pics_folder is not None:
														
 
															         plt.ioff()
														
@@ -316,7 +372,7 @@ def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_d
 
															     result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
														
 
															     for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
														
 
															-        res_dict, good_df = calculate_point(table, runs_df, compton_df)
														
 
															+        res_dict, good_df = calculate_point(table, runs_df, compton_df, rdb)
														
 
															         result_df = result_df.append(res_dict, ignore_index=True)
														
 
															         if pics_folder is not None:
														
@@ -370,6 +426,7 @@ def main():
 
															     parser.add_argument('--csv', action = 'store_true', help = 'Save csv file with data or not')
														
 
															     parser.add_argument('--clbrdb', action = 'store_true', help = 'Update Compton_run_avg clbrdb or not')
														
 
															     parser.add_argument('--pics_folder', help = 'Path to the directory for saving the pictures')
														
 
															+    parser.add_argument('--only_last', action = 'store_true', help = 'Compute values of the last (in Compton_run_avg clbrdb) and new points only')
														
 
															     args = parser.parse_args()
														
 
															     # logging.info(f"Arguments: season: {args.season}, config {args.config}")
														
@@ -378,15 +435,23 @@ def main():
 
															     parser.read(args.config);
														
 
															     rdb = RunsDBHandler(**parser['cmdruns'])
														
 
															+    clbrdb = CalibrdbHandler(**parser['clbrDB'])
														
 
															     idx = SEASONS['name'].index(args.season)
														
 
															     runs_range = (SEASONS['start_run'][idx], SEASONS['start_run'][idx+1])
														
 
															+    
														
 
															+    if args.only_last:
														
 
															+        res_avg = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run_avg', num_last_rows = 1)
														
 
															+        if len(res_avg[0]) != 0:
														
 
															+            begintime = res_avg[0][0][res_avg[1].index("begintime")]
														
 
															+            runs_range = (begintime, None)
														
 
															+    
														
 
															     res_rdb = rdb.load_tables(runs_range)
														
 
															     runs_df = pd.DataFrame(res_rdb[0], columns=res_rdb[1])
														
 
															     tdlt0 = timedelta(days=2)
														
 
															     time_range = (runs_df.starttime.min() - tdlt0, runs_df.stoptime.max() + tdlt0)
														
 
															-    clbrdb = CalibrdbHandler(**parser['clbrDB'])
														
 
															+    
														
 
															     res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = None, timerange = time_range)
														
 
															     cb = Combiner(res_rdb, res_clbrdb)
														
@@ -394,7 +459,7 @@ def main():
 
															     compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
														
 
															-    cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder)
														
 
															+    cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder, rdb)
														
 
															     if args.csv:
														
 
															         cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')
														
--- a/compton_filter.py
+++ b/compton_filter.py
@@ -5,6 +5,7 @@ Script to fill calibration database with filtering slowdb compton measurements
 
															 import argparse
														
 
															 from configparser import ConfigParser
														
 
															 from datetime import datetime, timedelta, timezone
														
 
															+import sys
														
 
															 from typing import Tuple, List, Dict, Union, Optional
														
 
															 import warnings
														
 
															 import logging
														
@@ -225,7 +226,18 @@ class CalibrdbHandler(PostgreSQLHandler):
 
															         return table, fields_name
														
 
															     def update(self, new_rows: list, system: str = "Misc", algo: str = "RunHeader", 
														
 
															-               name: str = "Compton_run", version: str = 'Default', handle_last_time_row: bool = True):
														
 
															+               name: str = "Compton_run", version: str = 'Default', handle_last_time_row: bool = False):
														
 
															+        """Writes new_rows in clbrdb
														
 
															+        
														
 
															+        Parameters
														
 
															+        ----------
														
 
															+        new_rows : list
														
 
															+            list of the data for writing
														
 
															+        handle_last_time_row : bool
														
 
															+            (DANGEROUS PLACE - keep default False or don't commit changes if you don't know what you want)
														
 
															+            update current values or not: replace all values in interval from min(begintime in new_rows) to max(endtime in new_rows)
														
 
															+        """
														
 
															+        
														
 
															         if len(new_rows) == 0:
														
 
															             return
														
@@ -234,12 +246,9 @@ class CalibrdbHandler(PostgreSQLHandler):
 
															         new_rows = list(map(lambda x: (sid, 'lxeuser', x[0], x[5], x[6], [x[1], x[2], x[3], x[4]]), new_rows))
														
 
															         if handle_last_time_row:
														
 
															-            last_written_row, _ = self.load_table(system, algo, name, version, num_last_rows = 1, return_timezone = True)
														
 
															-            if len(last_written_row) > 0:
														
 
															-                if last_written_row[0][5] > new_rows[0][3]:
														
 
															-                    logging.info('Removing of overlapping written row')
														
 
															-                    self.delete_row(sid = last_written_row[0][1], createdby = last_written_row[0][2], time = last_written_row[0][3])
														
 
															-                
														
 
															+            min_new_time, max_new_time = min(map(lambda x: x[3], new_rows)), max(map(lambda x: x[4], new_rows))
														
 
															+            self.delete_rows(sid = sid, createdby = 'lxeuser', time = (min_new_time, max_new_time))
														
 
															+        
														
 
															         insert_query = """INSERT INTO clbrdata (sid, createdby, time, begintime, endtime, data) VALUES %s;"""
														
 
															         execute_values(self.cur, insert_query, new_rows, fetch=False)
														
 
															         logging.info(f"Inserted {len(new_rows)} new rows")
														
@@ -312,6 +321,14 @@ class CalibrdbHandler(PostgreSQLHandler):
 
															         logging.info(f"Deleted ({sid}, {createdby}, {time}) row")
														
 
															         return
														
 
															+    def delete_rows(self, sid: int, createdby: str, time: Tuple[datetime, datetime]):
														
 
															+        delete_query = f"""DELETE FROM clbrdata 
														
 
															+        WHERE sid = %s AND createdby = %s AND endtime > %s AND begintime < %s
														
 
															+        """
														
 
															+        self.cur.execute(delete_query, (sid, createdby, time[0], time[1]))
														
 
															+        logging.info(f"Deleted ({sid}, {createdby} from {time[0]} to {time[1]}) rows")
														
 
															+        return
														
 
															+    
														
 
															     def remove_duplicates(self, system: str = "Misc", algo: str = "RunHeader", name: str = "Compton_run", version: str = 'Default', keep: str = 'last'):
														
 
															         sid = self.select_table(system, algo, name, version)
														
@@ -353,7 +370,7 @@ class CalibrdbHandler(PostgreSQLHandler):
 
															 def main():
														
 
															     log_format = '[%(asctime)s] %(levelname)s: %(message)s'
														
 
															-    logging.basicConfig(filename="compton_filter.log", format=log_format, level=logging.INFO)
														
 
															+    logging.basicConfig(stream=sys.stdout, format=log_format, level=logging.INFO) #"filename=compton_filter.log"
														
 
															     logging.info("Program started")
														
 
															     parser = argparse.ArgumentParser(description = 'Filter compton energy measurements from slowdb')
														
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1 @@
 
															-psycopg2==2.9.1
														
 
															-mysql-connector-python==8.0.23
														
 
															+psycopg2==2.9.1