3 years ago · bcc0dc42df
--- a/bad_comptons.txt
+++ b/bad_comptons.txt
@@ -0,0 +1,7 @@
 
				+1622157426 #incorrect laser line
			
 
				+1622155918 #incorrect laser line
			
 
				+1622158934 #incorrect laser line
			
 
				+1622160433 #incorrect laser line
			
 
				+1622161930 #incorrect laser line
			
 
				+1622163439 #incorrect laser line
			
 
				+1583279091 #disrepancy with energy label
			
--- a/compton_combiner.py
+++ b/compton_combiner.py
@@ -0,0 +1,374 @@
 
				+"""Script to combine compton measurements with runs and process these data
			
 
				+"""
			
 
				+
			
 
				+import argparse
			
 
				+from configparser import ConfigParser
			
 
				+from datetime import datetime, timedelta
			
 
				+import sqlite3
			
 
				+from typing import Union, Tuple, Optional
			
 
				+
			
 
				+from compton_filter import CalibrdbHandler
			
 
				+from iminuit import Minuit
			
 
				+import matplotlib.dates as mdates
			
 
				+import matplotlib.pyplot as plt
			
 
				+from mysql.connector import connect, Error
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from tqdm import tqdm
			
 
				+
			
 
				+SEASONS = {
			
 
				+    'name': ['HIGH2017', 'RHO2018', 'HIGH2019', 'LOW2020', 'HIGH2020', 'HIGH2021', 'NNBAR2021'],
			
 
				+    'start_run': [36872, 48938, 70014, 85224, 89973, 98116, 107342, None],
			
 
				+}
			
 
				+
			
 
				+class RunsDBHandler():
			
 
				+    def __init__(self, host: str = 'cmddb', database: str = 'online', user: str = None, password: str = None):
			
 
				+        self.conn = connect(host = host, database = database, user = user, password = password)
			
 
				+        self.cur = self.conn.cursor()
			
 
				+        self.cur.execute("SET time_zone = 'UTC';")
			
 
				+        
			
 
				+    @property
			
 
				+    def fields(self) -> list:
			
 
				+        """Returns a list of available columns in the RunsDB
			
 
				+        """
			
 
				+        
			
 
				+        self.cur.execute("""DESCRIBE Runlog""")
			
 
				+        return self.cur.fetchall()
			
 
				+        
			
 
				+    def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]]):
			
 
				+        
			
 
				+        cond = ""
			
 
				+        if isinstance(range[0], int):
			
 
				+            cond = f" AND run >= {range[0]} "
			
 
				+            if range[1] is not None:
			
 
				+                cond += f" AND run <= {range[1]} "
			
 
				+        elif isinstance(range[0], datetime):
			
 
				+            cond = f" AND stoptime BETWEEN %s AND %s"
			
 
				+            
			
 
				+        sql_query = f"""
			
 
				+        SELECT 
			
 
				+            run, 
			
 
				+            starttime, 
			
 
				+            stoptime, 
			
 
				+            energy, 
			
 
				+            luminosity
			
 
				+        FROM Runlog 
			
 
				+        WHERE 
			
 
				+            quality = "Y"
			
 
				+            {cond}
			
 
				+            AND luminosity > 0
			
 
				+            AND stoptime > starttime
			
 
				+            AND nevent > 0
			
 
				+        ORDER BY run DESC"""
			
 
				+        
			
 
				+        if isinstance(range[0], datetime):
			
 
				+            self.cur.execute(sql_query, range)
			
 
				+        else:
			
 
				+            self.cur.execute(sql_query)
			
 
				+        
			
 
				+        field_names = [i[0] for i in self.cur.description]
			
 
				+        res = self.cur.fetchall()
			
 
				+        return res, field_names
			
 
				+        
			
 
				+    def __del__(self):
			
 
				+        self.conn.close()
			
 
				+        
			
 
				+class Combiner():
			
 
				+    """Combines a dataframe with runs and a dataframe with compton measurements together
			
 
				+    """
			
 
				+    
			
 
				+    def __init__(self, runsdb: Tuple[list, list], clbrdb: Tuple[list, list]):
			
 
				+        """
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        runsdb : Tuple[list, list]
			
 
				+            table of runs (rows and field names)
			
 
				+        clbrdb : Tuple[list, list]
			
 
				+            table of compton measurements (rows and field names)            
			
 
				+        """
			
 
				+        
			
 
				+        rdb_rows, r_fld = runsdb
			
 
				+        cdb_rows, c_fld = clbrdb
			
 
				+        
			
 
				+        self.conn = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
			
 
				+        self.cur = self.conn.cursor()
			
 
				+        self.cur.execute(f"CREATE table runs (run, elabel, starttime timestamp, stoptime timestamp, luminosity)")
			
 
				+        self.cur.execute(f"CREATE table compton (begintime timestamp, endtime timestamp, e_mean, e_std, spread_mean, spread_std)")
			
 
				+        
			
 
				+        run_row_generator = map(lambda x: (x[r_fld.index("run")], x[r_fld.index("energy")],
			
 
				+                                           x[r_fld.index("starttime")], x[r_fld.index("stoptime")],
			
 
				+                                           x[r_fld.index("luminosity")]), rdb_rows)
			
 
				+        c_data_idx = c_fld.index("data")
			
 
				+        compton_row_generator = map(lambda x: (x[c_fld.index("begintime")], x[c_fld.index("endtime")], 
			
 
				+                                               float(x[c_data_idx][0]), float(x[c_data_idx][1]), 
			
 
				+                                               float(x[c_data_idx][2]), float(x[c_data_idx][3])), cdb_rows)
			
 
				+        
			
 
				+        self.cur.executemany(f"""INSERT into runs VALUES ({','.join(['?']*5)})""", run_row_generator)
			
 
				+        self.cur.executemany(f"""INSERT into compton VALUES ({','.join(['?']*6)})""", compton_row_generator)
			
 
				+        
			
 
				+        self.__create_combined_table()        
			
 
				+        
			
 
				+    def __create_combined_table(self):
			
 
				+        create_combined_query = """
			
 
				+        CREATE TABLE combined_table AS
			
 
				+        SELECT 
			
 
				+            runs.run AS run,
			
 
				+            runs.elabel AS elabel,
			
 
				+            runs.starttime as "run_start [timestamp]", 
			
 
				+            runs.stoptime AS "run_stop [timestamp]", 
			
 
				+            compton.begintime AS "compton_start [timestamp]", 
			
 
				+            compton.endtime AS "compton_stop [timestamp]",
			
 
				+            runs.luminosity, compton.e_mean, compton.e_std, compton.spread_mean, compton.spread_std
			
 
				+        FROM runs, compton
			
 
				+        WHERE 
			
 
				+            (runs.starttime BETWEEN compton.begintime AND compton.endtime) 
			
 
				+            OR (runs.stoptime BETWEEN compton.begintime AND compton.endtime)
			
 
				+            OR (compton.begintime BETWEEN runs.starttime AND runs.stoptime)
			
 
				+            OR (compton.endtime BETWEEN runs.starttime AND runs.stoptime);
			
 
				+        """
			
 
				+        self.cur.execute(create_combined_query)
			
 
				+        return
			
 
				+        
			
 
				+    def combined_table(self) -> pd.DataFrame:
			
 
				+        """Returns combined dataframe
			
 
				+        """
			
 
				+        
			
 
				+        sql_query = """
			
 
				+        SELECT * FROM combined_table;
			
 
				+        """
			
 
				+        df = pd.read_sql(sql_query, self.conn)
			
 
				+        df['common_duration'] = df[['run_stop', 'compton_stop']].min(axis=1) - df[['run_start', 'compton_start']].max(axis=1)
			
 
				+        df['run_duration'] = df['run_stop'] - df['run_start']
			
 
				+        df['run_in_measurement'] = df['common_duration']/df['run_duration']
			
 
				+        df = df.sort_values(by='run_in_measurement', ascending=False).drop_duplicates(subset='run').sort_values(by='run')
			
 
				+        df = df.drop(['run_duration', 'common_duration', 'run_start', 'run_stop'], axis=1)
			
 
				+        return df
			
 
				+        
			
 
				+    def __del__(self):
			
 
				+        self.conn.close()
			
 
				+        
			
 
				+class Likelihood():
			
 
				+    """
			
 
				+    Likelihood function
			
 
				+    """
			
 
				+    
			
 
				+    def __init__(self, means: np.array, sigmas: np.array, weights: np.array):
			
 
				+        """
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        means : np.array
			
 
				+            array of means, [MeV]
			
 
				+        sigmas : np.array
			
 
				+            array of standard deviations, [MeV]
			
 
				+        weights : np.array
			
 
				+            array of luminosities
			
 
				+        """
			
 
				+        
			
 
				+        self.means = means
			
 
				+        self.sigmas = sigmas
			
 
				+        self.weights = weights/weights.mean()
			
 
				+        
			
 
				+    def __call__(self, mean: float, sigma: float):
			
 
				+        """
			
 
				+        Calls likelihood calculation
			
 
				+        
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        mean : float
			
 
				+            expected mean
			
 
				+        sigma : float
			
 
				+            expected standard deviation
			
 
				+        """
			
 
				+        
			
 
				+        sigma_total = np.sqrt(sigma**2 + self.sigmas**2)
			
 
				+        ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
			
 
				+        return -ln_L
			
 
				+    
			
 
				+def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame) -> dict:
			
 
				+    """Calculates parameters of the energy (mean, std, spread) in this dataFrame
			
 
				+    
			
 
				+    Parameters
			
 
				+    ----------
			
 
				+    comb_df : pd.DataFrame
			
 
				+        table of the measurements linked with runs
			
 
				+    runs_df : pd.DataFrame
			
 
				+        table of the runs
			
 
				+    compton_df : pd.DataFrame
			
 
				+        table of the comptons
			
 
				+    
			
 
				+    Returns
			
 
				+    -------
			
 
				+    dict
			
 
				+        average parameters on this DataFrame
			
 
				+    """
			
 
				+        
			
 
				+    if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
			
 
				+        # no direct measurements of the compton during data runs
			
 
				+        # estimate energy by the nearest points
			
 
				+        # print('Hello')
			
 
				+        min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
			
 
				+        max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
			
 
				+        
			
 
				+        nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
			
 
				+        nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
			
 
				+        
			
 
				+        # regulatization
			
 
				+        nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
			
 
				+        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
			
 
				+        nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
			
 
				+        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
			
 
				+                
			
 
				+        mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
			
 
				+        mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
			
 
				+        std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
			
 
				+        std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
			
 
				+        sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
			
 
				+        
			
 
				+        
			
 
				+        return {
			
 
				+            'energy_point': comb_df.elabel.min(),
			
 
				+            'first_run': comb_df.run_first.min(),
			
 
				+            'last_run': comb_df.run_last.max(), 
			
 
				+            'mean_energy': mean_energy, 
			
 
				+            'mean_energy_stat_err': std_energy, 
			
 
				+            'mean_energy_sys_err': sys_energy, 
			
 
				+            'mean_spread': mean_spread,
			
 
				+            'mean_spread_stat_err': std_spread, 
			
 
				+            'used_lum': 0, 
			
 
				+            'comment': 'indirect measurement',
			
 
				+        }, pd.DataFrame()
			
 
				+        
			
 
				+        
			
 
				+    
			
 
				+    df = comb_df.copy()
			
 
				+    df.spread_std = np.where(df.spread_std < 1e-4, 1e-4, df.spread_std)
			
 
				+    
			
 
				+    df = df[df.e_std > 0]
			
 
				+    mean_energy = np.sum(df.e_mean*df.luminosity/(df.e_std**2))/np.sum(df.luminosity/(df.e_std**2))
			
 
				+    # std_energy = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.e_std**2))
			
 
				+    
			
 
				+    good_criterion = np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std()**2 + df.e_std**2)) < 5
			
 
				+    good_criterion = good_criterion
			
 
				+    df = df[good_criterion]
			
 
				+    
			
 
				+    m = Minuit(Likelihood(df.e_mean, df.e_std, df.luminosity), mean=df.e_mean.mean(), sigma=df.e_mean.std())
			
 
				+    m.errordef = 0.5 
			
 
				+    m.limits['sigma'] = (0, None)
			
 
				+    m.migrad();
			
 
				+    sys_err = m.values['sigma']
			
 
				+    mean_en = m.values['mean']
			
 
				+    
			
 
				+    mean_spread = np.sum(df.spread_mean*df.luminosity/(df.spread_std**2))/np.sum(df.luminosity/(df.spread_std**2))
			
 
				+    std_spread = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.spread_std**2))
			
 
				+    
			
 
				+    res_dict = {
			
 
				+        'energy_point': comb_df.elabel.min(), 
			
 
				+        'first_run': comb_df.run_first.min(),
			
 
				+        'last_run': comb_df.run_last.max(), 
			
 
				+        'mean_energy': mean_en, 
			
 
				+        'mean_energy_stat_err': m.errors['mean'], 
			
 
				+        'mean_energy_sys_err': sys_err, 
			
 
				+        'mean_spread': mean_spread,
			
 
				+        'mean_spread_stat_err': std_spread, 
			
 
				+        'used_lum': df.luminosity.sum()/comb_df.luminosity_total.sum(), 
			
 
				+        'comment': '',
			
 
				+    }
			
 
				+    return res_dict, df
			
 
				+
			
 
				+def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None) -> pd.DataFrame:
			
 
				+    
			
 
				+    if pics_folder is not None:
			
 
				+        plt.ioff()
			
 
				+        plt.style.use('ggplot')
			
 
				+        locator = mdates.AutoDateLocator(minticks=5)
			
 
				+        formatter = mdates.ConciseDateFormatter(locator)
			
 
				+        formatter.formats = ['%y', '%b', '%d', '%H:%M', '%H:%M', '%S.%f', ]
			
 
				+        formatter.zero_formats = [''] + formatter.formats[:-1]
			
 
				+        formatter.zero_formats[3] = '%d-%b'
			
 
				+        formatter.offset_formats = ['', '%Y', '%b %Y', '%d %b %Y', '%d %b %Y', '%d %b %Y %H:%M', ]
			
 
				+    
			
 
				+    runs_df = runs_df.rename({'luminosity': 'luminosity_full', 'energy': 'elabel'}, axis=1)
			
 
				+    combined_df = pd.merge(combined_df.drop(['elabel'], axis=1), runs_df[['run', 'elabel', 'luminosity_full']], how='outer')
			
 
				+    combined_df = combined_df.sort_values(by='run')
			
 
				+    combined_df['luminosity'] = combined_df['luminosity'].fillna(0)
			
 
				+    
			
 
				+    combined_df['point_idx'] = np.cumsum(~np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4))
			
 
				+    combined_df = combined_df.groupby(['point_idx', 'compton_start'], dropna=False).agg(
			
 
				+        elabel=('elabel', 'min'), elabel_test=('elabel', 'max'),
			
 
				+        run_first=('run', 'min'), run_last=('run', 'max'),
			
 
				+        luminosity=('luminosity', 'sum'), luminosity_total=('luminosity_full', 'sum'),
			
 
				+        compton_stop=('compton_stop', 'min'), compton_stop_test=('compton_stop', 'max'),
			
 
				+        e_mean=('e_mean', 'min'), e_mean_test=('e_mean', 'max'),
			
 
				+        e_std=('e_std', 'min'), e_std_test=('e_std', 'max'),
			
 
				+        spread_mean=('spread_mean', 'min'), spread_mean_test=('spread_mean', 'max'),
			
 
				+        spread_std=('spread_std', 'min'), spread_std_test=('spread_std', 'max'),
			
 
				+    ).reset_index().set_index('point_idx')
			
 
				+    # return combined_df
			
 
				+    
			
 
				+    result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
			
 
				+    
			
 
				+    for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
			
 
				+        res_dict, good_df = calculate_point(table, runs_df, compton_df)
			
 
				+        result_df = result_df.append(res_dict, ignore_index=True)
			
 
				+        
			
 
				+        if pics_folder is not None:
			
 
				+            plt_table = good_df.dropna()
			
 
				+            if len(plt_table) == 0:
			
 
				+                continue
			
 
				+            
			
 
				+            total_error = np.sqrt(res_dict["mean_energy_stat_err"]**2 + res_dict["mean_energy_sys_err"]**2)
			
 
				+            half_timedelta = (plt_table.compton_stop - plt_table.compton_start)/2 
			
 
				+            time = plt_table.compton_start + half_timedelta
			
 
				+            dlt0 = timedelta(days=1)
			
 
				+            
			
 
				+            fig, ax = plt.subplots(1, 1, dpi=120, tight_layout=True)
			
 
				+            ax.errorbar(time, plt_table.e_mean, xerr=half_timedelta, yerr=plt_table.e_std, fmt='.')
			
 
				+            ax.axhline(res_dict['mean_energy'], color='black', zorder=3, label='Mean')
			
 
				+            ax.fill_between([plt_table.compton_stop.min(), plt_table.compton_stop.max()], 
			
 
				+                            [res_dict['mean_energy'] - total_error]*2, 
			
 
				+                            [res_dict['mean_energy'] + total_error]*2, color='green', zorder=1, alpha=0.4)
			
 
				+            ax.tick_params(axis='x', labelrotation=45)
			
 
				+            ax.xaxis.set_major_locator(locator)
			
 
				+            ax.xaxis.set_major_formatter(formatter)
			
 
				+            ax.set(title=f'{res_dict["energy_point"]}, E = {res_dict["mean_energy"]:.3f} ± {res_dict["mean_energy_stat_err"]:.3f} ± {res_dict["mean_energy_sys_err"]:.3f} MeV', xlabel='Time, UTC', ylabel='Energy, [MeV]', 
			
 
				+                   xlim=[plt_table.compton_stop.min(), plt_table.compton_stop.max()])
			
 
				+            plt.savefig(f'{pics_folder}/{res_dict["first_run"]}_{res_dict["energy_point"]}.png')
			
 
				+            plt.close()
			
 
				+    
			
 
				+    return result_df
			
 
				+    
			
 
				+# python scripts/compton_combiner.py --season NNBAR2021 --config database.ini
			
 
				+def main():
			
 
				+    parser = argparse.ArgumentParser(description = 'Mean compton energy measurements from clbrdb')
			
 
				+    parser.add_argument('--season', help = 'Name of the season')
			
 
				+    parser.add_argument('--config', help = 'Config file containing information for access to databases')
			
 
				+    
			
 
				+    args = parser.parse_args()
			
 
				+    # logging.info(f"Arguments: season: {args.season}, config {args.config}")
			
 
				+
			
 
				+    parser = ConfigParser()
			
 
				+    parser.read(args.config);
			
 
				+    
			
 
				+    rdb = RunsDBHandler(**parser['cmdruns'])
			
 
				+    
			
 
				+    idx = SEASONS['name'].index(args.season)
			
 
				+    runs_range = (SEASONS['start_run'][idx], SEASONS['start_run'][idx+1])
			
 
				+    res_rdb = rdb.load_tables(runs_range)
			
 
				+    runs_df = pd.DataFrame(res_rdb[0], columns=res_rdb[1])
			
 
				+    
			
 
				+    tdlt0 = timedelta(days=2)
			
 
				+    time_range = (runs_df.starttime.min() - tdlt0, runs_df.stoptime.max() + tdlt0)
			
 
				+    clbrdb = CalibrdbHandler(**parser['clbrDB'])
			
 
				+    res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = None, timerange = time_range)
			
 
				+    
			
 
				+    cb = Combiner(res_rdb, res_clbrdb)
			
 
				+    comb_df = cb.combined_table()
			
 
				+    
			
 
				+    compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
			
 
				+    
			
 
				+    cdf = process_combined(comb_df, runs_df, compton_df, './pics')
			
 
				+    cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')
			
 
				+    return 
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/compton_filter.py
+++ b/compton_filter.py
@@ -0,0 +1,321 @@
 
				+"""
			
 
				+Script to fill calibration database with filtering slowdb compton measurements
			
 
				+"""
			
 
				+
			
 
				+import argparse
			
 
				+from configparser import ConfigParser
			
 
				+from datetime import datetime, timedelta
			
 
				+from typing import Tuple, List, Dict, Union, Optional
			
 
				+import warnings
			
 
				+import logging
			
 
				+
			
 
				+import psycopg2
			
 
				+from psycopg2.extras import execute_values
			
 
				+
			
 
				+class PostgreSQLHandler():
			
 
				+    """A common class for processing postgresql databases
			
 
				+    """
			
 
				+    
			
 
				+    def __init__(self, host: str = 'cmddb', database: str = 'slowdb', user: str = None, password: str = None):
			
 
				+        """
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        host : str
			
 
				+            host name (default is "cmddb")
			
 
				+        database : str
			
 
				+            database name (default is "slowdb")
			
 
				+        user : str
			
 
				+            username (default is None)
			
 
				+        password : str
			
 
				+            password (default is None)
			
 
				+        """
			
 
				+        
			
 
				+        self.conn = psycopg2.connect(host = host, database = database, user = user, password = password)
			
 
				+        self.cur = self.conn.cursor()
			
 
				+        logging.info("PostgreSQL Hander created")
			
 
				+        
			
 
				+    @property
			
 
				+    def list_tables(self) -> List[str]:
			
 
				+        """Returns list of existed tables in the compton measurements slowDB
			
 
				+        
			
 
				+        Returns
			
 
				+        -------
			
 
				+        List[str]
			
 
				+            list of tables
			
 
				+        """
			
 
				+        
			
 
				+        logging.info("Get list of the slowdb tables")
			
 
				+        self.cur.execute("""
			
 
				+        SELECT table_name FROM information_schema.tables
			
 
				+        WHERE table_schema = 'public'
			
 
				+        """)
			
 
				+        return list(map(lambda x: x[0], self.cur.fetchall()))
			
 
				+    
			
 
				+    def table_columns(self, table: str) -> List[str]:
			
 
				+        """Returns list of the fields of the table
			
 
				+        
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        table : str
			
 
				+            table from slowdb compton measurements
			
 
				+        
			
 
				+        Returns
			
 
				+        -------
			
 
				+        List[str]
			
 
				+            list of the fields
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+class SlowdbComptonHandler(PostgreSQLHandler):
			
 
				+    """A class for processing and filtering of compton measurements from slowdb
			
 
				+    """
			
 
				+    
			
 
				+    def __is_overlapped_row(self, start_time_next: datetime, stop_time_prev: datetime):
			
 
				+        gap = timedelta(seconds=2)
			
 
				+        if(start_time_next < stop_time_prev):
			
 
				+            logging.debug(f'time gap {abs(start_time_next - stop_time_prev)}')
			
 
				+        return start_time_next < stop_time_prev - gap
			
 
				+    
			
 
				+    def __drop_overlapping_rows_list(self, table: list) -> list:
			
 
				+        """Removes rows with overlapping time intervals from the table
			
 
				+        
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        table : list
			
 
				+            the table MUST BE ORDERED BY TIME where 5th column is start_time, 6th column is end_time
			
 
				+            
			
 
				+        Returns
			
 
				+        -------
			
 
				+        list
			
 
				+            clear table
			
 
				+        """
			
 
				+        
			
 
				+        if len(table) == 0:
			
 
				+            logging.info("Empty list. No overlapping rows")
			
 
				+            return table
			
 
				+        
			
 
				+        logging.info("Drop overlapping rows in list representation")
			
 
				+        table = table[::-1] # sort table by time from last to past
			
 
				+        min_time = table[0][6]
			
 
				+        overlapped_idxs = list()
			
 
				+        
			
 
				+        for idx, row in enumerate(table):
			
 
				+            start_time, stop_time = row[5], row[6]
			
 
				+            if self.__is_overlapped_row(min_time, stop_time):
			
 
				+                overlapped_idxs.append(idx)
			
 
				+            else:
			
 
				+                min_time = start_time
			
 
				+        
			
 
				+        for index in sorted(overlapped_idxs, reverse=True): # strict condition of the backward loop
			
 
				+            table.pop(index)
			
 
				+        
			
 
				+        return table[::-1]
			
 
				+    
			
 
				+    def load_tables(self, tables: List[str], daterange: Optional[datetime] = None):
			
 
				+        """Returns tables containing compton energy measurements
			
 
				+        
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        tables : List[str]
			
 
				+            names of tables in the slowdb compton measurements database 
			
 
				+            (full list of available tables can be seen with the property tables)
			
 
				+        daterange : Optional[datetime]
			
 
				+            minimum time for selection (UTC)
			
 
				+        
			
 
				+        Returns
			
 
				+        -------
			
 
				+        Union[pd.DataFrame, list]
			
 
				+            table containing compton energy measurements with fields:
			
 
				+            write_time - time when the row was written (UTC)
			
 
				+            mean_energy - compton mean of the energy measurement [MeV]
			
 
				+            std_energy - compton std of the energy measurement [MeV]
			
 
				+            mean_spread - compton mean of the spread measurement [MeV]
			
 
				+            std_spread - compton std of the spread measurement [MeV]
			
 
				+            start_time - beginning time of the compton measurement (UTC)
			
 
				+            end_time - end time of the compton measurement (UTC)
			
 
				+        """
			
 
				+        
			
 
				+        time_condition = f"AND time>(%(date)s AT TIME ZONE 'UTC')" if daterange is not None else ""
			
 
				+        
			
 
				+        sql_query = lambda table: f"""SELECT 
			
 
				+            time AT TIME ZONE 'UTC' AS time, 
			
 
				+            CAST(values_array[1] AS numeric) AS mean_energy, 
			
 
				+            CAST(values_array[2] AS numeric) AS std_energy, 
			
 
				+            ROUND(CAST(values_array[5]/1000 AS numeric), 6) AS mean_spread,
			
 
				+            ROUND(CAST(values_array[6]/1000 AS numeric), 6) AS std_spread, 
			
 
				+            date_trunc('second', time AT TIME ZONE 'UTC' + (values_array[8] * interval '1 second')) AS start_time, 
			
 
				+            date_trunc('second', time AT TIME ZONE 'UTC' + 
			
 
				+            (values_array[8] * interval '1 second') + (values_array[7] * interval '1 second')) AS stop_time
			
 
				+            FROM {table} WHERE g_id=43 AND dt>0 {time_condition}"""
			
 
				+        
			
 
				+        full_sql_query = '\nUNION ALL\n'.join([sql_query(table) for table in tables]) + '\nORDER BY time;'
			
 
				+        
			
 
				+        logging.debug(f"Full sql query {full_sql_query}")
			
 
				+        
			
 
				+        self.cur.execute(full_sql_query, {'date': daterange})
			
 
				+        table = self.cur.fetchall()
			
 
				+        table = self.__drop_overlapping_rows_list(table)
			
 
				+        return table
			
 
				+    
			
 
				+class CalibrdbHandler(PostgreSQLHandler):
			
 
				+    """A class for processing of calibration database
			
 
				+    """
			
 
				+    
			
 
				+    def select_table(self, system: str, algo: str, name: str, version: str = 'Default', verbose: bool = True) -> int:
			
 
				+        """Selects the table from database
			
 
				+        
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        system : str
			
 
				+            name of the system
			
 
				+        algo : str
			
 
				+            name of the algorithm
			
 
				+        name : str
			
 
				+            name of the calibration
			
 
				+        version : str
			
 
				+            name of the calibration version (default is Default)
			
 
				+        verbose : bool
			
 
				+            print additional information or not (default is False)
			
 
				+            
			
 
				+        Returns
			
 
				+        -------
			
 
				+        sid : int
			
 
				+            value corresponding the table
			
 
				+        """
			
 
				+        
			
 
				+        self.cur.execute(f"""SELECT * FROM clbrset 
			
 
				+        WHERE system='{system}' AND algo='{algo}' AND name='{name}' AND version='{version}'""")
			
 
				+        result = self.cur.fetchall()
			
 
				+        logging.debug(f"selected clbrset: {result}")
			
 
				+        if len(result) > 1:
			
 
				+            logging.warning('Multiple equal calibration sets. clbrset DB problem')
			
 
				+            return result[0]
			
 
				+        sid = result[0][0]
			
 
				+        return sid
			
 
				+    
			
 
				+    def load_table(self, system: str, algo: str, name: str, version: str = 'Default', num_last_rows: Optional[int] = None, timerange: Optional[Tuple[datetime, datetime]] = None) -> Tuple[list, list]:
			
 
				+        """Loads the calibration table
			
 
				+        
			
 
				+        Parameters
			
 
				+        ----------
			
 
				+        system : str
			
 
				+            name of the system
			
 
				+        algo : str
			
 
				+            name of the algorithm
			
 
				+        name : str
			
 
				+            name of the calibration
			
 
				+        version : str
			
 
				+            name of the calibration version (default is Default)
			
 
				+        num_last_rows : Optional[int]
			
 
				+            the number of last rows of the table
			
 
				+        timerange : Optional[Tuple[datetime, datetime]]
			
 
				+            time range (UTC) condition on the selection of the table (default is None)
			
 
				+        
			
 
				+        Returns
			
 
				+        -------
			
 
				+        Tuple[list, list]
			
 
				+            the calibration table and name of fields
			
 
				+        """
			
 
				+        
			
 
				+        sid = self.select_table(system, algo, name, version)
			
 
				+        time_condition = "AND begintime BETWEEN %s AND %s" if timerange is not None else ""
			
 
				+        sql_query = f"""SELECT * FROM clbrdata WHERE sid={sid} {time_condition} ORDER BY time DESC """
			
 
				+        if num_last_rows is not None:
			
 
				+            sql_query += f"LIMIT {num_last_rows}"
			
 
				+            
			
 
				+        if timerange is None:
			
 
				+            self.cur.execute(sql_query)
			
 
				+        else:
			
 
				+            self.cur.execute(sql_query, timerange)
			
 
				+        fields_name = [i[0] for i in self.cur.description]
			
 
				+        table = self.cur.fetchall()
			
 
				+        return table, fields_name
			
 
				+        
			
 
				+    def update(self, new_rows: list, system: str = "Misc", algo: str = "RunHeader", name: str = "Compton_run", version: str = 'Default', handle_last_time_row: bool = True):
			
 
				+        if len(new_rows) == 0:
			
 
				+            return
			
 
				+        
			
 
				+        sid = self.select_table(system, algo, name, version)
			
 
				+        
			
 
				+        new_rows = list(map(lambda x: (sid, 'lxeuser', x[0], x[5], x[6], [x[1], x[2], x[3], x[4]]), new_rows))
			
 
				+        
			
 
				+        if handle_last_time_row:
			
 
				+            last_written_row, _ = self.load_table(system, algo, name, version, num_last_rows = 1)
			
 
				+            if len(last_written_row) > 0:
			
 
				+                if last_written_row[0][5] > new_rows[0][3]:
			
 
				+                    logging.info('Removing of overlapping written row')
			
 
				+                    self.delete_row(sid = last_written_row[0][1], createdby = last_written_row[0][2], time = last_written_row[0][3])
			
 
				+                
			
 
				+        insert_query = """INSERT INTO clbrdata (sid, createdby, time, begintime, endtime, data) VALUES %s;"""
			
 
				+        execute_values(self.cur, insert_query, new_rows, fetch=False)
			
 
				+        logging.info(f"Inserted {len(new_rows)} new rows")
			
 
				+        return
			
 
				+        
			
 
				+    def delete_row(self, sid: int, createdby: str, time: datetime):
			
 
				+        delete_query = f"""DELETE FROM clbrdata 
			
 
				+        WHERE sid = %s AND createdby = %s AND time = %s
			
 
				+        """
			
 
				+        self.cur.execute(delete_query, (sid, createdby, time))
			
 
				+        logging.info(f"Deleted ({sid}, {createdby}, {time}) row")
			
 
				+        return
			
 
				+    
			
 
				+    def remove_duplicates(self, system: str = "Misc", algo: str = "RunHeader", name: str = "Compton_run", version: str = 'Default'):
			
 
				+        sid = self.select_table(system, algo, name, version)
			
 
				+        remove_query = f"""
			
 
				+            DELETE FROM clbrdata a
			
 
				+            USING clbrdata b
			
 
				+            WHERE
			
 
				+                a.sid = {sid}
			
 
				+                AND a.cid < b.cid
			
 
				+                AND a.sid = b.sid
			
 
				+                AND a.time = b.time                
			
 
				+        """
			
 
				+        self.cur.execute(remove_query)
			
 
				+        pass
			
 
				+    
			
 
				+    def commit(self):
			
 
				+        logging.info("Changes commited")
			
 
				+        self.conn.commit()
			
 
				+        return
			
 
				+
			
 
				+    def rollback(self):
			
 
				+        logging.info("Changes aborted")
			
 
				+        self.conn.rollback()
			
 
				+        return
			
 
				+    
			
 
				+    def __del__(self):
			
 
				+        logging.info("del clbr class")
			
 
				+        self.cur.close()
			
 
				+        self.conn.close()
			
 
				+    
			
 
				+    
			
 
				+def main():
			
 
				+    log_format = '[%(asctime)s] %(levelname)s: %(message)s'
			
 
				+    logging.basicConfig(filename="compton_filter.log", format=log_format, level=logging.INFO)
			
 
				+    logging.info("Program started")
			
 
				+    
			
 
				+    parser = argparse.ArgumentParser(description = 'Filter compton energy measurements from slowdb')
			
 
				+    parser.add_argument('--season', help = 'Name of compton measurement table from slowdb')
			
 
				+    parser.add_argument('--config', help = 'Config file containing information for access to databases')
			
 
				+    
			
 
				+    args = parser.parse_args()
			
 
				+    logging.info(f"Arguments: season: {args.season}, config {args.config}")
			
 
				+
			
 
				+    parser = ConfigParser()
			
 
				+    parser.read(args.config);
			
 
				+    logging.info("Config parsed")
			
 
				+    
			
 
				+    clbrdb = CalibrdbHandler(**parser['clbrDB'])
			
 
				+    last_written_row, _ = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = 1)
			
 
				+    last_time = last_written_row[0][3] if len(last_written_row) > 0 else None
			
 
				+    
			
 
				+    compton_slowdb = SlowdbComptonHandler(**parser['postgresql'])
			
 
				+    res = compton_slowdb.load_tables(['cmd3_2021_2'], last_time)
			
 
				+    
			
 
				+    clbrdb.update(res)
			
 
				+    clbrdb.commit()
			
 
				+    del clbrdb
			
 
				+    
			
 
				+# python scripts/compton_filter.py --season cmd3_2021_2 --config database.ini
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,2 @@
 
				+psycopg2==2.9.1
			
 
				+mysql-connector-python==8.0.23