Browse Source

Add source code

compton 3 years ago
commit
bcc0dc42df
4 changed files with 704 additions and 0 deletions
  1. 7 0
      bad_comptons.txt
  2. 374 0
      compton_combiner.py
  3. 321 0
      compton_filter.py
  4. 2 0
      requirements.txt

+ 7 - 0
bad_comptons.txt

@@ -0,0 +1,7 @@
+1622157426 #incorrect laser line
+1622155918 #incorrect laser line
+1622158934 #incorrect laser line
+1622160433 #incorrect laser line
+1622161930 #incorrect laser line
+1622163439 #incorrect laser line
+1583279091 #disrepancy with energy label

+ 374 - 0
compton_combiner.py

@@ -0,0 +1,374 @@
+"""Script to combine compton measurements with runs and process these data
+"""
+
+import argparse
+from configparser import ConfigParser
+from datetime import datetime, timedelta
+import sqlite3
+from typing import Union, Tuple, Optional
+
+from compton_filter import CalibrdbHandler
+from iminuit import Minuit
+import matplotlib.dates as mdates
+import matplotlib.pyplot as plt
+from mysql.connector import connect, Error
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+
+SEASONS = {
+    'name': ['HIGH2017', 'RHO2018', 'HIGH2019', 'LOW2020', 'HIGH2020', 'HIGH2021', 'NNBAR2021'],
+    'start_run': [36872, 48938, 70014, 85224, 89973, 98116, 107342, None],
+}
+
+class RunsDBHandler():
+    def __init__(self, host: str = 'cmddb', database: str = 'online', user: str = None, password: str = None):
+        self.conn = connect(host = host, database = database, user = user, password = password)
+        self.cur = self.conn.cursor()
+        self.cur.execute("SET time_zone = 'UTC';")
+        
+    @property
+    def fields(self) -> list:
+        """Returns a list of available columns in the RunsDB
+        """
+        
+        self.cur.execute("""DESCRIBE Runlog""")
+        return self.cur.fetchall()
+        
+    def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]]):
+        
+        cond = ""
+        if isinstance(range[0], int):
+            cond = f" AND run >= {range[0]} "
+            if range[1] is not None:
+                cond += f" AND run <= {range[1]} "
+        elif isinstance(range[0], datetime):
+            cond = f" AND stoptime BETWEEN %s AND %s"
+            
+        sql_query = f"""
+        SELECT 
+            run, 
+            starttime, 
+            stoptime, 
+            energy, 
+            luminosity
+        FROM Runlog 
+        WHERE 
+            quality = "Y"
+            {cond}
+            AND luminosity > 0
+            AND stoptime > starttime
+            AND nevent > 0
+        ORDER BY run DESC"""
+        
+        if isinstance(range[0], datetime):
+            self.cur.execute(sql_query, range)
+        else:
+            self.cur.execute(sql_query)
+        
+        field_names = [i[0] for i in self.cur.description]
+        res = self.cur.fetchall()
+        return res, field_names
+        
+    def __del__(self):
+        self.conn.close()
+        
+class Combiner():
+    """Combines a dataframe with runs and a dataframe with compton measurements together
+    """
+    
+    def __init__(self, runsdb: Tuple[list, list], clbrdb: Tuple[list, list]):
+        """
+        Parameters
+        ----------
+        runsdb : Tuple[list, list]
+            table of runs (rows and field names)
+        clbrdb : Tuple[list, list]
+            table of compton measurements (rows and field names)            
+        """
+        
+        rdb_rows, r_fld = runsdb
+        cdb_rows, c_fld = clbrdb
+        
+        self.conn = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
+        self.cur = self.conn.cursor()
+        self.cur.execute(f"CREATE table runs (run, elabel, starttime timestamp, stoptime timestamp, luminosity)")
+        self.cur.execute(f"CREATE table compton (begintime timestamp, endtime timestamp, e_mean, e_std, spread_mean, spread_std)")
+        
+        run_row_generator = map(lambda x: (x[r_fld.index("run")], x[r_fld.index("energy")],
+                                           x[r_fld.index("starttime")], x[r_fld.index("stoptime")],
+                                           x[r_fld.index("luminosity")]), rdb_rows)
+        c_data_idx = c_fld.index("data")
+        compton_row_generator = map(lambda x: (x[c_fld.index("begintime")], x[c_fld.index("endtime")], 
+                                               float(x[c_data_idx][0]), float(x[c_data_idx][1]), 
+                                               float(x[c_data_idx][2]), float(x[c_data_idx][3])), cdb_rows)
+        
+        self.cur.executemany(f"""INSERT into runs VALUES ({','.join(['?']*5)})""", run_row_generator)
+        self.cur.executemany(f"""INSERT into compton VALUES ({','.join(['?']*6)})""", compton_row_generator)
+        
+        self.__create_combined_table()        
+        
+    def __create_combined_table(self):
+        create_combined_query = """
+        CREATE TABLE combined_table AS
+        SELECT 
+            runs.run AS run,
+            runs.elabel AS elabel,
+            runs.starttime as "run_start [timestamp]", 
+            runs.stoptime AS "run_stop [timestamp]", 
+            compton.begintime AS "compton_start [timestamp]", 
+            compton.endtime AS "compton_stop [timestamp]",
+            runs.luminosity, compton.e_mean, compton.e_std, compton.spread_mean, compton.spread_std
+        FROM runs, compton
+        WHERE 
+            (runs.starttime BETWEEN compton.begintime AND compton.endtime) 
+            OR (runs.stoptime BETWEEN compton.begintime AND compton.endtime)
+            OR (compton.begintime BETWEEN runs.starttime AND runs.stoptime)
+            OR (compton.endtime BETWEEN runs.starttime AND runs.stoptime);
+        """
+        self.cur.execute(create_combined_query)
+        return
+        
+    def combined_table(self) -> pd.DataFrame:
+        """Returns combined dataframe
+        """
+        
+        sql_query = """
+        SELECT * FROM combined_table;
+        """
+        df = pd.read_sql(sql_query, self.conn)
+        df['common_duration'] = df[['run_stop', 'compton_stop']].min(axis=1) - df[['run_start', 'compton_start']].max(axis=1)
+        df['run_duration'] = df['run_stop'] - df['run_start']
+        df['run_in_measurement'] = df['common_duration']/df['run_duration']
+        df = df.sort_values(by='run_in_measurement', ascending=False).drop_duplicates(subset='run').sort_values(by='run')
+        df = df.drop(['run_duration', 'common_duration', 'run_start', 'run_stop'], axis=1)
+        return df
+        
+    def __del__(self):
+        self.conn.close()
+        
+class Likelihood():
+    """
+    Likelihood function
+    """
+    
+    def __init__(self, means: np.array, sigmas: np.array, weights: np.array):
+        """
+        Parameters
+        ----------
+        means : np.array
+            array of means, [MeV]
+        sigmas : np.array
+            array of standard deviations, [MeV]
+        weights : np.array
+            array of luminosities
+        """
+        
+        self.means = means
+        self.sigmas = sigmas
+        self.weights = weights/weights.mean()
+        
+    def __call__(self, mean: float, sigma: float):
+        """
+        Calls likelihood calculation
+        
+        Parameters
+        ----------
+        mean : float
+            expected mean
+        sigma : float
+            expected standard deviation
+        """
+        
+        sigma_total = np.sqrt(sigma**2 + self.sigmas**2)
+        ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
+        return -ln_L
+    
+def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame) -> dict:
+    """Calculates parameters of the energy (mean, std, spread) in this dataFrame
+    
+    Parameters
+    ----------
+    comb_df : pd.DataFrame
+        table of the measurements linked with runs
+    runs_df : pd.DataFrame
+        table of the runs
+    compton_df : pd.DataFrame
+        table of the comptons
+    
+    Returns
+    -------
+    dict
+        average parameters on this DataFrame
+    """
+        
+    if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
+        # no direct measurements of the compton during data runs
+        # estimate energy by the nearest points
+        # print('Hello')
+        min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
+        max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
+        
+        nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
+        nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
+        
+        # regulatization
+        nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
+        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
+        nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
+        nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
+                
+        mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
+        mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
+        std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
+        std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
+        sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
+        
+        
+        return {
+            'energy_point': comb_df.elabel.min(),
+            'first_run': comb_df.run_first.min(),
+            'last_run': comb_df.run_last.max(), 
+            'mean_energy': mean_energy, 
+            'mean_energy_stat_err': std_energy, 
+            'mean_energy_sys_err': sys_energy, 
+            'mean_spread': mean_spread,
+            'mean_spread_stat_err': std_spread, 
+            'used_lum': 0, 
+            'comment': 'indirect measurement',
+        }, pd.DataFrame()
+        
+        
+    
+    df = comb_df.copy()
+    df.spread_std = np.where(df.spread_std < 1e-4, 1e-4, df.spread_std)
+    
+    df = df[df.e_std > 0]
+    mean_energy = np.sum(df.e_mean*df.luminosity/(df.e_std**2))/np.sum(df.luminosity/(df.e_std**2))
+    # std_energy = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.e_std**2))
+    
+    good_criterion = np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std()**2 + df.e_std**2)) < 5
+    good_criterion = good_criterion
+    df = df[good_criterion]
+    
+    m = Minuit(Likelihood(df.e_mean, df.e_std, df.luminosity), mean=df.e_mean.mean(), sigma=df.e_mean.std())
+    m.errordef = 0.5 
+    m.limits['sigma'] = (0, None)
+    m.migrad();
+    sys_err = m.values['sigma']
+    mean_en = m.values['mean']
+    
+    mean_spread = np.sum(df.spread_mean*df.luminosity/(df.spread_std**2))/np.sum(df.luminosity/(df.spread_std**2))
+    std_spread = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.spread_std**2))
+    
+    res_dict = {
+        'energy_point': comb_df.elabel.min(), 
+        'first_run': comb_df.run_first.min(),
+        'last_run': comb_df.run_last.max(), 
+        'mean_energy': mean_en, 
+        'mean_energy_stat_err': m.errors['mean'], 
+        'mean_energy_sys_err': sys_err, 
+        'mean_spread': mean_spread,
+        'mean_spread_stat_err': std_spread, 
+        'used_lum': df.luminosity.sum()/comb_df.luminosity_total.sum(), 
+        'comment': '',
+    }
+    return res_dict, df
+
+def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None) -> pd.DataFrame:
+    
+    if pics_folder is not None:
+        plt.ioff()
+        plt.style.use('ggplot')
+        locator = mdates.AutoDateLocator(minticks=5)
+        formatter = mdates.ConciseDateFormatter(locator)
+        formatter.formats = ['%y', '%b', '%d', '%H:%M', '%H:%M', '%S.%f', ]
+        formatter.zero_formats = [''] + formatter.formats[:-1]
+        formatter.zero_formats[3] = '%d-%b'
+        formatter.offset_formats = ['', '%Y', '%b %Y', '%d %b %Y', '%d %b %Y', '%d %b %Y %H:%M', ]
+    
+    runs_df = runs_df.rename({'luminosity': 'luminosity_full', 'energy': 'elabel'}, axis=1)
+    combined_df = pd.merge(combined_df.drop(['elabel'], axis=1), runs_df[['run', 'elabel', 'luminosity_full']], how='outer')
+    combined_df = combined_df.sort_values(by='run')
+    combined_df['luminosity'] = combined_df['luminosity'].fillna(0)
+    
+    combined_df['point_idx'] = np.cumsum(~np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4))
+    combined_df = combined_df.groupby(['point_idx', 'compton_start'], dropna=False).agg(
+        elabel=('elabel', 'min'), elabel_test=('elabel', 'max'),
+        run_first=('run', 'min'), run_last=('run', 'max'),
+        luminosity=('luminosity', 'sum'), luminosity_total=('luminosity_full', 'sum'),
+        compton_stop=('compton_stop', 'min'), compton_stop_test=('compton_stop', 'max'),
+        e_mean=('e_mean', 'min'), e_mean_test=('e_mean', 'max'),
+        e_std=('e_std', 'min'), e_std_test=('e_std', 'max'),
+        spread_mean=('spread_mean', 'min'), spread_mean_test=('spread_mean', 'max'),
+        spread_std=('spread_std', 'min'), spread_std_test=('spread_std', 'max'),
+    ).reset_index().set_index('point_idx')
+    # return combined_df
+    
+    result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
+    
+    for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
+        res_dict, good_df = calculate_point(table, runs_df, compton_df)
+        result_df = result_df.append(res_dict, ignore_index=True)
+        
+        if pics_folder is not None:
+            plt_table = good_df.dropna()
+            if len(plt_table) == 0:
+                continue
+            
+            total_error = np.sqrt(res_dict["mean_energy_stat_err"]**2 + res_dict["mean_energy_sys_err"]**2)
+            half_timedelta = (plt_table.compton_stop - plt_table.compton_start)/2 
+            time = plt_table.compton_start + half_timedelta
+            dlt0 = timedelta(days=1)
+            
+            fig, ax = plt.subplots(1, 1, dpi=120, tight_layout=True)
+            ax.errorbar(time, plt_table.e_mean, xerr=half_timedelta, yerr=plt_table.e_std, fmt='.')
+            ax.axhline(res_dict['mean_energy'], color='black', zorder=3, label='Mean')
+            ax.fill_between([plt_table.compton_stop.min(), plt_table.compton_stop.max()], 
+                            [res_dict['mean_energy'] - total_error]*2, 
+                            [res_dict['mean_energy'] + total_error]*2, color='green', zorder=1, alpha=0.4)
+            ax.tick_params(axis='x', labelrotation=45)
+            ax.xaxis.set_major_locator(locator)
+            ax.xaxis.set_major_formatter(formatter)
+            ax.set(title=f'{res_dict["energy_point"]}, E = {res_dict["mean_energy"]:.3f} ± {res_dict["mean_energy_stat_err"]:.3f} ± {res_dict["mean_energy_sys_err"]:.3f} MeV', xlabel='Time, UTC', ylabel='Energy, [MeV]', 
+                   xlim=[plt_table.compton_stop.min(), plt_table.compton_stop.max()])
+            plt.savefig(f'{pics_folder}/{res_dict["first_run"]}_{res_dict["energy_point"]}.png')
+            plt.close()
+    
+    return result_df
+    
+# python scripts/compton_combiner.py --season NNBAR2021 --config database.ini
+def main():
+    parser = argparse.ArgumentParser(description = 'Mean compton energy measurements from clbrdb')
+    parser.add_argument('--season', help = 'Name of the season')
+    parser.add_argument('--config', help = 'Config file containing information for access to databases')
+    
+    args = parser.parse_args()
+    # logging.info(f"Arguments: season: {args.season}, config {args.config}")
+
+    parser = ConfigParser()
+    parser.read(args.config);
+    
+    rdb = RunsDBHandler(**parser['cmdruns'])
+    
+    idx = SEASONS['name'].index(args.season)
+    runs_range = (SEASONS['start_run'][idx], SEASONS['start_run'][idx+1])
+    res_rdb = rdb.load_tables(runs_range)
+    runs_df = pd.DataFrame(res_rdb[0], columns=res_rdb[1])
+    
+    tdlt0 = timedelta(days=2)
+    time_range = (runs_df.starttime.min() - tdlt0, runs_df.stoptime.max() + tdlt0)
+    clbrdb = CalibrdbHandler(**parser['clbrDB'])
+    res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = None, timerange = time_range)
+    
+    cb = Combiner(res_rdb, res_clbrdb)
+    comb_df = cb.combined_table()
+    
+    compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
+    
+    cdf = process_combined(comb_df, runs_df, compton_df, './pics')
+    cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')
+    return 
+
+if __name__ == "__main__":
+    main()

+ 321 - 0
compton_filter.py

@@ -0,0 +1,321 @@
+"""
+Script to fill calibration database with filtering slowdb compton measurements
+"""
+
+import argparse
+from configparser import ConfigParser
+from datetime import datetime, timedelta
+from typing import Tuple, List, Dict, Union, Optional
+import warnings
+import logging
+
+import psycopg2
+from psycopg2.extras import execute_values
+
+class PostgreSQLHandler():
+    """A common class for processing postgresql databases
+    """
+    
+    def __init__(self, host: str = 'cmddb', database: str = 'slowdb', user: str = None, password: str = None):
+        """
+        Parameters
+        ----------
+        host : str
+            host name (default is "cmddb")
+        database : str
+            database name (default is "slowdb")
+        user : str
+            username (default is None)
+        password : str
+            password (default is None)
+        """
+        
+        self.conn = psycopg2.connect(host = host, database = database, user = user, password = password)
+        self.cur = self.conn.cursor()
+        logging.info("PostgreSQL Hander created")
+        
+    @property
+    def list_tables(self) -> List[str]:
+        """Returns list of existed tables in the compton measurements slowDB
+        
+        Returns
+        -------
+        List[str]
+            list of tables
+        """
+        
+        logging.info("Get list of the slowdb tables")
+        self.cur.execute("""
+        SELECT table_name FROM information_schema.tables
+        WHERE table_schema = 'public'
+        """)
+        return list(map(lambda x: x[0], self.cur.fetchall()))
+    
+    def table_columns(self, table: str) -> List[str]:
+        """Returns list of the fields of the table
+        
+        Parameters
+        ----------
+        table : str
+            table from slowdb compton measurements
+        
+        Returns
+        -------
+        List[str]
+            list of the fields
+        """
+        pass
+
+class SlowdbComptonHandler(PostgreSQLHandler):
+    """A class for processing and filtering of compton measurements from slowdb
+    """
+    
+    def __is_overlapped_row(self, start_time_next: datetime, stop_time_prev: datetime):
+        gap = timedelta(seconds=2)
+        if(start_time_next < stop_time_prev):
+            logging.debug(f'time gap {abs(start_time_next - stop_time_prev)}')
+        return start_time_next < stop_time_prev - gap
+    
+    def __drop_overlapping_rows_list(self, table: list) -> list:
+        """Removes rows with overlapping time intervals from the table
+        
+        Parameters
+        ----------
+        table : list
+            the table MUST BE ORDERED BY TIME where 5th column is start_time, 6th column is end_time
+            
+        Returns
+        -------
+        list
+            clear table
+        """
+        
+        if len(table) == 0:
+            logging.info("Empty list. No overlapping rows")
+            return table
+        
+        logging.info("Drop overlapping rows in list representation")
+        table = table[::-1] # sort table by time from last to past
+        min_time = table[0][6]
+        overlapped_idxs = list()
+        
+        for idx, row in enumerate(table):
+            start_time, stop_time = row[5], row[6]
+            if self.__is_overlapped_row(min_time, stop_time):
+                overlapped_idxs.append(idx)
+            else:
+                min_time = start_time
+        
+        for index in sorted(overlapped_idxs, reverse=True): # strict condition of the backward loop
+            table.pop(index)
+        
+        return table[::-1]
+    
+    def load_tables(self, tables: List[str], daterange: Optional[datetime] = None):
+        """Returns tables containing compton energy measurements
+        
+        Parameters
+        ----------
+        tables : List[str]
+            names of tables in the slowdb compton measurements database 
+            (full list of available tables can be seen with the property tables)
+        daterange : Optional[datetime]
+            minimum time for selection (UTC)
+        
+        Returns
+        -------
+        Union[pd.DataFrame, list]
+            table containing compton energy measurements with fields:
+            write_time - time when the row was written (UTC)
+            mean_energy - compton mean of the energy measurement [MeV]
+            std_energy - compton std of the energy measurement [MeV]
+            mean_spread - compton mean of the spread measurement [MeV]
+            std_spread - compton std of the spread measurement [MeV]
+            start_time - beginning time of the compton measurement (UTC)
+            end_time - end time of the compton measurement (UTC)
+        """
+        
+        time_condition = f"AND time>(%(date)s AT TIME ZONE 'UTC')" if daterange is not None else ""
+        
+        sql_query = lambda table: f"""SELECT 
+            time AT TIME ZONE 'UTC' AS time, 
+            CAST(values_array[1] AS numeric) AS mean_energy, 
+            CAST(values_array[2] AS numeric) AS std_energy, 
+            ROUND(CAST(values_array[5]/1000 AS numeric), 6) AS mean_spread,
+            ROUND(CAST(values_array[6]/1000 AS numeric), 6) AS std_spread, 
+            date_trunc('second', time AT TIME ZONE 'UTC' + (values_array[8] * interval '1 second')) AS start_time, 
+            date_trunc('second', time AT TIME ZONE 'UTC' + 
+            (values_array[8] * interval '1 second') + (values_array[7] * interval '1 second')) AS stop_time
+            FROM {table} WHERE g_id=43 AND dt>0 {time_condition}"""
+        
+        full_sql_query = '\nUNION ALL\n'.join([sql_query(table) for table in tables]) + '\nORDER BY time;'
+        
+        logging.debug(f"Full sql query {full_sql_query}")
+        
+        self.cur.execute(full_sql_query, {'date': daterange})
+        table = self.cur.fetchall()
+        table = self.__drop_overlapping_rows_list(table)
+        return table
+    
+class CalibrdbHandler(PostgreSQLHandler):
+    """A class for processing of calibration database
+    """
+    
+    def select_table(self, system: str, algo: str, name: str, version: str = 'Default', verbose: bool = True) -> int:
+        """Selects the table from database
+        
+        Parameters
+        ----------
+        system : str
+            name of the system
+        algo : str
+            name of the algorithm
+        name : str
+            name of the calibration
+        version : str
+            name of the calibration version (default is Default)
+        verbose : bool
+            print additional information or not (default is False)
+            
+        Returns
+        -------
+        sid : int
+            value corresponding the table
+        """
+        
+        self.cur.execute(f"""SELECT * FROM clbrset 
+        WHERE system='{system}' AND algo='{algo}' AND name='{name}' AND version='{version}'""")
+        result = self.cur.fetchall()
+        logging.debug(f"selected clbrset: {result}")
+        if len(result) > 1:
+            logging.warning('Multiple equal calibration sets. clbrset DB problem')
+            return result[0]
+        sid = result[0][0]
+        return sid
+    
+    def load_table(self, system: str, algo: str, name: str, version: str = 'Default', num_last_rows: Optional[int] = None, timerange: Optional[Tuple[datetime, datetime]] = None) -> Tuple[list, list]:
+        """Loads the calibration table
+        
+        Parameters
+        ----------
+        system : str
+            name of the system
+        algo : str
+            name of the algorithm
+        name : str
+            name of the calibration
+        version : str
+            name of the calibration version (default is Default)
+        num_last_rows : Optional[int]
+            the number of last rows of the table
+        timerange : Optional[Tuple[datetime, datetime]]
+            time range (UTC) condition on the selection of the table (default is None)
+        
+        Returns
+        -------
+        Tuple[list, list]
+            the calibration table and name of fields
+        """
+        
+        sid = self.select_table(system, algo, name, version)
+        time_condition = "AND begintime BETWEEN %s AND %s" if timerange is not None else ""
+        sql_query = f"""SELECT * FROM clbrdata WHERE sid={sid} {time_condition} ORDER BY time DESC """
+        if num_last_rows is not None:
+            sql_query += f"LIMIT {num_last_rows}"
+            
+        if timerange is None:
+            self.cur.execute(sql_query)
+        else:
+            self.cur.execute(sql_query, timerange)
+        fields_name = [i[0] for i in self.cur.description]
+        table = self.cur.fetchall()
+        return table, fields_name
+        
+    def update(self, new_rows: list, system: str = "Misc", algo: str = "RunHeader", name: str = "Compton_run", version: str = 'Default', handle_last_time_row: bool = True):
+        if len(new_rows) == 0:
+            return
+        
+        sid = self.select_table(system, algo, name, version)
+        
+        new_rows = list(map(lambda x: (sid, 'lxeuser', x[0], x[5], x[6], [x[1], x[2], x[3], x[4]]), new_rows))
+        
+        if handle_last_time_row:
+            last_written_row, _ = self.load_table(system, algo, name, version, num_last_rows = 1)
+            if len(last_written_row) > 0:
+                if last_written_row[0][5] > new_rows[0][3]:
+                    logging.info('Removing of overlapping written row')
+                    self.delete_row(sid = last_written_row[0][1], createdby = last_written_row[0][2], time = last_written_row[0][3])
+                
+        insert_query = """INSERT INTO clbrdata (sid, createdby, time, begintime, endtime, data) VALUES %s;"""
+        execute_values(self.cur, insert_query, new_rows, fetch=False)
+        logging.info(f"Inserted {len(new_rows)} new rows")
+        return
+        
+    def delete_row(self, sid: int, createdby: str, time: datetime):
+        delete_query = f"""DELETE FROM clbrdata 
+        WHERE sid = %s AND createdby = %s AND time = %s
+        """
+        self.cur.execute(delete_query, (sid, createdby, time))
+        logging.info(f"Deleted ({sid}, {createdby}, {time}) row")
+        return
+    
+    def remove_duplicates(self, system: str = "Misc", algo: str = "RunHeader", name: str = "Compton_run", version: str = 'Default'):
+        sid = self.select_table(system, algo, name, version)
+        remove_query = f"""
+            DELETE FROM clbrdata a
+            USING clbrdata b
+            WHERE
+                a.sid = {sid}
+                AND a.cid < b.cid
+                AND a.sid = b.sid
+                AND a.time = b.time                
+        """
+        self.cur.execute(remove_query)
+        pass
+    
+    def commit(self):
+        logging.info("Changes commited")
+        self.conn.commit()
+        return
+
+    def rollback(self):
+        logging.info("Changes aborted")
+        self.conn.rollback()
+        return
+    
+    def __del__(self):
+        logging.info("del clbr class")
+        self.cur.close()
+        self.conn.close()
+    
+    
+def main():
+    log_format = '[%(asctime)s] %(levelname)s: %(message)s'
+    logging.basicConfig(filename="compton_filter.log", format=log_format, level=logging.INFO)
+    logging.info("Program started")
+    
+    parser = argparse.ArgumentParser(description = 'Filter compton energy measurements from slowdb')
+    parser.add_argument('--season', help = 'Name of compton measurement table from slowdb')
+    parser.add_argument('--config', help = 'Config file containing information for access to databases')
+    
+    args = parser.parse_args()
+    logging.info(f"Arguments: season: {args.season}, config {args.config}")
+
+    parser = ConfigParser()
+    parser.read(args.config);
+    logging.info("Config parsed")
+    
+    clbrdb = CalibrdbHandler(**parser['clbrDB'])
+    last_written_row, _ = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = 1)
+    last_time = last_written_row[0][3] if len(last_written_row) > 0 else None
+    
+    compton_slowdb = SlowdbComptonHandler(**parser['postgresql'])
+    res = compton_slowdb.load_tables(['cmd3_2021_2'], last_time)
+    
+    clbrdb.update(res)
+    clbrdb.commit()
+    del clbrdb
+    
+# python scripts/compton_filter.py --season cmd3_2021_2 --config database.ini
+if __name__ == "__main__":
+    main()

+ 2 - 0
requirements.txt

@@ -0,0 +1,2 @@
+psycopg2==2.9.1
+mysql-connector-python==8.0.23