0
0
Просмотр исходного кода

Update source files and add checker file

compton 1 год назад
Родитель
Сommit
60739a73d2
4 измененных файлов с 111 добавлено и 19 удалено
  1. 95 0
      compton_checker.py
  2. 7 2
      compton_combiner.py
  3. 1 1
      compton_filter.py
  4. 8 16
      requirements.txt

+ 95 - 0
compton_checker.py

@@ -0,0 +1,95 @@
+"""There is a set of checkers for handling of correctness of the calibration database"""
+
+import logging
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from compton_combiner import SEASONS
+from compton_combiner import RunsDBHandler, Combiner
+from compton_filter import SlowdbComptonHandler, CalibrdbHandler
+
+def check_season(season, parser):
+    """Check correctness of the average calibrations.
+    This function tests that all the points of the season are filled in the calibration database and 
+    this database has not duplicates.
+    
+    Parameters
+    ----------
+    season : str
+        a season for checking (can be from `compton_combiner.SEASONS` enum)
+    parser : Configparser
+        configparser of the toml file with credentials
+    """
+    
+    idx = SEASONS['name'].index(season)
+    start_run, stop_run = SEASONS['start_run'][idx], SEASONS['start_run'][idx + 1]
+    logging.info(f'Season {season}, runs from {start_run} to {stop_run}')
+    
+    rdb = RunsDBHandler(**parser['cmdruns'])
+    rdb_df = rdb.load_tables((start_run, stop_run))
+    trng = (rdb_df[0][-1][1], rdb_df[0][0][2])
+    logging.info(f'Runs table has been loaded. \n\
+First row is {rdb_df[0][-1][0]} run (at {rdb_df[0][-1][1].strftime("%Y-%m-%d")}), \
+last row is {rdb_df[0][0][0]} run (at {rdb_df[0][0][1].strftime("%Y-%m-%d")}). \n\
+Total rows {len(rdb_df[0])}, unique energies {sum(map(lambda x: x[0][3]!=x[1][3], zip(rdb_df[0][1:], rdb_df[0][:-1]))) + 1}' )
+    
+    rdb_df = pd.DataFrame(rdb_df[0], columns=rdb_df[1]).sort_values(by='run')
+    rdb_df['eprev'] = rdb_df.energy.shift(1).fillna(-1)
+    rdb_df['enext'] = rdb_df.energy.shift(-1).fillna(-1)
+    
+    # start_runs_df = rdb_df.drop_duplicates(subset=['energy'], keep='first')
+    # end_runs_df = rdb_df.drop_duplicates(subset=['energy'], keep='last')
+    
+    good_mask = np.invert(np.isclose(rdb_df['energy'], rdb_df['eprev']) & np.isclose(rdb_df['energy'], rdb_df['enext']))
+    rdb_df = rdb_df.loc[good_mask].drop(['eprev', 'enext'], axis=1)
+    rdb_df = pd.concat([rdb_df.iloc[::2].reset_index(drop=True).rename({'run': 'firstrun'}, axis=1)[['energy', 'firstrun']], 
+           rdb_df.iloc[1::2].reset_index(drop=True).rename({'run': 'lastrun'}, axis=1)[['lastrun']]], axis=1).assign(goods=1)
+    
+    clbrdb = CalibrdbHandler(**parser['clbrDB'])
+    # trng = (rdb_df.starttime.min(), rdb_df.stoptime.max())
+    logging.info(f'Time range from {trng[0]} to {trng[1]}')
+    # print(trng)
+    res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run_avg', num_last_rows = None, timerange = trng)
+    clbr_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1]).drop(['cid', 'sid', 'createdby', 'parameters'], axis=1).sort_values(by='time')
+    clbr_df = clbr_df.apply(
+        lambda x: pd.Series([x['comment'], x['time'], x['data'][0], x['data'][1], x['data'][2], round(x['data'][3], 3)], 
+                            index=['pname', 'time', 'energy', 'firstrun', 'lastrun', 'emeas']), 
+        axis=1).reset_index(drop=True)
+    
+    concat_df = pd.merge(clbr_df, rdb_df, on=['firstrun', 'lastrun'], how='outer')
+    concat_df.loc[concat_df.isna().max(axis=1), 'goods'] = 0
+    concat_df.goods = concat_df.goods.astype(int)
+    
+    nbads = sum(concat_df.goods==0)
+    if nbads > 0:
+        logging.warning(f'{sum(concat_df.goods==0)} bad rows are found')
+    else:
+        logging.info(f'bad rows are not found')
+    
+    
+    return concat_df
+
+if __name__=="__main__":
+    import argparse
+    import sys
+    from configparser import ConfigParser
+    
+    log_format = '[%(asctime)s] %(levelname)s: %(message)s'
+    logging.basicConfig(stream=sys.stdout, format=log_format, level=logging.INFO) #"filename=compton_combiner.log"
+    logging.info("compton_checker is started")
+    
+    parser = argparse.ArgumentParser(description = 'Checker of the energy measurement calibrations')
+    parser.add_argument('-s', '--season', help = 'Name of the season')
+    parser.add_argument('-c', '--config', help = 'Config file containing information for access to databases')
+    
+    args = parser.parse_args()
+    logging.info(f"""Arguments: season {args.season}, config {args.config}""")
+    
+    parser = ConfigParser()
+    parser.read(args.config);
+    
+    res_df = check_season(args.season, parser)
+    
+    with pd.option_context('display.max_rows', None,):
+        print(res_df)
+    

+ 7 - 2
compton_combiner.py

@@ -482,7 +482,8 @@ def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_d
         except Exception:
             continue
             
-        result_df = result_df.append(res_dict, ignore_index=True)
+        # result_df = result_df.append(res_dict, ignore_index=True)
+        result_df = pd.concat([result_df, pd.Series(res_dict).to_frame().T], ignore_index=True)
         
         if pics_folder is not None:
             plt_table = good_df.dropna()
@@ -509,6 +510,9 @@ def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_d
             plt.savefig(f'{pics_folder}/{res_dict["first_run"]}_{res_dict["energy_point"]}.png', transparent=True)
             plt.close()
     
+    types_dict = {ftype : float for ftype in ['energy_point', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum']}
+    types_dict.update({itype : int for itype in ['first_run', 'last_run']})
+    result_df = result_df.astype(types_dict)
     return result_df
 
 def final_table_to_clbrdb(df: pd.DataFrame, clbrdb: CalibrdbHandler, runs_df: pd.DataFrame, season: str):
@@ -533,7 +537,8 @@ def save_csv(df: pd.DataFrame, filepath: str, update_current: bool = True):
     
     if (os.path.isfile(filepath) and update_current):
         df_current = pd.read_csv(filepath)
-        df_current = df_current.append(df, ignore_index=True)
+        # df_current = df_current.append(df, ignore_index=True)
+        df_current = pd.concat([df, df_current], ignore_index=True)
         df_current = df_current.drop_duplicates(subset=['energy_point', 'first_run'], keep='last')
         df = df_current
     

+ 1 - 1
compton_filter.py

@@ -309,7 +309,7 @@ class CalibrdbHandler(PostgreSQLHandler):
                 a.sid = {sid}
                 AND a.cid < b.cid
                 AND a.sid = b.sid
-                AND a.begintime = b.begintime
+                AND (a.begintime = b.begintime OR a.endtime = b.endtime)
         """
         #AND a.comment = b.comment
         self.cur.execute(drop_query)

+ 8 - 16
requirements.txt

@@ -1,16 +1,8 @@
-cycler==0.11.0
-iminuit==2.9.0
-kiwisolver==1.3.1
-matplotlib==3.3.4
-mysql-connector-python==8.0.27
-numpy==1.19.5
-pandas==1.1.5
-Pillow==8.4.0
-protobuf==3.19.3
-psycopg2==2.7.7
-psycopg2-binary==2.9.3
-pyparsing==3.0.6
-python-dateutil==2.8.2
-pytz==2021.3
-six==1.16.0
-tqdm==4.62.3
+psycopg2==2.9.1
+
+matplotlib
+numpy
+pandas==1.3.3
+mysql-connector-python==8.0.23
+tqdm
+iminuit