123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374 |
- """Script to combine compton measurements with runs and process these data
- """
- import argparse
- from configparser import ConfigParser
- from datetime import datetime, timedelta
- import sqlite3
- from typing import Union, Tuple, Optional
- from compton_filter import CalibrdbHandler
- from iminuit import Minuit
- import matplotlib.dates as mdates
- import matplotlib.pyplot as plt
- from mysql.connector import connect, Error
- import numpy as np
- import pandas as pd
- from tqdm import tqdm
- SEASONS = {
- 'name': ['HIGH2017', 'RHO2018', 'HIGH2019', 'LOW2020', 'HIGH2020', 'HIGH2021', 'NNBAR2021'],
- 'start_run': [36872, 48938, 70014, 85224, 89973, 98116, 107342, None],
- }
- class RunsDBHandler():
- def __init__(self, host: str = 'cmddb', database: str = 'online', user: str = None, password: str = None):
- self.conn = connect(host = host, database = database, user = user, password = password)
- self.cur = self.conn.cursor()
- self.cur.execute("SET time_zone = 'UTC';")
-
- @property
- def fields(self) -> list:
- """Returns a list of available columns in the RunsDB
- """
-
- self.cur.execute("""DESCRIBE Runlog""")
- return self.cur.fetchall()
-
- def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]]):
-
- cond = ""
- if isinstance(range[0], int):
- cond = f" AND run >= {range[0]} "
- if range[1] is not None:
- cond += f" AND run <= {range[1]} "
- elif isinstance(range[0], datetime):
- cond = f" AND stoptime BETWEEN %s AND %s"
-
- sql_query = f"""
- SELECT
- run,
- starttime,
- stoptime,
- energy,
- luminosity
- FROM Runlog
- WHERE
- quality = "Y"
- {cond}
- AND luminosity > 0
- AND stoptime > starttime
- AND nevent > 0
- ORDER BY run DESC"""
-
- if isinstance(range[0], datetime):
- self.cur.execute(sql_query, range)
- else:
- self.cur.execute(sql_query)
-
- field_names = [i[0] for i in self.cur.description]
- res = self.cur.fetchall()
- return res, field_names
-
- def __del__(self):
- self.conn.close()
-
- class Combiner():
- """Combines a dataframe with runs and a dataframe with compton measurements together
- """
-
- def __init__(self, runsdb: Tuple[list, list], clbrdb: Tuple[list, list]):
- """
- Parameters
- ----------
- runsdb : Tuple[list, list]
- table of runs (rows and field names)
- clbrdb : Tuple[list, list]
- table of compton measurements (rows and field names)
- """
-
- rdb_rows, r_fld = runsdb
- cdb_rows, c_fld = clbrdb
-
- self.conn = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
- self.cur = self.conn.cursor()
- self.cur.execute(f"CREATE table runs (run, elabel, starttime timestamp, stoptime timestamp, luminosity)")
- self.cur.execute(f"CREATE table compton (begintime timestamp, endtime timestamp, e_mean, e_std, spread_mean, spread_std)")
-
- run_row_generator = map(lambda x: (x[r_fld.index("run")], x[r_fld.index("energy")],
- x[r_fld.index("starttime")], x[r_fld.index("stoptime")],
- x[r_fld.index("luminosity")]), rdb_rows)
- c_data_idx = c_fld.index("data")
- compton_row_generator = map(lambda x: (x[c_fld.index("begintime")], x[c_fld.index("endtime")],
- float(x[c_data_idx][0]), float(x[c_data_idx][1]),
- float(x[c_data_idx][2]), float(x[c_data_idx][3])), cdb_rows)
-
- self.cur.executemany(f"""INSERT into runs VALUES ({','.join(['?']*5)})""", run_row_generator)
- self.cur.executemany(f"""INSERT into compton VALUES ({','.join(['?']*6)})""", compton_row_generator)
-
- self.__create_combined_table()
-
- def __create_combined_table(self):
- create_combined_query = """
- CREATE TABLE combined_table AS
- SELECT
- runs.run AS run,
- runs.elabel AS elabel,
- runs.starttime as "run_start [timestamp]",
- runs.stoptime AS "run_stop [timestamp]",
- compton.begintime AS "compton_start [timestamp]",
- compton.endtime AS "compton_stop [timestamp]",
- runs.luminosity, compton.e_mean, compton.e_std, compton.spread_mean, compton.spread_std
- FROM runs, compton
- WHERE
- (runs.starttime BETWEEN compton.begintime AND compton.endtime)
- OR (runs.stoptime BETWEEN compton.begintime AND compton.endtime)
- OR (compton.begintime BETWEEN runs.starttime AND runs.stoptime)
- OR (compton.endtime BETWEEN runs.starttime AND runs.stoptime);
- """
- self.cur.execute(create_combined_query)
- return
-
- def combined_table(self) -> pd.DataFrame:
- """Returns combined dataframe
- """
-
- sql_query = """
- SELECT * FROM combined_table;
- """
- df = pd.read_sql(sql_query, self.conn)
- df['common_duration'] = df[['run_stop', 'compton_stop']].min(axis=1) - df[['run_start', 'compton_start']].max(axis=1)
- df['run_duration'] = df['run_stop'] - df['run_start']
- df['run_in_measurement'] = df['common_duration']/df['run_duration']
- df = df.sort_values(by='run_in_measurement', ascending=False).drop_duplicates(subset='run').sort_values(by='run')
- df = df.drop(['run_duration', 'common_duration', 'run_start', 'run_stop'], axis=1)
- return df
-
- def __del__(self):
- self.conn.close()
-
- class Likelihood():
- """
- Likelihood function
- """
-
- def __init__(self, means: np.array, sigmas: np.array, weights: np.array):
- """
- Parameters
- ----------
- means : np.array
- array of means, [MeV]
- sigmas : np.array
- array of standard deviations, [MeV]
- weights : np.array
- array of luminosities
- """
-
- self.means = means
- self.sigmas = sigmas
- self.weights = weights/weights.mean()
-
- def __call__(self, mean: float, sigma: float):
- """
- Calls likelihood calculation
-
- Parameters
- ----------
- mean : float
- expected mean
- sigma : float
- expected standard deviation
- """
-
- sigma_total = np.sqrt(sigma**2 + self.sigmas**2)
- ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
- return -ln_L
-
- def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame) -> dict:
- """Calculates parameters of the energy (mean, std, spread) in this dataFrame
-
- Parameters
- ----------
- comb_df : pd.DataFrame
- table of the measurements linked with runs
- runs_df : pd.DataFrame
- table of the runs
- compton_df : pd.DataFrame
- table of the comptons
-
- Returns
- -------
- dict
- average parameters on this DataFrame
- """
-
- if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
- # no direct measurements of the compton during data runs
- # estimate energy by the nearest points
- # print('Hello')
- min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
- max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
-
- nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
- nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
-
- # regulatization
- nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
- nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
- nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
- nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
-
- mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
- mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
- std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
- std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
- sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
-
-
- return {
- 'energy_point': comb_df.elabel.min(),
- 'first_run': comb_df.run_first.min(),
- 'last_run': comb_df.run_last.max(),
- 'mean_energy': mean_energy,
- 'mean_energy_stat_err': std_energy,
- 'mean_energy_sys_err': sys_energy,
- 'mean_spread': mean_spread,
- 'mean_spread_stat_err': std_spread,
- 'used_lum': 0,
- 'comment': 'indirect measurement',
- }, pd.DataFrame()
-
-
-
- df = comb_df.copy()
- df.spread_std = np.where(df.spread_std < 1e-4, 1e-4, df.spread_std)
-
- df = df[df.e_std > 0]
- mean_energy = np.sum(df.e_mean*df.luminosity/(df.e_std**2))/np.sum(df.luminosity/(df.e_std**2))
- # std_energy = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.e_std**2))
-
- good_criterion = np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std()**2 + df.e_std**2)) < 5
- good_criterion = good_criterion
- df = df[good_criterion]
-
- m = Minuit(Likelihood(df.e_mean, df.e_std, df.luminosity), mean=df.e_mean.mean(), sigma=df.e_mean.std())
- m.errordef = 0.5
- m.limits['sigma'] = (0, None)
- m.migrad();
- sys_err = m.values['sigma']
- mean_en = m.values['mean']
-
- mean_spread = np.sum(df.spread_mean*df.luminosity/(df.spread_std**2))/np.sum(df.luminosity/(df.spread_std**2))
- std_spread = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.spread_std**2))
-
- res_dict = {
- 'energy_point': comb_df.elabel.min(),
- 'first_run': comb_df.run_first.min(),
- 'last_run': comb_df.run_last.max(),
- 'mean_energy': mean_en,
- 'mean_energy_stat_err': m.errors['mean'],
- 'mean_energy_sys_err': sys_err,
- 'mean_spread': mean_spread,
- 'mean_spread_stat_err': std_spread,
- 'used_lum': df.luminosity.sum()/comb_df.luminosity_total.sum(),
- 'comment': '',
- }
- return res_dict, df
- def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None) -> pd.DataFrame:
-
- if pics_folder is not None:
- plt.ioff()
- plt.style.use('ggplot')
- locator = mdates.AutoDateLocator(minticks=5)
- formatter = mdates.ConciseDateFormatter(locator)
- formatter.formats = ['%y', '%b', '%d', '%H:%M', '%H:%M', '%S.%f', ]
- formatter.zero_formats = [''] + formatter.formats[:-1]
- formatter.zero_formats[3] = '%d-%b'
- formatter.offset_formats = ['', '%Y', '%b %Y', '%d %b %Y', '%d %b %Y', '%d %b %Y %H:%M', ]
-
- runs_df = runs_df.rename({'luminosity': 'luminosity_full', 'energy': 'elabel'}, axis=1)
- combined_df = pd.merge(combined_df.drop(['elabel'], axis=1), runs_df[['run', 'elabel', 'luminosity_full']], how='outer')
- combined_df = combined_df.sort_values(by='run')
- combined_df['luminosity'] = combined_df['luminosity'].fillna(0)
-
- combined_df['point_idx'] = np.cumsum(~np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4))
- combined_df = combined_df.groupby(['point_idx', 'compton_start'], dropna=False).agg(
- elabel=('elabel', 'min'), elabel_test=('elabel', 'max'),
- run_first=('run', 'min'), run_last=('run', 'max'),
- luminosity=('luminosity', 'sum'), luminosity_total=('luminosity_full', 'sum'),
- compton_stop=('compton_stop', 'min'), compton_stop_test=('compton_stop', 'max'),
- e_mean=('e_mean', 'min'), e_mean_test=('e_mean', 'max'),
- e_std=('e_std', 'min'), e_std_test=('e_std', 'max'),
- spread_mean=('spread_mean', 'min'), spread_mean_test=('spread_mean', 'max'),
- spread_std=('spread_std', 'min'), spread_std_test=('spread_std', 'max'),
- ).reset_index().set_index('point_idx')
- # return combined_df
-
- result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
-
- for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
- res_dict, good_df = calculate_point(table, runs_df, compton_df)
- result_df = result_df.append(res_dict, ignore_index=True)
-
- if pics_folder is not None:
- plt_table = good_df.dropna()
- if len(plt_table) == 0:
- continue
-
- total_error = np.sqrt(res_dict["mean_energy_stat_err"]**2 + res_dict["mean_energy_sys_err"]**2)
- half_timedelta = (plt_table.compton_stop - plt_table.compton_start)/2
- time = plt_table.compton_start + half_timedelta
- dlt0 = timedelta(days=1)
-
- fig, ax = plt.subplots(1, 1, dpi=120, tight_layout=True)
- ax.errorbar(time, plt_table.e_mean, xerr=half_timedelta, yerr=plt_table.e_std, fmt='.')
- ax.axhline(res_dict['mean_energy'], color='black', zorder=3, label='Mean')
- ax.fill_between([plt_table.compton_stop.min(), plt_table.compton_stop.max()],
- [res_dict['mean_energy'] - total_error]*2,
- [res_dict['mean_energy'] + total_error]*2, color='green', zorder=1, alpha=0.4)
- ax.tick_params(axis='x', labelrotation=45)
- ax.xaxis.set_major_locator(locator)
- ax.xaxis.set_major_formatter(formatter)
- ax.set(title=f'{res_dict["energy_point"]}, E = {res_dict["mean_energy"]:.3f} ± {res_dict["mean_energy_stat_err"]:.3f} ± {res_dict["mean_energy_sys_err"]:.3f} MeV', xlabel='Time, UTC', ylabel='Energy, [MeV]',
- xlim=[plt_table.compton_stop.min(), plt_table.compton_stop.max()])
- plt.savefig(f'{pics_folder}/{res_dict["first_run"]}_{res_dict["energy_point"]}.png')
- plt.close()
-
- return result_df
-
- # python scripts/compton_combiner.py --season NNBAR2021 --config database.ini
- def main():
- parser = argparse.ArgumentParser(description = 'Mean compton energy measurements from clbrdb')
- parser.add_argument('--season', help = 'Name of the season')
- parser.add_argument('--config', help = 'Config file containing information for access to databases')
-
- args = parser.parse_args()
- # logging.info(f"Arguments: season: {args.season}, config {args.config}")
- parser = ConfigParser()
- parser.read(args.config);
-
- rdb = RunsDBHandler(**parser['cmdruns'])
-
- idx = SEASONS['name'].index(args.season)
- runs_range = (SEASONS['start_run'][idx], SEASONS['start_run'][idx+1])
- res_rdb = rdb.load_tables(runs_range)
- runs_df = pd.DataFrame(res_rdb[0], columns=res_rdb[1])
-
- tdlt0 = timedelta(days=2)
- time_range = (runs_df.starttime.min() - tdlt0, runs_df.stoptime.max() + tdlt0)
- clbrdb = CalibrdbHandler(**parser['clbrDB'])
- res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = None, timerange = time_range)
-
- cb = Combiner(res_rdb, res_clbrdb)
- comb_df = cb.combined_table()
-
- compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
-
- cdf = process_combined(comb_df, runs_df, compton_df, './pics')
- cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')
- return
- if __name__ == "__main__":
- main()
|