|
@@ -0,0 +1,374 @@
|
|
|
+"""Script to combine compton measurements with runs and process these data
|
|
|
+"""
|
|
|
+
|
|
|
+import argparse
|
|
|
+from configparser import ConfigParser
|
|
|
+from datetime import datetime, timedelta
|
|
|
+import sqlite3
|
|
|
+from typing import Union, Tuple, Optional
|
|
|
+
|
|
|
+from compton_filter import CalibrdbHandler
|
|
|
+from iminuit import Minuit
|
|
|
+import matplotlib.dates as mdates
|
|
|
+import matplotlib.pyplot as plt
|
|
|
+from mysql.connector import connect, Error
|
|
|
+import numpy as np
|
|
|
+import pandas as pd
|
|
|
+from tqdm import tqdm
|
|
|
+
|
|
|
+SEASONS = {
|
|
|
+ 'name': ['HIGH2017', 'RHO2018', 'HIGH2019', 'LOW2020', 'HIGH2020', 'HIGH2021', 'NNBAR2021'],
|
|
|
+ 'start_run': [36872, 48938, 70014, 85224, 89973, 98116, 107342, None],
|
|
|
+}
|
|
|
+
|
|
|
+class RunsDBHandler():
|
|
|
+ def __init__(self, host: str = 'cmddb', database: str = 'online', user: str = None, password: str = None):
|
|
|
+ self.conn = connect(host = host, database = database, user = user, password = password)
|
|
|
+ self.cur = self.conn.cursor()
|
|
|
+ self.cur.execute("SET time_zone = 'UTC';")
|
|
|
+
|
|
|
+ @property
|
|
|
+ def fields(self) -> list:
|
|
|
+ """Returns a list of available columns in the RunsDB
|
|
|
+ """
|
|
|
+
|
|
|
+ self.cur.execute("""DESCRIBE Runlog""")
|
|
|
+ return self.cur.fetchall()
|
|
|
+
|
|
|
+ def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]]):
|
|
|
+
|
|
|
+ cond = ""
|
|
|
+ if isinstance(range[0], int):
|
|
|
+ cond = f" AND run >= {range[0]} "
|
|
|
+ if range[1] is not None:
|
|
|
+ cond += f" AND run <= {range[1]} "
|
|
|
+ elif isinstance(range[0], datetime):
|
|
|
+ cond = f" AND stoptime BETWEEN %s AND %s"
|
|
|
+
|
|
|
+ sql_query = f"""
|
|
|
+ SELECT
|
|
|
+ run,
|
|
|
+ starttime,
|
|
|
+ stoptime,
|
|
|
+ energy,
|
|
|
+ luminosity
|
|
|
+ FROM Runlog
|
|
|
+ WHERE
|
|
|
+ quality = "Y"
|
|
|
+ {cond}
|
|
|
+ AND luminosity > 0
|
|
|
+ AND stoptime > starttime
|
|
|
+ AND nevent > 0
|
|
|
+ ORDER BY run DESC"""
|
|
|
+
|
|
|
+ if isinstance(range[0], datetime):
|
|
|
+ self.cur.execute(sql_query, range)
|
|
|
+ else:
|
|
|
+ self.cur.execute(sql_query)
|
|
|
+
|
|
|
+ field_names = [i[0] for i in self.cur.description]
|
|
|
+ res = self.cur.fetchall()
|
|
|
+ return res, field_names
|
|
|
+
|
|
|
+ def __del__(self):
|
|
|
+ self.conn.close()
|
|
|
+
|
|
|
+class Combiner():
|
|
|
+ """Combines a dataframe with runs and a dataframe with compton measurements together
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self, runsdb: Tuple[list, list], clbrdb: Tuple[list, list]):
|
|
|
+ """
|
|
|
+ Parameters
|
|
|
+ ----------
|
|
|
+ runsdb : Tuple[list, list]
|
|
|
+ table of runs (rows and field names)
|
|
|
+ clbrdb : Tuple[list, list]
|
|
|
+ table of compton measurements (rows and field names)
|
|
|
+ """
|
|
|
+
|
|
|
+ rdb_rows, r_fld = runsdb
|
|
|
+ cdb_rows, c_fld = clbrdb
|
|
|
+
|
|
|
+ self.conn = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
|
|
|
+ self.cur = self.conn.cursor()
|
|
|
+ self.cur.execute(f"CREATE table runs (run, elabel, starttime timestamp, stoptime timestamp, luminosity)")
|
|
|
+ self.cur.execute(f"CREATE table compton (begintime timestamp, endtime timestamp, e_mean, e_std, spread_mean, spread_std)")
|
|
|
+
|
|
|
+ run_row_generator = map(lambda x: (x[r_fld.index("run")], x[r_fld.index("energy")],
|
|
|
+ x[r_fld.index("starttime")], x[r_fld.index("stoptime")],
|
|
|
+ x[r_fld.index("luminosity")]), rdb_rows)
|
|
|
+ c_data_idx = c_fld.index("data")
|
|
|
+ compton_row_generator = map(lambda x: (x[c_fld.index("begintime")], x[c_fld.index("endtime")],
|
|
|
+ float(x[c_data_idx][0]), float(x[c_data_idx][1]),
|
|
|
+ float(x[c_data_idx][2]), float(x[c_data_idx][3])), cdb_rows)
|
|
|
+
|
|
|
+ self.cur.executemany(f"""INSERT into runs VALUES ({','.join(['?']*5)})""", run_row_generator)
|
|
|
+ self.cur.executemany(f"""INSERT into compton VALUES ({','.join(['?']*6)})""", compton_row_generator)
|
|
|
+
|
|
|
+ self.__create_combined_table()
|
|
|
+
|
|
|
+ def __create_combined_table(self):
|
|
|
+ create_combined_query = """
|
|
|
+ CREATE TABLE combined_table AS
|
|
|
+ SELECT
|
|
|
+ runs.run AS run,
|
|
|
+ runs.elabel AS elabel,
|
|
|
+ runs.starttime as "run_start [timestamp]",
|
|
|
+ runs.stoptime AS "run_stop [timestamp]",
|
|
|
+ compton.begintime AS "compton_start [timestamp]",
|
|
|
+ compton.endtime AS "compton_stop [timestamp]",
|
|
|
+ runs.luminosity, compton.e_mean, compton.e_std, compton.spread_mean, compton.spread_std
|
|
|
+ FROM runs, compton
|
|
|
+ WHERE
|
|
|
+ (runs.starttime BETWEEN compton.begintime AND compton.endtime)
|
|
|
+ OR (runs.stoptime BETWEEN compton.begintime AND compton.endtime)
|
|
|
+ OR (compton.begintime BETWEEN runs.starttime AND runs.stoptime)
|
|
|
+ OR (compton.endtime BETWEEN runs.starttime AND runs.stoptime);
|
|
|
+ """
|
|
|
+ self.cur.execute(create_combined_query)
|
|
|
+ return
|
|
|
+
|
|
|
+ def combined_table(self) -> pd.DataFrame:
|
|
|
+ """Returns combined dataframe
|
|
|
+ """
|
|
|
+
|
|
|
+ sql_query = """
|
|
|
+ SELECT * FROM combined_table;
|
|
|
+ """
|
|
|
+ df = pd.read_sql(sql_query, self.conn)
|
|
|
+ df['common_duration'] = df[['run_stop', 'compton_stop']].min(axis=1) - df[['run_start', 'compton_start']].max(axis=1)
|
|
|
+ df['run_duration'] = df['run_stop'] - df['run_start']
|
|
|
+ df['run_in_measurement'] = df['common_duration']/df['run_duration']
|
|
|
+ df = df.sort_values(by='run_in_measurement', ascending=False).drop_duplicates(subset='run').sort_values(by='run')
|
|
|
+ df = df.drop(['run_duration', 'common_duration', 'run_start', 'run_stop'], axis=1)
|
|
|
+ return df
|
|
|
+
|
|
|
+ def __del__(self):
|
|
|
+ self.conn.close()
|
|
|
+
|
|
|
+class Likelihood():
|
|
|
+ """
|
|
|
+ Likelihood function
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self, means: np.array, sigmas: np.array, weights: np.array):
|
|
|
+ """
|
|
|
+ Parameters
|
|
|
+ ----------
|
|
|
+ means : np.array
|
|
|
+ array of means, [MeV]
|
|
|
+ sigmas : np.array
|
|
|
+ array of standard deviations, [MeV]
|
|
|
+ weights : np.array
|
|
|
+ array of luminosities
|
|
|
+ """
|
|
|
+
|
|
|
+ self.means = means
|
|
|
+ self.sigmas = sigmas
|
|
|
+ self.weights = weights/weights.mean()
|
|
|
+
|
|
|
+ def __call__(self, mean: float, sigma: float):
|
|
|
+ """
|
|
|
+ Calls likelihood calculation
|
|
|
+
|
|
|
+ Parameters
|
|
|
+ ----------
|
|
|
+ mean : float
|
|
|
+ expected mean
|
|
|
+ sigma : float
|
|
|
+ expected standard deviation
|
|
|
+ """
|
|
|
+
|
|
|
+ sigma_total = np.sqrt(sigma**2 + self.sigmas**2)
|
|
|
+ ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
|
|
|
+ return -ln_L
|
|
|
+
|
|
|
+def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame) -> dict:
|
|
|
+ """Calculates parameters of the energy (mean, std, spread) in this dataFrame
|
|
|
+
|
|
|
+ Parameters
|
|
|
+ ----------
|
|
|
+ comb_df : pd.DataFrame
|
|
|
+ table of the measurements linked with runs
|
|
|
+ runs_df : pd.DataFrame
|
|
|
+ table of the runs
|
|
|
+ compton_df : pd.DataFrame
|
|
|
+ table of the comptons
|
|
|
+
|
|
|
+ Returns
|
|
|
+ -------
|
|
|
+ dict
|
|
|
+ average parameters on this DataFrame
|
|
|
+ """
|
|
|
+
|
|
|
+ if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
|
|
|
+ # no direct measurements of the compton during data runs
|
|
|
+ # estimate energy by the nearest points
|
|
|
+ # print('Hello')
|
|
|
+ min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
|
|
|
+ max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
|
|
|
+
|
|
|
+ nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
|
|
|
+ nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
|
|
|
+
|
|
|
+ # regulatization
|
|
|
+ nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
|
|
|
+ nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
|
|
|
+ nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
|
|
|
+ nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
|
|
|
+
|
|
|
+ mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
|
|
|
+ mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
|
|
|
+ std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
|
|
|
+ std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
|
|
|
+ sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
|
|
|
+
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'energy_point': comb_df.elabel.min(),
|
|
|
+ 'first_run': comb_df.run_first.min(),
|
|
|
+ 'last_run': comb_df.run_last.max(),
|
|
|
+ 'mean_energy': mean_energy,
|
|
|
+ 'mean_energy_stat_err': std_energy,
|
|
|
+ 'mean_energy_sys_err': sys_energy,
|
|
|
+ 'mean_spread': mean_spread,
|
|
|
+ 'mean_spread_stat_err': std_spread,
|
|
|
+ 'used_lum': 0,
|
|
|
+ 'comment': 'indirect measurement',
|
|
|
+ }, pd.DataFrame()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ df = comb_df.copy()
|
|
|
+ df.spread_std = np.where(df.spread_std < 1e-4, 1e-4, df.spread_std)
|
|
|
+
|
|
|
+ df = df[df.e_std > 0]
|
|
|
+ mean_energy = np.sum(df.e_mean*df.luminosity/(df.e_std**2))/np.sum(df.luminosity/(df.e_std**2))
|
|
|
+ # std_energy = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.e_std**2))
|
|
|
+
|
|
|
+ good_criterion = np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std()**2 + df.e_std**2)) < 5
|
|
|
+ good_criterion = good_criterion
|
|
|
+ df = df[good_criterion]
|
|
|
+
|
|
|
+ m = Minuit(Likelihood(df.e_mean, df.e_std, df.luminosity), mean=df.e_mean.mean(), sigma=df.e_mean.std())
|
|
|
+ m.errordef = 0.5
|
|
|
+ m.limits['sigma'] = (0, None)
|
|
|
+ m.migrad();
|
|
|
+ sys_err = m.values['sigma']
|
|
|
+ mean_en = m.values['mean']
|
|
|
+
|
|
|
+ mean_spread = np.sum(df.spread_mean*df.luminosity/(df.spread_std**2))/np.sum(df.luminosity/(df.spread_std**2))
|
|
|
+ std_spread = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.spread_std**2))
|
|
|
+
|
|
|
+ res_dict = {
|
|
|
+ 'energy_point': comb_df.elabel.min(),
|
|
|
+ 'first_run': comb_df.run_first.min(),
|
|
|
+ 'last_run': comb_df.run_last.max(),
|
|
|
+ 'mean_energy': mean_en,
|
|
|
+ 'mean_energy_stat_err': m.errors['mean'],
|
|
|
+ 'mean_energy_sys_err': sys_err,
|
|
|
+ 'mean_spread': mean_spread,
|
|
|
+ 'mean_spread_stat_err': std_spread,
|
|
|
+ 'used_lum': df.luminosity.sum()/comb_df.luminosity_total.sum(),
|
|
|
+ 'comment': '',
|
|
|
+ }
|
|
|
+ return res_dict, df
|
|
|
+
|
|
|
+def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None) -> pd.DataFrame:
|
|
|
+
|
|
|
+ if pics_folder is not None:
|
|
|
+ plt.ioff()
|
|
|
+ plt.style.use('ggplot')
|
|
|
+ locator = mdates.AutoDateLocator(minticks=5)
|
|
|
+ formatter = mdates.ConciseDateFormatter(locator)
|
|
|
+ formatter.formats = ['%y', '%b', '%d', '%H:%M', '%H:%M', '%S.%f', ]
|
|
|
+ formatter.zero_formats = [''] + formatter.formats[:-1]
|
|
|
+ formatter.zero_formats[3] = '%d-%b'
|
|
|
+ formatter.offset_formats = ['', '%Y', '%b %Y', '%d %b %Y', '%d %b %Y', '%d %b %Y %H:%M', ]
|
|
|
+
|
|
|
+ runs_df = runs_df.rename({'luminosity': 'luminosity_full', 'energy': 'elabel'}, axis=1)
|
|
|
+ combined_df = pd.merge(combined_df.drop(['elabel'], axis=1), runs_df[['run', 'elabel', 'luminosity_full']], how='outer')
|
|
|
+ combined_df = combined_df.sort_values(by='run')
|
|
|
+ combined_df['luminosity'] = combined_df['luminosity'].fillna(0)
|
|
|
+
|
|
|
+ combined_df['point_idx'] = np.cumsum(~np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4))
|
|
|
+ combined_df = combined_df.groupby(['point_idx', 'compton_start'], dropna=False).agg(
|
|
|
+ elabel=('elabel', 'min'), elabel_test=('elabel', 'max'),
|
|
|
+ run_first=('run', 'min'), run_last=('run', 'max'),
|
|
|
+ luminosity=('luminosity', 'sum'), luminosity_total=('luminosity_full', 'sum'),
|
|
|
+ compton_stop=('compton_stop', 'min'), compton_stop_test=('compton_stop', 'max'),
|
|
|
+ e_mean=('e_mean', 'min'), e_mean_test=('e_mean', 'max'),
|
|
|
+ e_std=('e_std', 'min'), e_std_test=('e_std', 'max'),
|
|
|
+ spread_mean=('spread_mean', 'min'), spread_mean_test=('spread_mean', 'max'),
|
|
|
+ spread_std=('spread_std', 'min'), spread_std_test=('spread_std', 'max'),
|
|
|
+ ).reset_index().set_index('point_idx')
|
|
|
+ # return combined_df
|
|
|
+
|
|
|
+ result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
|
|
|
+
|
|
|
+ for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
|
|
|
+ res_dict, good_df = calculate_point(table, runs_df, compton_df)
|
|
|
+ result_df = result_df.append(res_dict, ignore_index=True)
|
|
|
+
|
|
|
+ if pics_folder is not None:
|
|
|
+ plt_table = good_df.dropna()
|
|
|
+ if len(plt_table) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ total_error = np.sqrt(res_dict["mean_energy_stat_err"]**2 + res_dict["mean_energy_sys_err"]**2)
|
|
|
+ half_timedelta = (plt_table.compton_stop - plt_table.compton_start)/2
|
|
|
+ time = plt_table.compton_start + half_timedelta
|
|
|
+ dlt0 = timedelta(days=1)
|
|
|
+
|
|
|
+ fig, ax = plt.subplots(1, 1, dpi=120, tight_layout=True)
|
|
|
+ ax.errorbar(time, plt_table.e_mean, xerr=half_timedelta, yerr=plt_table.e_std, fmt='.')
|
|
|
+ ax.axhline(res_dict['mean_energy'], color='black', zorder=3, label='Mean')
|
|
|
+ ax.fill_between([plt_table.compton_stop.min(), plt_table.compton_stop.max()],
|
|
|
+ [res_dict['mean_energy'] - total_error]*2,
|
|
|
+ [res_dict['mean_energy'] + total_error]*2, color='green', zorder=1, alpha=0.4)
|
|
|
+ ax.tick_params(axis='x', labelrotation=45)
|
|
|
+ ax.xaxis.set_major_locator(locator)
|
|
|
+ ax.xaxis.set_major_formatter(formatter)
|
|
|
+ ax.set(title=f'{res_dict["energy_point"]}, E = {res_dict["mean_energy"]:.3f} ± {res_dict["mean_energy_stat_err"]:.3f} ± {res_dict["mean_energy_sys_err"]:.3f} MeV', xlabel='Time, UTC', ylabel='Energy, [MeV]',
|
|
|
+ xlim=[plt_table.compton_stop.min(), plt_table.compton_stop.max()])
|
|
|
+ plt.savefig(f'{pics_folder}/{res_dict["first_run"]}_{res_dict["energy_point"]}.png')
|
|
|
+ plt.close()
|
|
|
+
|
|
|
+ return result_df
|
|
|
+
|
|
|
+# python scripts/compton_combiner.py --season NNBAR2021 --config database.ini
|
|
|
+def main():
|
|
|
+ parser = argparse.ArgumentParser(description = 'Mean compton energy measurements from clbrdb')
|
|
|
+ parser.add_argument('--season', help = 'Name of the season')
|
|
|
+ parser.add_argument('--config', help = 'Config file containing information for access to databases')
|
|
|
+
|
|
|
+ args = parser.parse_args()
|
|
|
+ # logging.info(f"Arguments: season: {args.season}, config {args.config}")
|
|
|
+
|
|
|
+ parser = ConfigParser()
|
|
|
+ parser.read(args.config);
|
|
|
+
|
|
|
+ rdb = RunsDBHandler(**parser['cmdruns'])
|
|
|
+
|
|
|
+ idx = SEASONS['name'].index(args.season)
|
|
|
+ runs_range = (SEASONS['start_run'][idx], SEASONS['start_run'][idx+1])
|
|
|
+ res_rdb = rdb.load_tables(runs_range)
|
|
|
+ runs_df = pd.DataFrame(res_rdb[0], columns=res_rdb[1])
|
|
|
+
|
|
|
+ tdlt0 = timedelta(days=2)
|
|
|
+ time_range = (runs_df.starttime.min() - tdlt0, runs_df.stoptime.max() + tdlt0)
|
|
|
+ clbrdb = CalibrdbHandler(**parser['clbrDB'])
|
|
|
+ res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = None, timerange = time_range)
|
|
|
+
|
|
|
+ cb = Combiner(res_rdb, res_clbrdb)
|
|
|
+ comb_df = cb.combined_table()
|
|
|
+
|
|
|
+ compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
|
|
|
+
|
|
|
+ cdf = process_combined(comb_df, runs_df, compton_df, './pics')
|
|
|
+ cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')
|
|
|
+ return
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|