123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621 |
- """Script to combine compton measurements with runs and process these data
- """
- import argparse
- from configparser import ConfigParser
- from datetime import datetime, timedelta, timezone
- import logging
- import os
- import sys
- import sqlite3
- from typing import Union, Tuple, Optional
- from compton_filter import CalibrdbHandler
- from iminuit import Minuit
- import matplotlib.dates as mdates
- import matplotlib.pyplot as plt
- from mysql.connector import connect, Error
- import numpy as np
- import pandas as pd
- from tqdm import tqdm
- SEASONS = {
- 'name': ['PHIOMEGA2012', 'RHO2013', 'BRK2013/16', 'HIGH2017', 'RHO2018', 'HIGH2019', 'LOW2020', 'HIGH2020', 'HIGH2021', 'NNBAR2022', 'HIGH2023', 'PHI2024'],
- 'start_run': [17405, 18809, 32076, 36872, 48938, 70014, 85224, 89973, 98116, 107342, 131913, 163012, None],
- }
- MANUAL_RUNS_SPLIT = [0, 150159, np.inf] # list[runs] to manually split point with the same energy
- class RunsDBHandler():
- def __init__(self, host: str = 'cmddb', database: str = 'online', user: str = None, password: str = None):
- self.conn = connect(host = host, database = database, user = user, password = password)
- self.cur = self.conn.cursor()
- self.cur.execute("SET time_zone = '+07:00';")
-
- @property
- def fields(self) -> list:
- """Returns a list of available columns in the RunsDB
- """
-
- self.cur.execute("""DESCRIBE Runlog""")
- return self.cur.fetchall()
-
- def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]], energy_point: Optional[float] = None, select_bad_runs: bool = False):
- """
- Returns a slice of the table with following fields: run, starttime, stoptime, energy, luminosity
-
- Parameters
- ----------
- range : Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]]
- selection range
- int range defines an interval in runs (first included, last excluded)
- datetime range defines a time interval (NSK: +7:00 time)
- energy_point : Optional[float]
- energy point name, MeV (default is None)
- select_bad_runs : bool
- select runs with labels except (Y) (default is False)
- """
-
- cond = ""
- if isinstance(range[0], int):
- cond = f" AND run >= {range[0]} "
- if range[1] is not None:
- cond += f" AND run < {range[1]} "
- elif isinstance(range[0], datetime):
- cond = f" AND starttime >= %s "
- if range[1] is not None:
- cond += " AND stoptime <= %s"
- else:
- range = (range[0], )
-
- energy_cond = ""
- if energy_point is not None:
- energy_cond = f" AND energy = {energy_point}"
-
- quality_cond = ' quality = "Y" '
- if select_bad_runs:
- quality_cond = ' quality <> "Y" '
-
- sql_query = f"""
- SELECT
- run,
- starttime,
- stoptime,
- energy,
- luminosity
- FROM Runlog
- WHERE
- {quality_cond}
- {cond}
- {energy_cond}
- AND luminosity > 0
- AND stoptime > starttime
- AND nevent > 0
- ORDER BY run DESC"""
-
- if isinstance(range[0], datetime):
- self.cur.execute(sql_query, range)
- else:
- self.cur.execute(sql_query)
-
- field_names = [i[0] for i in self.cur.description]
- res = self.cur.fetchall()
- return res, field_names
-
- def __del__(self):
- self.conn.close()
-
- class Combiner():
- """Combines a dataframe with runs and a dataframe with compton measurements together
- """
-
- def __init__(self, runsdb: Tuple[list, list], clbrdb: Tuple[list, list]):
- """
- Parameters
- ----------
- runsdb : Tuple[list, list]
- table of runs (rows and field names)
- clbrdb : Tuple[list, list]
- table of compton measurements (rows and field names)
- """
-
- rdb_rows, r_fld = runsdb
- cdb_rows, c_fld = clbrdb
-
- self.conn = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
- self.cur = self.conn.cursor()
- self.cur.execute(f"CREATE table runs (run, elabel, starttime timestamp, stoptime timestamp, luminosity)")
- self.cur.execute(f"CREATE table compton (begintime timestamp, endtime timestamp, e_mean, e_std, spread_mean, spread_std)")
-
- run_row_generator = map(lambda x: (x[r_fld.index("run")], x[r_fld.index("energy")],
- x[r_fld.index("starttime")], x[r_fld.index("stoptime")],
- x[r_fld.index("luminosity")]), rdb_rows)
- c_data_idx = c_fld.index("data")
- compton_row_generator = map(lambda x: (x[c_fld.index("begintime")], x[c_fld.index("endtime")],
- float(x[c_data_idx][0]), float(x[c_data_idx][1]),
- float(x[c_data_idx][2]), float(x[c_data_idx][3])), cdb_rows)
-
- self.cur.executemany(f"""INSERT into runs VALUES ({','.join(['?']*5)})""", run_row_generator)
- self.cur.executemany(f"""INSERT into compton VALUES ({','.join(['?']*6)})""", compton_row_generator)
-
- self.__create_combined_table()
-
- def __create_combined_table(self):
- create_combined_query = """
- CREATE TABLE combined_table AS
- SELECT
- runs.run AS run,
- runs.elabel AS elabel,
- runs.starttime as "run_start [timestamp]",
- runs.stoptime AS "run_stop [timestamp]",
- compton.begintime AS "compton_start [timestamp]",
- compton.endtime AS "compton_stop [timestamp]",
- runs.luminosity, compton.e_mean, compton.e_std, compton.spread_mean, compton.spread_std
- FROM runs, compton
- WHERE
- (runs.starttime BETWEEN compton.begintime AND compton.endtime)
- OR (runs.stoptime BETWEEN compton.begintime AND compton.endtime)
- OR (compton.begintime BETWEEN runs.starttime AND runs.stoptime)
- OR (compton.endtime BETWEEN runs.starttime AND runs.stoptime);
- """
- self.cur.execute(create_combined_query)
- return
-
- def combined_table(self) -> pd.DataFrame:
- """Returns combined dataframe
- """
-
- sql_query = """
- SELECT * FROM combined_table;
- """
- df = pd.read_sql(sql_query, self.conn)
- df['common_duration'] = df[['run_stop', 'compton_stop']].min(axis=1) - df[['run_start', 'compton_start']].max(axis=1)
- df['run_duration'] = df['run_stop'] - df['run_start']
- df['run_in_measurement'] = df['common_duration']/df['run_duration']
- df = df.sort_values(by='run_in_measurement', ascending=False).drop_duplicates(subset='run').sort_values(by='run')
- df = df.drop(['run_duration', 'common_duration'], axis=1) #, 'run_start', 'run_stop'
- return df
-
- def __del__(self):
- self.conn.close()
-
- class Likelihood():
- """
- Likelihood function
- """
-
- def __init__(self, means: np.array, sigmas: np.array, weights: np.array):
- """
- Parameters
- ----------
- means : np.array
- array of means, [MeV]
- sigmas : np.array
- array of standard deviations, [MeV]
- weights : np.array
- array of luminosities
- """
-
- self.means = means
- self.sigmas = sigmas
- self.weights = weights/weights.mean()
- # w_norm = (weights**2).sum()/(weights.sum())
- # self.weights = weights/w_norm
-
- def __call__(self, mean: float, sigma: float):
- """
- Calls likelihood calculation
-
- Parameters
- ----------
- mean : float
- expected mean
- sigma : float
- expected standard deviation
- """
-
- sigma_total = np.sqrt(sigma**2 + self.sigmas**2)
- ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
- return -ln_L
-
- def __estimate_point_with_closest(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame):
- # estimate energy by the nearest points
- min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
- max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
- nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
- nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
- # regulatization
- nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
- nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
- nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
- nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
- mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
- mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
- std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
- std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
- sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
- return {
- 'energy_point': comb_df.elabel.min(),
- 'first_run': comb_df.run_first.min(),
- 'last_run': comb_df.run_last.max(),
- 'mean_energy': mean_energy,
- 'mean_energy_stat_err': std_energy,
- 'mean_energy_sys_err': sys_energy,
- 'mean_spread': mean_spread,
- 'mean_spread_stat_err': std_spread,
- 'used_lum': 0,
- 'comment': 'indirect measurement #2',
- }, pd.DataFrame([])
- def averager_on_lums(df: pd.DataFrame) -> dict:
- """Averaging as <E> = \frac{\sum{L_i E_i}{\sum{L_i}},
- \deltaE^2 = \frac{\sum{(L_i \delta E_i)^2}}{(\sum L_i)^2}
-
- Attention: I think it's incorrect way of avaraging.
-
- Parameters
- ----------
- df : pd.DataFrame
- input dataframe containg means and spreads
-
- Returns
- -------
- dict
- averaged mean and spread
- """
-
- mean_en = (df.e_mean * df.luminosity).sum() / df.luminosity.sum()
- sys_err = df.e_mean.std()
- stat_err = np.sqrt( np.sum((df.luminosity * df.e_std)**2) ) / df.luminosity.sum()
-
- mean_spread = (df.spread_mean * df.luminosity).sum() / df.luminosity.sum()
- std_spread = np.sqrt( np.sum((df.luminosity * df.spread_std)**2) ) / df.luminosity.sum()
-
- return {
- 'mean_energy': mean_en,
- 'mean_energy_stat_err': stat_err,
- 'mean_energy_sys_err': sys_err,
- 'mean_spread': mean_spread,
- 'mean_spread_stat_err': std_spread,
- }
- def ultimate_averager(df: pd.DataFrame) -> dict:
- """Complete averager for estimation of mean energy and energy spread
-
- Parameters
- ----------
- df : pd.DataFrame
- input dataframe containing means and spreads
-
- Returns
- -------
- dict
- averaged mean and spread
- """
-
- m = Minuit(Likelihood(df.e_mean, df.e_std, df.luminosity), mean=df.e_mean.mean(), sigma=df.e_mean.std(ddof=0))
- m.errordef = 0.5
- m.limits['sigma'] = (0, None)
- m.migrad();
- # print(m.migrad())
- sys_err = m.values['sigma']
- mean_en = m.values['mean']
-
- mean_spread = np.sum(df.spread_mean*df.luminosity/(df.spread_std**2))/np.sum(df.luminosity/(df.spread_std**2))
- std_spread = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.spread_std**2))
- return {
- 'mean_energy': mean_en,
- 'mean_energy_stat_err': round(m.errors['mean'], 5),
- 'mean_energy_sys_err': round(sys_err, 5),
- 'mean_spread': mean_spread,
- 'mean_spread_stat_err': round(std_spread, 5),
- }
-
- def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, rdb, averager: callable = ultimate_averager) -> dict:
- """Calculates parameters of the energy (mean, std, spread) in this dataFrame
-
- Parameters
- ----------
- comb_df : pd.DataFrame
- table of the measurements linked with runs
- runs_df : pd.DataFrame
- table of the runs
- compton_df : pd.DataFrame
- table of the comptons
- averager : callable
- function for averaging (ultimate_averager or averager_on_lums)
-
- Returns
- -------
- dict, pd.DataFrame
- average parameters on this DataFrame, clean dataFrame
- """
-
- if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
- # no direct measurements of the compton during data runs
-
- min_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
- max_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
- dlt0 = timedelta(days=1)
- # assymetric time because energy can be stable only after
- runs_df_with_bads = rdb.load_tables((min_Yruntime, max_Yruntime + dlt0), energy_point = comb_df.iloc[0].at['elabel'], select_bad_runs = True)
-
- if len(runs_df_with_bads[0]) == 0:
- return __estimate_point_with_closest(comb_df, runs_df, compton_df)
-
- runs_df_with_bads_df = pd.DataFrame(runs_df_with_bads[0], columns = runs_df_with_bads[1])
- min_run_time, max_run_time = min(min_Yruntime, runs_df_with_bads_df.starttime.min()), max(max_Yruntime, runs_df_with_bads_df.stoptime.max())
-
- compton_meas = compton_df.query('((begintime>=@min_run_time)&(begintime<=@max_run_time))|((endtime>=@min_run_time)&(endtime<=@max_run_time))').copy()
-
- if len(compton_meas) == 0:
- # no compton measurements
- raise Exception("No measurement in this point. Pass it.")
-
-
- res_df = pd.DataFrame(list(map(lambda x: {
- 'compton_start': x[1]['begintime'],
- 'compton_stop': x[1]['endtime'],
- 'e_mean': float(x[1]['data'][0]),
- 'e_std': float(x[1]['data'][1]),
- 'spread_mean': float(x[1]['data'][2]),
- 'spread_std': float(x[1]['data'][3]),
- }, compton_meas.iterrows())))
-
- res_df = res_df.query(f'abs(e_mean -{comb_df.iloc[0].at["elabel"]})<5')
-
- if len(res_df) == 0:
- return __estimate_point_with_closest(comb_df, runs_df, compton_df)
-
- return {
- 'energy_point': comb_df.elabel.min(),
- 'first_run': comb_df.run_first.min(),
- 'last_run': comb_df.run_last.max(),
- 'mean_energy': res_df.e_mean.mean(),
- 'mean_energy_stat_err': np.sqrt(1/np.sum(1/(res_df.e_std)**2)),
- 'mean_energy_sys_err': np.abs(comb_df.iloc[0].at['elabel'] - res_df.e_mean.mean()),
- 'mean_spread': res_df.spread_mean.mean(),
- 'mean_spread_stat_err':np.sqrt(1/np.sum(1/(res_df.spread_std)**2)),
- 'used_lum': 0,
- 'comment': 'indirect measurement #1',
- }, res_df
-
-
- comb_df = comb_df.reset_index()
- df = comb_df.loc[~comb_df.compton_start.isna()].copy()
- df.spread_std = np.where(df.spread_std < 1e-4, 1e-4, df.spread_std)
-
- df = df[df.e_std > 0]
- mean_energy = np.sum(df.e_mean*df.luminosity/(df.e_std**2))/np.sum(df.luminosity/(df.e_std**2))
-
- good_criterion = np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std(ddof=0)**2 + df.e_std**2)) < 5
- # print('WTF:', df[~good_criterion].index)
- df = df[good_criterion]
- df['accepted'] = 1
-
- averages = averager(df)
-
- res_dict = {
- 'energy_point': comb_df.elabel.min(),
- 'first_run': comb_df.run_first.min(),
- 'last_run': comb_df.run_last.max(),
- 'mean_energy': averages['mean_energy'],
- 'mean_energy_stat_err': averages['mean_energy_stat_err'],
- 'mean_energy_sys_err': averages['mean_energy_sys_err'],
- 'mean_spread': averages['mean_spread'],
- 'mean_spread_stat_err': averages['mean_spread_stat_err'],
- 'used_lum': round(df.luminosity.sum()/comb_df.luminosity_total.sum(), 4),
- 'comment': '',
- }
-
- comb_df['accepted'] = 0
- comb_df.loc[df.index, 'accepted'] = 1
- return res_dict, comb_df.set_index('point_idx')
-
- def process_intersected_compton_meas(combined_df: pd.DataFrame) -> pd.DataFrame:
- """Replaces compton measurements writed on the border of two energy points on NaNs
- """
-
- energy_point_borders = combined_df[['point_idx', 'elabel', 'run_start', 'run_stop']].groupby(['point_idx'], dropna=True).agg(
- elabel_start_time=('run_start', 'min'), elabel_stop_time=('run_stop', 'max'),
- )
- df_comb = combined_df.set_index('point_idx').join(energy_point_borders, how='left')
-
- df_comb['comptonmeas_in_elabel'] = (df_comb[['elabel_stop_time', 'compton_stop']].min(axis=1) - df_comb[['elabel_start_time', 'compton_start']].max(axis=1))/(df_comb['compton_stop'] - df_comb['compton_start'])
-
- df_comb = df_comb.query('comptonmeas_in_elabel < 0.7')
- border_comptons = df_comb.compton_start.values
-
- combined_df.loc[combined_df.compton_start.isin(border_comptons),
- ['compton_start', 'compton_stop', 'e_mean', 'e_std', 'spread_mean', 'spread_std', 'luminosity']] = np.nan
- return combined_df
- def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None, rdb: Optional[RunsDBHandler] = None, old_averager: bool = False, energy_point_csv_folder: Optional[str] = None) -> pd.DataFrame:
-
- if pics_folder is not None:
- plt.ioff()
- plt.style.use('ggplot')
- locator = mdates.AutoDateLocator(minticks=5)
- formatter = mdates.ConciseDateFormatter(locator)
- formatter.formats = ['%y', '%b', '%d', '%H:%M', '%H:%M', '%S.%f', ]
- formatter.zero_formats = [''] + formatter.formats[:-1]
- formatter.zero_formats[3] = '%d-%b'
- formatter.offset_formats = ['', '%Y', '%b %Y', '%d %b %Y', '%d %b %Y', '%d %b %Y %H:%M', ]
-
- runs_df = runs_df.rename({'luminosity': 'luminosity_full', 'energy': 'elabel'}, axis=1)
-
- combined_df = pd.merge(combined_df.drop(['elabel'], axis=1), runs_df[['run', 'elabel', 'luminosity_full']], how='outer')
- combined_df = combined_df.sort_values(by='run')
-
- run_bin_split = np.digitize(combined_df.run, MANUAL_RUNS_SPLIT)
- is_same_elabel = np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4)
- is_same_manualbin = np.isclose(run_bin_split, np.roll(run_bin_split, 1))
- combined_df['point_idx'] = np.cumsum(~(is_same_elabel & is_same_manualbin))
- # combined_df['point_idx'] = np.cumsum(~np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4))
-
- combined_df = process_intersected_compton_meas(combined_df)
- combined_df['luminosity'] = combined_df['luminosity'].fillna(0)
- # combined_df.to_csv('file.csv')
-
- combined_df = combined_df.groupby(['point_idx', 'compton_start'], dropna=False).agg(
- elabel=('elabel', 'min'), elabel_test=('elabel', 'max'),
- run_first=('run', 'min'), run_last=('run', 'max'),
- luminosity=('luminosity', 'sum'), luminosity_total=('luminosity_full', 'sum'),
- compton_stop=('compton_stop', 'min'), compton_stop_test=('compton_stop', 'max'),
- e_mean=('e_mean', 'min'), e_mean_test=('e_mean', 'max'),
- e_std=('e_std', 'min'), e_std_test=('e_std', 'max'),
- spread_mean=('spread_mean', 'min'), spread_mean_test=('spread_mean', 'max'),
- spread_std=('spread_std', 'min'), spread_std_test=('spread_std', 'max'),
- ).reset_index().set_index('point_idx')
- # return combined_df
-
- result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
-
- for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
- try:
- res_dict, good_df = calculate_point(table, runs_df, compton_df, rdb, averager_on_lums if old_averager else ultimate_averager)
-
- if energy_point_csv_folder is not None:
- save_columns = ['elabel', 'run_first', 'run_last', 'luminosity', 'compton_start', 'compton_stop', 'e_mean', 'e_std', 'spread_mean', 'spread_std', 'accepted']
- save_csv(good_df[save_columns].dropna(), f'{energy_point_csv_folder}/{res_dict["energy_point"]}_{res_dict["first_run"]}.csv', update_current=False)
-
- good_df = good_df.query('accepted==1')
- except Exception:
- continue
-
- # result_df = result_df.append(res_dict, ignore_index=True)
- result_df = pd.concat([result_df, pd.Series(res_dict).to_frame().T], ignore_index=True)
-
- if pics_folder is not None:
- plt_table = good_df.dropna()
- if len(plt_table) == 0:
- continue
-
- total_error = np.sqrt(res_dict["mean_energy_stat_err"]**2 + res_dict["mean_energy_sys_err"]**2)
- half_timedelta = (plt_table.compton_stop - plt_table.compton_start)/2
- time = plt_table.compton_start + half_timedelta
- dlt0, total_time = timedelta(days=1), plt_table.compton_stop.max() - plt_table.compton_stop.min()
- timelim = [plt_table.compton_start.min() - 0.05*total_time, plt_table.compton_stop.max() + 0.05*total_time]
-
- fig, ax = plt.subplots(1, 1, dpi=120, tight_layout=True)
- ax.errorbar(time, plt_table.e_mean, xerr=half_timedelta, yerr=plt_table.e_std, fmt='.')
- ax.axhline(res_dict['mean_energy'], color='black', zorder=3, label='Mean')
- ax.fill_between(timelim,
- [res_dict['mean_energy'] - total_error]*2,
- [res_dict['mean_energy'] + total_error]*2, color='green', zorder=1, alpha=0.4)
- ax.tick_params(axis='x', labelrotation=45)
- ax.xaxis.set_major_locator(locator)
- ax.xaxis.set_major_formatter(formatter)
- ax.set(title=f'{res_dict["energy_point"]}, E = {res_dict["mean_energy"]:.3f} ± {res_dict["mean_energy_stat_err"]:.3f} ± {res_dict["mean_energy_sys_err"]:.3f} MeV',
- xlabel='Time, NSK', ylabel='Energy, [MeV]', xlim=timelim)
- plt.savefig(f'{pics_folder}/{res_dict["first_run"]}_{res_dict["energy_point"]}.png', transparent=True)
- plt.close()
-
- types_dict = {ftype : float for ftype in ['energy_point', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum']}
- types_dict.update({itype : int for itype in ['first_run', 'last_run']})
- result_df = result_df.astype(types_dict)
- return result_df
- def final_table_to_clbrdb(df: pd.DataFrame, clbrdb: CalibrdbHandler, runs_df: pd.DataFrame, season: str):
- """Write good values from the averaged table into clbrdb
- """
-
- good_values = (df.comment=='')|((df.comment!='')&((df.mean_energy.astype(float) - df.energy_point).abs()<5))
- df_clbrdb = df.loc[good_values].drop(['comment', 'used_lum'], axis=1)
-
- df_clbrdb = pd.merge(df_clbrdb, runs_df[['run', 'starttime']], how='left', left_on='first_run', right_on='run').drop(['run'], axis=1)
- df_clbrdb = pd.merge(df_clbrdb, runs_df[['run', 'stoptime']], how='left', left_on='last_run', right_on='run').drop(['run'], axis=1)
-
- df_clbrdb = df_clbrdb.assign(writetime=lambda df: df['stoptime'])
- df_clbrdb = df_clbrdb[['writetime', 'starttime', 'stoptime',
- 'energy_point', 'first_run', 'last_run', 'mean_energy',
- 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err']].values.tolist()
- clbrdb.insert(df_clbrdb, 'Misc', 'RunHeader', 'Compton_run_avg', 'Default', comment = season)
- clbrdb.commit()
-
- def save_csv(df: pd.DataFrame, filepath: str, update_current: bool = True):
- """Saves csv file. Updates current file in filepath if exists"""
-
- logging.info(f'Saving csv file: len(df)={len(df)}, filepath={filepath}, update_current={update_current}')
- if (os.path.isfile(filepath) and update_current):
- df_current = pd.read_csv(filepath)
- # df_current = df_current.append(df, ignore_index=True)
- df_current = pd.concat([df_current, df], ignore_index=True)
- df_current = df_current.drop_duplicates(subset=['energy_point', 'first_run'], keep='last')
- df = df_current
-
- df.to_csv(filepath, index=False, float_format='%g')
- return
- # python scripts/compton_combiner.py -s NNBAR2021 -c database.ini --csv_dir . --clbrdb
- def main():
- log_format = '[%(asctime)s] %(levelname)s: %(message)s'
- logging.basicConfig(stream=sys.stdout, format=log_format, level=logging.INFO) #"filename=compton_combiner.log"
- logging.info("compton_combiner is started")
-
- parser = argparse.ArgumentParser(description = 'Mean compton energy measurements from clbrdb')
- parser.add_argument('-s', '--season', help = 'Name of the season')
- parser.add_argument('-c', '--config', help = 'Config file containing information for access to databases')
- parser.add_argument('--csv_dir', help = 'Save csv file with data in the folder or not if skip it')
- parser.add_argument('--clbrdb', action = 'store_true', help = 'Update Compton_run_avg clbrdb or not')
- parser.add_argument('--pics_folder', help = 'Path to the directory for saving the pictures')
- parser.add_argument('--energy_point_csv_folder', help = 'Path to the directory for saving the result in detail for each energy point')
- parser.add_argument('--only_last', action = 'store_true', help = 'Compute values of the last (in Compton_run_avg clbrdb) and new points only')
- parser.add_argument('--old_averaging', action = 'store_true', help = 'Use old incomplete <E> = \frac{\sum{L_i E_i}{\sum{L_i}} averaging')
-
- args = parser.parse_args()
- logging.info(f"""Arguments: season {args.season}, config {args.config}, csv_dir {args.csv_dir}, save_to_clbrdb {args.clbrdb},
- pics_folder {args.pics_folder}, detailed_csv_folder {args.energy_point_csv_folder}, only_last {args.only_last}, old_average: {args.old_averaging}""")
- parser = ConfigParser()
- parser.read(args.config);
-
- rdb = RunsDBHandler(**parser['cmdruns'])
- clbrdb = CalibrdbHandler(**parser['clbrDB'])
-
- idx = SEASONS['name'].index(args.season)
- runs_range = (SEASONS['start_run'][idx], SEASONS['start_run'][idx+1])
-
- if args.only_last:
- res_avg = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run_avg', num_last_rows = 1)
- if len(res_avg[0]) != 0:
- begintime = res_avg[0][0][res_avg[1].index("begintime")]
- runs_range = (begintime, None)
- logging.info(f"only_last flag enabled. Time range for runs database is used: {runs_range}")
-
- res_rdb = rdb.load_tables(runs_range)
- runs_df = pd.DataFrame(res_rdb[0], columns=res_rdb[1])
-
- if args.only_last:
- # Remain the studied season only (important for the first averaging in the season with turned on only last flag)
- start_season_run, stop_season_run = SEASONS['start_run'][idx], SEASONS['start_run'][idx+1] if SEASONS['start_run'][idx+1] else np.inf
- runs_df = runs_df.query(f'(run>=@start_season_run)&(run<@stop_season_run)')
-
- tdlt0 = timedelta(days=2)
- time_range = (runs_df.starttime.min() - tdlt0, runs_df.stoptime.max() + tdlt0)
-
- res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = None, timerange = time_range)
-
- cb = Combiner(res_rdb, res_clbrdb)
- comb_df = cb.combined_table()
-
- compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
-
- cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder, rdb, args.old_averaging, args.energy_point_csv_folder)
-
- if args.csv_dir is not None:
- csv_path = os.path.join(args.csv_dir, f'{args.season}.csv')
- save_csv(cdf, csv_path)
- # cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')
-
- if args.clbrdb:
- final_table_to_clbrdb(cdf, clbrdb, runs_df, args.season)
- return
- if __name__ == "__main__":
- main()
|