0
0

compton_combiner.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. """Script to combine compton measurements with runs and process these data
  2. """
  3. import argparse
  4. from configparser import ConfigParser
  5. from datetime import datetime, timedelta, timezone
  6. import logging
  7. import os
  8. import sqlite3
  9. from typing import Union, Tuple, Optional
  10. from compton_filter import CalibrdbHandler
  11. from iminuit import Minuit
  12. import matplotlib.dates as mdates
  13. import matplotlib.pyplot as plt
  14. from mysql.connector import connect, Error
  15. import numpy as np
  16. import pandas as pd
  17. from tqdm import tqdm
  18. SEASONS = {
  19. 'name': ['RHO2013', 'BRK2013/16', 'HIGH2017', 'RHO2018', 'HIGH2019', 'LOW2020', 'HIGH2020', 'HIGH2021', 'NNBAR2021'],
  20. 'start_run': [18809, 32076, 36872, 48938, 70014, 85224, 89973, 98116, 107342, None],
  21. }
  22. class RunsDBHandler():
  23. def __init__(self, host: str = 'cmddb', database: str = 'online', user: str = None, password: str = None):
  24. self.conn = connect(host = host, database = database, user = user, password = password)
  25. self.cur = self.conn.cursor()
  26. self.cur.execute("SET time_zone = '+07:00';")
  27. @property
  28. def fields(self) -> list:
  29. """Returns a list of available columns in the RunsDB
  30. """
  31. self.cur.execute("""DESCRIBE Runlog""")
  32. return self.cur.fetchall()
  33. def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]], energy_point: Optional[float] = None, select_bad_runs: bool = False):
  34. """
  35. Parameters
  36. ----------
  37. range : Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]]
  38. selection range
  39. int range defines an interval in runs
  40. datetime range defines a time interval (NSK: +7:00 time)
  41. energy_point : Optional[float]
  42. energy point name, MeV (default is None)
  43. select_bad_runs : bool
  44. select runs with labels except (Y) (default is False)
  45. """
  46. cond = ""
  47. if isinstance(range[0], int):
  48. cond = f" AND run >= {range[0]} "
  49. if range[1] is not None:
  50. cond += f" AND run <= {range[1]} "
  51. elif isinstance(range[0], datetime):
  52. cond = f" AND starttime >= %s "
  53. if range[1] is not None:
  54. cond += " AND stoptime <= %s"
  55. else:
  56. range = (range[0], )
  57. energy_cond = ""
  58. if energy_point is not None:
  59. energy_cond = f" AND energy = {energy_point}"
  60. quality_cond = ' quality = "Y" '
  61. if select_bad_runs:
  62. quality_cond = ' quality <> "Y" '
  63. sql_query = f"""
  64. SELECT
  65. run,
  66. starttime,
  67. stoptime,
  68. energy,
  69. luminosity
  70. FROM Runlog
  71. WHERE
  72. {quality_cond}
  73. {cond}
  74. {energy_cond}
  75. AND luminosity > 0
  76. AND stoptime > starttime
  77. AND nevent > 0
  78. ORDER BY run DESC"""
  79. if isinstance(range[0], datetime):
  80. self.cur.execute(sql_query, range)
  81. else:
  82. self.cur.execute(sql_query)
  83. field_names = [i[0] for i in self.cur.description]
  84. res = self.cur.fetchall()
  85. return res, field_names
  86. def __del__(self):
  87. self.conn.close()
  88. class Combiner():
  89. """Combines a dataframe with runs and a dataframe with compton measurements together
  90. """
  91. def __init__(self, runsdb: Tuple[list, list], clbrdb: Tuple[list, list]):
  92. """
  93. Parameters
  94. ----------
  95. runsdb : Tuple[list, list]
  96. table of runs (rows and field names)
  97. clbrdb : Tuple[list, list]
  98. table of compton measurements (rows and field names)
  99. """
  100. rdb_rows, r_fld = runsdb
  101. cdb_rows, c_fld = clbrdb
  102. self.conn = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
  103. self.cur = self.conn.cursor()
  104. self.cur.execute(f"CREATE table runs (run, elabel, starttime timestamp, stoptime timestamp, luminosity)")
  105. self.cur.execute(f"CREATE table compton (begintime timestamp, endtime timestamp, e_mean, e_std, spread_mean, spread_std)")
  106. run_row_generator = map(lambda x: (x[r_fld.index("run")], x[r_fld.index("energy")],
  107. x[r_fld.index("starttime")], x[r_fld.index("stoptime")],
  108. x[r_fld.index("luminosity")]), rdb_rows)
  109. c_data_idx = c_fld.index("data")
  110. compton_row_generator = map(lambda x: (x[c_fld.index("begintime")], x[c_fld.index("endtime")],
  111. float(x[c_data_idx][0]), float(x[c_data_idx][1]),
  112. float(x[c_data_idx][2]), float(x[c_data_idx][3])), cdb_rows)
  113. self.cur.executemany(f"""INSERT into runs VALUES ({','.join(['?']*5)})""", run_row_generator)
  114. self.cur.executemany(f"""INSERT into compton VALUES ({','.join(['?']*6)})""", compton_row_generator)
  115. self.__create_combined_table()
  116. def __create_combined_table(self):
  117. create_combined_query = """
  118. CREATE TABLE combined_table AS
  119. SELECT
  120. runs.run AS run,
  121. runs.elabel AS elabel,
  122. runs.starttime as "run_start [timestamp]",
  123. runs.stoptime AS "run_stop [timestamp]",
  124. compton.begintime AS "compton_start [timestamp]",
  125. compton.endtime AS "compton_stop [timestamp]",
  126. runs.luminosity, compton.e_mean, compton.e_std, compton.spread_mean, compton.spread_std
  127. FROM runs, compton
  128. WHERE
  129. (runs.starttime BETWEEN compton.begintime AND compton.endtime)
  130. OR (runs.stoptime BETWEEN compton.begintime AND compton.endtime)
  131. OR (compton.begintime BETWEEN runs.starttime AND runs.stoptime)
  132. OR (compton.endtime BETWEEN runs.starttime AND runs.stoptime);
  133. """
  134. self.cur.execute(create_combined_query)
  135. return
  136. def combined_table(self) -> pd.DataFrame:
  137. """Returns combined dataframe
  138. """
  139. sql_query = """
  140. SELECT * FROM combined_table;
  141. """
  142. df = pd.read_sql(sql_query, self.conn)
  143. df['common_duration'] = df[['run_stop', 'compton_stop']].min(axis=1) - df[['run_start', 'compton_start']].max(axis=1)
  144. df['run_duration'] = df['run_stop'] - df['run_start']
  145. df['run_in_measurement'] = df['common_duration']/df['run_duration']
  146. df = df.sort_values(by='run_in_measurement', ascending=False).drop_duplicates(subset='run').sort_values(by='run')
  147. df = df.drop(['run_duration', 'common_duration'], axis=1) #, 'run_start', 'run_stop'
  148. return df
  149. def __del__(self):
  150. self.conn.close()
  151. class Likelihood():
  152. """
  153. Likelihood function
  154. """
  155. def __init__(self, means: np.array, sigmas: np.array, weights: np.array):
  156. """
  157. Parameters
  158. ----------
  159. means : np.array
  160. array of means, [MeV]
  161. sigmas : np.array
  162. array of standard deviations, [MeV]
  163. weights : np.array
  164. array of luminosities
  165. """
  166. self.means = means
  167. self.sigmas = sigmas
  168. self.weights = weights/weights.mean()
  169. def __call__(self, mean: float, sigma: float):
  170. """
  171. Calls likelihood calculation
  172. Parameters
  173. ----------
  174. mean : float
  175. expected mean
  176. sigma : float
  177. expected standard deviation
  178. """
  179. sigma_total = np.sqrt(sigma**2 + self.sigmas**2)
  180. ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
  181. return -ln_L
  182. def __estimate_point_with_closest(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame):
  183. # estimate energy by the nearest points
  184. min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
  185. max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
  186. nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
  187. nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
  188. # regulatization
  189. nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
  190. nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
  191. nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
  192. nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
  193. mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
  194. mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
  195. std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
  196. std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
  197. sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
  198. return {
  199. 'energy_point': comb_df.elabel.min(),
  200. 'first_run': comb_df.run_first.min(),
  201. 'last_run': comb_df.run_last.max(),
  202. 'mean_energy': mean_energy,
  203. 'mean_energy_stat_err': std_energy,
  204. 'mean_energy_sys_err': sys_energy,
  205. 'mean_spread': mean_spread,
  206. 'mean_spread_stat_err': std_spread,
  207. 'used_lum': 0,
  208. 'comment': 'indirect measurement #2',
  209. }, pd.DataFrame([])
  210. def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, rdb) -> dict:
  211. """Calculates parameters of the energy (mean, std, spread) in this dataFrame
  212. Parameters
  213. ----------
  214. comb_df : pd.DataFrame
  215. table of the measurements linked with runs
  216. runs_df : pd.DataFrame
  217. table of the runs
  218. compton_df : pd.DataFrame
  219. table of the comptons
  220. Returns
  221. -------
  222. dict, pd.DataFrame
  223. average parameters on this DataFrame, clean dataFrame
  224. """
  225. if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
  226. # no direct measurements of the compton during data runs
  227. min_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
  228. max_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
  229. dlt0 = timedelta(days=1)
  230. # assymetric time because energy can be stable only after
  231. runs_df_with_bads = rdb.load_tables((min_Yruntime, max_Yruntime + dlt0), energy_point = comb_df.iloc[0].at['elabel'], select_bad_runs = True)
  232. if len(runs_df_with_bads[0]) == 0:
  233. return __estimate_point_with_closest(comb_df, runs_df, compton_df)
  234. runs_df_with_bads_df = pd.DataFrame(runs_df_with_bads[0], columns = runs_df_with_bads[1])
  235. min_run_time, max_run_time = min(min_Yruntime, runs_df_with_bads_df.starttime.min()), max(max_Yruntime, runs_df_with_bads_df.stoptime.max())
  236. compton_meas = compton_df.query('((begintime>=@min_run_time)&(begintime<=@max_run_time))|((endtime>=@min_run_time)&(endtime<=@max_run_time))').copy()
  237. if len(compton_meas) == 0:
  238. # no compton measurements
  239. raise Exception("No measurement in this point. Pass it.")
  240. res_df = pd.DataFrame(list(map(lambda x: {
  241. 'compton_start': x[1]['begintime'],
  242. 'compton_stop': x[1]['endtime'],
  243. 'e_mean': float(x[1]['data'][0]),
  244. 'e_std': float(x[1]['data'][1]),
  245. 'spread_mean': float(x[1]['data'][2]),
  246. 'spread_std': float(x[1]['data'][3]),
  247. }, compton_meas.iterrows())))
  248. res_df = res_df.query(f'abs(e_mean -{comb_df.iloc[0].at["elabel"]})<5')
  249. if len(res_df) == 0:
  250. return __estimate_point_with_closest(comb_df, runs_df, compton_df)
  251. return {
  252. 'energy_point': comb_df.elabel.min(),
  253. 'first_run': comb_df.run_first.min(),
  254. 'last_run': comb_df.run_last.max(),
  255. 'mean_energy': res_df.e_mean.mean(),
  256. 'mean_energy_stat_err': np.sqrt(1/np.sum(1/(res_df.e_std)**2)),
  257. 'mean_energy_sys_err': np.abs(comb_df.iloc[0].at['elabel'] - res_df.e_mean.mean()),
  258. 'mean_spread': res_df.spread_mean.mean(),
  259. 'mean_spread_stat_err':np.sqrt(1/np.sum(1/(res_df.spread_std)**2)),
  260. 'used_lum': 0,
  261. 'comment': 'indirect measurement #1',
  262. }, res_df
  263. df = comb_df.loc[~comb_df.compton_start.isna()].copy()
  264. # df.spread_mean = np.where(df.spread_mean < 1e-3, 1e-3, df.spread_mean)
  265. df.spread_std = np.where(df.spread_std < 1e-4, 1e-4, df.spread_std)
  266. df = df[df.e_std > 0]
  267. mean_energy = np.sum(df.e_mean*df.luminosity/(df.e_std**2))/np.sum(df.luminosity/(df.e_std**2))
  268. # std_energy = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.e_std**2))
  269. good_criterion = np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std(ddof=0)**2 + df.e_std**2)) < 5
  270. # print('vals', np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std()**2 + df.e_std**2)) )
  271. # print(df[~good_criterion].elabel.value_counts())
  272. df = df[good_criterion]
  273. m = Minuit(Likelihood(df.e_mean, df.e_std, df.luminosity), mean=df.e_mean.mean(), sigma=df.e_mean.std(ddof=0))
  274. m.errordef = 0.5
  275. m.limits['sigma'] = (0, None)
  276. m.migrad();
  277. # print(m.migrad())
  278. sys_err = m.values['sigma']
  279. mean_en = m.values['mean']
  280. mean_spread = np.sum(df.spread_mean*df.luminosity/(df.spread_std**2))/np.sum(df.luminosity/(df.spread_std**2))
  281. std_spread = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.spread_std**2))
  282. res_dict = {
  283. 'energy_point': comb_df.elabel.min(),
  284. 'first_run': comb_df.run_first.min(),
  285. 'last_run': comb_df.run_last.max(),
  286. 'mean_energy': mean_en,
  287. 'mean_energy_stat_err': m.errors['mean'],
  288. 'mean_energy_sys_err': sys_err,
  289. 'mean_spread': mean_spread,
  290. 'mean_spread_stat_err': std_spread,
  291. 'used_lum': df.luminosity.sum()/comb_df.luminosity_total.sum(),
  292. 'comment': '',
  293. }
  294. return res_dict, df
  295. def process_intersected_compton_meas(combined_df: pd.DataFrame) -> pd.DataFrame:
  296. """Replaces compton measurements writed on the border of two energy points on NaNs
  297. """
  298. energy_point_borders = combined_df[['point_idx', 'elabel', 'run_start', 'run_stop']].groupby(['point_idx'], dropna=True).agg(
  299. elabel_start_time=('run_start', 'min'), elabel_stop_time=('run_stop', 'max'),
  300. )
  301. df_comb = combined_df.set_index('point_idx').join(energy_point_borders, how='left')
  302. df_comb['comptonmeas_in_elabel'] = (df_comb[['elabel_stop_time', 'compton_stop']].min(axis=1) - df_comb[['elabel_start_time', 'compton_start']].max(axis=1))/(df_comb['compton_stop'] - df_comb['compton_start'])
  303. #print(df_comb['comptonmeas_in_elabel'].dropna().sort_values())
  304. df_comb = df_comb.query('comptonmeas_in_elabel < 0.7')
  305. border_comptons = df_comb.compton_start.values
  306. #print(combined_df.compton_start.isin(border_comptons).sum())
  307. #print(combined_df.loc[combined_df.compton_start.isin(border_comptons)].elabel.value_counts())
  308. combined_df.loc[combined_df.compton_start.isin(border_comptons), ['compton_start', 'compton_stop', 'e_mean', 'e_std', 'spread_mean', 'spread_std']] = np.nan
  309. return combined_df
  310. def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None, rdb: Optional[RunsDBHandler] = None) -> pd.DataFrame:
  311. if pics_folder is not None:
  312. plt.ioff()
  313. plt.style.use('ggplot')
  314. locator = mdates.AutoDateLocator(minticks=5)
  315. formatter = mdates.ConciseDateFormatter(locator)
  316. formatter.formats = ['%y', '%b', '%d', '%H:%M', '%H:%M', '%S.%f', ]
  317. formatter.zero_formats = [''] + formatter.formats[:-1]
  318. formatter.zero_formats[3] = '%d-%b'
  319. formatter.offset_formats = ['', '%Y', '%b %Y', '%d %b %Y', '%d %b %Y', '%d %b %Y %H:%M', ]
  320. runs_df = runs_df.rename({'luminosity': 'luminosity_full', 'energy': 'elabel'}, axis=1)
  321. combined_df = pd.merge(combined_df.drop(['elabel'], axis=1), runs_df[['run', 'elabel', 'luminosity_full']], how='outer')
  322. combined_df = combined_df.sort_values(by='run')
  323. combined_df['luminosity'] = combined_df['luminosity'].fillna(0)
  324. combined_df['point_idx'] = np.cumsum(~np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4))
  325. combined_df = process_intersected_compton_meas(combined_df)
  326. combined_df = combined_df.groupby(['point_idx', 'compton_start'], dropna=False).agg(
  327. elabel=('elabel', 'min'), elabel_test=('elabel', 'max'),
  328. run_first=('run', 'min'), run_last=('run', 'max'),
  329. luminosity=('luminosity', 'sum'), luminosity_total=('luminosity_full', 'sum'),
  330. compton_stop=('compton_stop', 'min'), compton_stop_test=('compton_stop', 'max'),
  331. e_mean=('e_mean', 'min'), e_mean_test=('e_mean', 'max'),
  332. e_std=('e_std', 'min'), e_std_test=('e_std', 'max'),
  333. spread_mean=('spread_mean', 'min'), spread_mean_test=('spread_mean', 'max'),
  334. spread_std=('spread_std', 'min'), spread_std_test=('spread_std', 'max'),
  335. ).reset_index().set_index('point_idx')
  336. #return combined_df
  337. result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
  338. for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
  339. try:
  340. res_dict, good_df = calculate_point(table, runs_df, compton_df, rdb)
  341. except Exception:
  342. continue
  343. result_df = result_df.append(res_dict, ignore_index=True)
  344. if pics_folder is not None:
  345. plt_table = good_df.dropna()
  346. if len(plt_table) == 0:
  347. continue
  348. total_error = np.sqrt(res_dict["mean_energy_stat_err"]**2 + res_dict["mean_energy_sys_err"]**2)
  349. half_timedelta = (plt_table.compton_stop - plt_table.compton_start)/2
  350. time = plt_table.compton_start + half_timedelta
  351. dlt0, total_time = timedelta(days=1), plt_table.compton_stop.max() - plt_table.compton_stop.min()
  352. timelim = [plt_table.compton_start.min() - 0.05*total_time, plt_table.compton_stop.max() + 0.05*total_time]
  353. fig, ax = plt.subplots(1, 1, dpi=120, tight_layout=True)
  354. ax.errorbar(time, plt_table.e_mean, xerr=half_timedelta, yerr=plt_table.e_std, fmt='.')
  355. ax.axhline(res_dict['mean_energy'], color='black', zorder=3, label='Mean')
  356. ax.fill_between(timelim,
  357. [res_dict['mean_energy'] - total_error]*2,
  358. [res_dict['mean_energy'] + total_error]*2, color='green', zorder=1, alpha=0.4)
  359. ax.tick_params(axis='x', labelrotation=45)
  360. ax.xaxis.set_major_locator(locator)
  361. ax.xaxis.set_major_formatter(formatter)
  362. ax.set(title=f'{res_dict["energy_point"]}, E = {res_dict["mean_energy"]:.3f} ± {res_dict["mean_energy_stat_err"]:.3f} ± {res_dict["mean_energy_sys_err"]:.3f} MeV',
  363. xlabel='Time, NSK', ylabel='Energy, [MeV]', xlim=timelim)
  364. plt.savefig(f'{pics_folder}/{res_dict["first_run"]}_{res_dict["energy_point"]}.png', transparent=True)
  365. plt.close()
  366. return result_df
  367. def final_table_to_clbrdb(df: pd.DataFrame, clbrdb: CalibrdbHandler, runs_df: pd.DataFrame, season: str):
  368. """Write good values from the averaged table into clbrdb
  369. """
  370. good_values = (df.comment=='')|((df.comment!='')&((df.mean_energy.astype(float) - df.energy_point).abs()<5))
  371. df_clbrdb = df.loc[good_values].drop(['comment', 'used_lum'], axis=1)
  372. df_clbrdb = pd.merge(df_clbrdb, runs_df[['run', 'starttime']], how='left', left_on='first_run', right_on='run').drop(['run'], axis=1)
  373. df_clbrdb = pd.merge(df_clbrdb, runs_df[['run', 'stoptime']], how='left', left_on='last_run', right_on='run').drop(['run'], axis=1)
  374. df_clbrdb = df_clbrdb.assign(writetime=lambda df: df['stoptime'])
  375. df_clbrdb = df_clbrdb[['writetime', 'starttime', 'stoptime',
  376. 'energy_point', 'first_run', 'last_run', 'mean_energy',
  377. 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err']].values.tolist()
  378. clbrdb.insert(df_clbrdb, 'Misc', 'RunHeader', 'Compton_run_avg', 'Default', comment = season)
  379. clbrdb.commit()
  380. def save_csv(df: pd.DataFrame, filepath: str, update_current: bool = True):
  381. """Saves csv file. Updates current file in filepath if exists"""
  382. if (os.path.isfile(filepath) and update_current):
  383. df_current = pd.read_csv(filepath)
  384. df_current = df_current.append(df, ignore_index=True)
  385. df_current = df_current.drop_duplicates(subset=['energy_point', 'first_run'], keep='last')
  386. df = df_current
  387. df.to_csv(filepath, index=False, float_format='%g')
  388. return
  389. # python scripts/compton_combiner.py -s NNBAR2021 -c database.ini --csv_dir . --clbrdb
  390. def main():
  391. parser = argparse.ArgumentParser(description = 'Mean compton energy measurements from clbrdb')
  392. parser.add_argument('-s', '--season', help = 'Name of the season')
  393. parser.add_argument('-c', '--config', help = 'Config file containing information for access to databases')
  394. parser.add_argument('--csv_dir', help = 'Save csv file with data in the folder or not if skip it')
  395. parser.add_argument('--clbrdb', action = 'store_true', help = 'Update Compton_run_avg clbrdb or not')
  396. parser.add_argument('--pics_folder', help = 'Path to the directory for saving the pictures')
  397. parser.add_argument('--only_last', action = 'store_true', help = 'Compute values of the last (in Compton_run_avg clbrdb) and new points only')
  398. args = parser.parse_args()
  399. # logging.info(f"Arguments: season: {args.season}, config {args.config}")
  400. parser = ConfigParser()
  401. parser.read(args.config);
  402. rdb = RunsDBHandler(**parser['cmdruns'])
  403. clbrdb = CalibrdbHandler(**parser['clbrDB'])
  404. idx = SEASONS['name'].index(args.season)
  405. runs_range = (SEASONS['start_run'][idx], SEASONS['start_run'][idx+1])
  406. if args.only_last:
  407. res_avg = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run_avg', num_last_rows = 1)
  408. if len(res_avg[0]) != 0:
  409. begintime = res_avg[0][0][res_avg[1].index("begintime")]
  410. runs_range = (begintime, None)
  411. res_rdb = rdb.load_tables(runs_range)
  412. runs_df = pd.DataFrame(res_rdb[0], columns=res_rdb[1])
  413. tdlt0 = timedelta(days=2)
  414. time_range = (runs_df.starttime.min() - tdlt0, runs_df.stoptime.max() + tdlt0)
  415. res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = None, timerange = time_range)
  416. cb = Combiner(res_rdb, res_clbrdb)
  417. comb_df = cb.combined_table()
  418. compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
  419. cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder, rdb)
  420. if args.csv_dir is not None:
  421. csv_path = os.path.join(args.csv_dir, f'{args.season}.csv')
  422. save_csv(cdf, csv_path)
  423. # cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')
  424. if args.clbrdb:
  425. final_table_to_clbrdb(cdf, clbrdb, runs_df, args.season)
  426. return
  427. if __name__ == "__main__":
  428. main()