compton_combiner.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. """Script to combine compton measurements with runs and process these data
  2. """
  3. import argparse
  4. from configparser import ConfigParser
  5. from datetime import datetime, timedelta, timezone
  6. import logging
  7. import os
  8. import sys
  9. import sqlite3
  10. from typing import Union, Tuple, Optional
  11. from compton_filter import CalibrdbHandler
  12. from iminuit import Minuit
  13. import matplotlib.dates as mdates
  14. import matplotlib.pyplot as plt
  15. from mysql.connector import connect, Error
  16. import numpy as np
  17. import pandas as pd
  18. from tqdm import tqdm
  19. SEASONS = {
  20. 'name': ['PHIOMEGA2012', 'RHO2013', 'BRK2013/16', 'HIGH2017', 'RHO2018', 'HIGH2019', 'LOW2020', 'HIGH2020', 'HIGH2021', 'NNBAR2022', 'HIGH2023', 'PHI2024'],
  21. 'start_run': [17405, 18809, 32076, 36872, 48938, 70014, 85224, 89973, 98116, 107342, 131913, 163012, None],
  22. }
  23. MANUAL_RUNS_SPLIT = [0, 150159, np.inf] # list[runs] to manually split point with the same energy
  24. class RunsDBHandler():
  25. def __init__(self, host: str = 'cmddb', database: str = 'online', user: str = None, password: str = None):
  26. self.conn = connect(host = host, database = database, user = user, password = password)
  27. self.cur = self.conn.cursor()
  28. self.cur.execute("SET time_zone = '+07:00';")
  29. @property
  30. def fields(self) -> list:
  31. """Returns a list of available columns in the RunsDB
  32. """
  33. self.cur.execute("""DESCRIBE Runlog""")
  34. return self.cur.fetchall()
  35. def load_tables(self, range: Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]], energy_point: Optional[float] = None, select_bad_runs: bool = False):
  36. """
  37. Returns a slice of the table with following fields: run, starttime, stoptime, energy, luminosity
  38. Parameters
  39. ----------
  40. range : Union[Tuple[int, Optional[int]], Tuple[datetime, datetime]]
  41. selection range
  42. int range defines an interval in runs (first included, last excluded)
  43. datetime range defines a time interval (NSK: +7:00 time)
  44. energy_point : Optional[float]
  45. energy point name, MeV (default is None)
  46. select_bad_runs : bool
  47. select runs with labels except (Y) (default is False)
  48. """
  49. cond = ""
  50. if isinstance(range[0], int):
  51. cond = f" AND run >= {range[0]} "
  52. if range[1] is not None:
  53. cond += f" AND run < {range[1]} "
  54. elif isinstance(range[0], datetime):
  55. cond = f" AND starttime >= %s "
  56. if range[1] is not None:
  57. cond += " AND stoptime <= %s"
  58. else:
  59. range = (range[0], )
  60. energy_cond = ""
  61. if energy_point is not None:
  62. energy_cond = f" AND energy = {energy_point}"
  63. quality_cond = ' quality = "Y" '
  64. if select_bad_runs:
  65. quality_cond = ' quality <> "Y" '
  66. sql_query = f"""
  67. SELECT
  68. run,
  69. starttime,
  70. stoptime,
  71. energy,
  72. luminosity
  73. FROM Runlog
  74. WHERE
  75. {quality_cond}
  76. {cond}
  77. {energy_cond}
  78. AND luminosity > 0
  79. AND stoptime > starttime
  80. AND nevent > 0
  81. ORDER BY run DESC"""
  82. if isinstance(range[0], datetime):
  83. self.cur.execute(sql_query, range)
  84. else:
  85. self.cur.execute(sql_query)
  86. field_names = [i[0] for i in self.cur.description]
  87. res = self.cur.fetchall()
  88. return res, field_names
  89. def __del__(self):
  90. self.conn.close()
  91. class Combiner():
  92. """Combines a dataframe with runs and a dataframe with compton measurements together
  93. """
  94. def __init__(self, runsdb: Tuple[list, list], clbrdb: Tuple[list, list]):
  95. """
  96. Parameters
  97. ----------
  98. runsdb : Tuple[list, list]
  99. table of runs (rows and field names)
  100. clbrdb : Tuple[list, list]
  101. table of compton measurements (rows and field names)
  102. """
  103. rdb_rows, r_fld = runsdb
  104. cdb_rows, c_fld = clbrdb
  105. self.conn = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
  106. self.cur = self.conn.cursor()
  107. self.cur.execute(f"CREATE table runs (run, elabel, starttime timestamp, stoptime timestamp, luminosity)")
  108. self.cur.execute(f"CREATE table compton (begintime timestamp, endtime timestamp, e_mean, e_std, spread_mean, spread_std)")
  109. run_row_generator = map(lambda x: (x[r_fld.index("run")], x[r_fld.index("energy")],
  110. x[r_fld.index("starttime")], x[r_fld.index("stoptime")],
  111. x[r_fld.index("luminosity")]), rdb_rows)
  112. c_data_idx = c_fld.index("data")
  113. compton_row_generator = map(lambda x: (x[c_fld.index("begintime")], x[c_fld.index("endtime")],
  114. float(x[c_data_idx][0]), float(x[c_data_idx][1]),
  115. float(x[c_data_idx][2]), float(x[c_data_idx][3])), cdb_rows)
  116. self.cur.executemany(f"""INSERT into runs VALUES ({','.join(['?']*5)})""", run_row_generator)
  117. self.cur.executemany(f"""INSERT into compton VALUES ({','.join(['?']*6)})""", compton_row_generator)
  118. self.__create_combined_table()
  119. def __create_combined_table(self):
  120. create_combined_query = """
  121. CREATE TABLE combined_table AS
  122. SELECT
  123. runs.run AS run,
  124. runs.elabel AS elabel,
  125. runs.starttime as "run_start [timestamp]",
  126. runs.stoptime AS "run_stop [timestamp]",
  127. compton.begintime AS "compton_start [timestamp]",
  128. compton.endtime AS "compton_stop [timestamp]",
  129. runs.luminosity, compton.e_mean, compton.e_std, compton.spread_mean, compton.spread_std
  130. FROM runs, compton
  131. WHERE
  132. (runs.starttime BETWEEN compton.begintime AND compton.endtime)
  133. OR (runs.stoptime BETWEEN compton.begintime AND compton.endtime)
  134. OR (compton.begintime BETWEEN runs.starttime AND runs.stoptime)
  135. OR (compton.endtime BETWEEN runs.starttime AND runs.stoptime);
  136. """
  137. self.cur.execute(create_combined_query)
  138. return
  139. def combined_table(self) -> pd.DataFrame:
  140. """Returns combined dataframe
  141. """
  142. sql_query = """
  143. SELECT * FROM combined_table;
  144. """
  145. df = pd.read_sql(sql_query, self.conn)
  146. df['common_duration'] = df[['run_stop', 'compton_stop']].min(axis=1) - df[['run_start', 'compton_start']].max(axis=1)
  147. df['run_duration'] = df['run_stop'] - df['run_start']
  148. df['run_in_measurement'] = df['common_duration']/df['run_duration']
  149. df = df.sort_values(by='run_in_measurement', ascending=False).drop_duplicates(subset='run').sort_values(by='run')
  150. df = df.drop(['run_duration', 'common_duration'], axis=1) #, 'run_start', 'run_stop'
  151. return df
  152. def __del__(self):
  153. self.conn.close()
  154. class Likelihood():
  155. """
  156. Likelihood function
  157. """
  158. def __init__(self, means: np.array, sigmas: np.array, weights: np.array):
  159. """
  160. Parameters
  161. ----------
  162. means : np.array
  163. array of means, [MeV]
  164. sigmas : np.array
  165. array of standard deviations, [MeV]
  166. weights : np.array
  167. array of luminosities
  168. """
  169. self.means = means
  170. self.sigmas = sigmas
  171. self.weights = weights/weights.mean()
  172. # w_norm = (weights**2).sum()/(weights.sum())
  173. # self.weights = weights/w_norm
  174. def __call__(self, mean: float, sigma: float):
  175. """
  176. Calls likelihood calculation
  177. Parameters
  178. ----------
  179. mean : float
  180. expected mean
  181. sigma : float
  182. expected standard deviation
  183. """
  184. sigma_total = np.sqrt(sigma**2 + self.sigmas**2)
  185. ln_L = -np.sum( self.weights*( ((mean - self.means)**2)/(2*(sigma_total**2)) + np.log(sigma_total) ) )
  186. return -ln_L
  187. def __estimate_point_with_closest(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame):
  188. # estimate energy by the nearest points
  189. min_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
  190. max_run_time = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
  191. nearest_row_before = compton_df.iloc[pd.Index(compton_df.endtime).get_loc(min_run_time, 'nearest')]
  192. nearest_row_after = compton_df.iloc[pd.Index(compton_df.begintime).get_loc(max_run_time, 'nearest')]
  193. # regulatization
  194. nearest_row_before['data'][1] = max(nearest_row_before['data'][3], 1e-3)
  195. nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
  196. nearest_row_before['data'][1] = max(nearest_row_before['data'][1], 1e-3)
  197. nearest_row_after['data'][3] = max(nearest_row_after['data'][3], 1e-3)
  198. mean_energy = (nearest_row_before['data'][0] + nearest_row_after['data'][0])/2
  199. mean_spread = (nearest_row_before['data'][2] + nearest_row_after['data'][2])/2
  200. std_energy = np.sqrt(1/(1/(nearest_row_before['data'][1])**2 + 1/(nearest_row_after['data'][1])**2))
  201. std_spread = np.sqrt(1/(1/(nearest_row_before['data'][3])**2 + 1/(nearest_row_after['data'][3])**2))
  202. sys_energy = np.std([nearest_row_before['data'][0], nearest_row_after['data'][0]])
  203. return {
  204. 'energy_point': comb_df.elabel.min(),
  205. 'first_run': comb_df.run_first.min(),
  206. 'last_run': comb_df.run_last.max(),
  207. 'mean_energy': mean_energy,
  208. 'mean_energy_stat_err': std_energy,
  209. 'mean_energy_sys_err': sys_energy,
  210. 'mean_spread': mean_spread,
  211. 'mean_spread_stat_err': std_spread,
  212. 'used_lum': 0,
  213. 'comment': 'indirect measurement #2',
  214. }, pd.DataFrame([])
  215. def averager_on_lums(df: pd.DataFrame) -> dict:
  216. """Averaging as <E> = \frac{\sum{L_i E_i}{\sum{L_i}},
  217. \deltaE^2 = \frac{\sum{(L_i \delta E_i)^2}}{(\sum L_i)^2}
  218. Attention: I think it's incorrect way of avaraging.
  219. Parameters
  220. ----------
  221. df : pd.DataFrame
  222. input dataframe containg means and spreads
  223. Returns
  224. -------
  225. dict
  226. averaged mean and spread
  227. """
  228. mean_en = (df.e_mean * df.luminosity).sum() / df.luminosity.sum()
  229. sys_err = df.e_mean.std()
  230. stat_err = np.sqrt( np.sum((df.luminosity * df.e_std)**2) ) / df.luminosity.sum()
  231. mean_spread = (df.spread_mean * df.luminosity).sum() / df.luminosity.sum()
  232. std_spread = np.sqrt( np.sum((df.luminosity * df.spread_std)**2) ) / df.luminosity.sum()
  233. return {
  234. 'mean_energy': mean_en,
  235. 'mean_energy_stat_err': stat_err,
  236. 'mean_energy_sys_err': sys_err,
  237. 'mean_spread': mean_spread,
  238. 'mean_spread_stat_err': std_spread,
  239. }
  240. def ultimate_averager(df: pd.DataFrame) -> dict:
  241. """Complete averager for estimation of mean energy and energy spread
  242. Parameters
  243. ----------
  244. df : pd.DataFrame
  245. input dataframe containing means and spreads
  246. Returns
  247. -------
  248. dict
  249. averaged mean and spread
  250. """
  251. m = Minuit(Likelihood(df.e_mean, df.e_std, df.luminosity), mean=df.e_mean.mean(), sigma=df.e_mean.std(ddof=0))
  252. m.errordef = 0.5
  253. m.limits['sigma'] = (0, None)
  254. m.migrad();
  255. # print(m.migrad())
  256. sys_err = m.values['sigma']
  257. mean_en = m.values['mean']
  258. mean_spread = np.sum(df.spread_mean*df.luminosity/(df.spread_std**2))/np.sum(df.luminosity/(df.spread_std**2))
  259. std_spread = np.sqrt(1/np.sum((df.luminosity/df.luminosity.mean())/df.spread_std**2))
  260. return {
  261. 'mean_energy': mean_en,
  262. 'mean_energy_stat_err': round(m.errors['mean'], 5),
  263. 'mean_energy_sys_err': round(sys_err, 5),
  264. 'mean_spread': mean_spread,
  265. 'mean_spread_stat_err': round(std_spread, 5),
  266. }
  267. def calculate_point(comb_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, rdb, averager: callable = ultimate_averager) -> dict:
  268. """Calculates parameters of the energy (mean, std, spread) in this dataFrame
  269. Parameters
  270. ----------
  271. comb_df : pd.DataFrame
  272. table of the measurements linked with runs
  273. runs_df : pd.DataFrame
  274. table of the runs
  275. compton_df : pd.DataFrame
  276. table of the comptons
  277. averager : callable
  278. function for averaging (ultimate_averager or averager_on_lums)
  279. Returns
  280. -------
  281. dict, pd.DataFrame
  282. average parameters on this DataFrame, clean dataFrame
  283. """
  284. if (len(comb_df) == 1) and pd.isnull(comb_df.iloc[0].at['compton_start']):
  285. # no direct measurements of the compton during data runs
  286. min_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_first']].iloc[0].at['starttime']
  287. max_Yruntime = runs_df[runs_df.run == comb_df.iloc[0].at['run_last']].iloc[0].at['stoptime']
  288. dlt0 = timedelta(days=1)
  289. # assymetric time because energy can be stable only after
  290. runs_df_with_bads = rdb.load_tables((min_Yruntime, max_Yruntime + dlt0), energy_point = comb_df.iloc[0].at['elabel'], select_bad_runs = True)
  291. if len(runs_df_with_bads[0]) == 0:
  292. return __estimate_point_with_closest(comb_df, runs_df, compton_df)
  293. runs_df_with_bads_df = pd.DataFrame(runs_df_with_bads[0], columns = runs_df_with_bads[1])
  294. min_run_time, max_run_time = min(min_Yruntime, runs_df_with_bads_df.starttime.min()), max(max_Yruntime, runs_df_with_bads_df.stoptime.max())
  295. compton_meas = compton_df.query('((begintime>=@min_run_time)&(begintime<=@max_run_time))|((endtime>=@min_run_time)&(endtime<=@max_run_time))').copy()
  296. if len(compton_meas) == 0:
  297. # no compton measurements
  298. raise Exception("No measurement in this point. Pass it.")
  299. res_df = pd.DataFrame(list(map(lambda x: {
  300. 'compton_start': x[1]['begintime'],
  301. 'compton_stop': x[1]['endtime'],
  302. 'e_mean': float(x[1]['data'][0]),
  303. 'e_std': float(x[1]['data'][1]),
  304. 'spread_mean': float(x[1]['data'][2]),
  305. 'spread_std': float(x[1]['data'][3]),
  306. }, compton_meas.iterrows())))
  307. res_df = res_df.query(f'abs(e_mean -{comb_df.iloc[0].at["elabel"]})<5')
  308. if len(res_df) == 0:
  309. return __estimate_point_with_closest(comb_df, runs_df, compton_df)
  310. return {
  311. 'energy_point': comb_df.elabel.min(),
  312. 'first_run': comb_df.run_first.min(),
  313. 'last_run': comb_df.run_last.max(),
  314. 'mean_energy': res_df.e_mean.mean(),
  315. 'mean_energy_stat_err': np.sqrt(1/np.sum(1/(res_df.e_std)**2)),
  316. 'mean_energy_sys_err': np.abs(comb_df.iloc[0].at['elabel'] - res_df.e_mean.mean()),
  317. 'mean_spread': res_df.spread_mean.mean(),
  318. 'mean_spread_stat_err':np.sqrt(1/np.sum(1/(res_df.spread_std)**2)),
  319. 'used_lum': 0,
  320. 'comment': 'indirect measurement #1',
  321. }, res_df
  322. comb_df = comb_df.reset_index()
  323. df = comb_df.loc[~comb_df.compton_start.isna()].copy()
  324. df.spread_std = np.where(df.spread_std < 1e-4, 1e-4, df.spread_std)
  325. df = df[df.e_std > 0]
  326. mean_energy = np.sum(df.e_mean*df.luminosity/(df.e_std**2))/np.sum(df.luminosity/(df.e_std**2))
  327. good_criterion = np.abs((df.e_mean - mean_energy)/np.sqrt(df.e_mean.std(ddof=0)**2 + df.e_std**2)) < 5
  328. # print('WTF:', df[~good_criterion].index)
  329. df = df[good_criterion]
  330. df['accepted'] = 1
  331. averages = averager(df)
  332. res_dict = {
  333. 'energy_point': comb_df.elabel.min(),
  334. 'first_run': comb_df.run_first.min(),
  335. 'last_run': comb_df.run_last.max(),
  336. 'mean_energy': averages['mean_energy'],
  337. 'mean_energy_stat_err': averages['mean_energy_stat_err'],
  338. 'mean_energy_sys_err': averages['mean_energy_sys_err'],
  339. 'mean_spread': averages['mean_spread'],
  340. 'mean_spread_stat_err': averages['mean_spread_stat_err'],
  341. 'used_lum': round(df.luminosity.sum()/comb_df.luminosity_total.sum(), 4),
  342. 'comment': '',
  343. }
  344. comb_df['accepted'] = 0
  345. comb_df.loc[df.index, 'accepted'] = 1
  346. return res_dict, comb_df.set_index('point_idx')
  347. def process_intersected_compton_meas(combined_df: pd.DataFrame) -> pd.DataFrame:
  348. """Replaces compton measurements writed on the border of two energy points on NaNs
  349. """
  350. energy_point_borders = combined_df[['point_idx', 'elabel', 'run_start', 'run_stop']].groupby(['point_idx'], dropna=True).agg(
  351. elabel_start_time=('run_start', 'min'), elabel_stop_time=('run_stop', 'max'),
  352. )
  353. df_comb = combined_df.set_index('point_idx').join(energy_point_borders, how='left')
  354. df_comb['comptonmeas_in_elabel'] = (df_comb[['elabel_stop_time', 'compton_stop']].min(axis=1) - df_comb[['elabel_start_time', 'compton_start']].max(axis=1))/(df_comb['compton_stop'] - df_comb['compton_start'])
  355. df_comb = df_comb.query('comptonmeas_in_elabel < 0.7')
  356. border_comptons = df_comb.compton_start.values
  357. combined_df.loc[combined_df.compton_start.isin(border_comptons),
  358. ['compton_start', 'compton_stop', 'e_mean', 'e_std', 'spread_mean', 'spread_std', 'luminosity']] = np.nan
  359. return combined_df
  360. def process_combined(combined_df: pd.DataFrame, runs_df: pd.DataFrame, compton_df: pd.DataFrame, pics_folder: Optional[str] = None, rdb: Optional[RunsDBHandler] = None, old_averager: bool = False, energy_point_csv_folder: Optional[str] = None) -> pd.DataFrame:
  361. if pics_folder is not None:
  362. plt.ioff()
  363. plt.style.use('ggplot')
  364. locator = mdates.AutoDateLocator(minticks=5)
  365. formatter = mdates.ConciseDateFormatter(locator)
  366. formatter.formats = ['%y', '%b', '%d', '%H:%M', '%H:%M', '%S.%f', ]
  367. formatter.zero_formats = [''] + formatter.formats[:-1]
  368. formatter.zero_formats[3] = '%d-%b'
  369. formatter.offset_formats = ['', '%Y', '%b %Y', '%d %b %Y', '%d %b %Y', '%d %b %Y %H:%M', ]
  370. runs_df = runs_df.rename({'luminosity': 'luminosity_full', 'energy': 'elabel'}, axis=1)
  371. combined_df = pd.merge(combined_df.drop(['elabel'], axis=1), runs_df[['run', 'elabel', 'luminosity_full']], how='outer')
  372. combined_df = combined_df.sort_values(by='run')
  373. run_bin_split = np.digitize(combined_df.run, MANUAL_RUNS_SPLIT)
  374. is_same_elabel = np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4)
  375. is_same_manualbin = np.isclose(run_bin_split, np.roll(run_bin_split, 1))
  376. combined_df['point_idx'] = np.cumsum(~(is_same_elabel & is_same_manualbin))
  377. # combined_df['point_idx'] = np.cumsum(~np.isclose(combined_df.elabel, combined_df.elabel.shift(1), atol=1e-4))
  378. combined_df = process_intersected_compton_meas(combined_df)
  379. combined_df['luminosity'] = combined_df['luminosity'].fillna(0)
  380. # combined_df.to_csv('file.csv')
  381. combined_df = combined_df.groupby(['point_idx', 'compton_start'], dropna=False).agg(
  382. elabel=('elabel', 'min'), elabel_test=('elabel', 'max'),
  383. run_first=('run', 'min'), run_last=('run', 'max'),
  384. luminosity=('luminosity', 'sum'), luminosity_total=('luminosity_full', 'sum'),
  385. compton_stop=('compton_stop', 'min'), compton_stop_test=('compton_stop', 'max'),
  386. e_mean=('e_mean', 'min'), e_mean_test=('e_mean', 'max'),
  387. e_std=('e_std', 'min'), e_std_test=('e_std', 'max'),
  388. spread_mean=('spread_mean', 'min'), spread_mean_test=('spread_mean', 'max'),
  389. spread_std=('spread_std', 'min'), spread_std_test=('spread_std', 'max'),
  390. ).reset_index().set_index('point_idx')
  391. # return combined_df
  392. result_df = pd.DataFrame(columns=['energy_point', 'first_run', 'last_run', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum', 'comment'])
  393. for i, table in tqdm(combined_df.groupby('point_idx', dropna=False)):
  394. try:
  395. res_dict, good_df = calculate_point(table, runs_df, compton_df, rdb, averager_on_lums if old_averager else ultimate_averager)
  396. if energy_point_csv_folder is not None:
  397. save_columns = ['elabel', 'run_first', 'run_last', 'luminosity', 'compton_start', 'compton_stop', 'e_mean', 'e_std', 'spread_mean', 'spread_std', 'accepted']
  398. save_csv(good_df[save_columns].dropna(), f'{energy_point_csv_folder}/{res_dict["energy_point"]}_{res_dict["first_run"]}.csv', update_current=False)
  399. good_df = good_df.query('accepted==1')
  400. except Exception:
  401. continue
  402. # result_df = result_df.append(res_dict, ignore_index=True)
  403. result_df = pd.concat([result_df, pd.Series(res_dict).to_frame().T], ignore_index=True)
  404. if pics_folder is not None:
  405. plt_table = good_df.dropna()
  406. if len(plt_table) == 0:
  407. continue
  408. total_error = np.sqrt(res_dict["mean_energy_stat_err"]**2 + res_dict["mean_energy_sys_err"]**2)
  409. half_timedelta = (plt_table.compton_stop - plt_table.compton_start)/2
  410. time = plt_table.compton_start + half_timedelta
  411. dlt0, total_time = timedelta(days=1), plt_table.compton_stop.max() - plt_table.compton_stop.min()
  412. timelim = [plt_table.compton_start.min() - 0.05*total_time, plt_table.compton_stop.max() + 0.05*total_time]
  413. fig, ax = plt.subplots(1, 1, dpi=120, tight_layout=True)
  414. ax.errorbar(time, plt_table.e_mean, xerr=half_timedelta, yerr=plt_table.e_std, fmt='.')
  415. ax.axhline(res_dict['mean_energy'], color='black', zorder=3, label='Mean')
  416. ax.fill_between(timelim,
  417. [res_dict['mean_energy'] - total_error]*2,
  418. [res_dict['mean_energy'] + total_error]*2, color='green', zorder=1, alpha=0.4)
  419. ax.tick_params(axis='x', labelrotation=45)
  420. ax.xaxis.set_major_locator(locator)
  421. ax.xaxis.set_major_formatter(formatter)
  422. ax.set(title=f'{res_dict["energy_point"]}, E = {res_dict["mean_energy"]:.3f} ± {res_dict["mean_energy_stat_err"]:.3f} ± {res_dict["mean_energy_sys_err"]:.3f} MeV',
  423. xlabel='Time, NSK', ylabel='Energy, [MeV]', xlim=timelim)
  424. plt.savefig(f'{pics_folder}/{res_dict["first_run"]}_{res_dict["energy_point"]}.png', transparent=True)
  425. plt.close()
  426. types_dict = {ftype : float for ftype in ['energy_point', 'mean_energy', 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err', 'used_lum']}
  427. types_dict.update({itype : int for itype in ['first_run', 'last_run']})
  428. result_df = result_df.astype(types_dict)
  429. return result_df
  430. def final_table_to_clbrdb(df: pd.DataFrame, clbrdb: CalibrdbHandler, runs_df: pd.DataFrame, season: str):
  431. """Write good values from the averaged table into clbrdb
  432. """
  433. good_values = (df.comment=='')|((df.comment!='')&((df.mean_energy.astype(float) - df.energy_point).abs()<5))
  434. df_clbrdb = df.loc[good_values].drop(['comment', 'used_lum'], axis=1)
  435. df_clbrdb = pd.merge(df_clbrdb, runs_df[['run', 'starttime']], how='left', left_on='first_run', right_on='run').drop(['run'], axis=1)
  436. df_clbrdb = pd.merge(df_clbrdb, runs_df[['run', 'stoptime']], how='left', left_on='last_run', right_on='run').drop(['run'], axis=1)
  437. df_clbrdb = df_clbrdb.assign(writetime=lambda df: df['stoptime'])
  438. df_clbrdb = df_clbrdb[['writetime', 'starttime', 'stoptime',
  439. 'energy_point', 'first_run', 'last_run', 'mean_energy',
  440. 'mean_energy_stat_err', 'mean_energy_sys_err', 'mean_spread', 'mean_spread_stat_err']].values.tolist()
  441. clbrdb.insert(df_clbrdb, 'Misc', 'RunHeader', 'Compton_run_avg', 'Default', comment = season)
  442. clbrdb.commit()
  443. def save_csv(df: pd.DataFrame, filepath: str, update_current: bool = True):
  444. """Saves csv file. Updates current file in filepath if exists"""
  445. logging.info(f'Saving csv file: len(df)={len(df)}, filepath={filepath}, update_current={update_current}')
  446. if (os.path.isfile(filepath) and update_current):
  447. df_current = pd.read_csv(filepath)
  448. # df_current = df_current.append(df, ignore_index=True)
  449. df_current = pd.concat([df_current, df], ignore_index=True)
  450. df_current = df_current.drop_duplicates(subset=['energy_point', 'first_run'], keep='last')
  451. df = df_current
  452. df.to_csv(filepath, index=False, float_format='%g')
  453. return
  454. # python scripts/compton_combiner.py -s NNBAR2021 -c database.ini --csv_dir . --clbrdb
  455. def main():
  456. log_format = '[%(asctime)s] %(levelname)s: %(message)s'
  457. logging.basicConfig(stream=sys.stdout, format=log_format, level=logging.INFO) #"filename=compton_combiner.log"
  458. logging.info("compton_combiner is started")
  459. parser = argparse.ArgumentParser(description = 'Mean compton energy measurements from clbrdb')
  460. parser.add_argument('-s', '--season', help = 'Name of the season')
  461. parser.add_argument('-c', '--config', help = 'Config file containing information for access to databases')
  462. parser.add_argument('--csv_dir', help = 'Save csv file with data in the folder or not if skip it')
  463. parser.add_argument('--clbrdb', action = 'store_true', help = 'Update Compton_run_avg clbrdb or not')
  464. parser.add_argument('--pics_folder', help = 'Path to the directory for saving the pictures')
  465. parser.add_argument('--energy_point_csv_folder', help = 'Path to the directory for saving the result in detail for each energy point')
  466. parser.add_argument('--only_last', action = 'store_true', help = 'Compute values of the last (in Compton_run_avg clbrdb) and new points only')
  467. parser.add_argument('--old_averaging', action = 'store_true', help = 'Use old incomplete <E> = \frac{\sum{L_i E_i}{\sum{L_i}} averaging')
  468. args = parser.parse_args()
  469. logging.info(f"""Arguments: season {args.season}, config {args.config}, csv_dir {args.csv_dir}, save_to_clbrdb {args.clbrdb},
  470. pics_folder {args.pics_folder}, detailed_csv_folder {args.energy_point_csv_folder}, only_last {args.only_last}, old_average: {args.old_averaging}""")
  471. parser = ConfigParser()
  472. parser.read(args.config);
  473. rdb = RunsDBHandler(**parser['cmdruns'])
  474. clbrdb = CalibrdbHandler(**parser['clbrDB'])
  475. idx = SEASONS['name'].index(args.season)
  476. runs_range = (SEASONS['start_run'][idx], SEASONS['start_run'][idx+1])
  477. if args.only_last:
  478. res_avg = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run_avg', num_last_rows = 1)
  479. if len(res_avg[0]) != 0:
  480. begintime = res_avg[0][0][res_avg[1].index("begintime")]
  481. runs_range = (begintime, None)
  482. logging.info(f"only_last flag enabled. Time range for runs database is used: {runs_range}")
  483. res_rdb = rdb.load_tables(runs_range)
  484. runs_df = pd.DataFrame(res_rdb[0], columns=res_rdb[1])
  485. if args.only_last:
  486. # Remain the studied season only (important for the first averaging in the season with turned on only last flag)
  487. start_season_run, stop_season_run = SEASONS['start_run'][idx], SEASONS['start_run'][idx+1] if SEASONS['start_run'][idx+1] else np.inf
  488. runs_df = runs_df.query(f'(run>=@start_season_run)&(run<@stop_season_run)')
  489. tdlt0 = timedelta(days=2)
  490. time_range = (runs_df.starttime.min() - tdlt0, runs_df.stoptime.max() + tdlt0)
  491. res_clbrdb = clbrdb.load_table('Misc', 'RunHeader', 'Compton_run', num_last_rows = None, timerange = time_range)
  492. cb = Combiner(res_rdb, res_clbrdb)
  493. comb_df = cb.combined_table()
  494. compton_df = pd.DataFrame(res_clbrdb[0], columns=res_clbrdb[1])
  495. cdf = process_combined(comb_df, runs_df, compton_df, args.pics_folder, rdb, args.old_averaging, args.energy_point_csv_folder)
  496. if args.csv_dir is not None:
  497. csv_path = os.path.join(args.csv_dir, f'{args.season}.csv')
  498. save_csv(cdf, csv_path)
  499. # cdf.to_csv(f'{args.season}.csv', index=False, float_format='%g')
  500. if args.clbrdb:
  501. final_table_to_clbrdb(cdf, clbrdb, runs_df, args.season)
  502. return
  503. if __name__ == "__main__":
  504. main()