Browse Source

Fix elabelizer.py

* Move to gogs repo usage
* Add .gitignore
nikita-p 4 months ago
parent
commit
98ad719b77
2 changed files with 101 additions and 44 deletions
  1. 2 0
      .gitignore
  2. 99 44
      src/avg/elabelizer.py

+ 2 - 0
.gitignore

@@ -0,0 +1,2 @@
+__pycache__
+*.ini

+ 99 - 44
src/avg/elabelizer.py

@@ -9,18 +9,47 @@ If this script is not working properly, try to setup environment firstly with th
 This script is inspired by listoffdata.py script (/sl/cmd3/cc7-64/Cmd3Off/scripts/listoffdata.py)
 """
 
+from datetime import datetime
+from typing import Dict, List, Tuple, NamedTuple, Optional
+from pathlib import Path
+from urllib.request import urlopen
+
+import json
 import os 
-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings_read_all')
 
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings_read_all')
+API_KEY = "9e3b3d9b7a57098bb1bc0e5c5053f2084251cd00"
 
 import django
 django.setup()
-
 from cmdweb.apps.cmd3off.energypoints.models import EnergyPoint, Param, ParamData
-from urllib.request import urlopen
 
+class RowEPoint(NamedTuple):
+    """Structure of the energy point metadata"""
+    elabel: str
+    first_run: int
+    last_run: int
+    nominal_energy: float
+    start_time: datetime
+    stop_time: datetime
+    season: str
+
+class AvgEPoint(NamedTuple):
+    """Structure of the averaged beam energy point"""
+    energy_mean: float
+    first_run: int
+    last_run: int
+    nominal_energy: float
+    season: str
 
-def retrieve_elabels(startrun, stoprun):
+class CombinedEPoint(NamedTuple):
+    """Structure of the combined energy point"""
+    season: str
+    elabel: Optional[str]
+    energy_mean: Optional[float]
+
+
+def retrieve_elabels(startrun: int, stoprun: int) -> List[RowEPoint]:
     """Retrieves elabels from database
     
     Parameters:
@@ -32,7 +61,7 @@ def retrieve_elabels(startrun, stoprun):
     
     Returns:
     --------
-    List[Tuple[str, int, int, float, datetime, datetime]
+    List[RowEPoint]
         list of elabels and information about them
         (elabel, firstrun, lastrun, nominal energy, starttime, stoptime, season)
     """
@@ -48,78 +77,106 @@ def retrieve_elabels(startrun, stoprun):
     for e in EnergyPoint.objects.filter(startrun__gte=startrun, endrun__lte=stoprun):
         while((e.startrun < seasons[season_idx][1]) or (e.endrun > seasons[season_idx][2])):
             season_idx+=1
-        info = (e.name, e.startrun, e.endrun, e.energy, e.starttime, e.endtime, seasons[season_idx][0])
+        info = RowEPoint(
+            elabel = e.name, 
+            first_run = e.startrun, 
+            last_run = e.endrun, 
+            nominal_energy = e.energy, 
+            start_time = e.starttime, 
+            stop_time = e.endtime, 
+            season = seasons[season_idx][0]
+        )
         if not(e.name.startswith("Season")):
             points.append(info)
     return points
     
-def get_available_compton_seasons():
-    tables_url = 'https://cmd.inp.nsk.su/~compton/gitlist/compton_tables/raw/dev/tables/'
+def get_available_compton_seasons() -> Dict[str, str]:
+    """Returns a dictionary containing all available 
+    average energy season tables
+    
+    Returns
+    -------
+    Dict[str, str]
+        keys are season names, values are pathes to the tables
+    """
+
+    tables_url = f'https://cmd.inp.nsk.su/~compton/gogs/api/v1/repos/compton/tables/contents/?token={API_KEY}'
     with urlopen(tables_url) as f:
-        data = f.read().decode('utf-8')
-        datarows = list(map(lambda x: x[:-4], filter(lambda x: x.endswith('.csv'), data.split('\n'))))
+        data_str = f.read().decode('utf-8')
+        data = json.loads(data_str)
+        datarows = {Path(row['name']).stem: row['download_url'] for row in
+        filter(lambda x: (x['type'] == 'file') and x['name'].endswith('.csv'), data)}
     return datarows
 
-def retrieve_comptons(seasons=None):
+def retrieve_comptons(season_names: Optional[List[str]] = None) -> List[AvgEPoint]:
     """Retrieves compton measurements from tables
     
     Parameters:
     -----------
-    seasons: Optional[List[str]]
+    season_names: Optional[List[str]]
         list of seasons for retrieving
         (default is None, it means retrieving all available seasons)
     
     Returns:
     --------
-    List[Tuple[str, float, int, int, float]]
+    List[AvgEPoint]
         list of compton means measurements
         (season, energy_point, first_run, last_run, mean_energy)
         first_run and last_run can be different from the corresponding energy points from elabels database
     """
     
     available_seasons = get_available_compton_seasons()
-    if seasons is not None:
-        for s in seasons:
-            if s not in available_seasons:
-                raise ValueError(f"Season {s} is not found. List of available seasons: {available_seasons}")
+    seasons = dict()
+    if season_names is not None:
+        for s in season_names:
+            try:
+                seasons[s] = available_seasons[s]
+            except KeyError:
+                raise ValueError(f"Season {s} is not found. List of available seasons: {available_seasons.keys()}")
     else:
         seasons = available_seasons
         
-    def parse_compton_row(row, season):
+    def parse_compton_row(row: str, season: str) -> AvgEPoint:
         items = row.split(',')
-        return (season, float(items[0]), int(items[1]), int(items[2]), float(items[3]))
-    
-    def retrieve_compton_one_season(season):
-        with urlopen(f'https://cmd.inp.nsk.su/~compton/gitlist/compton_tables/raw/dev/tables/{season}.csv') as f:
+        return AvgEPoint(
+            season=season, 
+            nominal_energy=float(items[0]), 
+            first_run=int(items[1]), 
+            last_run=int(items[2]), 
+            energy_mean=float(items[3])
+        )
+    
+    def retrieve_compton_one_season(season_name: str, season_table_url: str) -> List[AvgEPoint]:
+        with urlopen(season_table_url) as f:
             r = f.read().decode('utf-8').strip()
-            rows = list(map(lambda x: parse_compton_row(x, season), r.split('\n')[1:]))
+            rows = list(map(lambda x: parse_compton_row(x, s_name), r.split('\n')[1:]))
         return rows
                     
     datarows = []
-    for s in seasons:
-        seasonrows = retrieve_compton_one_season(s)
+    for s_name, s_table in seasons.items():
+        seasonrows = retrieve_compton_one_season(s_name, s_table)
         datarows.extend(seasonrows)
     
     return datarows
         
-def combine_compton_elabel(epoints, comptonpoints):
+def combine_compton_elabel(epoints: List[RowEPoint], comptonpoints: List[AvgEPoint]) -> List[CombinedEPoint]:
     """Combines compton energy points and elabels from database
     
     Parameters:
     -----------
-    epoints : List[Tuple[...]]
+    epoints : List[RowEPoint]
         list of energy points
-    comptonpoints : List[Tuple[...]]
-        list of compton points
+    comptonpoints : List[AvgEPoint]
+        list of compton averaged points
     
     Returns:
     --------
-    combpoints : List[Tuple[...]]
+    combpoints : List[CombinedEPoint]
         list of combined points (outer join)
     """
     
-    epoints_sorted = sorted(epoints, key=lambda x: x[1])
-    cpoints_sorted = sorted(comptonpoints, key=lambda x: x[2])
+    epoints_sorted = sorted(epoints, key=lambda x: x.first_run)
+    cpoints_sorted = sorted(comptonpoints, key=lambda x: x.first_run)
     
     combined_data = []
     
@@ -128,42 +185,40 @@ def combine_compton_elabel(epoints, comptonpoints):
         erow = epoints_sorted[eidx]
         crow = cpoints_sorted[cidx]
         
-        cstart, cstop, cenergy, cseason = crow[2], crow[3], crow[4], crow[0]
-        estart, estop, elabel, eseason = erow[1], erow[2], erow[0], erow[6]
+        cstart, cstop, cenergy, cseason = crow.first_run, crow.last_run, crow.energy_mean, crow.season
+        estart, estop, elabel, eseason = erow.first_run, erow.last_run, erow.elabel, erow.season
         
         if (cstart >= estart) and (cstop <= estop):
-            combrow = (eseason, elabel, cenergy)
+            combrow = CombinedEPoint(eseason, elabel, cenergy)
             eidx += 1
             cidx += 1
             #print(combrow)
         elif (cstart > estop):
-            combrow = (eseason, elabel, None)
+            combrow = CombinedEPoint(eseason, elabel, None)
             eidx += 1
         elif (estart > cstop):
-            combrow = (cseason, None, cenergy)
+            combrow = CombinedEPoint(cseason, None, cenergy)
             cidx += 1
         else:
             raise Exception("Something wrong")
         
-        #print(combrow)
+        # print(combrow)
         combined_data.append(combrow)
     
     for i in range(eidx, len(epoints_sorted)):
         erow = epoints_sorted[i]
-        elabel = erow[0]
-        combrow = (None, elabel, None)
+        combrow = CombinedEPoint(erow.season, erow.elabel, None)
         combined_data.append(combrow)
         
     for i in range(cidx, len(cpoints_sorted)):
         crow = cpoints_sorted[i]
-        cstart, cstop, cenergy, cseason = crow[2], crow[3], crow[4], crow[0]
-        combrow = (cseason, None, cenergy)
+        combrow = CombinedEPoint(crow.season, None, crow.energy_mean)
         combined_data.append(combrow)
     
     return combined_data
 
 
-def elabelize():
+def elabelize() -> List[CombinedEPoint]:
     RUNLIMITS = (17405, 200000)
     SEASONS = None
     epoints = retrieve_elabels(*RUNLIMITS)
@@ -175,7 +230,7 @@ def elabelize():
 def main():
     combdata = elabelize()
     for c in combdata:
-        season, elabel, energy = c
+        season, elabel, energy = c.season, c.elabel, c.energy_mean
         if season is None:
             season = ''
         if elabel is None: