Coverage for rta_reconstruction/dl1_reader.py: 80%

51 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-02 09:59 +0000

1import warnings 

2from pathlib import Path 

3 

4import astropy.units as u 

5import numpy as np 

6import pandas as pd 

7from astropy.coordinates import Angle 

8 

9from rta_reconstruction.utils.optics import OpticsDescription 

10 

11 

12# TODO: put key in configuration, not hard-coded 

13# TODO: make sure likelihood fit is only use in source dependent analysis, therefore really not needed, otherwise need to implement it here 

14# TODO: do we need to create the "delta_t" column (lstchain does it from dragon time, which we don't have) 

15def read_dl1(dl1_file_path: Path, dl1_param_key: str = "/dl1/event/telescope/parameters/LST_LSTCam") -> pd.DataFrame: 

16 return pd.read_hdf(dl1_file_path, key=dl1_param_key) 

17 

18 

19# TODO: is this required ? in alt-az dl1_maker we just drop the missing events ? 

20# TODO: throw error instead of warning if repeated event id ? (could happen for small runs following each other in lst) 

21# TODO: profile cause this looks slow 

22# TODO: refactor to use DataFrame.fillna ? 

23def interpolate_missing_alt_az(dl1_df: pd.DataFrame): 

24 if ( 

25 "alt_tel" in dl1_df.columns 

26 and "az_tel" in dl1_df.columns 

27 and (np.isnan(dl1_df.alt_tel).any() or np.isnan(dl1_df.az_tel).any()) 

28 ): 

29 # make sure there is at least one good pointing value to interp from. 

30 if np.isfinite(dl1_df.alt_tel).any() and np.isfinite(dl1_df.az_tel).any(): 

31 if len(set(dl1_df.event_id)) != len(dl1_df.event_id): 

32 warnings.warn( 

33 "Beware, the data has been resorted by `event_id` to interpolate invalid pointing values but there are " 

34 "several events with the same `event_id` in the data, thus probably leading to unexpected behaviour", 

35 UserWarning, 

36 ) 

37 dl1_df = dl1_df.sort_values(by="event_id") 

38 for df_col in dl1_df["alt_tel", "az_tel"]: 

39 x = np.arange(len(df_col)) 

40 mask_missing = np.isnan(df_col.values) 

41 df_col = np.interp(x[mask_missing], x[~mask_missing], df_col.values[~mask_missing]) 

42 else: 

43 dl1_df.alt_tel = -np.pi / 2.0 

44 dl1_df.az_tel = -np.pi / 2.0 

45 

46 

47def convert_az_and_sin_az_angle_to_degree(dl1_df: pd.DataFrame): 

48 # Normalize all azimuth angles to the range [0, 360) degrees 

49 dl1_df.az_tel = Angle(dl1_df.az_tel, u.rad).wrap_at(360 * u.deg).rad 

50 

51 # Dealing with `sin_az_tel` missing data because of the former version of lstchain 

52 # TODO: this is copy paster from lstchain, but np.sin(degree) is not working 

53 if "sin_az_tel" not in dl1_df.columns: 

54 dl1_df["sin_az_tel"] = np.sin(dl1_df.az_tel) 

55 

56 

57# TODO: is this required ? 

58# TODO: make key configurable 

59# TODO: make sure we get the right optic per tel ID 

60# TODO: make this able to work for other tel ID 

61def read_telescope_optics(dl1_file_path: Path, optics_table_key: str = "/configuration/instrument/telescope/optics"): 

62 optics_df = pd.read_hdf(dl1_file_path, key=optics_table_key) 

63 layout_df = pd.read_hdf(dl1_file_path, key="/configuration/instrument/subarray/layout") 

64 lst1_layout_row = layout_df.loc[layout_df["tel_id"] == 1].iloc[0] 

65 optics_row = optics_df.iloc[lst1_layout_row["optics_index"]] 

66 return OpticsDescription( 

67 name=optics_row.name, 

68 size_type=optics_row.size_type, 

69 n_mirrors=optics_row.n_mirrors, 

70 equivalent_focal_length=optics_row.equivalent_focal_length * u.meter, # TODO: read units from file ? 

71 effective_focal_length=optics_row.effective_focal_length * u.meter, 

72 mirror_area=optics_row.mirror_area * u.meter * u.meter, 

73 n_mirror_tiles=optics_row.n_mirror_tiles, 

74 reflector_shape=optics_row.reflector_shape, 

75 ) 

76 

77 

78# TODO: configuration object for filtering ranges 

79# TODO: re-name finite_params 

80def filter_dl1( 

81 dl1_df, 

82 filters=None, 

83 finite_params=None, 

84): 

85 """ 

86 Apply data filtering to a pandas dataframe. 

87 Each filtering range is applied if the column name exists in the DataFrame so that 

88 `(events >= range[0]) & (events <= range[1])` 

89 

90 Parameters 

91 ---------- 

92 events: `pandas.DataFrame` 

93 filters: dict containing events features names and their filtering range 

94 example : dict(intensity=[0, np.inf], width=[0, np.inf], r=[0, np.inf]) 

95 finite_params: optional, None or list of strings 

96 Column names: any row with a non-finite value in one of those columns will be discarded 

97 Returns 

98 ------- 

99 `pandas.DataFrame` 

100 """ 

101 

102 filter = np.ones(len(dl1_df), dtype=bool) 

103 filters = {} if filters is None else filters 

104 

105 for col, (lower_limit, upper_limit) in filters.items(): 

106 filter &= (dl1_df[col] >= lower_limit) & (dl1_df[col] <= upper_limit) 

107 

108 if finite_params is not None: 

109 # TODO: a usefull warning could be: a filter column is not present ?... 

110 _finite_params = list(set(finite_params).intersection(list(dl1_df.columns))) 

111 # TODO: replace inf by nans, then run isna ?? 

112 dl1_df[_finite_params] = dl1_df[_finite_params].replace([np.inf, -np.inf], np.nan) 

113 not_finite_mask = dl1_df[_finite_params].isna() 

114 filter &= ~(not_finite_mask.any(axis=1)) 

115 # TODO: make this a useful warning: 1 per file ? / single warning entry 

116 not_finite_counts = (not_finite_mask).sum(axis=0)[_finite_params] 

117 if (not_finite_counts > 0).any(): 

118 warnings.warn("Data contains not-predictable events.") 

119 warnings.warn("Column | Number of non finite values") 

120 for k, v in not_finite_counts.items(): 

121 if v > 0: 

122 warnings.warn(f"{k} : {v}") 

123 

124 # if pandas DataFrame or Series, transforms to numpy 

125 # TODO: why convert to numpy ? 

126 filter = filter.to_numpy() if hasattr(filter, "to_numpy") else filter 

127 dl1_df = dl1_df[filter] 

128 

129 return dl1_df