Coverage for rta_reconstruction/dl1_to_dl2.py: 74%

103 statements  

« prev     ^ index     » next       coverage.py v7.6.7, created at 2024-11-16 09:59 +0000

1"""DL1 to DL2 processing script.""" 

2 

3import argparse 

4import json 

5from pathlib import Path 

6from typing import Dict 

7 

8import astropy.units as u 

9import joblib 

10import numpy as np 

11import pandas as pd 

12from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor 

13 

14from rta_reconstruction.dl1_reader import ( 

15 convert_az_and_sin_az_angle_to_degree, 

16 filter_dl1, 

17 interpolate_missing_alt_az, 

18 read_dl1, 

19 read_telescope_optics, 

20) 

21from rta_reconstruction.dl2_io import init_dl2_file, write_dl2_df 

22from rta_reconstruction.image_displacement import ( 

23 disp_to_pos, 

24 disp_vector_pol2cart, 

25 update_disp_with_effective_focal_length, 

26) 

27from rta_reconstruction.utils.coordinates import camera_to_altaz, camera_to_shower_coordinates 

28from rta_reconstruction.utils.logging import init_logging 

29 

30 

31# TODO: move somewhere else ? 

32def reco_source_position_sky(cog_x, cog_y, disp_dx, disp_dy, focal_length, pointing_alt, pointing_az): 

33 """ 

34 Compute the reconstructed source position in the sky 

35 

36 Parameters 

37 ---------- 

38 cog_x: `astropy.units.Quantity` 

39 cog_y: `astropy.units.Quantity` 

40 disp_dx: `astropy.units.Quantity` 

41 disp_dy: `astropy.units.Quantity` 

42 focal_length: `astropy.units.Quantity` 

43 pointing_alt: `astropy.units.Quantity` 

44 pointing_az: `astropy.units.Quantity` 

45 

46 Returns 

47 ------- 

48 sky frame: `astropy.coordinates.sky_coordinate.SkyCoord` 

49 """ 

50 src_x, src_y = disp_to_pos(disp_dx, disp_dy, cog_x, cog_y) 

51 return camera_to_altaz(src_x, src_y, focal_length, pointing_alt, pointing_az) 

52 

53 

54# TODO: use pydantic configuration instead of json to Dict 

55# TODO: use more generic type hints than specific class (sklearn regressor/classifier maybe ?) 

56# TODO: refactor in several steps to isolate the "predict" calls, to make it simpler to implement training 

57def dl1_to_dl2( 

58 dl1_df: pd.DataFrame, 

59 dl1_dl2_config: Dict, 

60 effective_focal_length, 

61 energy_regressor: RandomForestRegressor, 

62 gamma_classifier: RandomForestClassifier, 

63 image_displacement_vector_regressor: RandomForestRegressor | None = None, 

64 disp_norm_regressor: RandomForestRegressor | None = None, 

65 disp_sign_classifier: RandomForestClassifier | None = None, 

66) -> pd.DataFrame: 

67 # TODO: make very clear in doc that this is "inplace" operations 

68 # (this doesn't copy, simply we will add column to the df) 

69 dl2_df = dl1_df 

70 

71 # TODO: refactor model computation to individual functions (if surrounding operations are really required) 

72 

73 # TODO: is log energy or energy used by disp vector, disp norm or disp sign ? then need concatenate 

74 # to avoid copy ? Can't use "out" since predict doesn't support that API, could copy only vector of 

75 # size (#samples) instead of everything though 

76 # what could do, since fortran indexing may be best (TEST!?) 

77 # load everything as a fortran order rec-array (pre-allocated with dl2 columns.) 

78 # that way we can in with column names, and avoid pandas copy 

79 # pytables read has an "out" parameters, but it doesn't check the types between out and datadisk (only copies) 

80 dl2_df["log_reco_energy"] = energy_regressor.predict(dl2_df.loc[:, dl1_dl2_config["energy_regressor_features"]]) 

81 dl2_df["reco_energy"] = 10 ** dl2_df["log_reco_energy"] 

82 

83 if image_displacement_vector_regressor is not None: 83 ↛ 88line 83 didn't jump to line 88 because the condition on line 83 was always true

84 disp_vector = image_displacement_vector_regressor.predict( 

85 dl2_df.loc[:, dl1_dl2_config["disp_vector_regressor_features"]] 

86 ) 

87 else: 

88 dl2_df["reco_disp_norm"] = disp_norm_regressor.predict( 

89 dl2_df.loc[:, dl1_dl2_config["disp_norm_regressor_features"]] 

90 ) 

91 disp_sign_proba = disp_sign_classifier.predict_proba( 

92 dl2_df[:, dl1_dl2_config["disp_sign_classifier_features"]] 

93 ) 

94 # TODO: since we only care about something been gamma or not, 1 probability should be enough 

95 # we need to change the models for it to predict "gamma" or "not-gamma" with single proba. 

96 col = list(disp_sign_classifier.classes_).index(1) 

97 disp_sign = np.where(disp_sign_proba[:, col] > 0.5, 1, -1) 

98 dl2_df["reco_disp_sign"] = disp_sign 

99 dl2_df["reco_disp_sign_proba"] = disp_sign_proba[:, 0] 

100 

101 disp_vector = disp_vector_pol2cart(dl2_df["reco_disp_norm"], dl2_df["psi"], disp_sign) 

102 

103 dl2_df["reco_disp_dx"] = disp_vector[:, 0] 

104 dl2_df["reco_disp_dy"] = disp_vector[:, 1] 

105 dl2_df["reco_src_x"], dl2_df["reco_src_y"] = disp_to_pos( 

106 dl2_df.reco_disp_dx, 

107 dl2_df.reco_disp_dy, 

108 dl2_df.x, 

109 dl2_df.y, 

110 ) 

111 

112 # TODO: stack coordinates and project in // as in pyhiperta ? 

113 longi, _ = camera_to_shower_coordinates( 

114 dl2_df["reco_src_x"], dl2_df["reco_src_y"], dl2_df["x"], dl2_df["y"], dl2_df["psi"] 

115 ) 

116 

117 # TODO: check sign of longitudinal coordinate with HiPeRTA 

118 # TODO: is this required ? 

119 # Obtain the time gradient with sign relative to the reconstructed shower direction (reco_src_x, reco_src_y) 

120 # Defined positive if light arrival times increase with distance to it. Negative otherwise: 

121 dl2_df["signed_time_gradient"] = -1 * np.sign(longi) * dl2_df["time_gradient"] 

122 # Obtain skewness with sign relative to the reconstructed shower direction (reco_src_x, reco_src_y) 

123 # Defined on the major image axis; sign is such that it is typically positive for gammas: 

124 dl2_df["signed_skewness"] = -1 * np.sign(longi) * dl2_df["skewness"] 

125 

126 # TODO: what if we use simulations but still want to use the reconstructed alt az ? 

127 # TODO: better default value (nan ?) 

128 if "mc_alt_tel" in dl2_df.columns: 128 ↛ 129line 128 didn't jump to line 129 because the condition on line 128 was never true

129 alt_tel = dl2_df["mc_alt_tel"].values 

130 az_tel = dl2_df["mc_az_tel"].values 

131 elif "alt_tel" in dl2_df.columns: 131 ↛ 135line 131 didn't jump to line 135 because the condition on line 131 was always true

132 alt_tel = dl2_df["alt_tel"].values 

133 az_tel = dl2_df["az_tel"].values 

134 else: 

135 alt_tel = -np.pi / 2.0 * np.ones(len(dl2_df)) 

136 az_tel = -np.pi / 2.0 * np.ones(len(dl2_df)) 

137 

138 # TODO: this calls astropy coordinates changing routines, can it be optimized ? 

139 src_pos_reco = reco_source_position_sky( 

140 dl2_df.x.values * u.m, 

141 dl2_df.y.values * u.m, 

142 dl2_df.reco_disp_dx.values * u.m, 

143 dl2_df.reco_disp_dy.values * u.m, 

144 effective_focal_length, 

145 alt_tel * u.rad, 

146 az_tel * u.rad, 

147 ) 

148 dl2_df["reco_alt"] = src_pos_reco.alt.rad 

149 dl2_df["reco_az"] = src_pos_reco.az.rad 

150 

151 gammaness = gamma_classifier.predict_proba(dl2_df.loc[:, dl1_dl2_config["gamma_classifier_features"]]) 

152 

153 # TODO: replace this with a single proba predictor like disp sign, so no hardcoded class values! 

154 # gammaness is the prediction probability for the class 0 (proton: class 101) 

155 mc_type_gamma, mc_type_proton = 0, 101 

156 col = list(gamma_classifier.classes_).index(mc_type_gamma) 

157 dl2_df["gammaness"] = gammaness[:, col] 

158 dl2_df["reco_type"] = np.where(gammaness[:, col] > 0.5, mc_type_gamma, mc_type_proton) 

159 

160 return dl2_df 

161 

162 

163def main(): 

164 # TODO: init logging with log files passed as argument or in config 

165 init_logging(log_filename="dl1_to_dl2.log") 

166 

167 parser = argparse.ArgumentParser( 

168 description="Stand-alone DL1 to DL2 processing from ctapipe-lstchain for Real Time Analysis", 

169 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 

170 ) 

171 parser.add_argument( 

172 "--config", 

173 "-c", 

174 action="store", 

175 type=Path, 

176 dest="config_path", 

177 help="Path to the configuration file", 

178 required=True, 

179 ) 

180 parser.add_argument( 

181 "--input_dl1", 

182 "-i", 

183 type=Path, 

184 nargs="+", 

185 dest="dl1_file_paths", 

186 help="Path(s) to DL1 file(s) to process to DL2.", 

187 required=True, 

188 ) 

189 parser.add_argument( 

190 "--energy_regressor", 

191 "-e", 

192 action="store", 

193 type=Path, 

194 required=True, 

195 dest="energy_regressor_model_path", 

196 help="Path to the energy regressor model (.sav)", 

197 ) 

198 parser.add_argument( 

199 "--gamma_classifier", 

200 "-g", 

201 action="store", 

202 type=Path, 

203 required=True, 

204 dest="gamma_classifier_path", 

205 help="Path to the gamma/hadron classifier model (.sav)", 

206 ) 

207 parser.add_argument( 

208 "--image_displacement_vector_regressor", 

209 "-v", 

210 action="store", 

211 type=Path, 

212 default=None, 

213 dest="disp_vector_regressor_model_path", 

214 help="Path to the image displacement vector regressor model (.sav). " 

215 "Either this argument must be specified, or `image_displacement_norm_regressor` and " 

216 "`image_displacement_sign_classifier`", 

217 ) 

218 parser.add_argument( 

219 "--image_displacement_norm_regressor", 

220 "-n", 

221 action="store", 

222 type=Path, 

223 default=None, 

224 dest="disp_norm_regressor_model_path", 

225 help="Path to the image displacement norm regressor model (.sav)" 

226 "Either this argument and `image_displacement_sign_classifier` must be specified, or " 

227 "`image_displacement_vector_regressor`", 

228 ) 

229 parser.add_argument( 

230 "--image_displacement_sign_classifier", 

231 "-s", 

232 action="store", 

233 type=Path, 

234 default=None, 

235 dest="disp_sign_classifier_model_path", 

236 help="Path to the image displacement sign classifier model (.sav)" 

237 "Either this argument and `image_displacement_norm_regressor` must be specified, or " 

238 "`image_displacement_vector_regressor`", 

239 ) 

240 parser.add_argument( 

241 "--output_dl2", 

242 "-o", 

243 type=Path, 

244 nargs="+", 

245 dest="dl2_file_paths", 

246 help="Path(s) to DL2 file(s) to write the output.", 

247 required=True, 

248 ) 

249 

250 args = parser.parse_args() 

251 

252 if len(args.dl1_file_paths) != len(args.dl2_file_paths): 252 ↛ 253line 252 didn't jump to line 253 because the condition on line 252 was never true

253 raise argparse.ArgumentTypeError( 

254 "The number {} of input dl1 files must match the number {} of output DL2 files.".format( 

255 len(args.dl1_file_paths), len(args.dl2_files) 

256 ) 

257 ) 

258 

259 if args.disp_vector_regressor_model_path is not None and args.disp_norm_regressor_model_path is not None: 259 ↛ 260line 259 didn't jump to line 260 because the condition on line 259 was never true

260 raise argparse.ArgumentTypeError( 

261 "`image_displacement_vector_regressor` and `image_displacement_norm_regressor` are mutually exclusive arguments. " 

262 "Got {} and {}".format(args.disp_vector_regressor_model_path, args.disp_norm_regressor_model_path) 

263 ) 

264 if args.disp_vector_regressor_model_path is not None and args.disp_sign_classifier_model_path is not None: 264 ↛ 265line 264 didn't jump to line 265 because the condition on line 264 was never true

265 raise argparse.ArgumentTypeError( 

266 "`image_displacement_vector_regressor` and `image_displacement_sign_classifier` are mutually exclusive arguments. " 

267 "Got {} and {}".format(args.disp_vector_regressor_model_path, args.disp_sign_classifier_model_path) 

268 ) 

269 if ( 269 ↛ 274line 269 didn't jump to line 274

270 args.disp_vector_regressor_model_path is None 

271 and args.disp_norm_regressor_model_path is None 

272 and args.disp_sign_classifier_model_path is None 

273 ): 

274 raise argparse.ArgumentTypeError( 

275 "Either `image_displacement_vector_regressor` or `image_displacement_norm_regressor` and " 

276 "`image_displacement_sign_classifier` must be specified, got none of them." 

277 ) 

278 if (args.disp_norm_regressor_model_path is not None and args.disp_sign_classifier_model_path is None) or ( 278 ↛ 281line 278 didn't jump to line 281 because the condition on line 278 was never true

279 args.disp_norm_regressor_model_path is None and args.disp_sign_classifier_model_path is not None 

280 ): 

281 raise argparse.ArgumentError( 

282 "`image_displacement_vector_regressor` and `image_displacement_sign_classifier` must both be specified if used. Got {} and {}".format( 

283 args.disp_norm_regressor_model_path, args.disp_sign_classifier_model_path 

284 ) 

285 ) 

286 

287 with open(args.config_path, "r") as config_f: 

288 config = json.load(config_f) 

289 

290 energy_regressor = joblib.load(args.energy_regressor_model_path) 

291 gamma_classifier = joblib.load(args.gamma_classifier_path) 

292 all_features = config["energy_regressor_features"] + config["gamma_classifier_features"] 

293 

294 using_disp_vector = args.disp_vector_regressor_model_path is not None 

295 if using_disp_vector: 295 ↛ 301line 295 didn't jump to line 301 because the condition on line 295 was always true

296 disp_vector_regressor = joblib.load(args.disp_vector_regressor_model_path) 

297 disp_norm_regressor = None 

298 disp_sign_classifier = None 

299 all_features += config["disp_vector_regressor_features"] 

300 else: 

301 disp_vector_regressor = None 

302 disp_norm_regressor = joblib.load(args.disp_norm_regressor_model_path) 

303 disp_sign_classifier = joblib.load(args.disp_sign_classifier_model_path) 

304 all_features += config["disp_norm_regressor_features"] + config["disp_sign_classifier_features"] 

305 

306 for dl1_file_path, dl2_file_path in zip(args.dl1_file_paths, args.dl2_file_paths): 

307 # TODO: read dl1 straight in a numpy array (avoid pandas to avoid copy before passing to sklearn) 

308 # should be Fortran order ? -> benchmark 

309 dl1_df = read_dl1(dl1_file_path) # TODO: make table "key" configurable 

310 interpolate_missing_alt_az(dl1_df) # TODO: required after dl1 alt-az maker ? what about simple dropna 

311 tel_optics = read_telescope_optics(dl1_file_path) # TODO: make sure correct 

312 convert_az_and_sin_az_angle_to_degree( 

313 dl1_df 

314 ) # TODO: is this required ? (angles come from arctan2, and sin is not used ?) 

315 dl1_df = filter_dl1( 

316 dl1_df, filters=config["events_filters"], finite_params=all_features 

317 ) # TODO: config ? (filter events based on column names, filters, ~isnan, etc.) 

318 

319 # TODO: is this required ? 

320 # Update parameters related to target direction on camera frame for MC data 

321 # taking into account of the aberration effect using effective focal length 

322 if "disp_norm" in dl1_df.columns: 322 ↛ 323line 322 didn't jump to line 323 because the condition on line 322 was never true

323 update_disp_with_effective_focal_length(dl1_df, effective_focal_length=tel_optics.effective_focal_length) 

324 

325 # TODO: make dl1_to_dl2 not return anything - inplace operations in the df 

326 dl2_df = dl1_to_dl2( 

327 dl1_df, 

328 config, 

329 tel_optics.effective_focal_length, 

330 energy_regressor, 

331 gamma_classifier, 

332 disp_vector_regressor, 

333 disp_norm_regressor, 

334 disp_sign_classifier, 

335 ) 

336 

337 init_dl2_file(dl2_file_path, dl1_file_path) 

338 write_dl2_df( 

339 dl2_file_path, 

340 dl2_df, 

341 attributes={"config": config}, 

342 ) 

343 

344 

345if __name__ == "__main__": 345 ↛ 346line 345 didn't jump to line 346 because the condition on line 345 was never true

346 main()