Source code for rta_reconstruction.dl1_reader

import warnings
from pathlib import Path

import astropy.units as u
import numpy as np
import pandas as pd
from astropy.coordinates import Angle

from rta_reconstruction.utils.optics import OpticsDescription


# TODO: put key in configuration, not hard-coded
# TODO: make sure likelihood fit is only use in source dependent analysis, therefore really not needed, otherwise need to implement it here
# TODO: do we need to create the "delta_t" column (lstchain does it from dragon time, which we don't have)

[docs]
def read_dl1(dl1_file_path: Path, dl1_param_key: str = "/dl1/event/telescope/parameters/LST_LSTCam") -> pd.DataFrame:
    return pd.read_hdf(dl1_file_path, key=dl1_param_key)



# TODO: is this required ? in alt-az dl1_maker we just drop the missing events ?
# TODO: throw error instead of warning if repeated event id ? (could happen for small runs following each other in lst)
# TODO: profile cause this looks slow
# TODO: refactor to use DataFrame.fillna ?

[docs]
def interpolate_missing_alt_az(dl1_df: pd.DataFrame):
    if (
        "alt_tel" in dl1_df.columns
        and "az_tel" in dl1_df.columns
        and (np.isnan(dl1_df.alt_tel).any() or np.isnan(dl1_df.az_tel).any())
    ):
        # make sure there is at least one good pointing value to interp from.
        if np.isfinite(dl1_df.alt_tel).any() and np.isfinite(dl1_df.az_tel).any():
            if len(set(dl1_df.event_id)) != len(dl1_df.event_id):
                warnings.warn(
                    "Beware, the data has been resorted by `event_id` to interpolate invalid pointing values but there are "
                    "several events with the same `event_id` in the data, thus probably leading to unexpected behaviour",
                    UserWarning,
                )
            dl1_df = dl1_df.sort_values(by="event_id")
            for df_col in dl1_df["alt_tel", "az_tel"]:
                x = np.arange(len(df_col))
                mask_missing = np.isnan(df_col.values)
                df_col = np.interp(x[mask_missing], x[~mask_missing], df_col.values[~mask_missing])
        else:
            dl1_df.alt_tel = -np.pi / 2.0
            dl1_df.az_tel = -np.pi / 2.0




[docs]
def convert_az_and_sin_az_angle_to_degree(dl1_df: pd.DataFrame):
    # Normalize all azimuth angles to the range [0, 360) degrees
    dl1_df.az_tel = Angle(dl1_df.az_tel, u.rad).wrap_at(360 * u.deg).rad

    # Dealing with `sin_az_tel` missing data because of the former version of lstchain
    # TODO: this is copy paster from lstchain, but np.sin(degree) is not working
    if "sin_az_tel" not in dl1_df.columns:
        dl1_df["sin_az_tel"] = np.sin(dl1_df.az_tel)



# TODO: is this required ?
# TODO: make key configurable
# TODO: make sure we get the right optic per tel ID
# TODO: make this able to work for other tel ID

[docs]
def read_telescope_optics(dl1_file_path: Path, optics_table_key: str = "/configuration/instrument/telescope/optics"):
    optics_df = pd.read_hdf(dl1_file_path, key=optics_table_key)
    layout_df = pd.read_hdf(dl1_file_path, key="/configuration/instrument/subarray/layout")
    lst1_layout_row = layout_df.loc[layout_df["tel_id"] == 1].iloc[0]
    optics_row = optics_df.iloc[lst1_layout_row["optics_index"]]
    return OpticsDescription(
        name=optics_row.name,
        size_type=optics_row.size_type,
        n_mirrors=optics_row.n_mirrors,
        equivalent_focal_length=optics_row.equivalent_focal_length * u.meter,  # TODO: read units from file ?
        effective_focal_length=optics_row.effective_focal_length * u.meter,
        mirror_area=optics_row.mirror_area * u.meter * u.meter,
        n_mirror_tiles=optics_row.n_mirror_tiles,
        reflector_shape=optics_row.reflector_shape,
    )



# TODO: configuration object for filtering ranges
# TODO: re-name finite_params

[docs]
def filter_dl1(
    dl1_df,
    filters=None,
    finite_params=None,
):
    """
    Apply data filtering to a pandas dataframe.
    Each filtering range is applied if the column name exists in the DataFrame so that
    `(events >= range[0]) & (events <= range[1])`

    Parameters
    ----------
    events: `pandas.DataFrame`
    filters: dict containing events features names and their filtering range
        example : dict(intensity=[0, np.inf], width=[0, np.inf], r=[0, np.inf])
    finite_params: optional, None or list of strings
        Column names: any row with a non-finite value in one of those columns will be discarded
    Returns
    -------
    `pandas.DataFrame`
    """

    filter = np.ones(len(dl1_df), dtype=bool)
    filters = {} if filters is None else filters

    for col, (lower_limit, upper_limit) in filters.items():
        filter &= (dl1_df[col] >= lower_limit) & (dl1_df[col] <= upper_limit)

    if finite_params is not None:
        # TODO: a usefull warning could be: a filter column is not present ?...
        _finite_params = list(set(finite_params).intersection(list(dl1_df.columns)))
        # TODO: replace inf by nans, then run isna ??
        dl1_df[_finite_params] = dl1_df[_finite_params].replace([np.inf, -np.inf], np.nan)
        not_finite_mask = dl1_df[_finite_params].isna()
        filter &= ~(not_finite_mask.any(axis=1))
        # TODO: make this a useful warning: 1 per file ? / single warning entry
        not_finite_counts = (not_finite_mask).sum(axis=0)[_finite_params]
        if (not_finite_counts > 0).any():
            warnings.warn("Data contains not-predictable events.")
            warnings.warn("Column | Number of non finite values")
            for k, v in not_finite_counts.items():
                if v > 0:
                    warnings.warn(f"{k} : {v}")

    # if pandas DataFrame or Series, transforms to numpy
    # TODO: why convert to numpy ?
    filter = filter.to_numpy() if hasattr(filter, "to_numpy") else filter
    dl1_df = dl1_df[filter]

    return dl1_df