diff --git a/docs/changes/3004.feature.rst b/docs/changes/3004.feature.rst new file mode 100644 index 00000000000..462994d42ca --- /dev/null +++ b/docs/changes/3004.feature.rst @@ -0,0 +1,10 @@ +Adds a new ``FeatureSet`` to `~ctapipe.io.EventPreprocessor` for DL2 to DL3 +processing to generate GADF-like event lists. This adds the ability to transform +coordinates from Alt/AZ to ICRS, and supports applying pre-optimized +gamma-hadron cuts. + +Also added a ``mode=MARK`` option that keeps all events, but adds boolean +columns per ``QualityCriterion`` showing which ones pass. This is useful for +debugging. The default mode is still ``DROP``, which filters events. A +corresponding API function `~ctapipe.core.QualityQuery.add_table_mask_columns` +was also added to support this. diff --git a/src/ctapipe/coordinates/__init__.py b/src/ctapipe/coordinates/__init__.py index 7d6ed4ea864..2a3f2e6ad8a 100644 --- a/src/ctapipe/coordinates/__init__.py +++ b/src/ctapipe/coordinates/__init__.py @@ -22,6 +22,7 @@ from .nominal_frame import NominalFrame from .telescope_frame import TelescopeFrame from .utils import ( + altaz_to_icrs, altaz_to_nominal, altaz_to_righthanded_cartesian, get_point_on_shower_axis, @@ -42,6 +43,7 @@ "shower_impact_distance", "get_point_on_shower_axis", "altaz_to_nominal", + "altaz_to_icrs", ] diff --git a/src/ctapipe/coordinates/utils.py b/src/ctapipe/coordinates/utils.py index 52a993357ff..181727f170a 100644 --- a/src/ctapipe/coordinates/utils.py +++ b/src/ctapipe/coordinates/utils.py @@ -11,6 +11,7 @@ "altaz_to_righthanded_cartesian", "get_point_on_shower_axis", "altaz_to_nominal", + "altaz_to_icrs", ] @@ -103,3 +104,23 @@ def altaz_to_nominal(az, alt, pointing_az, pointing_alt) -> u.Quantity: return u.Quantity( np.column_stack((nominal_coord.fov_lon.deg, nominal_coord.fov_lat.deg)), u.deg ) + + +def altaz_to_icrs(az, alt, obstime, location) -> u.Quantity: + """ + Compute nominal (FOV) coordinates from alt/az coordinates. + + This can be used in a FeatureGenerator or ExpressionEngine to get a single + column with fov_lon, fov_lat coordinates. + + Returns + ------- + u.Quantity: + 2D array of coordinates with 2 columns: ra, dec + """ + event_coord = SkyCoord( + az=az, alt=alt, frame="altaz", obstime=obstime, location=location + ) + icrs_coord = event_coord.transform_to("icrs") + + return u.Quantity(np.column_stack((icrs_coord.ra.deg, icrs_coord.dec.deg)), u.deg) diff --git a/src/ctapipe/core/qualityquery.py b/src/ctapipe/core/qualityquery.py index 50020a2b9b4..6a3adef87b8 100644 --- a/src/ctapipe/core/qualityquery.py +++ b/src/ctapipe/core/qualityquery.py @@ -137,3 +137,32 @@ def get_table_mask(self, table): self._counts += np.count_nonzero(result, axis=1) self._cumulative_counts += np.count_nonzero(np.cumprod(result, axis=0), axis=1) return np.all(result, axis=0) + + def add_table_mask_columns(self, table): + """ + Return original table with boolean columns for each criterion. + + Parameters + ---------- + table : `~astropy.table.Table` + Table with columns matching the expressions used in the + `QualityQuery.quality_criteria`. + + Returns + ------- + Table : + Original table plus new columns per criterion. + """ + table = table.copy() + n_criteria = len(self.quality_criteria) + 1 + result = np.ones((n_criteria, len(table)), dtype=bool) + + for i, res in enumerate(self.engine(table)): + name = self.criteria_names[i] + table[name] = res.data + result[i + 1] = res.data + + self._counts += np.count_nonzero(result, axis=1) + self._cumulative_counts += np.count_nonzero(np.cumprod(result, axis=0), axis=1) + + return table diff --git a/src/ctapipe/core/tests/test_qualityquery.py b/src/ctapipe/core/tests/test_qualityquery.py index 6696c4aefdf..18b029db55f 100644 --- a/src/ctapipe/core/tests/test_qualityquery.py +++ b/src/ctapipe/core/tests/test_qualityquery.py @@ -146,3 +146,22 @@ def test_setup(): # 3-tuple fails with pytest.raises(TraitError): QualityQuery(quality_criteria=[("1", "2", "3")]) + + +def test_add_columns(): + """Test that we can add columns.""" + + query = QualityQuery( + quality_criteria=[ + ("low", "x>0"), + ("high", "x < 10"), + ], + ) + + table = Table({"x": [-10, 1, 2, 3, 5, 11, 5]}) + + table_with_cols = query.add_table_mask_columns(table) + + assert "low" in table_with_cols.colnames + assert "high" in table_with_cols.colnames + assert table_with_cols["low"].sum() == 6 diff --git a/src/ctapipe/io/event_preprocessor.py b/src/ctapipe/io/event_preprocessor.py index d311035815d..bb602c6cd03 100644 --- a/src/ctapipe/io/event_preprocessor.py +++ b/src/ctapipe/io/event_preprocessor.py @@ -1,8 +1,10 @@ """Module containing classes related to event loading and preprocessing""" +from enum import Enum, auto + from astropy.coordinates import angular_separation -from ..coordinates import altaz_to_nominal +from ..coordinates import altaz_to_icrs, altaz_to_nominal from ..core import ( Component, FeatureGenerator, @@ -123,6 +125,68 @@ def _dl2_irf_config(preprocessor): } +@FeatureSetRegistry.register("dl2_to_dl3") +def _dl2_to_dl3_config(preprocessor: "EventPreprocessor"): + """Creates a DL3/Event table that conforms to GADF/VODF column naming.""" + return { + "features_to_generate": [ + ("EVENT_ID", "event_id"), + ("TIME", "time"), + ("ALT", f"{preprocessor.geometry_reconstructor}_alt"), + ("AZ", f"{preprocessor.geometry_reconstructor}_az"), + ( + "_reco_fov_coord", + "altaz_to_nominal(AZ, ALT, subarray_pointing_lon, subarray_pointing_lat)", + ), + ("FOV_LON", "_reco_fov_coord[:,0]"), + ("FOV_LAT", "_reco_fov_coord[:,1]"), + ( + "_reco_icrs_coord", + "altaz_to_icrs(AZ, ALT, TIME, subarray.reference_location)", + ), + ("RA", "_reco_icrs_coord[:,0]"), + ("DEC", "_reco_icrs_coord[:,1]"), + ("ENERGY", f"{preprocessor.energy_reconstructor}_energy"), + ("GAMMANESS", f"{preprocessor.gammaness_reconstructor}_prediction"), + ( + "MULTIP", + f"subarray.multiplicity({preprocessor.geometry_reconstructor}_telescopes)", + ), + ("HMAX", f"{preprocessor.geometry_reconstructor}_h_max"), + ("_passed_gh", "gammaness_cut_function(GAMMANESS, ENERGY)"), + ("_passed_multip", "multiplicity_cut_function(MULTIP, ENERGY)"), + ], + "quality_criteria": [ + ("VALID_RECO", f"{preprocessor.geometry_reconstructor}_is_valid"), + ("VALID_ENERGY", f"{preprocessor.energy_reconstructor}_is_valid"), + ("VALID_GAMMANESS", f"{preprocessor.gammaness_reconstructor}_is_valid"), + ("PASSED_GH", "_passed_gh"), + ("PASSED_MULTIP", "_passed_multip"), + ], + "output_features": [ + "EVENT_ID", + "TIME", + "RA", + "DEC", + "ENERGY", + "ALT", + "AZ", + "FOV_LON", + "FOV_LAT", + "GAMMANESS", + "MULTIP", + "HMAX", + ], + } + + +class EventPreprocessorMode(Enum): + """Mode of output of EventPreprocessor.""" + + DROP = auto() #: drop events that do not pass + MARK = auto() #: only mark evens as not passing, adding boolean columns + + class EventPreprocessor(Component): """ Selects or generates features and filters tables of events. @@ -140,6 +204,16 @@ class with the columns you to retain in the output table. - `~ctapipe.coordinates.altaz_to_nominal` """ + mode = traits.UseEnum( + EventPreprocessorMode, + default_value=EventPreprocessorMode.DROP, + help=( + "If 'DROP', removes events that do not pass quality cuts. " + "If 'MARK', generates a new boolean column for each quality criteria, " + "but keeps all events." + ), + ).tag(config=True) + energy_reconstructor = traits.Unicode( default_value="RandomForestRegressor", help="Prefix of the reco `_energy` column", @@ -176,8 +250,9 @@ class with the columns you to retain in the output table. ), ).tag(config=True) - def __init__(self, config=None, parent=None, **kwargs): + def __init__(self, config=None, parent=None, subarray=None, **kwargs): super().__init__(config=config, parent=parent, **kwargs) + self.subarray = subarray if self.feature_set == "custom": self.feature_generator = FeatureGenerator(parent=self) self.quality_query = QualityQuery(parent=self) @@ -198,20 +273,38 @@ def __init__(self, config=None, parent=None, **kwargs): "of features in the configuration (DL2EventPreprocessor.features)." ) - def __call__(self, table): - """Return new table with only the columns in features.""" + def __call__(self, table, **other_attributes): + """ + Return new table with only the columns in features. + + Parameters + ---------- + table: Table + Table to process + **other_attributes: Any + Other functions or objects that the FeatureGenerator should have + access to, in addition to the default ones. + """ # generate new features, which includes renaming columns: generated = self.feature_generator( table, angular_separation=angular_separation, altaz_to_nominal=altaz_to_nominal, + altaz_to_icrs=altaz_to_icrs, + subarray=self.subarray, + **other_attributes, ) # apply event selection on the resulting table - selected_mask = self.quality_query.get_table_mask(generated) - - # return only the columns specified in `self.features`, and rows in - # `selected_mask` - return generated[self.features][selected_mask] + if self.mode == EventPreprocessorMode.DROP: + # return only the columns specified in `self.features`, and rows in + # `selected_mask` + selected_mask = self.quality_query.get_table_mask(generated) + return generated[self.features][selected_mask] + elif self.mode == EventPreprocessorMode.MARK: + generated = self.quality_query.add_table_mask_columns(generated) + return generated[self.features + self.quality_query.criteria_names] + else: + raise ValueError("Unsupported mode: {self.mode}") diff --git a/src/ctapipe/io/tests/test_event_preprocessor.py b/src/ctapipe/io/tests/test_event_preprocessor.py index 378e0d4ac65..2c1217b2ebf 100644 --- a/src/ctapipe/io/tests/test_event_preprocessor.py +++ b/src/ctapipe/io/tests/test_event_preprocessor.py @@ -4,8 +4,9 @@ import pytest from astropy import units as u from astropy.table import QTable +from astropy.time import Time -from ctapipe.io.event_preprocessor import FeatureSetRegistry +from ctapipe.io.event_preprocessor import EventPreprocessorMode, FeatureSetRegistry @pytest.fixture(scope="function") @@ -16,11 +17,14 @@ def minimal_dl2_table(): obs_id=[10, 10, 10, 10], event_id=[1, 2, 3, 4], true_energy=[100.0, 50.0, 2.0, 30.0] * u.TeV, + time=Time([0, 0, 0, 0], format="unix", scale="utc"), RandomForestRegressor_energy=[100.1, 49.2, 2.6, 40.0] * u.TeV, RandomForestRegressor_is_valid=[True, True, True, True], HillasReconstructor_az=[271.0, 271.6, 271.4, 268.1] * u.deg, HillasReconstructor_alt=[70.1, 68.2, 69.3, 70.8] * u.deg, + HillasReconstructor_h_max=[2000.0, 2500.0, 1980.0, 1000.0] * u.m, HillasReconstructor_is_valid=[True, True, True, False], + HillasReconstructor_telescopes=np.ones(shape=(4, 13), dtype=bool), RandomForestClassifier_prediction=[0.9, 0.5, 0.1, 0.3], RandomForestClassifier_is_valid=[True, True, True, False], true_alt=[70.0, 70.0, 70.0, 70.0] * u.deg, @@ -39,8 +43,11 @@ def minimal_dl2_table(): ) +@pytest.mark.parametrize("mode", list(EventPreprocessorMode)) @pytest.mark.parametrize("feature_set", FeatureSetRegistry.list_available()) -def test_event_preprocessing(feature_set, minimal_dl2_table): +def test_event_preprocessing( + feature_set, mode, minimal_dl2_table, subarray_prod5_paranal +): from traitlets.config import Config from ctapipe.io import EventPreprocessor @@ -52,14 +59,33 @@ def test_event_preprocessing(feature_set, minimal_dl2_table): table = minimal_dl2_table # process the table: - preprocess = EventPreprocessor(config=custom_config, feature_set=feature_set) - table_processed = preprocess(table) + preprocess = EventPreprocessor( + config=custom_config, + feature_set=feature_set, + subarray=subarray_prod5_paranal, + mode=mode, + ) + table_processed = preprocess( + table, + gammaness_cut_function=lambda g, e: True, + multiplicity_cut_function=lambda m, e: m >= 4, + ) for feature in preprocess.features: assert feature in table_processed.columns - # check that the qualityquery worked - assert len(table_processed) <= len(table) + if mode == EventPreprocessorMode.DROP: + # check that the qualityquery worked + assert len(table_processed) <= len(table) + + if mode == EventPreprocessorMode.MARK: + # check that we have all the original events in KEEP mode + assert len(table_processed) == len(table) + + # check that new columns were added: + set(table_processed.colnames) - set(table.colnames) == set( + preprocess.quality_query.criteria_names + ) def test_no_output():