Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/changes/3004.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Adds a new ``FeatureSet`` to `~ctapipe.io.EventPreprocessor` for DL2 to DL3
processing to generate GADF-like event lists. This adds the ability to transform
coordinates from Alt/AZ to ICRS, and supports applying pre-optimized
gamma-hadron cuts.

Also added a ``mode=MARK`` option that keeps all events, but adds boolean
columns per ``QualityCriterion`` showing which ones pass. This is useful for
debugging. The default mode is still ``DROP``, which filters events. A
corresponding API function `~ctapipe.core.QualityQuery.add_table_mask_columns`
was also added to support this.
2 changes: 2 additions & 0 deletions src/ctapipe/coordinates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .nominal_frame import NominalFrame
from .telescope_frame import TelescopeFrame
from .utils import (
altaz_to_icrs,
altaz_to_nominal,
altaz_to_righthanded_cartesian,
get_point_on_shower_axis,
Expand All @@ -42,6 +43,7 @@
"shower_impact_distance",
"get_point_on_shower_axis",
"altaz_to_nominal",
"altaz_to_icrs",
]


Expand Down
21 changes: 21 additions & 0 deletions src/ctapipe/coordinates/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"altaz_to_righthanded_cartesian",
"get_point_on_shower_axis",
"altaz_to_nominal",
"altaz_to_icrs",
]


Expand Down Expand Up @@ -103,3 +104,23 @@ def altaz_to_nominal(az, alt, pointing_az, pointing_alt) -> u.Quantity:
return u.Quantity(
np.column_stack((nominal_coord.fov_lon.deg, nominal_coord.fov_lat.deg)), u.deg
)


def altaz_to_icrs(az, alt, obstime, location) -> u.Quantity:
"""
Compute nominal (FOV) coordinates from alt/az coordinates.

This can be used in a FeatureGenerator or ExpressionEngine to get a single
column with fov_lon, fov_lat coordinates.

Returns
-------
u.Quantity:
2D array of coordinates with 2 columns: ra, dec
"""
event_coord = SkyCoord(
az=az, alt=alt, frame="altaz", obstime=obstime, location=location
)
icrs_coord = event_coord.transform_to("icrs")

return u.Quantity(np.column_stack((icrs_coord.ra.deg, icrs_coord.dec.deg)), u.deg)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be better to return the SkyCoord object and then do coord.ra.to(u.deg) in the transform isntead of filling a 2d array and then indexing into that?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was just to output a simple 2D array, not a SkyCoord: astropy.table supports writing columns of SkyCoords, but our io.write_table function doesn't.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But looking at the examples and the DL3 config, the goal seems to anyway be to get two separate columns? The 2d array is never actually used.

@kosack kosack Jun 8, 2026

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is used: it's a bit confusing maybe, but there is a 2D feature genereated with this function, and then the following two features extract the single columns. THat was the most efficient way to do this without calculating twice.

e.g.:

            (
                "reco_fov_coord",
                "altaz_to_nominal(AZ, ALT, subarray_pointing_lon, subarray_pointing_lat)",
            ),  # makes the 2D coordinate
            ("FOV_LON", "reco_fov_coord[:,0]"), # makes single column 
            ("FOV_LAT", "reco_fov_coord[:,1]"),  # makes single column

The 2D column (reco_fov_coord) is then not included in the EventPreprocessor output, so it's temporary only.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's what I wrote above, right?

and then indexing into that

You can just leave the skycoord and then in the feature generator you access its fields instead of indexing into the array.

29 changes: 29 additions & 0 deletions src/ctapipe/core/qualityquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,32 @@ def get_table_mask(self, table):
self._counts += np.count_nonzero(result, axis=1)
self._cumulative_counts += np.count_nonzero(np.cumprod(result, axis=0), axis=1)
return np.all(result, axis=0)

def add_table_mask_columns(self, table):
"""
Return original table with boolean columns for each criterion.

Parameters
----------
table : `~astropy.table.Table`
Table with columns matching the expressions used in the
`QualityQuery.quality_criteria`.

Returns
-------
Table :
Original table plus new columns per criterion.
"""
table = table.copy()
n_criteria = len(self.quality_criteria) + 1
result = np.ones((n_criteria, len(table)), dtype=bool)

for i, res in enumerate(self.engine(table)):
name = self.criteria_names[i]
table[name] = res.data
result[i + 1] = res.data

self._counts += np.count_nonzero(result, axis=1)
self._cumulative_counts += np.count_nonzero(np.cumprod(result, axis=0), axis=1)

return table
19 changes: 19 additions & 0 deletions src/ctapipe/core/tests/test_qualityquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,22 @@ def test_setup():
# 3-tuple fails
with pytest.raises(TraitError):
QualityQuery(quality_criteria=[("1", "2", "3")])


def test_add_columns():
"""Test that we can add columns."""

query = QualityQuery(
quality_criteria=[
("low", "x>0"),
("high", "x < 10"),
],
)

table = Table({"x": [-10, 1, 2, 3, 5, 11, 5]})

table_with_cols = query.add_table_mask_columns(table)

assert "low" in table_with_cols.colnames
assert "high" in table_with_cols.colnames
assert table_with_cols["low"].sum() == 6
111 changes: 102 additions & 9 deletions src/ctapipe/io/event_preprocessor.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Module containing classes related to event loading and preprocessing"""

from enum import Enum, auto

from astropy.coordinates import angular_separation

from ..coordinates import altaz_to_nominal
from ..coordinates import altaz_to_icrs, altaz_to_nominal
from ..core import (
Component,
FeatureGenerator,
Expand Down Expand Up @@ -123,6 +125,68 @@ def _dl2_irf_config(preprocessor):
}


@FeatureSetRegistry.register("dl2_to_dl3")
def _dl2_to_dl3_config(preprocessor: "EventPreprocessor"):
"""Creates a DL3/Event table that conforms to GADF/VODF column naming."""
return {
"features_to_generate": [
("EVENT_ID", "event_id"),
("TIME", "time"),
("ALT", f"{preprocessor.geometry_reconstructor}_alt"),
("AZ", f"{preprocessor.geometry_reconstructor}_az"),
(
"_reco_fov_coord",
"altaz_to_nominal(AZ, ALT, subarray_pointing_lon, subarray_pointing_lat)",
),
("FOV_LON", "_reco_fov_coord[:,0]"),
("FOV_LAT", "_reco_fov_coord[:,1]"),
(
"_reco_icrs_coord",
"altaz_to_icrs(AZ, ALT, TIME, subarray.reference_location)",
),
("RA", "_reco_icrs_coord[:,0]"),
("DEC", "_reco_icrs_coord[:,1]"),
("ENERGY", f"{preprocessor.energy_reconstructor}_energy"),
("GAMMANESS", f"{preprocessor.gammaness_reconstructor}_prediction"),
(
"MULTIP",
f"subarray.multiplicity({preprocessor.geometry_reconstructor}_telescopes)",
),
("HMAX", f"{preprocessor.geometry_reconstructor}_h_max"),
("_passed_gh", "gammaness_cut_function(GAMMANESS, ENERGY)"),
("_passed_multip", "multiplicity_cut_function(MULTIP, ENERGY)"),
],
"quality_criteria": [
("VALID_RECO", f"{preprocessor.geometry_reconstructor}_is_valid"),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As quality criteria are independant for each reconstructor, I think it would be safer to perform an overall "and" on reconstructor quality cut.

@kosack kosack Jun 4, 2026

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I will add criteria also for energy and gammaness. It's true they could be different, and it's useful to track

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also realized I forgot the MULTIP cut, so I added a similar function hook where we can add a function of f(multiplicity, energy) that applies it. That should also come out of the OptimizationResult.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking more about it, but perhaps for the future after we refactor the optimization code to use this as well, would be to just copy over the OptimizationResult.quality_query directly to the QualityQuery here, rather than allowing the user to attach them - it's already stored in the output. That way we are sure to have the same selection applied.

I have to think what would be the cleanest way to do that, while keeping this class a bit generic. I think one option is to expand the OptimizationResult class to be a bit smarter: it should also handle the actual cutting, and then we can just pass it in like the Subarray, rather than require gammaness_cut_function and multiplicity_cut_function to be passed into the EventPreprocessor on reconstruction.

In that scenario, I would just have:

("PASSED_GH", "optimization_result.pass_gh(GAMMANESS, ENERGY)"),
("PASSED_MULTIP", "optimization_result.pass_multip(MULTIP, ENERGY)"

("VALID_ENERGY", f"{preprocessor.energy_reconstructor}_is_valid"),
("VALID_GAMMANESS", f"{preprocessor.gammaness_reconstructor}_is_valid"),
("PASSED_GH", "_passed_gh"),
("PASSED_MULTIP", "_passed_multip"),
],
"output_features": [
"EVENT_ID",
"TIME",
"RA",
"DEC",
"ENERGY",
"ALT",
"AZ",
"FOV_LON",
"FOV_LAT",
"GAMMANESS",
"MULTIP",
"HMAX",
],
}


class EventPreprocessorMode(Enum):
"""Mode of output of EventPreprocessor."""

DROP = auto() #: drop events that do not pass
MARK = auto() #: only mark evens as not passing, adding boolean columns


class EventPreprocessor(Component):
"""
Selects or generates features and filters tables of events.
Expand All @@ -140,6 +204,16 @@ class with the columns you to retain in the output table.
- `~ctapipe.coordinates.altaz_to_nominal`
"""

mode = traits.UseEnum(
EventPreprocessorMode,
default_value=EventPreprocessorMode.DROP,
help=(
"If 'DROP', removes events that do not pass quality cuts. "
"If 'MARK', generates a new boolean column for each quality criteria, "
"but keeps all events."
),
).tag(config=True)

energy_reconstructor = traits.Unicode(
default_value="RandomForestRegressor",
help="Prefix of the reco `_energy` column",
Expand Down Expand Up @@ -176,8 +250,9 @@ class with the columns you to retain in the output table.
),
).tag(config=True)

def __init__(self, config=None, parent=None, **kwargs):
def __init__(self, config=None, parent=None, subarray=None, **kwargs):
super().__init__(config=config, parent=parent, **kwargs)
self.subarray = subarray
if self.feature_set == "custom":
self.feature_generator = FeatureGenerator(parent=self)
self.quality_query = QualityQuery(parent=self)
Expand All @@ -198,20 +273,38 @@ def __init__(self, config=None, parent=None, **kwargs):
"of features in the configuration (DL2EventPreprocessor.features)."
)

def __call__(self, table):
"""Return new table with only the columns in features."""
def __call__(self, table, **other_attributes):
"""
Return new table with only the columns in features.

Parameters
----------
table: Table
Table to process
**other_attributes: Any
Other functions or objects that the FeatureGenerator should have
access to, in addition to the default ones.
"""

# generate new features, which includes renaming columns:
generated = self.feature_generator(
table,
angular_separation=angular_separation,
altaz_to_nominal=altaz_to_nominal,
altaz_to_icrs=altaz_to_icrs,
subarray=self.subarray,
**other_attributes,
)

# apply event selection on the resulting table

selected_mask = self.quality_query.get_table_mask(generated)

# return only the columns specified in `self.features`, and rows in
# `selected_mask`
return generated[self.features][selected_mask]
if self.mode == EventPreprocessorMode.DROP:
# return only the columns specified in `self.features`, and rows in
# `selected_mask`
selected_mask = self.quality_query.get_table_mask(generated)
return generated[self.features][selected_mask]
elif self.mode == EventPreprocessorMode.MARK:
generated = self.quality_query.add_table_mask_columns(generated)
return generated[self.features + self.quality_query.criteria_names]
else:
raise ValueError("Unsupported mode: {self.mode}")
38 changes: 32 additions & 6 deletions src/ctapipe/io/tests/test_event_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import pytest
from astropy import units as u
from astropy.table import QTable
from astropy.time import Time

from ctapipe.io.event_preprocessor import FeatureSetRegistry
from ctapipe.io.event_preprocessor import EventPreprocessorMode, FeatureSetRegistry


@pytest.fixture(scope="function")
Expand All @@ -16,11 +17,14 @@ def minimal_dl2_table():
obs_id=[10, 10, 10, 10],
event_id=[1, 2, 3, 4],
true_energy=[100.0, 50.0, 2.0, 30.0] * u.TeV,
time=Time([0, 0, 0, 0], format="unix", scale="utc"),
RandomForestRegressor_energy=[100.1, 49.2, 2.6, 40.0] * u.TeV,
RandomForestRegressor_is_valid=[True, True, True, True],
HillasReconstructor_az=[271.0, 271.6, 271.4, 268.1] * u.deg,
HillasReconstructor_alt=[70.1, 68.2, 69.3, 70.8] * u.deg,
HillasReconstructor_h_max=[2000.0, 2500.0, 1980.0, 1000.0] * u.m,
HillasReconstructor_is_valid=[True, True, True, False],
HillasReconstructor_telescopes=np.ones(shape=(4, 13), dtype=bool),
RandomForestClassifier_prediction=[0.9, 0.5, 0.1, 0.3],
RandomForestClassifier_is_valid=[True, True, True, False],
true_alt=[70.0, 70.0, 70.0, 70.0] * u.deg,
Expand All @@ -39,8 +43,11 @@ def minimal_dl2_table():
)


@pytest.mark.parametrize("mode", list(EventPreprocessorMode))
@pytest.mark.parametrize("feature_set", FeatureSetRegistry.list_available())
def test_event_preprocessing(feature_set, minimal_dl2_table):
def test_event_preprocessing(
feature_set, mode, minimal_dl2_table, subarray_prod5_paranal
):
from traitlets.config import Config

from ctapipe.io import EventPreprocessor
Expand All @@ -52,14 +59,33 @@ def test_event_preprocessing(feature_set, minimal_dl2_table):
table = minimal_dl2_table

# process the table:
preprocess = EventPreprocessor(config=custom_config, feature_set=feature_set)
table_processed = preprocess(table)
preprocess = EventPreprocessor(
config=custom_config,
feature_set=feature_set,
subarray=subarray_prod5_paranal,
mode=mode,
)
table_processed = preprocess(
table,
gammaness_cut_function=lambda g, e: True,
multiplicity_cut_function=lambda m, e: m >= 4,
)

for feature in preprocess.features:
assert feature in table_processed.columns

# check that the qualityquery worked
assert len(table_processed) <= len(table)
if mode == EventPreprocessorMode.DROP:
# check that the qualityquery worked
assert len(table_processed) <= len(table)

if mode == EventPreprocessorMode.MARK:
# check that we have all the original events in KEEP mode
assert len(table_processed) == len(table)

# check that new columns were added:
set(table_processed.colnames) - set(table.colnames) == set(
preprocess.quality_query.criteria_names
)


def test_no_output():
Expand Down
Loading