diff --git a/python/lsst/ap/association/__init__.py b/python/lsst/ap/association/__init__.py index a999c1d4..f91a0347 100644 --- a/python/lsst/ap/association/__init__.py +++ b/python/lsst/ap/association/__init__.py @@ -20,6 +20,7 @@ # along with this program. If not, see . from .version import * +from .trailedSourceFilter import * from .association import * from .diaForcedSource import * from .loadDiaCatalogs import * diff --git a/python/lsst/ap/association/association.py b/python/lsst/ap/association/association.py index 8df3daaa..a3b46991 100644 --- a/python/lsst/ap/association/association.py +++ b/python/lsst/ap/association/association.py @@ -32,6 +32,7 @@ import lsst.pex.config as pexConfig import lsst.pipe.base as pipeBase from lsst.utils.timer import timeMethod +from .trailedSourceFilter import TrailedSourceFilterTask # Enforce an error for unsafe column/array value setting in pandas. pd.options.mode.chained_assignment = 'raise' @@ -47,6 +48,18 @@ class AssociationConfig(pexConfig.Config): default=1.0, ) + trailedSourceFilter = pexConfig.ConfigurableField( + target=TrailedSourceFilterTask, + doc="Subtask to filter artifact candidates based on morphological " + "criteria, i.g. those that appear to be streaks.", + ) + + doTrailedSourceFilter = pexConfig.Field( + doc="Set flag for trailed source filter subtask to run.", + dtype=bool, + default=True, + ) + class AssociationTask(pipeBase.Task): """Associate DIAOSources into existing DIAObjects. @@ -60,10 +73,16 @@ class AssociationTask(pipeBase.Task): ConfigClass = AssociationConfig _DefaultName = "association" + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + if self.config.doTrailedSourceFilter: + self.makeSubtask("trailedSourceFilter") + @timeMethod def run(self, diaSources, - diaObjects): + diaObjects, + diffIm=None): """Associate the new DiaSources with existing DiaObjects. Parameters @@ -72,6 +91,8 @@ def run(self, New DIASources to be associated with existing DIAObjects. diaObjects : `pandas.DataFrame` Existing diaObjects from the Apdb. + diffIm : `lsst.afw.image.ExposureF` optional + Difference image on which the DiaSources were detected. Returns ------- @@ -98,7 +119,13 @@ def run(self, nUpdatedDiaObjects=0, nUnassociatedDiaObjects=0) - matchResult = self.associate_sources(diaObjects, diaSources) + if self.config.doTrailedSourceFilter: + diaTrailedResult = self.trailedSourceFilter.run(diaSources, diffIm) + + matchResult = self.associate_sources(diaObjects, diaTrailedResult.diaSources) + + else: + matchResult = self.associate_sources(diaObjects, diaSources) mask = matchResult.diaSources["diaObjectId"] != 0 diff --git a/python/lsst/ap/association/diaPipe.py b/python/lsst/ap/association/diaPipe.py index 1d3eaa47..9a48d449 100644 --- a/python/lsst/ap/association/diaPipe.py +++ b/python/lsst/ap/association/diaPipe.py @@ -28,6 +28,10 @@ Currently loads directly from the Apdb rather than pre-loading. """ +__all__ = ("DiaPipelineConfig", + "DiaPipelineTask", + "DiaPipelineConnections") + import pandas as pd import lsst.dax.apdb as daxApdb @@ -44,10 +48,6 @@ PackageAlertsTask) from lsst.ap.association.ssoAssociation import SolarSystemAssociationTask -__all__ = ("DiaPipelineConfig", - "DiaPipelineTask", - "DiaPipelineConnections") - class DiaPipelineConnections( pipeBase.PipelineTaskConnections, @@ -368,7 +368,7 @@ def run(self, # Associate new DiaSources with existing DiaObjects. assocResults = self.associator.run(diaSourceTable, - loaderResult.diaObjects) + loaderResult.diaObjects, exposure=diffIm) if self.config.doSolarSystemAssociation: ssoAssocResult = self.solarSystemAssociator.run( assocResults.unAssocDiaSources, diff --git a/python/lsst/ap/association/trailedSourceFilter.py b/python/lsst/ap/association/trailedSourceFilter.py new file mode 100644 index 00000000..786af356 --- /dev/null +++ b/python/lsst/ap/association/trailedSourceFilter.py @@ -0,0 +1,106 @@ +# This file is part of ap_association. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (https://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +__all__ = ("TrailedSourceFilterTask", "TrailedSourceFilterConfig") + +import lsst.pex.config as pexConfig +import lsst.pipe.base as pipeBase +from lsst.utils.timer import timeMethod + + +class TrailedSourceFilterConfig(pexConfig.Config): + """Config class for TrailedSourceFilterTask. + """ + maxTrailLength = pexConfig.Field( + dtype=float, + doc='Maximum trail length permitted is less than 10 degrees/day. ' + 'This is a rate of 0.416 arcseconds per second. As trail length' + ' is measured in arcseconds, it is dependant on the length of ' + 'the exposure.', + default=0.416, + ) + + +class TrailedSourceFilterTask(pipeBase.Task): + """Find trailed sources in DIASources. This task checks the length of + trailLength in the DIASource catalog using a given + arcsecond/second rate from maxTrailLength and the exposure time. The + two values are used to calculate the maximum allowed trail length and + filters out any trail longer than the maximum. + + """ + + ConfigClass = TrailedSourceFilterConfig + _DefaultName = "trailedSourceFilter" + + @timeMethod + def run(self, dia_sources, diffIm): + """Find trailed sources which do not meet requirements and + will not be included in the diaSource catalog. + + Parameters + ---------- + dia_sources : `pandas.DataFrame` + New DIASources to be checked for trailed sources. + diffIm : `lsst.afw.image.ExposureF` + Difference image on which the DIASources were detected. + + Returns + ------- + result : `lsst.pipe.base.Struct` + Results struct with components. + + - ``dia_sources`` : DIASource table that is free from unwanted + trailed sources. (`pandas.DataFrame`) + + - ``trailed_dia_sources`` : DIASources that have trailed more + than 0.416 arcseconds/second*exposure_time. (`pandas.DataFrame`) + """ + + trail_mask = self.check_dia_source_trail(dia_sources, diffIm) + + return pipeBase.Struct( + diaSources=dia_sources[~trail_mask].reset_index(drop=True), + trailedDiaSources=dia_sources[trail_mask].reset_index(drop=True)) + + def check_dia_source_trail(self, dia_sources, diffIm): + """Find DiaSources that have long trails. + + Creates a mask for sources with lengths greater than 0.416 + arcseconds/second multiplied by the exposure time. + + Parameters + ---------- + dia_sources : `pandas.DataFrame` + Input DIASources to check for trail lengths. + diffIm : `pandas.DataFrame` + Difference image on which the DIASources were detected. + + Returns + ------- + trail_mask : `pandas.DataFrame` + Boolean mask for DIASources which are greater than the + cutoff length. + """ + diffIm_time = diffIm.getInfo().getVisitInfo().getExposureTime() + trail_mask = (dia_sources.loc[:, "trailLength"].values[:] >= self.config.maxTrailLength*diffIm_time) + + return trail_mask diff --git a/tests/test_association_task.py b/tests/test_association_task.py index 0a10af81..a5c18aa8 100644 --- a/tests/test_association_task.py +++ b/tests/test_association_task.py @@ -22,7 +22,7 @@ import numpy as np import pandas as pd import unittest - +import lsst.afw.image as afwImage import lsst.geom as geom import lsst.utils.tests @@ -46,20 +46,30 @@ def setUp(self): self.diaSources = pd.DataFrame(data=[ {"ra": 0.04*idx + scatter*rng.uniform(-1, 1), "dec": 0.04*idx + scatter*rng.uniform(-1, 1), - "diaSourceId": idx + 1 + self.nObjects, "diaObjectId": 0} + "diaSourceId": idx + 1 + self.nObjects, "diaObjectId": 0, "trailLength": 5.5*idx} for idx in range(self.nSources)]) self.diaSourceZeroScatter = pd.DataFrame(data=[ {"ra": 0.04*idx, "dec": 0.04*idx, - "diaSourceId": idx + 1 + self.nObjects, "diaObjectId": 0} + "diaSourceId": idx + 1 + self.nObjects, "diaObjectId": 0, "trailLength": 5.5*idx} for idx in range(self.nSources)]) + visitInfo = afwImage.VisitInfo(exposureTime=30.0, + ) + exposureInfo = afwImage.ExposureInfo() + exposureInfo.setVisitInfo(visitInfo) + maskedImage = afwImage.MaskedImageF(lsst.geom.Extent2I(64, 64)) + self.exposure = afwImage.ExposureF(maskedImage, exposureInfo) def test_run(self): """Test the full task by associating a set of diaSources to existing diaObjects. """ - assocTask = AssociationTask() - results = assocTask.run(self.diaSources, self.diaObjects) + + config = AssociationTask.ConfigClass() + config.doTrailedSourceFilter = False + assocTask = AssociationTask(config=config) + + results = assocTask.run(self.diaSources, self.diaObjects, self.exposure) self.assertEqual(results.nUpdatedDiaObjects, len(self.diaObjects) - 1) self.assertEqual(results.nUnassociatedDiaObjects, 1) @@ -75,13 +85,36 @@ def test_run(self): [0]): self.assertEqual(test_obj_id, expected_obj_id) + def test_run_trailed_sources(self): + """Test the full task by associating a set of diaSources to + existing diaObjects when trailed sources are filtered. + """ + + assocTask = AssociationTask() + + results = assocTask.run(self.diaSources, self.diaObjects, self.exposure) + + self.assertEqual(results.nUpdatedDiaObjects, len(self.diaObjects) - 3) + self.assertEqual(results.nUnassociatedDiaObjects, 3) + self.assertEqual(len(results.matchedDiaSources), + len(self.diaObjects) - 3) + self.assertEqual(len(results.unAssocDiaSources), 1) + for test_obj_id, expected_obj_id in zip( + results.matchedDiaSources["diaObjectId"].to_numpy(), + [1, 2, 3, 4]): + self.assertEqual(test_obj_id, expected_obj_id) + for test_obj_id, expected_obj_id in zip( + results.unAssocDiaSources["diaObjectId"].to_numpy(), + [0]): + self.assertEqual(test_obj_id, expected_obj_id) + def test_run_no_existing_objects(self): """Test the run method with a completely empty database. """ assocTask = AssociationTask() results = assocTask.run( self.diaSources, - pd.DataFrame(columns=["ra", "dec", "diaObjectId"])) + pd.DataFrame(columns=["ra", "dec", "diaObjectId", "trailLength"]), self.exposure) self.assertEqual(results.nUpdatedDiaObjects, 0) self.assertEqual(results.nUnassociatedDiaObjects, 0) self.assertEqual(len(results.matchedDiaSources), 0) diff --git a/tests/test_diaPipe.py b/tests/test_diaPipe.py index 5236e42c..5759a191 100644 --- a/tests/test_diaPipe.py +++ b/tests/test_diaPipe.py @@ -121,7 +121,7 @@ def solarSystemAssociator_run(self, unAssocDiaSources, solarSystemObjectTable, d unAssocDiaSources=MagicMock(spec=pd.DataFrame())) @lsst.utils.timer.timeMethod - def associator_run(self, table, diaObjects): + def associator_run(self, table, diaObjects, exposure): return lsst.pipe.base.Struct(nUpdatedDiaObjects=2, nUnassociatedDiaObjects=3, matchedDiaSources=MagicMock(spec=pd.DataFrame()), unAssocDiaSources=MagicMock(spec=pd.DataFrame())) diff --git a/tests/test_trailedSourceFilter.py b/tests/test_trailedSourceFilter.py new file mode 100644 index 00000000..19972c68 --- /dev/null +++ b/tests/test_trailedSourceFilter.py @@ -0,0 +1,85 @@ +import unittest +from lsst.ap.association import TrailedSourceFilterTask +import numpy as np +import pandas as pd +import lsst.afw.image as afwImage +import lsst.utils.tests + + +class TestTrailedSourceFilterTask(unittest.TestCase): + + def setUp(self): + """Create sets of diaSources and diaObjects. + """ + rng = np.random.default_rng(1234) + scatter = 0.1 / 3600 + self.nSources = 5 + self.diaSources = pd.DataFrame(data=[ + {"ra": 0.04*idx + scatter*rng.uniform(-1, 1), + "dec": 0.04*idx + scatter*rng.uniform(-1, 1), + "diaSourceId": idx, "diaObjectId": 0, "trailLength": 5.5*idx} + for idx in range(self.nSources)]) + visitInfo = afwImage.VisitInfo(exposureTime=30.0, + ) + exposureInfo = afwImage.ExposureInfo() + exposureInfo.setVisitInfo(visitInfo) + maskedImage = afwImage.MaskedImageF(lsst.geom.Extent2I(64, 64)) + self.exposure = afwImage.ExposureF(maskedImage, exposureInfo) + + def test_run(self): + # Run with default trail length + + trailedSourceFilterTask = TrailedSourceFilterTask() + + results = trailedSourceFilterTask.run(self.diaSources, self.exposure) + + self.assertEqual(len(results.diaSources), + 3) + + def test_run_short_max_trail(self): + # Run with low trail rate cutoff to test config settings. + + config = TrailedSourceFilterTask.ConfigClass() + config.maxTrailLength = 0.01 + trailedSourceFilterTask = TrailedSourceFilterTask(config=config) + + results = trailedSourceFilterTask.run(self.diaSources, self.exposure) + + self.assertEqual(len(results.diaSources), + 1) + + def test_run_no_trails(self): + # Run with long trail length rate so that no sources are filtered. + + config = TrailedSourceFilterTask.ConfigClass() + config.maxTrailLength = 10.00 + trailedSourceFilterTask = TrailedSourceFilterTask(config=config) + + results = trailedSourceFilterTask.run(self.diaSources, self.exposure) + + self.assertEqual(len(results.diaSources), + 5) + self.assertEqual(len(results.trailedDiaSources), + 0) + + def test_check_dia_source_trail(self): + # Check that the task outputs the correct masks. + + trailedSourceFilterTask = TrailedSourceFilterTask() + mask = trailedSourceFilterTask.check_dia_source_trail(self.diaSources, self.exposure) + + comparison = mask == [False, False, False, True, True] + self.assertTrue(comparison.all()) + + +class MemoryTester(lsst.utils.tests.MemoryTestCase): + pass + + +def setup_module(module): + lsst.utils.tests.init() + + +if __name__ == "__main__": + lsst.utils.tests.init() + unittest.main()