Skip to content

Commit

Permalink
Merge pull request #223 from lsst/tickets/DM-44850
Browse files Browse the repository at this point in the history
DM-44850: Add utility to create empty tables
  • Loading branch information
isullivan committed Jun 19, 2024
2 parents 4ec8bf2 + 899a360 commit 639f7d8
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 12 deletions.
5 changes: 3 additions & 2 deletions python/lsst/ap/association/diaPipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@
LoadDiaCatalogsTask,
PackageAlertsTask)
from lsst.ap.association.ssoAssociation import SolarSystemAssociationTask
from lsst.ap.association.utils import convertTableToSdmSchema, readSchemaFromApdb, dropEmptyColumns
from lsst.ap.association.utils import convertTableToSdmSchema, readSchemaFromApdb, dropEmptyColumns, \
make_empty_catalog
from lsst.daf.base import DateTime
from lsst.meas.base import DetectorVisitIdGeneratorConfig, \
DiaObjectCalculationTask
Expand Down Expand Up @@ -658,7 +659,7 @@ def createNewDiaObjects(self, unAssocDiaSources):
- ``nNewDiaObjects`` : Number of newly created diaObjects.(`int`)
"""
if len(unAssocDiaSources) == 0:
newDiaObjects = self.apdb._make_empty_catalog(daxApdb.ApdbTables.DiaObject)
newDiaObjects = make_empty_catalog(self.schema, tableName="DiaObject")
else:
unAssocDiaSources["diaObjectId"] = unAssocDiaSources["diaSourceId"]
newDiaObjects = convertTableToSdmSchema(self.schema, unAssocDiaSources,
Expand Down
43 changes: 33 additions & 10 deletions python/lsst/ap/association/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@

"""Utilities for working with the APDB.
"""
__all__ = ("convertTableToSdmSchema", "readSdmSchemaFile", "readSchemaFromApdb", "dropEmptyColumns")
__all__ = ("convertTableToSdmSchema", "readSdmSchemaFile", "readSchemaFromApdb",
"dropEmptyColumns", "make_empty_catalog")

from collections.abc import Mapping
import os

from lsst.dax.apdb import Apdb, ApdbTables, schema_model
import felis.datamodel
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -89,7 +91,7 @@ def readSdmSchemaFile(schemaFile: str,
Returns
-------
schemaTable : 'dict' of `lsst.dax.apdb.apdbSchema.ApdbSchema`
schemaTable : dict[str, schema_model.Table]
A dict of the schemas in the given table defined in the specified file.
Raises
Expand All @@ -114,7 +116,7 @@ def readSdmSchemaFile(schemaFile: str,
return schemaTable


def readSchemaFromApdb(apdb):
def readSchemaFromApdb(apdb: Apdb) -> dict[str, schema_model.Table | None]:
"""Extract the schema from an APDB instance.
Parameters
Expand All @@ -124,13 +126,10 @@ def readSchemaFromApdb(apdb):
Returns
-------
schemaTable : 'dict' of `lsst.dax.apdb.apdbSchema.ApdbSchema`
schemaTable : dict[str, schema_model.Table | None]
A dict of the schemas in the given table defined in the specified file.
"""
schemaTable = {}
for singleTable in apdb._schema.tableSchemas:
schemaTable[singleTable.name] = apdb._schema.tableSchemas[singleTable]
return schemaTable
return {table.table_name(): apdb.tableDef(table) for table in ApdbTables}


def convertTableToSdmSchema(apdbSchema, sourceTable, tableName):
Expand All @@ -141,7 +140,7 @@ def convertTableToSdmSchema(apdbSchema, sourceTable, tableName):
Parameters
----------
apdbSchema : `lsst.dax.apdb.apdbSchema.ApdbSchema`
apdbSchema : `dict` [`str`, `lsst.dax.apdb.schema_model.Table`]
Schema from ``sdm_schemas`` containing the table definition to use.
sourceTable : `pandas.DataFrame`
The input table to convert.
Expand Down Expand Up @@ -182,7 +181,7 @@ def dropEmptyColumns(apdbSchema, sourceTable, tableName):
Parameters
----------
apdbSchema : `lsst.dax.apdb.apdbSchema.ApdbSchema`
apdbSchema : `dict` [`str`, `lsst.dax.apdb.schema_model.Table`]
Schema from ``sdm_schemas`` containing the table definition to use.
sourceTable : `pandas.DataFrame`
The input table to remove missing data columns from.
Expand All @@ -196,3 +195,27 @@ def dropEmptyColumns(apdbSchema, sourceTable, tableName):
nullColNames = nullColumns[nullColumns].index.tolist()
dropColumns = list(set(nullColNames) & set(nullableList))
return sourceTable.drop(columns=dropColumns)


def make_empty_catalog(apdbSchema, tableName):
"""Make an empty catalog for a table with a given name.
Parameters
----------
apdbSchema : `dict` [`str`, `lsst.dax.apdb.schema_model.Table`]
Schema from ``sdm_schemas`` containing the table definition to use.
tableName : `str`
Name of the table in the schema to use.
Returns
-------
catalog : `pandas.DataFrame`
An empty catalog.
"""
table = apdbSchema[tableName]

data = {
columnDef.name: pd.Series(column_dtype(columnDef.datatype))
for columnDef in table.columns
}
return pd.DataFrame(data)
50 changes: 50 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# This file is part of ap_association.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import os
import unittest

import pandas as pd

from lsst.ap.association.utils import readSdmSchemaFile, make_empty_catalog, convertTableToSdmSchema


class TestUtils(unittest.TestCase):

def test_make_empty_catalog(self):
"""Check that an empty catalog has the correct format.
"""
schemaFile = os.path.join("${SDM_SCHEMAS_DIR}", "yml", "apdb.yaml")
schemaName = "ApdbSchema"
schema = readSdmSchemaFile(schemaFile, schemaName)

tableNames = ["DiaObject", "DiaSource", "DiaForcedSource"]
for tableName in tableNames:
emptyDiaObjects = make_empty_catalog(schema, tableName=tableName)
emptyColumns = set(emptyDiaObjects.columns)
self.assertIn("ra", emptyColumns)
self.assertIn("dec", emptyColumns)
self.assertIn("diaObjectId", emptyColumns)

emptyDf = pd.DataFrame(columns=["diaObjectId",])
emptyDf.set_index("diaObjectId")
convertedEmptyDiaObjects = convertTableToSdmSchema(schema, emptyDf, tableName=tableName)
self.assertEqual(set(convertedEmptyDiaObjects.columns), emptyColumns)

0 comments on commit 639f7d8

Please sign in to comment.