running test sandag example through trip dest sample

RSGInc · Nov 7, 2024 · 8a4b281 · 8a4b281
1 parent 0a1bd5c
commit 8a4b281
Show file tree

Hide file tree

Showing 6 changed files with 144 additions and 40 deletions.
diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
@@ -402,7 +402,7 @@ def location_presample(
 
     # choose a MAZ for each DEST_TAZ choice, choice probability based on MAZ size_term fraction of TAZ total
     maz_choices = tour_destination.choose_MAZ_for_TAZ(
-        state, taz_sample, MAZ_size_terms, trace_label
+        state, taz_sample, MAZ_size_terms, trace_label, model_settings
     )
 
     assert DEST_MAZ in maz_choices

diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py
@@ -293,6 +293,7 @@ def choose_MAZ_for_TAZ(
     network_los,
     alt_dest_col_name,
     trace_label,
+    model_settings,
 ):
     """
     Convert taz_sample table with TAZ zone sample choices to a table with a MAZ zone chosen for each TAZ
@@ -532,6 +533,52 @@ def choose_MAZ_for_TAZ(
             transpose=False,
         )
 
+    if estimation.manager.enabled and (model_settings.ESTIMATION_SAMPLE_SIZE > 0):
+        # want to ensure the override choice is in the choice set
+        survey_choices = estimation.manager.get_survey_destination_choices(
+            state, chooser_df, trace_label
+        )
+        if survey_choices is not None:
+            assert (
+                chooser_df.index == survey_choices.index
+            ).all(), "survey_choices index should match chooser_df index"
+            survey_choices.name = DEST_MAZ
+            survey_choices = survey_choices.dropna().astype(taz_choices[DEST_MAZ].dtype)
+            # merge maz_sizes onto survey choices
+            maz_sizes["MAZ_prob"] = maz_sizes.groupby(DEST_TAZ)["size_term"].transform(
+                lambda x: x / x.sum()
+            )
+            survey_choices = pd.merge(
+                survey_choices.reset_index(),
+                maz_sizes[[DEST_MAZ, DEST_TAZ, "MAZ_prob"]],
+                on=[DEST_MAZ],
+                how="left",
+            )
+            # merge TAZ_prob from taz_choices onto survey choices
+            survey_choices = pd.merge(
+                survey_choices,
+                # dropping duplicates to avoid duplicate rows as the same TAZ can be chosen multiple times
+                taz_choices[[chooser_id_col, DEST_TAZ, "TAZ_prob"]].drop_duplicates(
+                    subset=[chooser_id_col, DEST_TAZ]
+                ),
+                on=[chooser_id_col, DEST_TAZ],
+                how="left",
+            )
+            survey_choices["prob"] = (
+                survey_choices["TAZ_prob"] * survey_choices["MAZ_prob"]
+            )
+
+            # Don't care about getting dest_TAZ correct as it gets dropped later
+            survey_choices.fillna(0, inplace=True)
+
+            # merge survey choices back into choices_df and sort by chooser
+            taz_choices = pd.concat(
+                [taz_choices, survey_choices[taz_choices.columns]], ignore_index=True
+            )
+            taz_choices.sort_values(
+                by=[chooser_id_col, DEST_TAZ], inplace=True, ignore_index=True
+            )
+
     taz_choices = taz_choices.drop(columns=["TAZ_prob", "MAZ_prob"])
     taz_choices = taz_choices.groupby([chooser_id_col, DEST_MAZ]).agg(
         prob=("prob", "max"), pick_count=("prob", "count")
@@ -605,6 +652,7 @@ def destination_presample(
         network_los,
         alt_dest_col_name,
         trace_label,
+        model_settings,
     )
 
     assert alt_dest_col_name in maz_sample

diff --git a/activitysim/abm/models/util/cdap.py b/activitysim/abm/models/util/cdap.py
@@ -969,6 +969,7 @@ def household_activity_choices(
             spec,
             choosers,
             trace_label=trace_label,
+            have_trace_targets=(trace_hh_id in choosers.index),
             chunk_sizer=chunk_sizer,
             compute_settings=compute_settings,
         )
@@ -984,14 +985,15 @@ def household_activity_choices(
             interaction_coefficients,
             hhsize,
             trace_spec=(trace_hh_id in choosers.index),
-            trace_label=trace_label,
+            trace_label=tracing.extend_trace_label(trace_label, "joint"),
         )
 
         joint_tour_utils = simulate.eval_utilities(
             state,
             joint_tour_spec,
             choosers,
-            trace_label=trace_label,
+            trace_label=tracing.extend_trace_label(trace_label, "joint"),
+            have_trace_targets=(trace_hh_id in choosers.index),
             chunk_sizer=chunk_sizer,
             compute_settings=compute_settings,
         )

diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py
@@ -234,7 +234,9 @@ def aggregate_size_terms(dest_size_terms, network_los):
     return MAZ_size_terms, TAZ_size_terms
 
 
-def choose_MAZ_for_TAZ(state: workflow.State, taz_sample, MAZ_size_terms, trace_label):
+def choose_MAZ_for_TAZ(
+    state: workflow.State, taz_sample, MAZ_size_terms, trace_label, model_settings
+):
     """
     Convert taz_sample table with TAZ zone sample choices to a table with a MAZ zone chosen for each TAZ
     choose MAZ probabilistically (proportionally by size_term) from set of MAZ zones in parent TAZ
@@ -311,8 +313,6 @@ def choose_MAZ_for_TAZ(state: workflow.State, taz_sample, MAZ_size_terms, trace_
     # taz_choices index values should be contiguous
     assert (
         (taz_choices[chooser_id_col] == np.repeat(chooser_df.index, taz_sample_size))
-        # can get one extra if sampling in estimation mode
-        | (taz_choices[chooser_id_col] == np.repeat(chooser_df.index, taz_sample_size + 1))
     ).all()
 
     # we need to choose a MAZ for each DEST_TAZ choice
@@ -457,6 +457,54 @@ def choose_MAZ_for_TAZ(state: workflow.State, taz_sample, MAZ_size_terms, trace_
             transpose=False,
         )
 
+    if estimation.manager.enabled and (model_settings.ESTIMATION_SAMPLE_SIZE > 0):
+        # want to ensure the override choice is in the choice set
+        survey_choices = estimation.manager.get_survey_destination_choices(
+            state, chooser_df, trace_label
+        )
+
+        if survey_choices is not None:
+            assert (
+                chooser_df.index == survey_choices.index
+            ).all(), "survey_choices index should match chooser_df index"
+            survey_choices.name = DEST_MAZ
+            survey_choices = survey_choices.dropna().astype(taz_choices[DEST_MAZ].dtype)
+            # merge maz_sizes onto survey choices
+            MAZ_size_terms["MAZ_prob"] = MAZ_size_terms.groupby("dest_TAZ")[
+                "size_term"
+            ].transform(lambda x: x / x.sum())
+            survey_choices = pd.merge(
+                survey_choices.reset_index(),
+                MAZ_size_terms.rename(columns={"zone_id": DEST_MAZ}),
+                on=[DEST_MAZ],
+                how="left",
+            )
+            # merge TAZ_prob from taz_choices onto survey choices
+            survey_choices = pd.merge(
+                survey_choices,
+                # dropping duplicates to avoid duplicate rows as the same TAZ can be chosen multiple times
+                taz_choices[[chooser_id_col, "dest_TAZ", "TAZ_prob"]].drop_duplicates(
+                    subset=[chooser_id_col, "dest_TAZ"]
+                ),
+                on=[chooser_id_col, "dest_TAZ"],
+                how="left",
+            )
+
+            survey_choices["prob"] = (
+                survey_choices["TAZ_prob"] * survey_choices["MAZ_prob"]
+            )
+
+            # Don't care about getting dest_TAZ correct as it gets dropped later
+            survey_choices.fillna(0, inplace=True)
+
+            # merge survey choices back into choices_df and sort by chooser
+            taz_choices = pd.concat(
+                [taz_choices, survey_choices[taz_choices.columns]], ignore_index=True
+            )
+            taz_choices.sort_values(
+                by=[chooser_id_col, "dest_TAZ"], inplace=True, ignore_index=True
+            )
+
     taz_choices = taz_choices.drop(columns=["TAZ_prob", "MAZ_prob"])
     taz_choices = taz_choices.groupby([chooser_id_col, DEST_MAZ]).agg(
         prob=("prob", "max"), pick_count=("prob", "count")
@@ -515,7 +563,9 @@ def destination_presample(
     )
 
     # choose a MAZ for each DEST_TAZ choice, choice probability based on MAZ size_term fraction of TAZ total
-    maz_choices = choose_MAZ_for_TAZ(state, taz_sample, MAZ_size_terms, trace_label)
+    maz_choices = choose_MAZ_for_TAZ(
+        state, taz_sample, MAZ_size_terms, trace_label, model_settings
+    )
 
     assert DEST_MAZ in maz_choices
     maz_choices = maz_choices.rename(columns={DEST_MAZ: alt_dest_col_name})

diff --git a/activitysim/core/estimation.py b/activitysim/core/estimation.py
@@ -934,7 +934,7 @@ def get_survey_values(self, model_values, table_name, column_names):
 
         return values[column_name] if column_name else values
 
-    def get_survey_destination_chocies(self, state, choosers, trace_label):
+    def get_survey_destination_choices(self, state, choosers, trace_label):
         """
         Returning the survey choices for the destination choice model.
         This gets called from inside interaction_sample and is used to
@@ -953,7 +953,7 @@ def get_survey_destination_chocies(self, state, choosers, trace_label):
         if "accessibilities" in trace_label:
             # accessibilities models to not have survey values
             return None
-        
+
         model = trace_label.split(".")[0]
         if model == "school_location":
             survey_choices = manager.get_survey_values(
@@ -994,6 +994,20 @@ def get_survey_destination_chocies(self, state, choosers, trace_label):
             # not implemented models include scheduling models and tour_od_choice
             logger.debug(f"Not grabbing survey choices for {model}.")
             return None
+
+        if "presample.interaction_sample" in trace_label:
+            # presampling happens for destination choice of two-zone systems.
+            # They are pre-sampling TAZs but the survey value destination is MAZs.
+            land_use = state.get_table("land_use")
+            TAZ_col = "TAZ" if "TAZ" in land_use.columns else "taz"
+            assert (
+                TAZ_col in land_use.columns
+            ), "Cannot find TAZ column in land_use table."
+            maz_to_taz_map = land_use[TAZ_col].to_dict()
+            # allow for unmapped TAZs
+            maz_to_taz_map[-1] = -1
+            survey_choices = survey_choices.map(maz_to_taz_map)
+
         return survey_choices
 
 

diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py
@@ -504,48 +504,38 @@ def _interaction_sample(
 
     if estimation.manager.enabled and sample_size > 0:
         # we need to ensure chosen alternative is included in the sample
-        survey_choices = estimation.manager.get_survey_destination_chocies(
+        survey_choices = estimation.manager.get_survey_destination_choices(
             state, choosers, trace_label
         )
         if survey_choices is not None:
+            assert (
+                survey_choices.index == choosers.index
+            ).all(), "survey_choices and choosers must have the same index"
             survey_choices.name = alt_col_name
             survey_choices = survey_choices.dropna().astype(
                 choices_df[alt_col_name].dtype
             )
-            comparison = pd.merge(
+
+            # merge all survey choices onto choices_df
+            probs_df = probs.reset_index().melt(
+                id_vars=[choosers.index.name],
+                var_name=alt_col_name,
+                value_name="prob",
+            )
+            # probs are numbered 0..n-1 so we need to map back to alt ids
+            zone_map = pd.Series(alternatives.index).to_dict()
+            probs_df[alt_col_name] = probs_df[alt_col_name].map(zone_map)
+
+            survey_choices = pd.merge(
                 survey_choices,
-                choices_df,
+                probs_df,
                 on=[choosers.index.name, alt_col_name],
                 how="left",
-                indicator=True,
             )
-            missing_choices = comparison[comparison["_merge"] == "left_only"]
-            # need to get prob of missing choices and add them to choices_df
-            if not missing_choices.empty:
-                probs_df = probs.reset_index().melt(
-                    id_vars=[choosers.index.name],
-                    var_name=alt_col_name,
-                    value_name="prob",
-                )
-                # probs are numbered 0..n-1 so we need to map back to alt ids
-                zone_map = pd.Series(alternatives.index).to_dict()
-                probs_df[alt_col_name] = probs_df[alt_col_name].map(zone_map)
-                # merge the probs onto the missing chocies
-                missing_choices = pd.merge(
-                    missing_choices.drop(columns=["prob", "_merge"]),
-                    probs_df,
-                    on=[choosers.index.name, alt_col_name],
-                    how="left",
-                )
-                if missing_choices.prob.isna().sum() > 0:
-                    logger.warning(f"Survey choices with no probs:\n {missing_choices[missing_choices.prob.isna()]}")
-                del probs_df
-                missing_choices['prob'].fillna(0, inplace=True)
-                # random number is not important, filling with 0
-                missing_choices["rand"] = 0
-                # merge survey choices back into choices_df and sort by chooser
-                choices_df = pd.concat([choices_df, missing_choices], ignore_index=True)
-                choices_df.sort_values(by=[choosers.index.name], inplace=True)
+            survey_choices["rand"] = 0
+            survey_choices["prob"].fillna(0, inplace=True)
+            choices_df = pd.concat([choices_df, survey_choices], ignore_index=True)
+            choices_df.sort_values(by=[choosers.index.name], inplace=True)
 
     del probs
     chunk_sizer.log_df(trace_label, "probs", None)