From e757b138293e1b1aa1888f6f6bbfb71129a8a0cc Mon Sep 17 00:00:00 2001
From: Jake Bowers <jake@jakebowers.org>
Date: Tue, 6 Aug 2024 10:03:53 -0500
Subject: [PATCH] Added notes about unequally weighted obs

---
 DESCRIPTION                |  2 +-
 NAMESPACE                  |  2 +
 R/find_odds.R              |  2 +-
 R/p_binary.R               | 48 +++++++++++++++++++++-
 R/sim_urn.R                | 24 +++++++++++
 man/sens_analysis.Rd       | 14 +++----
 tests/testthat/test_thep.R | 83 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 164 insertions(+), 11 deletions(-)
 create mode 100644 R/sim_urn.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 9424f53..af9f2d0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -7,7 +7,7 @@ Description: What the package does (one paragraph).
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2.9000
 Depends:
     BiasedUrn,
     dplyr
diff --git a/NAMESPACE b/NAMESPACE
index 4eac6ae..0d3a020 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
+export(find_p_two_types)
 export(sens_analysis)
+export(sim_urn)
 importFrom(BiasedUrn,dFNCHypergeo)
 importFrom(stats,uniroot)
diff --git a/R/find_odds.R b/R/find_odds.R
index 769aa09..38ad684 100644
--- a/R/find_odds.R
+++ b/R/find_odds.R
@@ -7,7 +7,7 @@
 ##' @param obs_support An integer representing the number of observations in favor of the working hypothesis. Must be less than or equal to the total.
 ##' @param total_obs An integer representing the total number of observations
 ##' @param thep The p-value threshold
-##' @return
+##' @return The odds required for the p-value to be equal to that specified in thep
 ##' @importFrom BiasedUrn dFNCHypergeo
 ##' @importFrom stats uniroot
 ##' @export
diff --git a/R/p_binary.R b/R/p_binary.R
index 8a9bc52..58686f6 100644
--- a/R/p_binary.R
+++ b/R/p_binary.R
@@ -1,7 +1,7 @@
 #' Find a p-value given a certain number of observations in favor of the
 #' working hypothesis among a total number of observations
 #'
-#'
+#' TODO description
 #'
 #' @param obs_support An integer representing the number of observations in favor of the working hypothesis. Must be less than or equal to the total.
 #' @param total_obs An integer representing the total number of observations
@@ -36,3 +36,49 @@ find_p_two_types <- function(obs_support, total_obs, odds = 1, interpretation =
 #' # Unequal probability, 2 kinds of evidence with interpretation printed
 #' find_p_two_types(obs_support = 7, total_obs = 10, interpretation = TRUE, odds = .5)
 #' find_p_two_types(obs_support = 7, total_obs = 10, interpretation = TRUE, odds = 2)
+
+#' Find a p-value given differently weighted pieces of evidence
+#'
+#' TODO Description
+#'
+#' @param obs_support An integer representing the number of observations in favor of the working hypothesis. Must be less than or equal to the total.
+#' @param total_obs An integer representing the total number of observations
+#' @param odds The odds of seeing
+#' @param interpretation TRUE if the function returns text helping to interpret the result, FALSE (default option) to return just the p-value
+#' @return Either a p-value (numeric, scalar) or a list containing the p-value and text containing an interpretation
+#' @export
+
+#' @details Given two kinds of information --- information supporting the rival
+#' and information supporting the working hypothesis -- where each type can
+#' have a different overall odds of observation --- and where each **piece** of
+#' working theory supporting information can have different weight, what is the
+#' probability of observing a given amount of working theory supporting
+#' information. We convert this problem into a many items or multivariate problem:
+
+find_p_two_types_w <- function(obs_support, total_obs, odds = 1, weights, interpretation = FALSE) {
+  ## Test to make sure that obs_support is less than or equal to total_obs
+  stopifnot("The number of observations in favor of the working hypothesis must be less than or equal to the total number of observations" = obs_support <= total_obs)
+  obs_oppose <- obs_support + 1
+  stopifnot("Observations are already compatible with the null. The number of observations in favor of the working hypothesis must be greater than or equal to half of the total number of observations" = obs_support >= (total_obs / 2))
+  ## We assume odds=1 here
+  thep <- dFNCHypergeo(
+    x = obs_support, m1 = obs_support, m2 = obs_oppose,
+    n = total_obs, odds = odds
+  )
+  if (!interpretation) {
+    return(thep)
+  } else {
+    interp <- paste0("The probability of drawing ", obs_support, " observations which support the working theory from an urn model supporting a rival theory, where the odds of observing working theory information is odds=", odds, ", is p=", round(thep, 4))
+    message(interp)
+    return(list(thep = thep, interp = interp))
+  }
+}
+#' @examples
+#' ...
+#' # Equal probability, 2 kinds of evidence
+#' find_p_two_types(obs_support = 7, total_obs = 10)
+#' # Equal probability, 2 kinds of evidence with interpretation printed
+#' find_p_two_types(obs_support = 7, total_obs = 10, interpretation = TRUE)
+#' # Unequal probability, 2 kinds of evidence with interpretation printed
+#' find_p_two_types(obs_support = 7, total_obs = 10, interpretation = TRUE, odds = .5)
+#' find_p_two_types(obs_support = 7, total_obs = 10, interpretation = TRUE, odds = 2)
diff --git a/R/sim_urn.R b/R/sim_urn.R
new file mode 100644
index 0000000..d734514
--- /dev/null
+++ b/R/sim_urn.R
@@ -0,0 +1,24 @@
+# Functions for drawing from urn models
+
+
+#' Simulate an urn model
+#'
+#' A function that allows the researcher to simulate draws from urn models that
+#' do not have nice analytical expressions. For example, urn models where each
+#' ball has a different probability of being drawn. Given a set of draws, we
+#' can calculate probabilities.
+#'
+#'
+#' @title Simulate draws from an urn model
+#' @param urn_size Total number of items in the urn
+#' @param total_obs An integer representing the total number of observations
+#' @param thep The p-value threshold
+#' @return A vector of length n
+#' @importFrom BiasedUrn dFNCHypergeo
+#' @importFrom stats uniroot
+#' @export
+
+sim_urn <- function(urn_size, total_obs, item_probs, thep) {
+  res <- sample(x = distinct_items, prob = item_probs, replace = FALSE)
+  return(the_draw)
+}
diff --git a/man/sens_analysis.Rd b/man/sens_analysis.Rd
index 999da57..e3ee1c2 100644
--- a/man/sens_analysis.Rd
+++ b/man/sens_analysis.Rd
@@ -4,23 +4,21 @@
 \alias{sens_analysis}
 \title{Find odds given ideas about unequally easy evidence}
 \usage{
-sens_analysis(m1, m2, n, thep)
+sens_analysis(obs_support, obs_oppose, total_obs, thep = 0.05)
 }
 \arguments{
-\item{m1}{TODO}
+\item{obs_support}{An integer representing the number of observations in favor of the working hypothesis. Must be less than or equal to the total.}
 
-\item{m2}{TODO}
+\item{total_obs}{An integer representing the total number of observations}
 
-\item{n}{TODO}
-
-\item{thep}{TODO}
+\item{thep}{The p-value threshold}
 }
 \value{
-A vector representing TODO
+The odds required for the p-value to be equal to that specified in thep
 }
 \description{
 Find odds
 }
 \details{
-TODO Description
+A function that allows the researcher to input the likelihood of data or to find how mush bias in datacollection would be nescessary to obtain a p>0.05 or p>0.10 result using the same data.
 }
diff --git a/tests/testthat/test_thep.R b/tests/testthat/test_thep.R
index b81d357..ece9589 100644
--- a/tests/testthat/test_thep.R
+++ b/tests/testthat/test_thep.R
@@ -32,3 +32,86 @@ test_that("Warnings work", {
   expect_error(find_p_two_types(obs_support = 2, total_obs = 10))
 })
 
+library(partitions)
+# Define the urn composition
+urn <- c(2, 3) # 2 red, 3 black
+# Enumerate possible draws without replacement
+draws <- S(urn, 2) # Draw 2 balls
+print(draws)
+
+
+obs_working <- 3
+obs_sampled <- 4
+rival_obs <- 1
+
+obs_weights <- c(10, 1, 1)
+
+## An observation of working obs 1 is worth 10 of either obs 2 or obs 3 in
+## regards evidence against the rival. It is as if if we draw working obs 1
+## (w1) we are extra surprised to see it under the rival theory. Like the
+## number of rival items is **larger** than we thought. So, maybe if our set of
+## 4 sampled includes w2 or w3 then the rival size is still 4, but if the set
+## of 4 sampled includes w1, then the urn model includes more rival --- like
+## max(c(sum(obs_weights),obs_working+1,rival_obs)). So, here it would have 12
+## obs rather than 7.
+
+## So we can imagine this model this as two steps: First draw from an urn representing whether
+## you will draw from an urn containing w1 versus not w1. And second draw from
+## the chosen urn with rival=7 or 12 depending on whether or not it contains w1
+## or not respectively. An issue with the two step idea: If you are asking probabability of seeing 3 and you
+## actually observed 3, then you have to have the possibility of sampling 3
+## from the urn. You cannot exclude any from the urn. If you choose an urn
+## containing w1: then you can sample 4 from that urn If you choose an urn
+## without w1: then you can never see 3 working theory supporting obs.
+
+## I think it is easier to think of obs_weights as saying that seeing all of these
+## three including w1 would be more surprising than if they were all equally
+## weighted.
+
+## I think it is ok to have many pieces of information that have weights less
+## than 1, but I think we don't want rival size to be less than working size
+## otherwise it doesn't feel like a test of the rival. So, we can print out a
+## warning for such "straw in the wind" style of situations. Really, the
+## ability to have multiple items with less than 1 weight is to counteract one
+## or a few observations with very high evidentiary weight.
+
+stopifnot(length(obs_weights) == obs_working)
+
+## We are twice as likely to observe working theory supporting information
+working_odds <- 2
+
+rival_obs <- max(rival_obs, obs_working + 1)
+total_obs <- obs_working + rival_obs
+
+## samples of size 4 from total 7 where 3 are working obs but where working obs
+## 1 has weight 10. All working theory obs are twice as easy to observe working
+## theory supporting evidence compared to the rival: equally easy to observe
+## any working theory item compared to any rival theory item in this binary
+## example. To change the odds I'd suggest we focus on "item type" in the
+## multivariate model so each type gets an odds.
+
+## So is this:
+
+## (1) an Urn with 10+1+1 working theory items and 4 rival items? I
+## don't think so. It is not a test of the rival in this case --- it will be
+## very probable to see 3 working theory items in a set of 4 sampled.
+
+
+dFNCHypergeo(
+  x = obs_working, m1 = obs_working, m2 = rival_obs,
+  n = obs_sampled, odds = working_odds
+)
+dFNCHypergeo(
+  x = obs_working, m1 = obs_working, m2 = sum(obs_weights),
+  n = obs_sampled, odds = working_odds
+)
+
+## Compare when odds=1
+dFNCHypergeo(
+  x = obs_working, m1 = obs_working, m2 = rival_obs,
+  n = obs_sampled, odds = 1
+)
+dFNCHypergeo(
+  x = obs_working, m1 = obs_working, m2 = sum(obs_weights),
+  n = obs_sampled, odds = 1
+)