P-value for Sheffe-test from pooled summary data (means and standard deviation)

Question

I’m stuck, how do I calculate p-values for the Scheffé test from pooled data (means and standard deviations) like “sheffeTest” from library(DescTools)? I didn’t find any documentation. The contrasts I’m considering are all possible pairwise comparisons.

I got the following data called Data_sum:

Location  n       mean       sd
<fctr>    <dbl>   <dbl>      <dbl>
TM    5   0.08016    0.0145622
NP    5   0.07550    0.0091643
PB    5   0.10434    0.0195800

and my contrast table looks like this (which is the same as using emmeans(model, pairwise ~ Location, adjust=”scheffe”):

 Contrast.Name     TM   NP    PB
 TMvsNP           -1    1     0
 TMvsPB           -1    0     1
 NPvsPB            0   -1     1

I calculated an Anova using anovaMean() from library(HH). I can calculate Sheffe by hand given the values from Anova, but im not able to calulate p-values. Any clues?

Edit: solved, see code below

Caclulating Anova from summary statistics from Data_sum using library(HH)

library(HH)
anova_summary = anovaMean(Data_sum$Location, Data_sum$n, Data_sum$mean, Data_sum$sd)
anova_summary

Creating a contrast table (or Dataframe)

note: grouping variable doesnt has to be a factor, order is generated by unique()

# Setup
df       <- Data_sum
iv       <- "Location" # Column containing the explanatory variable in df
mean_col <- "mean"     # Column containing the means in df
obs_col  <- "n"        # Column containing the number of observations in df

# Initialize the contrasts data frame outside the loop
contrasts.df <- data.frame(
  contrast         = character(),
  diff             = numeric(),
  n_precalc        = numeric(),
  stringsAsFactors = FALSE
)

# Function to calculate contrasts and differences
calculate_contrasts <- function(level1, level2) {
  # Means
  m <- function(level) mean(df[[mean_col]][df[[iv]] == level])
  # Number of observations
  n <- function(level) sum(df[[obs_col]][df[[iv]] == level])
  # cbind results
  data.frame(contrast  = paste(level1, " - ", level2, sep = ""),
             diff      = m(level1) - m(level2),
             # Precalculated part of t-statistic
             n_precalc = 1 / n(level1) + 1 / n(level2))
}

# Loop for calculating contrasts and differences
for (i in 1:(length(unique(df[[iv]])) - 1)) {
  for (j in (i + 1):length(unique(df[[iv]]))) {
    level1 <- unique(df[[iv]])[i]  # i-th level of iv
    level2 <- unique(df[[iv]])[j]  # j-th level of iv
    # Calculation of contrasts and differences for each pair of levels
    contrasts.df <- rbind(contrasts.df, calculate_contrasts(level1, level2))
  }
}

print(contrasts.df)

Doing the math as advised from PBulls

note: since pooled_n = mean(contr$n_precalc), only works in balanced design with equal number of observations

#setup
df    = Data_sum
iv    = "Location"        #column with explanatory variable in df
obs   = "n"               #column with number of observations in df
alpha = 0.05              #Set the significance level
anova = anova_summary     #anovaMean object (library(HH))
contr = contrasts.df      #name of the contrasts dataframe


n     = sum(df[[obs]])    #total number of observations
k     = length(unique(df[[iv]])) #Number of groups/levels of explanatory variable
rank  = k-1               #rank of the test

#statistics derived from anovaMean
mse   = anova$`Mean Sq`[2] #mean squared error

#values derived from contrasts.df
pooled_means   = contr$diff            #vector of means
if (sd(contr$n_precalc) != 0) warning("Balanced design required!")
pooled_n       = mean(contr$n_precalc) #cave balanced design

#scheffe t-statistics
se       = sqrt(mse*pooled_n)                            #pooled standard error
t.ratio  = pooled_means/se                               #tratio
p.values = pf(t.ratio**2/rank, rank, n-k, lower.tail = F) #p-value
t.crit   = sqrt(rank*qf(1-alpha, rank, n-k))             #critical t-value

#confidence intervals
lwr.ci = pooled_means - t.crit * se
upr.ci = pooled_means + t.crit * se

sheffe.df = data.frame(contrast = contr$contrast,
                       estimate = contr$diff,
                       SE       = se,
                       df       = n-k,
                       t.ratio  = t.ratio,
                       p.value  = p.values,
                       lwr.ci   = lwr.ci,
                       upr.ci   = upr.ci)

print(paste0("Confidence level used: ",1-alpha))
print(paste0("Conf-level adjustment: sheffe method with rank ",k-1))

sheffe.df

the result is a dataframe with exact results as “sheffeTest” from library(DescTools), but using summary statistics instead

Leave a Comment Cancel reply