# License ----------------------------------------------------------------- # Copyright (c) 2020 Jacob Lahne # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # Setup ------------------------------------------------------------------- # Data analysis for Berry CLT Paper # Comparing the efficacy of different scales for affective research within subjects # Includes code for bootstrapping results (nonparametric separation efficiency) library(tidyverse) library(readxl) library(ez) # Import data files ------------------------------------------------------- load(file = "berry scale paper data - for distribution.RData") berry_liking_complete_long # dataframe with all liking data in long form by subject, sample, and scale berry_cata_censored # long form CATA data for all berries # Berry ANOVA ----------------------------------------------------------- library(afex) library(emmeans) library(patchwork) berry_anova <- function(type_of_berry, type_of_scale){ berry_liking_complete_long %>% filter(berry == type_of_berry, str_detect(scale, type_of_scale)) %>% drop_na() %>% mutate(subject = as.factor(subject), berry = as.factor(berry), sample = as.factor(sample), scale = as.factor(scale)) -> filtered_liking aov_results <- aov_ez(id = "subject", dv = "rating", within = "sample", data = filtered_liking) return(list( type_of_berry = type_of_berry, type_of_scale = type_of_scale, data = filtered_liking, aov_results = aov_results )) } berry_anova_2_way <- function(type_of_berry){ berry_liking_complete_long %>% filter(berry == type_of_berry) %>% unite(berry, sample, scale, col = "scale", sep = " ") %>% pivot_wider(names_from = "scale", values_from = "rating") %>% drop_na() %>% pivot_longer(names_to = "scale", values_to = "rating", -subject) %>% separate(scale, into = c("berry", "sample", "scale"), sep = " ") %>% mutate(subject = as.factor(subject), berry = as.factor(berry), sample = as.factor(sample), scale = as.factor(scale), rating = case_when(str_detect(scale, "9pt") ~ rating, str_detect(scale, "lms") ~ (rating + 100) * (8 / 200) + 1, str_detect(scale, "us") ~ (rating + 0) * (8 / 15) + 1)) -> filtered_liking aov_results <- aov_ez(dv = "rating", id = "subject", within = c("sample", "scale"), data = filtered_liking) ez::ezPlot(filtered_liking, dv = rating, wid = subject, within = c(sample, scale), x = sample, split = scale, type = 3) + theme_bw() + scale_color_viridis_d(option = "magma", end = 0.9, direction = -1, name = "scale", labels = c("9-pt", "LAM", "VAS")) + labs(x = paste0(type_of_berry, " variety samples"), y = "liking (scaled to 9-pt range)") + scale_linetype_discrete(name = "scale", labels = c("9-pt", "LAM", "VAS")) + scale_shape_discrete(name = "scale", labels = c("9-pt", "LAM", "VAS")) + coord_cartesian(ylim = c(4, 7)) -> interaction_plot return(list( n = filtered_liking %>% pull(subject) %>% unique %>% length, type_of_berry = type_of_berry, data = filtered_liking, aov_results = aov_results, interaction_plot = interaction_plot )) } (berry_anova_2_way("raspberry")$interaction_plot + berry_anova_2_way("strawberry")$interaction_plot) / (berry_anova_2_way("blackberry")$interaction_plot + berry_anova_2_way("blueberry")$interaction_plot) # Bootstrap: panelists with all scales for a berry ------------------------ # The following code makes sure to only draw subjects for each berry who have # rated those samples on all 3 scales. Furthermore, the bootstrapping approach # draws "complete" records: for each subject who is pulled for each berry, all # three of their responses (using each scale) are pulled. # Please note that all code below has been commented because it will take ~1-2 # hours to to run. I recommend instead starting at the load() command below to # load the set of simulations I already ran. However, for full replicability # this process should be run. # bootstrap_berry_sample_with_complete_observations <- function(type_of_berry, number_of_subjects, seed = 1234){ # # set.seed(seed) # # berry_liking_complete_long %>% # filter(berry == type_of_berry) %>% # unite(berry, sample, scale, col = "scale", sep = " ") %>% # pivot_wider(names_from = "scale", values_from = "rating") %>% # drop_na() %>% # sample_n(size = number_of_subjects, replace = T, ) %>% # mutate(id = 1:number_of_subjects) %>% # select(-subject) %>% # pivot_longer(names_to = "scale", values_to = "rating", -id) %>% # separate(scale, into = c("berry", "sample", "scale"), sep = " ") %>% # return(.) # # } # # # run_bootstrap_anova_with_complete_observations <- function(type_of_berry, number_of_subjects){ # # bootstrap_berry_sample_with_complete_observations(type_of_berry, number_of_subjects) -> # current_bootstrap_sample # # current_results <- tibble( # type_of_berry = character(), # type_of_scale = character(), # number_of_subjects = numeric(), # f_stat = numeric(), # p_value = numeric(), # pairwise_proportion = numeric() # ) # # for(type_of_scale in c("9pt", "us", "lms")){ # # current_bootstrap_sample %>% # filter(str_detect(scale, type_of_scale)) %>% # aov_car(formula = rating ~ sample + Error(id / sample), data = .) -> # aov_results # # tibble( # type_of_berry = type_of_berry, # type_of_scale = type_of_scale, # number_of_subjects = number_of_subjects, # f_stat = aov_results$anova_table$F, # p_value = aov_results$anova_table$`Pr(>F)`, # pairwise_proportion = aov_results %>% # emmeans(specs = "sample") %>% # pairs %>% # as.data.frame %>% # summarize(sum(p.value < 0.05) / nrow(.)) %>% # unlist() # ) %>% # bind_rows(current_results, .) -> # current_results # } # # return(current_results) # # } # # complete_bootstrap_anova_simulations <- tibble( # type_of_berry = character(), # type_of_scale = character(), # number_of_subjects = numeric(), # f_stat = numeric(), # p_value = numeric(), # pairwise_proportion = numeric() # ) # # pb <- progress_bar$new(total = 4 * 13 * 100) # # for(type_of_berry in c("raspberry", "strawberry", "blackberry", "blueberry")){ # # for(number_of_subjects in seq(20, 80, 5)){ # # for(i in 1:100){ # # run_bootstrap_anova_with_complete_observations(type_of_berry, number_of_subjects) %>% # bind_rows(complete_bootstrap_anova_simulations, .) -> # complete_bootstrap_anova_simulations # pb$tick() # # } # } # } # # complete_bootstrap_anova_simulations %>% # mutate(bootstrap_id = rep(1:(4*13*100), each = 3)) -> # complete_bootstrap_anova_simulations load("Complete Bootstrapped ANOVAs.RData") complete_bootstrap_anova_simulations %>% mutate(type_of_berry = fct_relevel(type_of_berry, "raspberry", "strawberry", "blackberry", "blueberry")) %>% ggplot(aes(x = number_of_subjects, y = 1-p_value, color = type_of_scale, linetype = type_of_scale)) + geom_hline(yintercept = 0.95, linetype = "dotted") + geom_smooth(method = "loess", fill = "grey") + #lims(x = c(20, 75)) + facet_wrap(~type_of_berry) + scale_color_viridis_d(name = "scale", labels = c("9-pt", "LAM", "VAS"), option = "magma", end = 0.9) + scale_linetype_discrete(name = "scale", labels = c("9-pt", "LAM", "VAS")) + theme_bw() + labs(x = "number of subjects", y = "proportion of significant ANOVAs (in 100 simulations)") complete_bootstrap_anova_simulations %>% mutate(type_of_berry = fct_relevel(type_of_berry, "raspberry", "strawberry", "blackberry", "blueberry")) %>% ggplot(aes(x = number_of_subjects, y = pairwise_proportion, color = type_of_scale, linetype = type_of_scale)) + geom_smooth(method = "loess", fill = "grey") + facet_wrap(~type_of_berry) + scale_color_viridis_d(name = "scale", labels = c("9-pt", "LAM", "VAS"), option = "magma", end = 0.9) + scale_linetype_discrete(name = "scale", labels = c("9-pt", "LAM", "VAS")) + theme_bw() + labs(x = "number of subjects", y = "proportion of possible pairwise distinctions (in 100 simulations)") # Use CATA (private) descriptors to find patterns of berries -------------- ca_for_berry_cata <- function(type_of_berry){ berry_cata_censored %>% filter(str_detect(sample_name, type_of_berry)) %>% drop_na() %>% pivot_wider(names_from = "attribute", values_from = "count") %>% column_to_rownames("sample_name") %>% FactoMineR::CA(graph = F) -> berry_CATA_CA berry_CATA_CA$row$coord %>% as_tibble(rownames = "sample") %>% left_join(berry_anova(type_of_berry = type_of_berry, type_of_scale = "9pt")$data %>% group_by(berry, sample) %>% summarize(average_rating = mean(rating)) %>% ungroup() %>% unite(berry, sample, col = "sample", sep = " ")) %>% ggplot(aes(x = `Dim 1`, y = `Dim 2`, color = average_rating)) + geom_hline(yintercept = 0, linetype = 2, color = "darkgrey") + geom_vline(xintercept = 0, linetype = 2, color = "darkgrey") + geom_point() + ggrepel::geom_text_repel(aes(label = sample)) + theme_bw() + theme(panel.grid = element_blank(), legend.title.align = 0.5) + #coord_fixed() + scale_color_viridis_c(option = "inferno", end = 0.8) + labs(x = paste0("Dimension 1\n(", berry_CATA_CA$eig[1,2] %>% round(2), "% Variance)"), y = paste0("Dimension 2\n(", berry_CATA_CA$eig[2,2] %>% round(2), "% Variance)"), color = "rated\nliking") -> berry_CATA_plot list( plot = berry_CATA_plot, ca_data = berry_CATA_CA ) %>% return(.) } (ca_for_berry_cata("raspberry")$plot + ca_for_berry_cata("strawberry")$plot) / (ca_for_berry_cata("blackberry")$plot + ca_for_berry_cata("blueberry")$plot) # Check the "categorical" behavior of LAM/gLMS ---------------------------- # The limit of +/- 2 pts from an anchor position is from Hayes et al (2013) for categorical behavior. is_categorical_mark <- function(rating){ mark_boundaries <- tibble( mark = c(-100, -76, -56, -32, -10, 0, 12, 36, 56, 74, 100), lower = case_when(mark < 0 ~ mark + 2, mark >= 0 ~ mark - 2), upper = case_when(mark < 0 ~ mark - 2, mark >= 0 ~ mark + 2) ) is_categorical <- FALSE if(rating < 0){ is_categorical <- any(rating <= mark_boundaries$lower & rating >= mark_boundaries$upper) } else{ is_categorical <- any(rating >= mark_boundaries$lower & rating <= mark_boundaries$upper) } return(is_categorical) } berry_liking_complete_long %>% select(subject, berry, scale, rating) %>% filter(str_detect(scale, "lms")) %>% mutate(is_categorical = map_lgl(rating, ~is_categorical_mark(.))) %>% group_by(berry, is_categorical) %>% count(is_categorical) %>% ungroup %>% xtabs(n ~ berry + is_categorical, data = .) -> categorical_use_of_lms_table categorical_use_of_lms_table %>% chisq.test() categorical_use_of_lms_table %>% prop.table(margin = 1) categorical_use_of_lms_table %>% as_tibble %>% arrange(berry) %>% group_by(berry) %>% mutate(proportion = n / sum(n) * 100) %>% ungroup() %>% mutate(berry = factor(berry, levels = c("raspberry", "strawberry", "blackberry", "blueberry")), categorical = ifelse(is_categorical, "categorical", "full")) %>% ggplot(aes(x = berry, y = proportion, group = is_categorical, fill = berry)) + geom_col(position = "dodge", color = "white") + geom_label(aes(label = proportion %>% round(1) %>% as.character %>% paste0(., "%")), fill = "white", position = position_dodge(width = 0.9), show.legend = F) + geom_text(aes(label = categorical, y = 2), position = position_dodge(width = 0.9), color = "white") + labs(x = "Berry (in order of test presentation)", y = "% Scale Usage Pattern") + scale_fill_viridis_d(end = 0.8, option = "plasma", direction = -1) + theme_bw() # ANOVA IIR Check --------------------------------------------------------- plot_fitted_residuals <- function(berry_anova_results){ current_anova_results <- berry_anova_results$aov_results$lm current_anova_results %>% fitted() %>% as_tibble(rownames = "subject") %>% pivot_longer(names_to = "sample", values_to = "fitted", -subject) %>% left_join(current_anova_results %>% residuals() %>% as_tibble(rownames = "subject") %>% pivot_longer(names_to = "sample", values_to = "residuals", -subject), by = c("subject", "sample")) %>% ggplot(aes(x = fitted, y = residuals)) + geom_jitter(aes(color = sample %>% as.factor), alpha = 0.5, show.legend = F) + theme_bw() + scale_color_viridis_d(end = 0.8, option = "plasma", direction = -1) + labs(title = paste0(berry_anova_results$type_of_berry, ", ", berry_anova_results$type_of_scale, " - fitted vs. residuals plot")) } # Example use berry_anova("raspberry", "lms") %>% plot_fitted_residuals()