# License ----------------------------------------------------------------- # Copyright (c) 2020 Jacob Lahne # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # README ------------------------------------------------------------------ # The following code is written by Jacob Lahne, 2019-2020, to accompany Lahne, # J. (2020). Sorting Backbone Analysis: A network-based method of extracting key # actionable information from free-sorting task results. Food Quality and # Preference, 103870. https://doi.org/10.1016/j.foodqual.2020.103870, as well as # the presentation of this material at Sensometrics 2020. Please contact # jlahne@vt.edu and/or cite the publication if you wish to reuse this code. # Setup ------------------------------------------------------------------- library(tidyverse) library(DistatisR) library(igraph) library(tidygraph) library(ggraph) library(cowplot) library(partitions) load(file = "sorting_data.RData") # Utility functions ------------------------------------------------------- sort_to_graph <- function(sort){ sort %>% DistanceFromSort() %>% apply(1:2, sum) %>% apply(1:2, function(x){max(.) - x}) %>% graph_from_adjacency_matrix(mode = "directed", diag = F, weighted = T) %>% as_tbl_graph() %>% arrange(name) } disparity_filter <- function(directed_graph, alpha = 0.05){ directed_graph %>% mutate(out_degree = centrality_degree(mode = "out")) %>% # calculation of disparity starts here activate(edges) %>% group_by(from) %>% mutate(normalized_out_weight = weight / sum(weight), sum_out_weight = sum(weight), local_degree = .N()$out_degree[from], p = (1 - normalized_out_weight) ^ (.N()$out_degree[from] - 1), maximum_link = (p < alpha | p == min(p))) %>% # calculation of disparity ends here ungroup() %>% filter(maximum_link) %>% activate(nodes) %>% mutate(group = group_walktrap(steps = 4, weights = weight)) %>% mutate(group = as.factor(group)) } plot_SBA <- function(sort, alpha = 0.05, seed = 1234, layout = "kk", textsize = 5){ set.seed(seed) sort %>% sort_to_graph() %>% disparity_filter(alpha = alpha) %>% ggraph(layout = layout) + geom_edge_fan(aes(end_cap = circle(2, "mm"), alpha = weight), arrow = arrow(type = "closed", length = unit(0.1, "inches")), edge_width = 1, show.legend = F) + geom_node_point(aes(color = as.factor(group)), shape = 21, fill = "white", size = 4, show.legend = F) + geom_node_point(aes(color = as.factor(group)), shape = 16, size = 2, show.legend = F) + geom_node_text(aes(label = name, color = as.factor(group)), show.legend = F, repel = T, position = position_jitter(seed = seed), size = textsize) + scale_color_viridis_d() + theme_graph() + coord_fixed() } plot_distatis_results <- function(sort, dimensions = c(1,2), confidence_ellipses = F, point_color = NULL, text_size = 4){ sort[sort(row.names(sort)), ] %>% DistanceFromSort() %>% distatis(nfact2keep = nrow(sort)) -> distatis_results # NB: We are using the BootFactorScores() function which is biased for pure # convenience. This could be rewritten to use a true bootstrap. if(length(point_color) != 0 & length(point_color) != nrow(distatis_results$res4Splus$F)){ break("'point_color' must be either NULL or be a vector of the same length as the number of objects being sorted .") } if(is.null(point_color)){ point_color <- 1 } distatis_results$res4Splus$F %>% .[,dimensions] %>% as_tibble(rownames = NA) %>% rename(x = 1, y = 2) %>% mutate(sample = row.names(.), color = as.factor(point_color)) -> plotting_points plotting_points %>% ggplot(aes(x = x, y = y)) + geom_hline(yintercept = 0, size = 1, color = "darkgrey", linetype = "dashed") + geom_vline(xintercept = 0, size = 1, color = "darkgrey", linetype = "dashed") + geom_point(aes(color = color), size = 2, show.legend = F) + ggrepel::geom_text_repel(aes(label = sample), size = text_size) + cowplot::theme_cowplot() + scale_color_viridis_d() + labs(x = paste0("Dimension ", dimensions[1]), y = paste0("Dimension ", dimensions[2])) + coord_fixed() -> point_plot if(confidence_ellipses == T){ BootFactorScores(distatis_results$res4Splus$PartialF) %>% .[,dimensions,] -> boot_coords ellipse_data <- matrix(0, nrow=0, ncol = 2) if(length(point_color) != 1){ ellipse_colors <- rep(point_color, dim(boot_coords)[3]) }else{ ellipse_colors <- 1 } for(i in 1:dim(boot_coords)[3]) ellipse_data <- rbind(ellipse_data, boot_coords[,,i]) ellipse_data %>% as_tibble(rownames = NA) %>% rename(x = 1, y = 2) %>% mutate(sample = row.names(.), color = as.factor(ellipse_colors)) %>% stat_ellipse(data = ., geom = "polygon", aes(group = sample, fill = color), alpha = 0.3, show.legend = F) -> point_ellipses plotting_points %>% ggplot(aes(x = x, y = y)) + geom_hline(yintercept = 0, size = 1, color = "darkgrey", linetype = "dashed") + geom_vline(xintercept = 0, size = 1, color = "darkgrey", linetype = "dashed") + geom_point(aes(color = color), size = 2, show.legend = F) + point_ellipses + ggrepel::geom_text_repel(aes(label = sample), size = text_size) + cowplot::theme_cowplot() + scale_color_viridis_d() + scale_fill_viridis_d() + labs(x = paste0("Dimension ", dimensions[1]), y = paste0("Dimension ", dimensions[2])) + coord_fixed() -> point_plot point_plot }else{ point_plot } } simulate_sorting <- function(objects, subjects, seed = 1234){ set.seed(seed) matrix(0, nrow = objects, ncol = subjects, dimnames = list(paste0("O", 1:objects), paste0("S", 1:subjects))) -> data for(i in 1:subjects){ sample(2:(objects-1), 1) %>% partitions::restrictedparts(objects, ., include.zero = F) -> possible_partitions possible_partitions %>% .[, sample(1:ncol(possible_partitions), 1)] %>% rep(1:length(.), times = .) %>% sample(replace = F) -> data[, i] } return(data) } # Showing the degenerate case --------------------------------------------- sim_sort <- simulate_sorting(13, 25) sim_sort[11,] <- matrix(14, nrow = 1, ncol = 25) sim_sort[12,] <- matrix(15, nrow = 1, ncol = 25) sim_sort[13,] <- matrix(16, nrow = 1, ncol = 25) plot_SBA(sim_sort) plot_distatis_results(sim_sort, confidence_ellipses = T, point_color = sim_sort %>% sort_to_graph() %>% disparity_filter() %>% as_tibble() %>% pull(group)) # Demos ------------------------------------------------------------------- # Beer data plot_SBA(beer_sort, layout = "fr", seed = 12421445) plot_distatis_results(beer_sort, confidence_ellipses = T, point_color = beer_sort %>% sort_to_graph() %>% disparity_filter() %>% as_tibble() %>% pull(group)) # Chocolate data plot_SBA(choc_sorting, layout = "kk") plot_SBA(choc_sorting, layout = "drl") plot_distatis_results(choc_sorting, confidence_ellipses = T, point_color = choc_sorting %>% sort_to_graph() %>% disparity_filter() %>% as_tibble() %>% arrange(name) %>% pull(group)) # Amari data plot_SBA(amari_sorting, layout = "stress") plot_distatis_results(amari_sorting, confidence_ellipses = T, point_color = amari_sorting %>% sort_to_graph() %>% disparity_filter() %>% as_tibble() %>% arrange(name) %>% pull(group)) # Spice data plot_SBA(spice_sort, layout = "graphopt") plot_distatis_results(spice_sort, confidence_ellipses = T, point_color = spice_sort %>% sort_to_graph() %>% disparity_filter() %>% as_tibble() %>% arrange(name) %>% pull(group))