library(readxl)
library(purrr)
library(dplyr)
library(DT)
library(gt)
library(htmltools)
# ── Load all result tables from the exported Excel workbook ──────────────────
xlsx_path <- "figs/R/PanelCheck_results.xlsx"

table_names <- c(
  "descriptive_stats",
  "descriptive_stats_overall",
  "Table5_perf_counts",
  "Table6_anova_Fratios",
  "panel_performance",
  "assessor_discrimination",
  "assessor_agreement",
  "assessor_flag_summary",
  "panel_anova",
  "repeatability_anova",
  "mixed_model_fixed",
  "mixed_model_random",
  "tukey_contrasts",
  "tukey_cld",
  "tukey_emmeans",
  "pca_sample_coords",
  "pca_variable_coords",
  "vc_flags"
)

tables <- map(
  table_names,
  \(t) tryCatch(read_xlsx(xlsx_path, sheet = t), error = \(e) NULL)
)
names(tables) <- table_names

1 Introduction

This report presents the outputs of the PanelCheck R translation — a clean, modular reimplementation of the PanelCheck sensory panel analysis pipeline in R. All analyses were executed via R/run_all.R on the included example dataset (Data_Bread.xlsx), which comprises ratings from 8 assessors across 5 bread samples, collected over 2 replicates and 10 sensory attributes.

The analyses cover four broad areas:

Area Scripts
Panel performance & ANOVA 01_panel_performance.R
Profile visualisation 02_profile_plots.R, 05_fvalue_overview.R
Mixed model & post-hoc testing 03_mixed_model.R
Consensus PCA 04_pca.R

Tabular results are loaded directly from figs/R/PanelCheck_results.xlsx; figures are embedded from figs/R/.


2 Descriptive Statistics

Descriptive statistics summarise the distribution of sensory scores across all attributes. The by-sample table breaks scores down per product, whilst the overall table collapses across samples to give panel-wide summaries.

2.1 By Sample

Each row represents one attribute–sample combination. Columns report the minimum, maximum, mean, standard deviation (SD), standard error of the mean (SEM), and coefficient of variation (CV%).

.dt_plain(tables[["descriptive_stats"]],
          caption = "Table 1. Descriptive statistics per attribute and sample.")

2.2 Overall

Panel-wide descriptive statistics collapsed across all samples and replicates.

.dt_plain(tables[["descriptive_stats_overall"]],
          caption = "Table 2. Overall descriptive statistics per attribute.")

3 Panel Performance

Panel performance is evaluated per assessor and attribute across three complementary domains: discrimination (ability to distinguish between samples), repeatability (consistency across replicates), and agreement (correlation with the panel mean). Row highlighting in the domain tabs reflects automated QC flags — 🟢 PASS · 🟡 MONITOR · 🔴 CONCERN — based on widely used thresholds in sensory science.

3.1 Summary Table

The panel performance table combines all three metrics into a single summary per assessor and attribute. Higher discrimination F-values and agreement correlations, combined with lower repeatability CVs, indicate a well-performing assessor.

.dt_plain(tables[["panel_performance"]],
          caption = "Table 3. Combined panel performance metrics per assessor and attribute.")

3.2 Performance Counts

The counts table summarises how many attributes each assessor performs well, moderately, or poorly on — providing a quick overview of overall panel reliability.

tables[["Table5_perf_counts"]] |>
  gt() |>
  tab_header(
    title    = "Table 4. Panel Performance Count Summary",
    subtitle = "Number of attributes per assessor meeting performance thresholds"
  ) |>
  tab_spanner_delim(delim = "_") |>
  .gt_style()
Table 4. Panel Performance Count Summary
Number of attributes per assessor meeting performance thresholds
Metric AS1 AS2 AS3 AS4 AS5 AS6 AS7 AS8
Discrimination 2.00 7.00 5.00 2.00 4.00 4.00 4.00 7.00
Repeatability 10.00 9.00 9.00 8.00 7.00 10.00 8.00 6.00
No interaction 5.00 5.00 4.00 8.00 6.00 5.00 7.00 9.00
Total 17.00 21.00 18.00 18.00 17.00 19.00 19.00 22.00

3.3 Mean Score Plot

The panel mean score plot displays the average rating for each sample across all attributes, providing an at-a-glance view of product differentiation at the panel level.

knitr::include_graphics("figs/R/panel_mean_scores.png")

3.4 Assessor Flag Summary

A concise QC summary — one row per assessor — aggregating performance across all attributes. Each domain (agreement, repeatability, discrimination) is flagged independently; no single composite score is produced. 🟢 PASS · 🟡 MONITOR · 🔴 CONCERN · ⬜ Insufficient data.

if (!is.null(tables[["assessor_flag_summary"]])) {
  dt <- tables[["assessor_flag_summary"]] |>
    mutate(across(where(is.numeric), \(x) round(x, 2))) |>
    datatable(
      filter    = "top",
      rownames  = FALSE,
      extensions = "Buttons",
      options   = list(
        dom        = "Bfrtip",
        buttons    = c("csv", "excel"),
        pageLength = 20,
        scrollX    = TRUE
      ),
      caption = htmltools::tags$caption(
        style = "caption-side: top; text-align: left;",
        "Table 4a. Assessor flag summary — per-domain QC flags aggregated across attributes."
      )
    )
  # Apply background colours to all flag columns (columns ending in _flag)
  flag_cols <- grep("_flag$", names(tables[["assessor_flag_summary"]]), value = TRUE)
  for (fc in flag_cols) dt <- .dt_flag_colours(dt, fc)
  dt
} else {
  cat("*Assessor flag summary table not available — re-run `run_all.R` to generate.*")
}

⚠️ Note: Performance flags are based on widely used guidelines in sensory science. Thresholds may be adjusted to reflect panel context, product category, and institutional SOPs.

3.5 Discrimination

One-way ANOVA per assessor per attribute. The F-value indicates how well each assessor distinguishes between samples on that attribute — a higher F-value reflects stronger discrimination.

.dt_flag_rows(
  tables[["assessor_discrimination"]],
  caption     = "Table 5. One-way ANOVA F-values per assessor and attribute.",
  page_length = 15
)

Significance key: *** p < 0.001  ·  ** p < 0.01  ·  * p < 0.05  ·  ns p ≥ 0.05 · Row colour: 🟢 p ≤ 0.05 · 🟡 p ≤ 0.10 · 🔴 p > 0.10

⚠️ Note: Performance flags are based on widely used guidelines in sensory science. Thresholds may be adjusted to reflect panel context, product category, and institutional SOPs.

3.6 Discrimination Bars

Each bar represents an assessor’s F-value for a given attribute. Bar colour reflects the significance of the one-way ANOVA for that assessor–attribute combination.

knitr::include_graphics("figs/R/discrimination_bars.png")

3.7 Repeatability ANOVA

Two-way additive fixed-effects ANOVA (value ~ Sample + Replicate, Type III SS) fitted per assessor × attribute. No Sample × Replicate interaction is included — with typically only two replicates this term cannot be estimated reliably. A significant Sample effect confirms the assessor discriminates between products; a significant Replicate effect indicates session-to-session inconsistency.

.dt_flag_rows(
  tables[["repeatability_anova"]],
  caption     = "Table 8. Repeatability ANOVA results per assessor and attribute.",
  page_length = 15
)

Significance key: *** p < 0.001  ·  ** p < 0.01  ·  * p < 0.05  ·  ns p ≥ 0.05 · Sample rows: 🟢 p ≤ 0.05 · 🔴 p > 0.10  ·  Replicate rows: 🟢 p > 0.10 (consistent) · 🔴 p ≤ 0.05 (session inconsistency)

⚠️ Note: Performance flags are based on widely used guidelines in sensory science. Thresholds may be adjusted to reflect panel context, product category, and institutional SOPs.

3.8 Assessor Agreement

Pearson correlation of each assessor’s scores with the panel mean per attribute. Values approaching 1 indicate the assessor aligns closely with the panel consensus; values near 0 or negative indicate systematic disagreement or scale reversal.

.dt_flag_rows(
  tables[["assessor_agreement"]],
  caption     = "Table 9. Assessor agreement (Pearson r with panel mean) per assessor and attribute.",
  page_length = 15
)

Significance key: *** p < 0.001  ·  ** p < 0.01  ·  * p < 0.05  ·  ns p ≥ 0.05 · Row colour: 🟢 r ≥ 0.80 · 🟡 r ≥ 0.60 · 🔴 r < 0.60 or negative

⚠️ Note: Performance flags are based on widely used guidelines in sensory science. Thresholds may be adjusted to reflect panel context, product category, and institutional SOPs.


4 Panel ANOVA

A three-way fixed-effects ANOVA is run per attribute at the panel level using lm() with sum-to-zero contrasts and car::Anova() (Type III sums of squares). All three structural terms — Sample, Assessor, and Replicate — are treated as fixed effects (contrast with the mixed model below, where Assessor is random). Type III SS with sum-to-zero contrasts ensures correct marginal F-tests for unbalanced designs. This decomposes variance into the following effects:

  • Sample — significant product differences (the primary goal of profiling)
  • Assessor — systematic differences in scale use between panellists
  • Replicate — variation between repeat sessions; a significant effect indicates poor panel repeatability
  • Sample × Assessor — interaction indicating disagreement in how assessors rank samples relative to one another (included only when all assessor × sample cells are observed; dropped automatically for incomplete designs)

F-ratios are computed as the marginal mean square for each effect divided by the residual mean square. Significance is evaluated against an F-distribution with the corresponding degrees of freedom.

4.1 ANOVA F-Ratios

.dt_plain(tables[["Table6_anova_Fratios"]],
          caption = "Table 6. Three-way fixed-effects ANOVA F-ratios per attribute (wide format). Each cell shows the rounded F-ratio and significance stars for the Sample, Panellist, and Replicate main effects. See Table 7 for full SS, df, MS, and interaction term detail.")

Significance key: *** p < 0.001  ·  ** p < 0.01  ·  * p < 0.05  ·  ns p ≥ 0.05

4.2 Panel ANOVA Detail

.dt_plain(tables[["panel_anova"]],
          caption = "Table 7. Three-way fixed-effects ANOVA results in long format — one row per effect, reporting SS, df, MS, F, and p per attribute. Includes the Sample × Assessor interaction where design is complete.")

Significance key: *** p < 0.001  ·  ** p < 0.01  ·  * p < 0.05  ·  ns p ≥ 0.05

4.3 F-Value Heatmap

The heatmap displays F-values for each effect (rows) across all attributes (columns). Warm colours indicate higher F-values and stronger effects.

knitr::include_graphics("figs/R/fvalue_heatmap.png")

4.4 F-Value Dotplot

The dotplot provides an alternative view of the same F-values, ordered by magnitude per effect, making it easier to identify the most discriminating attributes.

knitr::include_graphics("figs/R/fvalue_dotplot.png")

4.5 P-Value Heatmap

The p-value heatmap highlights which attribute–effect combinations are statistically significant. Each cell is labelled with a significance symbol derived from the three-way fixed-effects ANOVA p-value for that attribute × effect combination.

knitr::include_graphics("figs/R/pvalue_heatmap.png")

4.6 Panel ANOVA F-Values Plot

knitr::include_graphics("figs/R/panel_anova_fvalues.png")


5 Profile Plots

Profile plots show how each assessor scores samples across all attributes relative to the panel mean (bold line). Deviations from the panel mean reveal individual biases in scale use or perception.

Note: One representative assessor profile (AS1) is shown below. Profiles for all eight assessors are available in figs/R/ and were generated using 02_profile_plots.R.

5.1 Assessor Profile (AS1)

knitr::include_graphics("figs/R/profile_AS1.png")

5.2 All Assessors Overview

The combined overview plot displays all assessors on a single figure, facilitating rapid identification of outliers or systematic scale deviations.

knitr::include_graphics("figs/R/profile_all_assessors.png")

5.3 Spider Plot

The spider (radar) chart displays panel mean scores for each sample across all attributes simultaneously, making it straightforward to compare overall sensory profiles between products.

knitr::include_graphics("figs/R/spider_plot.png")


6 Mixed Model ANOVA

A linear mixed model is fitted per attribute using lmerTest, with Sample as a fixed effect and Assessor, Assessor:Replicate, and Assessor:Sample as random effects. This approach accounts for the hierarchical structure of panel data and provides more accurate inference than a standard ANOVA.

Model formula:

score ~ Sample + (1 | Assessor) + (1 | Assessor:Replicate) + (1 | Assessor:Sample)

6.1 Fixed Effects

The fixed effects table reports the F-statistic and p-value for the overall Sample effect per attribute, derived from the mixed model using Satterthwaite denominator df approximation (lmerTest). This is an omnibus test of whether any sample differences exist — it does not identify which specific samples differ (see the Tukey HSD section for pairwise comparisons). A significant result confirms the panel reliably distinguishes between products on that attribute.

tables[["mixed_model_fixed"]] |>
  gt() |>
  tab_header(
    title    = "Table 10. Mixed Model Fixed Effects",
    subtitle = "F-test statistics for the Sample effect per attribute (Satterthwaite df)"
  ) |>
  .gt_style()
Table 10. Mixed Model Fixed Effects
F-test statistics for the Sample effect per attribute (Satterthwaite df)
Attribute F_value df_num df_den p_value sig
Bread-od 0.97 4.00 28.00 0.44 ns
Yeast-od 8.54 4.00 28.00 0.00 ***
Off-flav 5.51 4.00 28.00 0.00 **
Colour 15.60 4.00 60.00 0.00 ***
Moisture 27.98 4.00 28.00 0.00 ***
Tough 44.40 4.00 28.00 0.00 ***
Salt-t 57.11 4.00 28.00 0.00 ***
Sweet-t 22.30 4.00 28.00 0.00 ***
Yeast-t 9.60 4.00 28.00 0.00 ***
Other-t 23.12 4.00 28.00 0.00 ***

Significance key: *** p < 0.001  ·  ** p < 0.01  ·  * p < 0.05  ·  ns p ≥ 0.05

6.2 Random Effects

Variance components quantify each random term’s share of total variability. % of total shows each component’s proportional contribution. The Assessor:Sample ratio (Assessor:Sample ÷ Residual) is the primary diagnostic for panel consensus — values below 1.0 indicate residual noise exceeds the signal; values above 2.0 indicate high disagreement on sample ordering. Row colour reflects the QC flag for each component.

if (!is.null(tables[["vc_flags"]])) {
  .dt_flag_rows(
    tables[["vc_flags"]],
    caption     = "Table 11. Mixed model random effects — variance components with QC flags per attribute.",
    page_length = 20
  )
} else {
  tables[["mixed_model_random"]] |>
    mutate(across(where(is.numeric), \(x) round(x, 2))) |>
    datatable(
      filter    = "top",
      rownames  = FALSE,
      extensions = "Buttons",
      options   = list(
        dom        = "Bfrtip",
        buttons    = c("csv", "excel"),
        pageLength = 15,
        scrollX    = TRUE
      ),
      caption = "Table 11. Mixed model random effects variance components per attribute."
    )
}

⚠️ Note: Performance flags are based on widely used guidelines in sensory science. Thresholds may be adjusted to reflect panel context, product category, and institutional SOPs.

6.3 Variance Breakdown

The stacked bar chart partitions total variance per attribute into four sources: Assessor (individual scale use), Assessor:Replicate (session consistency), Assessor:Sample (disagreement on sample ordering), and Residual (unexplained error). A large Assessor:Sample proportion relative to Residual indicates the panel disagrees on how samples are ranked.

knitr::include_graphics("figs/R/variance_components.png")


7 Post-hoc Analysis — Tukey HSD

Where the mixed model identifies a significant Sample effect, a Tukey HSD post-hoc test is applied to determine which specific sample pairs differ. Compact letter display (CLD) groups are assigned such that samples sharing a letter do not differ significantly at α = 0.05.

7.1 Estimated Marginal Means

Estimated marginal means (EMMs) are model-adjusted sample means that account for imbalance and random effects. They provide the best estimates of true sample differences per attribute.

.dt_plain(tables[["tukey_emmeans"]],
          caption = "Table 12. Estimated marginal means (EMMs) per sample and attribute.")

7.2 Compact Letter Display

CLD group letter per sample–attribute combination. Samples sharing a letter do not differ significantly at α = 0.05.

.dt_plain(tables[["tukey_cld"]],
          caption = "Table 13. Compact letter display (CLD) groups from Tukey HSD.")

7.3 Pairwise Contrasts

Full pairwise contrast table showing estimated differences, standard errors, t-ratios, and adjusted p-values for every sample pair per attribute.

.dt_plain(tables[["tukey_contrasts"]],
          caption = "Table 14. Tukey HSD pairwise contrasts per attribute.")

Significance key: *** p < 0.001  ·  ** p < 0.01  ·  * p < 0.05  ·  ns p ≥ 0.05

7.4 Means & CLD Plot

Points represent estimated marginal means ± 95% confidence intervals. CLD letters appear above each point; samples sharing a letter are not significantly different.

knitr::include_graphics("figs/R/tukey_means_cld.png")

7.5 Pairwise P-value Heatmap

The heatmap displays Tukey-adjusted p-values for all pairwise sample comparisons per attribute. Each cell is labelled with a significance symbol; only the lower triangle is shown as comparisons are symmetric.

knitr::include_graphics("figs/R/tukey_heatmap.png")


8 Consensus PCA

Principal Component Analysis (PCA) is performed on the consensus matrix — the grand mean of all assessor scores per sample and attribute, averaged across replicates. This provides a low-dimensional representation of sample and attribute relationships as perceived by the panel as a whole.

8.1 Biplot

The biplot overlays sample scores (points) and attribute loadings (arrows) in the space defined by the first two principal components. Samples close together are perceived as similar; attributes pointing in the same direction are positively correlated.

knitr::include_graphics("figs/R/pca_biplot.png")

8.2 Scree Plot

The scree plot shows the percentage of variance explained by each principal component, along with a cumulative variance line. Components to the left of the elbow are typically retained for interpretation.

knitr::include_graphics("figs/R/pca_scree.png")

8.3 Variable Loadings

Loading bars show the contribution of each sensory attribute to the first two principal components. Longer bars indicate attributes that drive the most variation in the consensus space.

knitr::include_graphics("figs/R/pca_loadings.png")

8.4 Sample Coordinates

The sample coordinate table reports the PCA scores (position on each component) for every sample. These values correspond to the point positions in the biplot.

tables[["pca_sample_coords"]] |>
  gt() |>
  tab_header(
    title    = "Table 15. PCA Sample Coordinates",
    subtitle = "Consensus matrix scores projected onto principal components"
  ) |>
  .gt_style()
Table 15. PCA Sample Coordinates
Consensus matrix scores projected onto principal components
Sample x y Dim.3 Dim.4
Bread1 −4.64 0.63 0.19 0.03
Bread2 0.80 −2.74 0.52 0.47
Bread3 0.73 −1.20 −0.03 −0.76
Bread4 2.07 2.46 1.15 0.07
Bread5 1.03 0.85 −1.84 0.19

8.5 Variable Coordinates

The variable coordinate table reports the PCA loadings (contribution of each attribute to each component), corresponding to the arrow endpoints in the biplot.

tables[["pca_variable_coords"]] |>
  gt() |>
  tab_header(
    title    = "Table 16. PCA Variable Coordinates (Loadings)",
    subtitle = "Sensory attribute contributions to each principal component"
  ) |>
  .gt_style()
Table 16. PCA Variable Coordinates (Loadings)
Sensory attribute contributions to each principal component
variable Dim.1 Dim.2 x y
Yeast-t 0.26 −0.94 0.96 −3.53
Yeast-od 0.42 −0.90 1.55 −3.35
Bread-od 0.57 0.82 2.14 3.07
Salt-t 0.74 0.61 2.77 2.29
Tough 0.85 0.48 3.16 1.78
Other-t −0.93 0.32 −3.49 1.18
Off-flav −0.87 0.40 −3.24 1.49
Sweet-t −0.98 0.03 −3.66 0.12
Moisture 0.97 −0.05 3.65 −0.17
Colour 0.51 0.01 1.90 0.04

9 Session Information

sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.3.1
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Australia/Brisbane
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] htmltools_0.5.9 gt_1.3.0        DT_0.34.0       dplyr_1.2.0    
## [5] purrr_1.2.1     readxl_1.4.5   
## 
## loaded via a namespace (and not attached):
##  [1] jsonlite_2.0.0    compiler_4.5.2    tidyselect_1.2.1  xml2_1.5.2       
##  [5] jquerylib_0.1.4   yaml_2.3.12       fastmap_1.2.0     R6_2.6.1         
##  [9] generics_0.1.4    knitr_1.51        htmlwidgets_1.6.4 tibble_3.3.1     
## [13] bslib_0.10.0      pillar_1.11.1     rlang_1.1.7       cachem_1.1.0     
## [17] xfun_0.56         fs_1.6.6          sass_0.4.10       otel_0.2.0       
## [21] cli_3.6.5         withr_3.0.2       magrittr_2.0.4    crosstalk_1.2.2  
## [25] digest_0.6.39     rstudioapi_0.18.0 lifecycle_1.0.5   vctrs_0.7.1      
## [29] evaluate_1.0.5    glue_1.8.0        cellranger_1.1.0  rmarkdown_2.30   
## [33] tools_4.5.2       pkgconfig_2.0.3