Mixed model in R using quadratic function

BB1464 · December 23, 2024, 3:48am

Hi all, I am currently working on a report and I have been struggling on how to extract estimate from a quadratic model and plot them on ggplot2 just as the plot in which I attached below. I will really appreciate it if someone can help me out. I have attached my sample data and code in the reprex script, along with a screenshot of expected output. Also, if I have new data, how do I get the predicted yield using the predict function? I keep running into issues here when I try. Thank you very much!

Data

data <- structure(
list(
Rep = structure(
c(
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
4L
),
levels = c("1", "2", "3", "4"),
class = "factor"
),
History = structure(
c(
1L,
1L,
1L,
1L,
1L,
3L,
3L,
3L,
3L,
3L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
1L,
1L,
1L,
1L,
1L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
2L,
2L,
2L,
2L,
2L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
3L,
3L,
3L,
3L,
3L,
2L,
2L,
2L,
2L,
2L
),
levels = c("corn", "fallow", "soybean"),
class = "factor"
),
N rate = c(
160,
240,
80,
0,
40,
240,
80,
160,
0,
40,
0,
160,
240,
40,
80,
0,
40,
160,
80,
240,
0,
80,
40,
160,
240,
80,
240,
160,
40,
0,
240,
0,
40,
160,
80,
160,
80,
0,
40,
240,
40,
240,
80,
0,
160,
80,
240,
0,
40,
160,
80,
240,
0,
160,
40,
40,
240,
0,
160,
80
),
Yield (lbs/ac) = c(
10985.3700627973,
10626.7366933948,
5547.72357794582,
2442.96833756322,
4535.35536257199,
11068.7231625113,
10676.0333370982,
11455.3286642235,
6842.05778552118,
9439.31658996695,
6966.3414993091,
10999.1589642535,
12438.2086896966,
6791.7839379994,
9862.08653890057,
4306.59412484229,
6975.16764818264,
9980.26897398618,
9312.70761429859,
11279.3297691799,
3174.336,
9235.65661712226,
6444.90877885251,
9436.78767774106,
11519.0999459297,
10207.4488963653,
11491.8371124061,
11931.3766680685,
10039.1662716732,
8351.84656869931,
11844.5646019826,
7700.01994208471,
8499.14824343647,
11444.3585086212,
10659.7688434965,
11939.8919170922,
9799.12857386602,
6272.4515758486,
7126.201929228,
12908.662288495,
7787.12424007209,
11913.2103752478,
9404.56765539201,
5692.32596936017,
10993.155180775,
9330.03591132472,
11105.6148984079,
4222.47887149294,
7673.86524241514,
10543.104,
10496.7546405527,
11668.0546778011,
8400.19000444578,
11625.6989111445,
10079.0619453289,
7634.65065545209,
11433.044589967,
6326.05256881946,
11177.1853380595,
9732.41297446681
)
),
row.names = c(NA, -60L),
class = c("tbl_df", "tbl", "data.frame")
)

Packages

library(tidyverse)
library(emmeans)
library(ggpmisc)

Convert History and Rep to factor

data$History <- factor(data$History)
data$Rep <- data$Rep

Model fitting

mod3_quad <- lmer(
Yield (lbs/ac) ~ History * N rate + History * I(N rate^2) +
(1 | History:N rate) + (1 | Rep),
data = data
)

Summary

summary(mod3_quad)
anova(mod3_quad)

Post Hoc Test

emmeans::emmeans(object = mod3_quad,specs = ~History) %>% multcomp::cld(Letters=letters,reverse=T)

Augmenting and adding pearson standardized residuals

mod3_quad_aug <- augment(mod3_quad) %>%
mutate(.stdresid = resid(mod3_quad,
type = "pearson",
scaled = T))

mod3_quad_aug

Final Plot

ggplot(mod3_quad_aug,
aes(x = N rate,
y = Yield (lbs/ac)))+
geom_point(size = 3, alpha = .7)+
geom_line(aes(y = .fixed),
color = "red",linewidth=0.7)+
facet_grid(~History)+
labs(y='Yield (lb/ac)',x='N rate (lb/ac)',title = '2024 Missouri-Columbia')+
#stat_poly_line(formula = formula)+
#facet_grid(~History)+
#stat_poly_eq(use_label('R2','P'),label.x = 'right',label.y = 'bottom',size=6)+
#stat_smooth(method = "lm", col = "blue",se=F,formula = y~poly(x,2))+ # This add polynomial curve to the graph
theme_bw()+
coord_cartesian(clip = 'off')+
theme(panel.grid = element_blank(),
strip.text.x = element_text(family = 'serif',face = 'bold',colour = 'black',size = 16),
axis.text = element_text(family = 'serif',face = 'bold',colour = 'black',size = 13),
axis.title = element_text(family = 'serif',face = 'bold',colour = 'black',size = 13),
plot.title = element_text(family = 'serif',face = 'bold',colour = 'black',size = 18),
plot.title.position = 'plot')

Expected output

jrkrideau · December 23, 2024, 2:31pm

I don't know if I can help with the actual question but your sample data is not reading in. You have some spaces and illegal characters in the variable names.

I made some quick edits and this should work with the new variable names

data <- structure(list(Rep = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L), levels = c("1", "2", "3", "4"), class = "factor"), 
    History = structure(c(1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 
    3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
    1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 
    2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 
    3L, 2L, 2L, 2L, 2L, 2L), levels = c("corn", "fallow", "soybean"
    ), class = "factor"), N_rate = c(160, 240, 80, 0, 40, 240, 
    80, 160, 0, 40, 0, 160, 240, 40, 80, 0, 40, 160, 80, 240, 
    0, 80, 40, 160, 240, 80, 240, 160, 40, 0, 240, 0, 40, 160, 
    80, 160, 80, 0, 40, 240, 40, 240, 80, 0, 160, 80, 240, 0, 
    40, 160, 80, 240, 0, 160, 40, 40, 240, 0, 160, 80), Yield_lbs_ac = c(10985.3700627973, 
    10626.7366933948, 5547.72357794582, 2442.96833756322, 4535.35536257199, 
    11068.7231625113, 10676.0333370982, 11455.3286642235, 6842.05778552118, 
    9439.31658996695, 6966.3414993091, 10999.1589642535, 12438.2086896966, 
    6791.7839379994, 9862.08653890057, 4306.59412484229, 6975.16764818264, 
    9980.26897398618, 9312.70761429859, 11279.3297691799, 3174.336, 
    9235.65661712226, 6444.90877885251, 9436.78767774106, 11519.0999459297, 
    10207.4488963653, 11491.8371124061, 11931.3766680685, 10039.1662716732, 
    8351.84656869931, 11844.5646019826, 7700.01994208471, 8499.14824343647, 
    11444.3585086212, 10659.7688434965, 11939.8919170922, 9799.12857386602, 
    6272.4515758486, 7126.201929228, 12908.662288495, 7787.12424007209, 
    11913.2103752478, 9404.56765539201, 5692.32596936017, 10993.155180775, 
    9330.03591132472, 11105.6148984079, 4222.47887149294, 7673.86524241514, 
    10543.104, 10496.7546405527, 11668.0546778011, 8400.19000444578, 
    11625.6989111445, 10079.0619453289, 7634.65065545209, 11433.044589967, 
    6326.05256881946, 11177.1853380595, 9732.41297446681)), row.names = c(NA, 
-60L), class = c("tbl_df", "tbl", "data.frame"))

jrkrideau · December 23, 2024, 3:41pm

There seems to be some inconsistencies in your code. You say

# Convert History and Rep to factor ---------------------------------------
data$History <- factor(data$History)
data$Rep <- data$Rep

but you are not converting Rep to a factor.

You are making a call to the lmer() function which appears to be in the {lme4} package but you are not loading {lme4}.

mod3_quad <- lmer(Yield_lbs_ac ~ Hi_rate + (1 | Rep), data = data )

You have a variable Hi_rate in the above regression equation that is not defined.

BB1464 · December 23, 2024, 3:52pm

Thank you very much for formatting the data. I am still hoping that I will get help on how to go about this.

BB1464 · December 26, 2024, 1:45pm

Good morning all, I have been able to figure this out to a certain point, but I have one question: why is the model estimate (intercept) from the quadratic model obtained with the summary(mod3_quad) different from what is been shown in ggplot2 with stat_regline_equation ?. I would be grateful if someone can help me with this.

#### Data

data <- structure(list(N_rate = c(160, 240, 80, 0, 40, 240, 80, 160,
0, 40, 0, 160, 240, 40, 80, 0, 40, 160, 80, 240, 0, 80, 40, 160,
240, 80, 240, 160, 40, 0, 240, 0, 40, 160, 80, 160, 80, 0, 40,
240, 40, 240, 80, 0, 160, 80, 240, 0, 40, 160, 80, 240, 0, 160,
40, 40, 240, 0, 160, 80), Rep = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), levels = c("1", "2", "3", "4"), class = "factor"),
History = c("corn", "corn", "corn", "corn", "corn", "soybean",
"soybean", "soybean", "soybean", "soybean", "fallow", "fallow",
"fallow", "fallow", "fallow", "fallow", "fallow", "fallow",
"fallow", "fallow", "corn", "corn", "corn", "corn", "corn",
"soybean", "soybean", "soybean", "soybean", "soybean", "soybean",
"soybean", "soybean", "soybean", "soybean", "fallow", "fallow",
"fallow", "fallow", "fallow", "corn", "corn", "corn", "corn",
"corn", "corn", "corn", "corn", "corn", "corn", "soybean",
"soybean", "soybean", "soybean", "soybean", "fallow", "fallow",
"fallow", "fallow", "fallow"), Yield_lbs_ac = c(11024.0401420118,
10589.3268071006, 5612.81495857988, 2494.55818224852, 4577.92683550296,
10993.541112426, 10791.4714508876, 11404.4138414201, 6948.15507692308,
9563.53590532544, 6976.26217278106, 10937.3606627219, 12324.522887574,
6944.91584378698, 9980.29661538461, 4398.64053017751, 7091.51363786982,
10029.6218130178, 9446.18868639053, 11202.717216568, 3268.25126627219,
9433.05367100592, 6590.21981538462, 9450.22648047337, 11332.6764497041,
10341.7766627219, 11265.3780071006, 11808.3169704142, 10124.1556828402,
8530.35362840237, 11569.4328994083, 7765.20664615385, 8680.80363550296,
11406.9257467456, 10724.9921893491, 11886.8233846154, 9962.57831005917,
6435.95133727811, 7311.95582485207, 12623.9701207101, 7853.04835029586,
11888.2274272189, 9539.36537751479, 5793.95332544379, 10970.1016615385,
9452.81711715976, 10899.8058035503, 4292.90993609467, 7828.87782248521,
10483.8680236686, 10573.2993514793, 11492.9191384615, 8491.01981538462,
11683.1884686391, 10199.8727573964, 7833.6579408284, 11368.8085207101,
6461.26159526627, 11193.1026177515, 9871.90977514793)), row.names = c(NA,
-60L), class = c("tbl_df", "tbl", "data.frame"))



### Packages

library(tidyverse)
library(ggpmisc)
#> Loading required package: ggpp
#> Registered S3 methods overwritten by 'ggpp':
#>   method                  from   
#>   heightDetails.titleGrob ggplot2
#>   widthDetails.titleGrob  ggplot2
#> 
#> Attaching package: 'ggpp'
#> The following object is masked from 'package:ggplot2':
#> 
#>     annotate
library(lmerTest)
#> Loading required package: lme4
#> Loading required package: Matrix
#> 
#> Attaching package: 'Matrix'
#> The following objects are masked from 'package:tidyr':
#> 
#>     expand, pack, unpack
#> 
#> Attaching package: 'lmerTest'
#> The following object is masked from 'package:lme4':
#> 
#>     lmer
#> The following object is masked from 'package:stats':
#> 
#>     step
library(broom.mixed)
library(nlraa)
library(performance)
library(ggpubr)
#> 
#> Attaching package: 'ggpubr'
#> The following objects are masked from 'package:ggpp':
#> 
#>     as_npc, as_npcx, as_npcy


## Convert history to factor

data$History <- factor(data$History)




# Model fitting

mod3_quad <- lmer(Yield_lbs_ac ~ History*N_rate+History*I(N_rate^2)+(1|History:N_rate)+(1|Rep), data = data)
#> Warning: Some predictor variables are on very different scales: consider
#> rescaling
#> boundary (singular) fit: see help('isSingular')
#> Warning: Some predictor variables are on very different scales: consider
#> rescaling


summary(mod3_quad) 
#> Linear mixed model fit by REML. t-tests use Satterthwaite's method [
#> lmerModLmerTest]
#> Formula: Yield_lbs_ac ~ History * N_rate + History * I(N_rate^2) + (1 |  
#>     History:N_rate) + (1 | Rep)
#>    Data: data
#> 
#> REML criterion at convergence: 940.7
#> 
#> Scaled residuals: 
#>     Min      1Q  Median      3Q     Max 
#> -2.9317 -0.5454  0.0419  0.5803  1.6380 
#> 
#> Random effects:
#>  Groups         Name        Variance Std.Dev.
#>  History:N_rate (Intercept)      0     0.0   
#>  Rep            (Intercept) 128933   359.1   
#>  Residual                   699369   836.3   
#> Number of obs: 60, groups:  History:N_rate, 15; Rep, 4
#> 
#> Fixed effects:
#>                              Estimate Std. Error         df t value Pr(>|t|)
#> (Intercept)                4127.30958  412.28885   29.68559  10.011 5.00e-11
#> Historyfallow              1814.04280  524.87864   48.00000   3.456  0.00116
#> Historysoybean             3902.04380  524.87864   48.00000   7.434 1.59e-09
#> N_rate                       65.59938    8.18101   48.00000   8.018 2.06e-10
#> I(N_rate^2)                  -0.15246    0.03249   48.00000  -4.692 2.28e-05
#> Historyfallow:N_rate        -15.24262   11.56969   48.00000  -1.317  0.19394
#> Historysoybean:N_rate       -24.30378   11.56969   48.00000  -2.101  0.04095
#> Historyfallow:I(N_rate^2)     0.04476    0.04595   48.00000   0.974  0.33487
#> Historysoybean:I(N_rate^2)    0.03699    0.04595   48.00000   0.805  0.42481
#>                               
#> (Intercept)                ***
#> Historyfallow              ** 
#> Historysoybean             ***
#> N_rate                     ***
#> I(N_rate^2)                ***
#> Historyfallow:N_rate          
#> Historysoybean:N_rate      *  
#> Historyfallow:I(N_rate^2)     
#> Historysoybean:I(N_rate^2)    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Correlation of Fixed Effects:
#>                (Intr) Hstryf Hstrys N_rate I(N_^2 Hstryf:N_ Hstrys:N_
#> Historyfllw    -0.637                                                
#> Historysybn    -0.637  0.500                                         
#> N_rate         -0.678  0.532  0.532                                  
#> I(N_rate^2)     0.552 -0.434 -0.434 -0.964                           
#> Hstryfll:N_     0.479 -0.753 -0.376 -0.707  0.682                    
#> Hstrysyb:N_     0.479 -0.376 -0.753 -0.707  0.682  0.500             
#> Hstryf:I(N_^2) -0.390  0.613  0.307  0.682 -0.707 -0.964    -0.482   
#> Hstrys:I(N_^2) -0.390  0.307  0.613  0.682 -0.707 -0.482    -0.964   
#>                Hstryf:I(N_^2)
#> Historyfllw                  
#> Historysybn                  
#> N_rate                       
#> I(N_rate^2)                  
#> Hstryfll:N_                  
#> Hstrysyb:N_                  
#> Hstryf:I(N_^2)               
#> Hstrys:I(N_^2)  0.500        
#> fit warnings:
#> Some predictor variables are on very different scales: consider rescaling
#> optimizer (nloptwrap) convergence code: 0 (OK)
#> boundary (singular) fit: see help('isSingular')





### Visualization

data %>% 
  ggplot(aes(N_rate, Yield_lbs_ac)) +
  geom_point(size = 4,
             alpha = 0.5) +
  geom_line(stat="smooth",
            method = "nls",
            formula = y ~ SSquadp3xs(x, a, b, jp),
            se = FALSE,
            color = "#CC0000",linewidth=1.2) +
  stat_regline_equation(
    aes(label = paste(after_stat(eq.label), after_stat(adj.rr.label), sep = "~~~~")),
    formula = y~poly(x,degree = 2,raw = T),
    label.y=1000,
    label.x = 70,
    size=5
  )+
  facet_grid(~History)+
  labs(title = paste(
    "2024 Missouri-Columbia"),
    y = expression('Yield at @15.5%'~'lb acre'^{"-1"}),
    x = expression(Nitrogen~'lb acre'^{"-1"}))+
  theme_bw()+
  coord_cartesian(clip = 'off')+
  theme(panel.grid = element_blank(),
        strip.text.x = element_text(family = 'serif',face = 'bold',colour = 'black',size = 16),
        axis.text = element_text(family = 'serif',face = 'bold',colour = 'black',size = 13),
        axis.title = element_text(family = 'serif',face = 'bold',colour = 'black',size = 13),
        plot.title = element_text(family = 'serif',face = 'bold',colour = 'black',size = 18),
        plot.title.position = 'plot')

^{Created on 2024-12-26 with reprex v2.1.1}

danomene · December 27, 2024, 9:05pm

Unless I'm missing something, the issue is that the regline is only displaying two significant figures. This could be adjusted with options(digits = 4), but it isn't clear if that accuracy on that variable is justified.

BB1464 · December 27, 2024, 9:21pm

@danomene, thank you very much, I just tried that now and stat_regline_equation does not accept the use of options(digits=4). Can you show me how to specify it ?

BB1464 · December 28, 2024, 1:47pm

I have finally figured this out today with the code below

data %>% 
  ggplot(aes(N_rate, Yield_lbs_ac)) +
  geom_point(size = 4,
             alpha = 0.5) +
  geom_line(stat="smooth",
            method = "nls",
            formula = y ~ SSquadp3xs(x, a, b, jp),
            se = FALSE,
            color = "#CC0000",linewidth=1.2) +
  stat_poly_eq(use_label('eq','adj.rr.label','P'),label.x = 'right',label.y = 'bottom',size=3,formula = y~poly(x,2,raw = T,coefs = F),coef.digits = 4)+ # This fixed the issue for me but I am still figuring out how to round the slope and quadratic term
  facet_grid(~History)+
  labs(title = paste(
    "2024 Missouri-Columbia"),
    y = expression('Yield at @15.5%'~'lb acre'^{"-1"}),
    x = expression(Nitrogen~'lb acre'^{"-1"}))+
  theme_bw()+
  coord_cartesian(clip = 'off')+
  theme(panel.grid = element_blank(),
        strip.text.x = element_text(family = 'serif',face = 'bold',colour = 'black',size = 16),
        axis.text = element_text(family = 'serif',face = 'bold',colour = 'black',size = 13),
        axis.title = element_text(family = 'serif',face = 'bold',colour = 'black',size = 13),
        plot.title = element_text(family = 'serif',face = 'bold',colour = 'black',size = 18),
        plot.title.position = 'plot')

system · January 4, 2025, 1:48pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.