Here is some example data:

```
exdf <- structure(list(TENURE = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92,
93, 94, 95, 96, 97, 98, 99, 100, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100),
GrowthRate = c(0.522068407541269,
0.285714748576455, 0.185508515947955, 0.197014234790025, 0.0955682840302288,
0.104550241818405, 0.0729165843177029, 0.052710172057008, 0.111205299930434,
0.0842167291103291, 0.0435357167369297, 0.0342561790820088, 0.0697015114811705,
0.0567551900498326, 0.0367956747574549, 0.0300708018467208, 0.0351762377682974,
0.0355785502062602, 0.0212760525934605, 0.0363614825795171, 0.0283660359906612,
0.0276822491674977, 0.0280297636181288, 0.0176973848620943, 0.0269962225046196,
0.0232995719858735, 0.0132348057981257, 0.0231117576016402, 0.0200135248674158,
0.0138992243554679, 0.0204646137008204, 0.0147834886226441, 0.0177097949074607,
0.014717031805624, 0.0132285475095113, 0.0212633514283631, 0.00842835296173483,
0.0145954105765114, 0.0101416096952409, 0.0134118780421613, 0.00938878674984167,
0.0129743252759109, 0.0109985830231452, 0.0283401588275556, 0.0204205991521889,
0.0093976137766294, 0.00910728837933128, 0.00916694752719849,
0.00741753979164272, 0.00881249103330362, 0.0111578966749217,
0.0192836374335688, 0.0120252563211682, 0.00586430387931181,
0.0143027002879901, 0.00370704967795099, 0.00682983045111385,
0.0141476105093492, 0.00482637041824496, 0.00420690319208639,
0.00376569470107668, 0.0109937747749598, 0.0140867803731126,
0.00371637851743323, 0.00717512598946612, 0.00768842197231479,
0.00251049125312619, 0.00187703000356798, 0.0048259385225542,
0.0100417595068638, 0.00374772165540982, 0.0171614859902469,
0.0149367579256126, 0.0115188216576438, 0.00490666943969309,
0.00530930735473234, 0.00723074299963677, 0.00592049387249816,
0.00484467566849744, 0.0124063039986577, 0.00573257723329412,
0.00427949785279758, 0.0030963281113916, 0.00252914919374447,
0.00635090782429515, 0.00210452127168104, 0.00804075169058649,
0.00436670408574535, 0.0035060879778257, 0.00713604840396798,
0.0023981009089411, 0.00175704532860088, 0.000779919174233257,
0.00340671636283396, 0.00460636923636137, 0.00120733393667471,
0.00933279440430645, 0.00305741675516025, 0.00304857402624847,
0.641110630691566, 0.281083708481502, 0.21664831342118, 0.142526508746734,
0.129455004633227, 0.102003156382356, 0.0774235271374781, 0.0549670789691081,
0.0809027507369802, 0.0460263351173751, 0.0653262071979981, 0.047154349769059,
0.0385947285426713, 0.0430288757773969, 0.0406360525819185, 0.0430116335897406,
0.0588492430703393, 0.0532880434533727, 0.0428311618518791, 0.0318427099482772,
0.028791324553973, 0.023804027169529, 0.0201765096165349, 0.0333877840776626,
0.02423781438746, 0.0209281238862644, 0.0152334901166924, 0.0206101546399822,
0.0192309106927642, 0.0217159111858543, 0.0204661728941318, 0.0147045919062503,
0.00963324607321603, 0.0168095825286532, 0.00772585884807775,
0.0141943686223769, 0.0105122199425551, 0.00730692991230875,
0.01505280063156, 0.00993250009012492, 0.0190685740493759, 0.0138696114197483,
0.00991543096798608, 0.0140457461257082, 0.0238644668564838,
0.014335212464454, 0.016231440976755, 0.0136006335528513, 0.010508195169141,
0.0108223681012323, 0.00724634873393271, 0.0114878660557896,
0.010768179227167, 0.0125569796266074, 0.00746257416521345, 0.00541761544241481,
0.00915133561343495, 0.0128090265857459, 0.0128834975364693,
0.00616011529066718, 0.0124072342143222, 0.00813729534453778,
0.00918992619169501, 0.00776227156683262, 0.00770248250797501,
0.00588355687767006, 0.00690140085420765, 0.00508911144051716,
0.00911620445070582, 0.00830466702459098, 0.00520036975577831,
0.00716886970824682, 0.00303064818576892, 0.0053193981576598,
0.00327658374593653, 0.00321651271471524, 0.00513516303969652,
0.00823984725674798, 0.00417642468764257, 0.014131171763804,
0.00787422067561749, 0.00692890013640657, 0.0121753072926065,
0.0070034266847312, 0.00540091988640867, 0.00689103254072876,
0.00506455733404643, 0.00598304422775975, 0.00661625550389644,
0.00732959383078224, 0.00720982160244255, 0.00564679063948148,
0.00435863414775284, 0.00556842035294203, 0.00603506069392346,
0.00548744139395829, 0.00660639356662429, 0.0045943028618467,
0.00490921014265311, 0.713632131494036, 0.316109107238324, 0.19955189191854,
0.149362726365638, 0.140527584352695, 0.122967115856381, 0.0629710542708235,
0.0794423836726601, 0.0723283480042465, 0.048411305500041, 0.0878323397155594,
0.0552740672330323, 0.0540408087274855, 0.0711265884999275, 0.0461176344041601,
0.0421688829740532, 0.0408229960327535, 0.0337191263188075, 0.0365627089380531,
0.0343486264915729, 0.0369672518324808, 0.0324027127903612, 0.0335280505737625,
0.0241231062680551, 0.0315868501489174, 0.0260876281368159, 0.0358190565406051,
0.031770274327048, 0.0307189115217597, 0.0204031908865456, 0.0304622366438121,
0.0488170927658, 0.0395661396435187, 0.0234030909209384, 0.0225458543480528,
0.0312754025113176, 0.0135110740643416, 0.0187184090181205, 0.013568316123143,
0.0170345821576348, 0.0184100113763801, 0.019219623166352, 0.0137349197436514,
0.0131633552715194, 0.0157758351970561, 0.0117732814253362, 0.0111181879788198,
0.0193989282426799, 0.0121149862839278, 0.0136437991248339, 0.0111608014682734,
0.011828405955594, 0.00944009280512503, 0.0123120446979428, 0.0175349768061963,
0.0243703580512893, 0.00775361404112473, 0.0115755699642719,
0.00869789602322868, 0.0230748185953917, 0.011388761136665, 0.0144514590777103,
0.0114791528596783, 0.0121005997846328, 0.00959154057558642,
0.0125793490404558, 0.0106768149670486, 0.00824512497201191,
0.0076573705531775, 0.0104520640098951, 0.0115961878675463, 0.0132315972263175,
0.00453620536761079, 0.00996315028677053, 0.00699906839894915,
0.0100602309904119, 0.00881275005589721, 0.00840590467502622,
0.00410243169790725, 0.00393864651260856, 0.0027632056756115,
0.00604623344076316, 0.00644190206596029, 0.0073296018951492,
0.0062144137386646, 0.00779570731803325, 0.00465780201362875,
0.0141815826452891, 0.0121929551749531, 0.00573235434597308,
0.00512844095153575, 0.0122186637666211, 0.00354975243105926,
0.0158151971904505, 0.006405119131486, 0.00504244314036839, 0.00623419419033411,
0.00664127009174464, 0.00428700468190435)), row.names = c(NA,
-297L), class = c("tbl_df", "tbl", "data.frame"))
```

I would like to model GrowthRate as a function of Tenure. Here are some plots of the relationship between these variables:

```
exdf |> ggplot(aes(x = TENURE, y = GrowthRate)) +
geom_point()
```

### Observation from above plot: GrowthRate decays over time.

There is also a linear relationship when I take the log of both variables:

```
exdf |>
ggplot(aes(x = log(TENURE), y = log(GrowthRate))) +
geom_point()
```

### observation from above plot: There's a linear relationship with the log of both variables

I have two approaches to my goal of modeling `GrowthRate ~ Tenure`

:

- A linear model of
`log(GrowthRate) ~ log(TENURE)`

- A power law model using nls with
`GrowthRate ~ I(TENURE^power)`

Here are both models:

Linear model:

```
mod.lm <- lm(log(GrowthRate) ~ log(TENURE), data = exdf)
mod.lm |> summary()
Call:
lm(formula = log(GrowthRate) ~ log(TENURE), data = exdf)
Residuals:
Min 1Q Median 3Q Max
-1.90334 -0.22186 0.01676 0.24309 1.11874
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.13426 0.10617 1.265 0.207
log(TENURE) -1.18576 0.02815 -42.125 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.4134 on 295 degrees of freedom
Multiple R-squared: 0.8575, Adjusted R-squared: 0.857
F-statistic: 1775 on 1 and 295 DF, p-value: < 2.2e-16
```

And here is the nls power law model:

```
mod.nls <- nls(GrowthRate ~ I(TENURE^power), data = exdf, start = list(power = 1), trace = T)
mod.nls |> summary()
Formula: GrowthRate ~ I(TENURE^power)
Parameters:
Estimate Std. Error t value Pr(>|t|)
power -1.08564 0.01093 -99.28 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.02001 on 296 degrees of freedom
Number of iterations to convergence: 13
Achieved convergence tolerance: 1.417e-06
```

A plot of how the nls power law model fits:

```
exdf$PowerFit <- predict(mod.nls)
exdf |> ggplot(aes(x = TENURE, y = GrowthRate)) +
geom_point() +
geom_point(aes(x = TENURE, y = PowerFit), color = 'blue')
```

A plot of how the lm fits:

```
exdf$LmFit <- predict(mod.lm) |> exp()
exdf |> ggplot(aes(x = TENURE, y = GrowthRate)) +
geom_point() +
geom_point(aes(x = TENURE, y = LmFit), color = 'blue')
```

Also, here's a residuals plot of the lm, notice there exists hetroskedasticity:

A few observations and then a few questions:

## observations & questions:

- Both plots of the fit look pretty good, the blue through the black.
- I notice the coefficient in the linear model -1.18576 is somewhat similar to the power estimate in nls -1.08564. Is that just coincidence? Logs and powers always trip me up.
- Without testing on out of sample new data for prediction, is there a way I can compare both models and judge which fits best? With the
`lm()`

I can look at`R^2`

. But both have RSE however since the target variable of the models are on different scales I presumably cannot compare them? Could I just`exp()`

the RSE on the log model and then compare and decide which model is best based on smallest RSE? - the lm model has hetroskedasticity. I know that with hetroskedasticity my standard errors of lm might be unreliable. I could attempt to calculate robust standard errors. Or I could just use the nls approach... would that negate this concern? Non hetroskedasticity is a
`lm()`

assumption, does it matter for the nls power law approach?

Open ended question: Which approach is 'better'? How should I compare the models and decide? Have I any blind spots that I should consider?