I can't figure out why the number of outliers is always 0 when the number of non outliers is not zero. It makes no difference in the size of the dataset df2 the end result is always the same??? Am I reading this wrong?

I am following an example:

```
# create a new dataframe that contains only those rows
# that have a z-score of below 3
new_data <- subset(data, data$zscore < 3)
```

My data sets:

dput(head(df2))

structure(list(id = 1:6, id2 = 0:5, z = c(0L, 0L, 0L, 0L, 0L,

0L), x1 = c(7.4, 7.8, 7.8, 11.2, 7.4, 7.4), x2 = c(0.7, 0.88,

0.76, 0.28, 0.7, 0.66), x3 = c(0, 0, 0.04, 0.56, 0, 0), x4 = c(1.9,

2.6, 2.3, 1.9, 1.9, 1.8), x5 = c(0.076, 0.098, 0.092, 0.075,

0.076, 0.075), x6 = c(11, 25, 15, 17, 11, 13), x7 = c(34, 67,

54, 60, 34, 40), x8 = c(0.9978, 0.9968, 0.997, 0.998, 0.9978,

0.9978), x9 = c(3.51, 3.2, 3.26, 3.16, 3.51, 3.51), x10 = c(0.56,

0.68, 0.65, 0.58, 0.56, 0.56), x11 = c(9.4, 9.8, 9.8, 9.8, 9.4,

9.4), y = c(5L, 5L, 5L, 6L, 5L, 5L), y2 = c(0L, 0L, 0L, 1L, 0L,

0L), y3 = c(1L, 1L, 1L, 2L, 1L, 1L)), row.names = c(NA, 6L), class = "data.frame")

dput(head(df3))

structure(list(z = c(-1.75005514316603, -1.75005514316603, -1.75005514316603,

-1.75005514316603, -1.75005514316603, -1.75005514316603), x1 = c(0.142462300205994,

0.451001010798382, 0.451001010798382, 3.07358005083368, 0.142462300205994,

0.142462300205994), x2 = c(2.18866446400268, 3.28198233904062,

2.55310375568199, -0.362410577752516, 2.18866446400268, 1.94570493621647

), x3 = c(-2.19266375510471, -2.19266375510471, -1.91740510037435,

1.6609574111204, -2.19266375510471, -2.19266375510471), x4 = c(-0.744720785192258,

-0.597594077620892, -0.660648380865763, -0.744720785192258, -0.744720785192258,

-0.765738886273882), x5 = c(0.569913952190335, 1.19788250632519,

1.0266183551975, 0.541369927002388, 0.569913952190335, 0.541369927002388

), x6 = c(-1.10005519223097, -0.311296125454904, -0.87469545886638,

-0.762015592184085, -1.10005519223097, -0.987375325548675), x7 = c(-1.44624721020492,

-0.862402248309921, -1.09240177875341, -0.986248149317952, -1.44624721020492,

-1.34009358076947), x8 = c(1.03491316497404, 0.701432322361402,

0.768128490883923, 1.10160933349656, 1.03491316497404, 1.03491316497404

), x9 = c(1.81294997139708, -0.11506417365602, 0.258099854418771,

-0.36384019237255, 1.81294997139708, 1.81294997139708), x10 = c(0.193081910246498,

0.999501691167798, 0.797896745937473, 0.327485207066714, 0.193081910246498,

0.193081910246498), x11 = c(-0.915393708652846, -0.58002349000728,

-0.58002349000728, -0.58002349000728, -0.915393708652846, -0.915393708652846

), y = c(-0.937157483579359, -0.937157483579359, -0.937157483579359,

0.207983041305932, -0.937157483579359, -0.937157483579359)), row.names = c(NA,

6L), class = "data.frame")

I have 11 variables which I am checking one at a time for outliers. I am repeating this type of statement.

```
df <- read.csv("df.csv",header=TRUE)
df2 <- read.csv("df2.csv",header=TRUE)
df3 <- read.csv("df3.csv",header=TRUE)
data <- df2
names(data)
data$x1z <- df3$x1
data$x2z <- df3$x2
data$x3z <- df3$x3
data$x4z <- df3$x4
data$x5z <- df3$x5
data$x6z <- df3$x6
data$x7z <- df3$x7
data$x8z <- df3$x8
data$x9z <- df3$x9
data$x10z <- df3$x10
data$x11z <- df3$x11
names(data)
dim(data)
# keep only rows with no outliers
datakeep <- data
datakeep <- subset(datakeep,abs(datakeep$x1z)<= 3)
datakeep <- subset(datakeep,abs(datakeep$x2z)<= 3)
datakeep <- subset(datakeep,abs(datakeep$x3z)<= 3)
datakeep <- subset(datakeep,abs(datakeep$x4z)<= 3)
datakeep <- subset(datakeep,abs(datakeep$x5z)<= 3)
datakeep <- subset(datakeep,abs(datakeep$x6z)<= 3)
datakeep <- subset(datakeep,abs(datakeep$x7z)<= 3)
datakeep <- subset(datakeep,abs(datakeep$x8z)<= 3)
datakeep <- subset(datakeep,abs(datakeep$x9z)<= 3)
datakeep <- subset(datakeep,abs(datakeep$x10z)<= 3)
datakeep <- subset(datakeep,abs(datakeep$x11z)<= 3)
dim(datakeep) #6009 x 28 or 938 outliers
names(datakeep)
```

# create a new dataframe that contains only those rows

# that have a z-score > 3

```
df <- read.csv("df.csv",header=TRUE)
df2 <- read.csv("df2.csv",header=TRUE)
df3 <- read.csv("df3.csv",header=TRUE)
data <- df2
names(data)
data$x1z <- df3$x1
data$x2z <- df3$x2
data$x3z <- df3$x3
data$x4z <- df3$x4
data$x5z <- df3$x5
data$x6z <- df3$x6
data$x7z <- df3$x7
data$x8z <- df3$x8
data$x9z <- df3$x9
data$x10z <- df3$x10
data$x11z <- df3$x11
names(data)
dim(data)
# keep only rows with outliers
datakeep2 <- data
datakeep2 <- subset(datakeep2,abs(datakeep2$x1z)>3)
datakeep2 <- subset(datakeep2,abs(datakeep2$x2z)>3)
datakeep2 <- subset(datakeep2,abs(datakeep2$x3z)>3)
datakeep2 <- subset(datakeep2,abs(datakeep2$x4z)>3)
datakeep2 <- subset(datakeep2,abs(datakeep2$x5z)>3)
datakeep2 <- subset(datakeep2,abs(datakeep2$x6z)>3)
datakeep2 <- subset(datakeep2,abs(datakeep2$x7z)>3)
datakeep2 <- subset(datakeep2,abs(datakeep2$x8z)>3)
datakeep2 <- subset(datakeep2,abs(datakeep2$x9z)>3)
datakeep2 <- subset(datakeep2,abs(datakeep2$x10z)>3)
datakeep2 <- subset(datakeep2,abs(datakeep2$x11z)>3)
dim(datakeep2) # 0 x 28
names(datakeep2)
head(datakeep2,25)
```