Systematic Sampling

Navien · November 24, 2019, 8:13am

Hi,
I have an issue with sampling code below. Iam trying to use it with my CSV file but iam getting error. I read The csv file and save it as frame then name it df, but its not working ? The csv file are composit of ID, N1, N2 where ID are string the N1, N2 , real numbers.

Please any help ?
############### Sampling method. ##################

Systematic sampling method on a data.frame.

rw.sample <- function(df = NULL, n = NULL) {

N <- nrow(df)
int <- ceiling(N / n)

Creating an `id` column.

id.creator <- function(df = NULL) {
print("Creating 'id' columns -- using a highly inefficient method.")

# Create progress bar.
pb <- txtProgressBar(min = 0, max = N, style = 3)

a <- data.frame(1)
for (i in 1:nrow(df)) {

  # Update progress bar.
  setTxtProgressBar(pb, i)

  x <- i
  a <- rbind(a, x)
}
close(pb)
a <- data.frame(a[1:nrow(a) - 1,])
df <- cbind(a, df)
colnames(df)[1] <- "id"
return(df)

}
df <- id.creator(df = df)

Function for sampling.

sampler <- function(df = NULL) {
print("Sampling.")

# Create progress bar.
pb <- txtProgressBar(min = 0, max = n, style = 3)

a <- data.frame()
x <- as.numeric(df$id[sample(nrow(df), 1)])
for (i in 1:n) {

  # Update progress bar.
  setTxtProgressBar(pb, i)

  if (((x + int) > nrow(df)) == FALSE) {
    x <- x + int
  }
  if (((x + int) > nrow(df)) == TRUE) {
    x <- ((x + int) - nrow(df))
  }
  a <- rbind(a, x)
}
close(pb)
a <- data.frame(a)
colnames(a)[1] <- "id"
a <- merge(df, a)
return(a)

}
a <- sampler(df = df)
print("Done.")
return(a)
}

raytong · November 24, 2019, 2:53pm

Hi @Navien. I ran your code and it work well. So, what is the error?

Navien · November 27, 2019, 2:54pm

I used CSV file and I saved it as frame then i sign it to df and its not running ?

Please could yu send me your data format ?

Navien · November 28, 2019, 1:04pm

the Image above is my data, which i would like to do the sampling for.

raytong · November 29, 2019, 2:39am

@Navien. The follow are the reprex of my run. Can you indicate what error you got?

rw.sample <- function(df = NULL, n = NULL) {
  
  N <- nrow(df)
  int <- ceiling(N / n)
  
  #Creating an id column.
  id.creator <- function(df = NULL) {
    print("Creating 'id' columns -- using a highly inefficient method.")
    
    # Create progress bar.
    pb <- txtProgressBar(min = 0, max = N, style = 3)
    
    a <- data.frame(1)
    for (i in 1:nrow(df)) {
      
      # Update progress bar.
      setTxtProgressBar(pb, i)
      
      x <- i
      a <- rbind(a, x)
    }
    close(pb)
    a <- data.frame(a[1:nrow(a) - 1,])
    df <- cbind(a, df)
    colnames(df)[1] <- "id"
    return(df)
  }
  df <- id.creator(df = df)
  
  #Function for sampling.
  sampler <- function(df = NULL) {
    print("Sampling.")
    
    # Create progress bar.
    pb <- txtProgressBar(min = 0, max = n, style = 3)
    
    a <- data.frame()
    x <- as.numeric(df$id[sample(nrow(df), 1)])
    for (i in 1:n) {
      
      # Update progress bar.
      setTxtProgressBar(pb, i)
      
      if (((x + int) > nrow(df)) == FALSE) {
        x <- x + int
      }
      if (((x + int) > nrow(df)) == TRUE) {
        x <- ((x + int) - nrow(df))
      }
      a <- rbind(a, x)
    }
    close(pb)
    a <- data.frame(a)
    colnames(a)[1] <- "id"
    a <- merge(df, a)
    return(a)
  }
  a <- sampler(df = df)
  print("Done.")
  return(a)
}

df <- data.frame(ID = LETTERS[1:6], N1 = rnorm(6), N2 = rnorm(6))

rw.sample(df, 4)
#> [1] "Creating 'id' columns -- using a highly inefficient method."
#> 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=================================================================| 100%
#> [1] "Sampling."
#> 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================================| 100%
#> [1] "Done."
#>   id ID         N1         N2
#> 1  1  A -0.7393022  1.1040326
#> 2  1  A -0.7393022  1.1040326
#> 3  1  B  1.4119348 -0.2605365
#> 4  1  B  1.4119348 -0.2605365
#> 5  3  D  0.7794347 -1.6395992
#> 6  3  D  0.7794347 -1.6395992

^{Created on 2019-11-29 by the reprex package (v0.3.0)}

system · December 20, 2019, 2:39am

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.