Problems with creating loop structure

Hey there!

I have some errors when I create the loop structure for the file. As a result, it gives data for only 1 row and also omits some columns in the output file.

I was looking through the script but I am unsure where I made an error.

Can anyone help, please?

This code was adapted from an already existing code labphon-9152-carignan.html

Script:

file_list <- unique(data$filename)
mfccs <- paste0("mfcc",1:14)

Loop through all files in file_list

for (file in file_list) {

Process each file's data

fdat <- data[data$filename == file, ]
}

Check if there’s data for the file

if (nrow(fdat) > 0) {
print(paste("Processing file:", file)) # Debugging print statement
}

Read the audio file

audio_dir <- "/Desktop/audio/"
wav_files <- list.files(audio_dir, pattern = "\.wav$", full.names = FALSE)
wav_file_name <- paste0(file, ".wav")

if (wav_file_name %in% wav_files) {
audio <- tuneR::readWave(paste0(audio_dir, wav_file_name))
}

total duration (in seconds)

totaldur <- length(audio@left)/audio@samp.rate

extract audio and de-mean to remove DC offset

snd <- audio@left - mean(audio@left)

audio pre-emphasis

for (n in 2:length(snd)) {
snd[n] <- snd[n] - 0.97*snd[n-1]
}

replace the wave object audio samples

audio@left <- snd

calculate MFCCs

melcs <- tuneR::melfcc(audio,
sr = audio@samp.rate,
numcep = length(mfccs),
wintime = 0.01,
hoptime = 0.005)

get the actual time step (may be slightly different than "hoptime")

timestep <- totaldur/nrow(melcs)

get the MFCCs samples nearest to the time points

mfsamps <- round(fdat$point_time/timestep)

add the MFCCs to the file data - corrected to as.list

fdat[,mfccs] <- as.list(melcs[mfsamps,])

create spectrogram

spec <- signal::specgram(x = audio@left,
n = 1024,
Fs = audio@samp.rate,
window = 256,
overlap = 128
)

get spectra

P <- abs(spec$S)

convert to dB

P <- 20*log10(P)

get the spectral time step

timestep <- diff(spec$t[1:2])

get the spectral samples nearest to the time points

specsamps <- round(fdat$point_time/timestep)

get first four spectral moments

moments <- c()
for (samp in 1:length(specsamps)) {
moments <- rbind(moments, emuR::moments(P[,samp]))
}
colnames(moments) <- c("COG", "variance", "skew", "kurtosis")

add the moments to the file data

fdat[,colnames(moments)] <- moments

nasal murmur (low/high ratio, 0-320 Hz : 320-5360 Hz) bands

thresh1 <- which.min(abs(spec$f-320))
thresh2 <- which.min(abs(spec$f-5360))

get the spectral amplitude means within the two frequency bands

print(specsamps)
specsamps <- 1:101
low <- colMeans(P[1:thresh1,specsamps])
high <- colMeans(P[thresh1:thresh2,specsamps])

calculate the murmur ratio and add to the file data

print(length(low))
print(length(high))
fdat$murmur <- rep(low / high, length.out = nrow(fdat))
fdat$murmur <- list(low/high)

add the file data to the combined data frame

alldata <- rbind.data.frame(alldata,fdat)

saveRDS(alldata,"alldata.Rda")

its quite hard to offer you advice, because unfortunately when you paste your R code into this forum's post creation box, what you paste is interpreted as markdown, so the R comments have shown up as markdown titles etc.

For readability therefore, you can edit your post.

There is a button for inserting preformatted text, it looks like </> in the tool bar, and has short cut key of Ctrl + E

type or paste code here

Ive looked over what you shared briefly and think that you may simply have not put the majority of the code you wish to loop over, within a loop. it seems that you quite early open a loop and complete it.

for (file in file_list) {
#Process each file's data
fdat <- data[data$filename == file, ]
}

at this point you will have looped over the contents of your file_list, and replaced fdat however many times , and then your script continues from there, with the last fdat value.

1 Like
file_list <- unique(data$filename)
mfccs <- paste0("mfcc",1:14)

# Loop through all files in file_list
for (file in unique(data$filename)) {
  # Process each file's data
  fdat <- data[data$filename == file, ]
  print(paste("File:", file, "has", nrow(fdat), "rows"))
  alldata[[file]] <- fdat
} 
alldata[[file]] <- fdat

  # Read the audio file
audio_dir <- "/Users/Desktop/audio/"
wav_files <- list.files(audio_dir, pattern = "\\.wav$", full.names = FALSE)
wav_file_name <- paste0(file, ".wav")

if (wav_file_name %in% wav_files) {
  audio <- tuneR::readWave(paste0(audio_dir, wav_file_name))
}

  # total duration (in seconds)
  totaldur <- length(audio@left)/audio@samp.rate
  
  # extract audio and de-mean to remove DC offset
  snd <- audio@left - mean(audio@left)
  
  # audio pre-emphasis
  for (n in 2:length(snd)) {
    snd[n] <- snd[n] - 0.97*snd[n-1]
  }
  
  # replace the wave object audio samples
  audio@left <- snd
  
  # calculate MFCCs
  melcs <- tuneR::melfcc(audio, 
                         sr = audio@samp.rate, 
                         numcep = length(mfccs), 
                         wintime = 0.01, 
                         hoptime = 0.005)
  
  # get the actual time step (may be slightly different than "hoptime")
  timestep <- totaldur/nrow(melcs)
  
  # get the MFCCs samples nearest to the time points
  mfsamps <- round(fdat$point_time/timestep)
  
  # add the MFCCs to the file data - corrected to as.list
  fdat[,mfccs] <- as.list(melcs[mfsamps,])
  
  # create spectrogram
  spec <- signal::specgram(x = audio@left,
                           n = 1024,
                           Fs = audio@samp.rate,
                           window = 256,
                           overlap = 128
  )
  
  # get spectra
  P <- abs(spec$S)
  
  # convert to dB
  P <- 20*log10(P)
  
  # get the spectral time step
  timestep <- diff(spec$t[1:2])
  
  # get the spectral samples nearest to the time points
  specsamps <- round(fdat$point_time/timestep)
  
  # get first four spectral moments
  moments <- c()
  for (samp in 1:length(specsamps)) {
    moments <- rbind(moments, emuR::moments(P[,samp]))
  }
  colnames(moments) <- c("COG", "variance", "skew", "kurtosis")
  
  # add the moments to the file data
  fdat[,colnames(moments)] <- moments
  
  # nasal murmur (low/high ratio, 0-320 Hz : 320-5360 Hz) bands
  thresh1 <- which.min(abs(spec$f-320))
  thresh2 <- which.min(abs(spec$f-5360))
  
  # get the spectral amplitude means within the two frequency bands
  print(specsamps)
  specsamps <- 1:101
  low   <- colMeans(P[1:thresh1,specsamps])
  high  <- colMeans(P[thresh1:thresh2,specsamps])
  
  # calculate the murmur ratio and add to the file data
  print(length(low))
  print(length(high))
  fdat$murmur <- rep(low / high, length.out = nrow(fdat))
  fdat$murmur <- list(low/high)
  
  # add the file data to the combined data frame
  alldata <- rbind.data.frame(alldata,fdat)



saveRDS(alldata,"alldata.Rda")

Thank you very much for the advice! Here is the full code.
I think I already have the line of code that you have suggested.

I was trying to point out, if you want to repeatedly do things for each file from a list of files , you stop too early

for (file in unique(data$filename)) {
  # Process each file's data
  fdat <- data[data$filename == file, ]
  print(paste("File:", file, "has", nrow(fdat), "rows"))
  alldata[[file]] <- fdat
} 

<<- this is your loop , it end here, no more looping.
which implies that everything else you do subsequently, you are doing to either : 1) just the last file, or 2) all the files at once.
It seems more like to me that you've written it as though it would be per file, but having closed the loop, you will not be processing every file, you will be processing only the last file.

further down you have a couple of small for loops, but presumably those along with most everything else should be nested within the original loop which you closed too early (as far as I understand your code)

1 Like

Yes, exactly, this is my problem: I want to process all files, but ultimately it gives me data for one file only. What is the best way to make sure that it will loop through all the files and not only one?

close your loop later ?

But how can I write it in a code? Sorry, I am just starting in R.

Can you follow this logic ? try running it and thinking about what is happening

(things_to_do <- 1:4)

results <- c()
for(t in things_to_do ){
  results[t] <- t + 1  # adding 1 to the things and making an entry for that in a list 
} # whithin the above loop, t is 1, then 2, then 3 , then 4; they all get processed / incremented and stored away

# out here what is t ?  .... answer : its the last t, which would be 4

results[t] <- results[t] *10 # so this multiples 5 by 10

results
# 2 3 4 50



results_2 <- c()
for(t in things_to_do ){
 interim_value  <- t + 1  # adding 1 to the things and making an entry for that in a list 
 results_2[t] <- interim_value * 10  # having added 1 multiply by 10 
  
} # whithin the above loop, t is 1, then 2, then 3 , then 4; they all get processed / incremented and stored away


results_2
# 20 30 40 50

I think I have partially understood you. I have closed the loop after the audio files. However, this still does not solve the issue. Here is the modified code:

for (file in unique(data$filename)) {
  
  # Process each file's data
  fdat <- data[data$filename == file, ]
  print(paste("File:", file, "has", nrow(fdat), "rows"))
  
  # Read the audio file
  audio_dir <- "/Users/Desktop/audio/"
  wav_files <- list.files(audio_dir, pattern = "\\.wav$", full.names = FALSE)
  wav_file_name <- paste0(file, ".wav")
  
  audio <- tuneR::readWave(paste0(audio_dir, wav_file_name))
  }   

  # total duration (in seconds)
  totaldur <- length(audio@left)/audio@samp.rate

  # extract audio and de-mean to remove DC offset
  snd <- audio@left - mean(audio@left)

  # audio pre-emphasis
  for (n in 2:length(snd)) {
  snd[n] <- snd[n] - 0.97*snd[n-1]
 }

It seems like the system is still relying on data from one row to calculate the data for the remaining rows. Just for reference: filename is a column in an Excel file, which contains the names of the respective files which match the names of the audio files I am trying to process.

What can I do now? I know that the issue is with the coding of the loop structure, but I can´t wrap my head around it.

why did you choose that place ?
is the audio files something that should be done seperately or every file ? or is it independent altogether ?or is it something they would all have in common.
if the former then they should also be in the same loop that processes a given file.

its probably the case that almost everything should go inside your for loop that runs over each file to process.

My goal is to loop through all the files: first, to get the file data in the Excel and then the audio, because they are connected.
As I mentioned, when I try to close the loop after the audio file, the code gives the same results.
Also, when I posted the loop even further:

# Loop through all files in file_list
for (file in unique(data$filename)) {
  
  # Process each file's data
  fdat <- data[data$filename == file, ]
  print(paste("File:", file, "has", nrow(fdat), "rows"))
  
  # Read the audio file
  audio_dir <- "/Users/Desktop/audio/"
  wav_files <- list.files(audio_dir, pattern = "\\.wav$", full.names = FALSE)
  wav_file_name <- paste0(file, ".wav")
  
  audio <- tuneR::readWave(paste0(audio_dir, wav_file_name))
  

  # total duration (in seconds)
  totaldur <- length(audio@left)/audio@samp.rate

  # extract audio and de-mean to remove DC offset
  snd <- audio@left - mean(audio@left)
} 

It now gave me the results for all the files, however, it only calculated the values for 1. Meaning, that values in all the columns are the same.

this is a guess because i dont have your data to test against, but this approach might work.

process_a_file <- function(data  # give it all the data
                           ,file  # the particular file / subpart to do 
                           ,mfccs
){
  #for any one file do everything here 
  fdat <- data[data$filename == file, ]
  print(paste("File:", file, "has", nrow(fdat), "rows"))
  
  # Read the audio file
  audio_dir <- "/Users/Desktop/audio/"
  wav_files <- list.files(audio_dir, pattern = "\\.wav$", full.names = FALSE)
  wav_file_name <- paste0(file, ".wav")
  
  if (wav_file_name %in% wav_files) {
    print("audio hit")
    audio <- tuneR::readWave(paste0(audio_dir, wav_file_name))
  } else{
    print("audio miss")
  }
  
  # total duration (in seconds)
  totaldur <- length(audio@left)/audio@samp.rate
  
  # extract audio and de-mean to remove DC offset
  snd <- audio@left - mean(audio@left)
  
  # audio pre-emphasis
  for (n in 2:length(snd)) {
    snd[n] <- snd[n] - 0.97*snd[n-1]
  }
  
  # replace the wave object audio samples
  audio@left <- snd
  
  # calculate MFCCs
  melcs <- tuneR::melfcc(audio, 
                         sr = audio@samp.rate, 
                         numcep = length(mfccs), 
                         wintime = 0.01, 
                         hoptime = 0.005)
  
  # get the actual time step (may be slightly different than "hoptime")
  timestep <- totaldur/nrow(melcs)
  
  # get the MFCCs samples nearest to the time points
  mfsamps <- round(fdat$point_time/timestep)
  
  # add the MFCCs to the file data - corrected to as.list
  fdat[,mfccs] <- as.list(melcs[mfsamps,])
  
  # create spectrogram
  spec <- signal::specgram(x = audio@left,
                           n = 1024,
                           Fs = audio@samp.rate,
                           window = 256,
                           overlap = 128
  )
  
  # get spectra
  P <- abs(spec$S)
  
  # convert to dB
  P <- 20*log10(P)
  
  # get the spectral time step
  timestep <- diff(spec$t[1:2])
  
  # get the spectral samples nearest to the time points
  specsamps <- round(fdat$point_time/timestep)
  
  # get first four spectral moments
  moments <- c()
  for (samp in 1:length(specsamps)) {
    moments <- rbind(moments, emuR::moments(P[,samp]))
  }
  colnames(moments) <- c("COG", "variance", "skew", "kurtosis")
  
  # add the moments to the file data
  fdat[,colnames(moments)] <- moments
  
  # nasal murmur (low/high ratio, 0-320 Hz : 320-5360 Hz) bands
  thresh1 <- which.min(abs(spec$f-320))
  thresh2 <- which.min(abs(spec$f-5360))
  
  # get the spectral amplitude means within the two frequency bands
  print(specsamps)
  specsamps <- 1:101
  low   <- colMeans(P[1:thresh1,specsamps])
  high  <- colMeans(P[thresh1:thresh2,specsamps])
  
  # calculate the murmur ratio and add to the file data
  print(length(low))
  print(length(high))
  fdat$murmur <- rep(low / high, length.out = nrow(fdat))
  fdat$murmur <- list(low/high)
  fdat 
}


alldata <- list()
file_list <- unique(data$filename) 
mfccs <- paste0("mfcc",1:14)
for (f in file_list)){
  
  # add the file data to the combined data frame
  alldata <- rbind.data.frame(alldata,
                              process_a_file(data = data,
                                             file = f,
                                             mfccs = mfccs))
}

alldata

# saveRDS(alldata,"alldata.Rda")

I did not use exactly the code you wrote, but it helped me understand where I did wrong.

Thank you very much!