Writing Custom Layer (3D RoI Pooling) using Keras in R

I am seeking help to solve a Keras custom layer problem demonstrated with attached Data and below code .
Vox_6_Ch_NotNorm.txt

I am using a voxelised 3D point cloud (LiDAR) on a 3D Region of Interest (RoI) max pooling custom layer. The 3D_RoI_MaxPool custom layer works on the attached dataset by first cropping a 6X6X6 voxelised space into a 4X4X4 RoI and then max pools the cropped layer to generate a 2X2X2 output layer of interest. Above is a reproducible example dataset and below is the code.

First I import the data

library(keras)
library(tfdatasets)
library(tfautograph)
library(reticulate)
library(tensorflow)
library(rlist)

#EXAMPLE OF INPUT
Vox1_CSV <- read.csv("D:/DATA/TEST_RoI_Pool/Vox_6_Ch_NotNorm.csv")

CNN_Array_Vox <- array(0, dim = c(1,6,6,6,1)) # THERE ARE TWO SAMPLES
CNN_Array_Vox[1,,,,]<- array(as.matrix(Vox1_CSV$Channel), dim = c(6,6,6,1))

# THIS ARRAY SLICES OFF ONE VOXEL AT EACH END OF X, Y, Z AXIS RESULTS IN RoI OF 4X4X4 with two samples and one channel
CNN_Array_RoI <- array(0, dim = c(3, 1, 10)) # THERE ARE TWO SAMPLES, AND TWO RoI 
CNN_Array_RoI[1,,]<- array(as.matrix(c(0,1,1,4,1,4,1,4,0,1)), dim=c(1,10)) # START INDEX (0,1,1,1,0) AND LENGTH OF OUTPUT (1,4,4,4,0)
CNN_Array_RoI[2,,]<- array(as.matrix(c(0,1,1,4,1,4,1,4,0,1)), dim=c(1,10)) # START INDEX (0,1,1,1,0) AND LENGTH OF OUTPUT (1,4,4,4,0)
CNN_Array_RoI[3,,]<- array(as.matrix(c(0,1,1,4,1,4,1,4,0,1)), dim=c(1,10)) # START INDEX (0,1,1,1,0) AND LENGTH OF OUTPUT (1,4,4,4,0)

# Output after RoI max pooling 
output_dim <- c(3, 2, 2, 2, 1) # RoIs, X, Y, Z, Channel (THE BATCH AXIS SHOULD APPEAR AS NONE AFTER  layer_3D_ROI_pooled )

##########################
# INPUT TENSORS WITH VALUES
##########################
Vox_Tensor <- tf$convert_to_tensor(CNN_Array_Vox, dtype= "float32")
RoI_Tensor <- tf$convert_to_tensor(CNN_Array_RoI, dtype= "float32")

Then I import the custom layer:

ROI_3D_pooled_Layer <- R6::R6Class("KerasLayer",
                                   
                                   inherit = KerasLayer,
                                   
                                   public = list(
                                     output_dim = NULL, 
                                     
                                     initialize = function( output_dim) {
                                       self$output_dim = output_dim
                                     },
                                     
                                     call = function(x, mask = NULL) {   
                                       
                                       # INPUT LAYERS
                                       Input_Vox <- x[[1]] # ORIGINAL PLOT AREA (IN VOXELS)
                                       Input_RoI <- x[[2]] # START AND END INDEX OF EACH DIMENSION FOR CROPPING RoI
                                       output_dim <- self$output_dim # DESIRED DIMENSION AFTER POOLING

                                       ####################################
                                       # RoI Function
                                       ####################################
                                       ROI_Pool_Fn <- function(Input_Vox, Input_RoI, output_dim){
                                         
                                         # PUT THE OUTPUT DIMENSION INTO A TENSOR FORMAT
                                         output_dim_T <- tf$convert_to_tensor(matrix(output_dim, nrow = 5, byrow = TRUE))

                                         # EMPTY OUTPUT ARRAY FOR Max_Pool... IS THIS APPROPRIATE APPROACH
                                         output_Array = list() 
                                         
                                         # UNSTACK EACH ROI AND LOOPING OVER EACH
                                         RoI_unpacked = tf$unstack(Input_RoI, axis=0L)
                                         for(r in 1:length(RoI_unpacked)){ 
                                           one_RoI <- RoI_unpacked[[r]]                                            # Tensor("3D_RoI_Pooled_1/3D_RoI_Pooled/unstack:0", shape=(1, 10), dtype=float32)
                                           # browser()
                                           # CROP THE VOXEL SPACE TO ONLY INCLUDE THE RoI
                                           Start_RoI <- tf$cast(one_RoI[,c(1, 3, 5, 7, 9), drop=FALSE], "int32") 
                                           Start_RoI <- k_squeeze(Start_RoI, 1)                                    # Tensor("Squeeze:0", shape=(5,), dtype=int32)
                                           Finish_RoI <- tf$cast(one_RoI[,c(2, 4, 6, 8, 10), drop=FALSE], "int32")
                                           Finish_RoI <- k_squeeze(Finish_RoI, 1)                                  # Tensor("Squeeze_1:0", shape=(5,), dtype=int32)
                                           #browser()
                                           RoI_Cropped <- tf$slice(Input_Vox, Start_RoI, Finish_RoI)               #Tensor("3D_RoI_Pooled_1/3D_RoI_Pooled/Slice:0", shape=(1, 4, 4, 4, 1), dtype=float32)

                                           # GET RoI RANGE FOR EACH OF THREE DIMENSIONS (i.e. NUMBER OF VOXELS AFTER CROPPING)
                                           XStart <-tf$cast(one_RoI[,3, drop=TRUE], "float32")                     # Tensor("strided_slice_2:0", shape=(1,), dtype=float32)
                                           XFinish <-tf$cast(one_RoI[,4, drop=TRUE], "float32")                    # Tensor("strided_slice_3:0", shape=(1,), dtype=float32)
                                           XFinish <-tf$math$add(XFinish, tf$constant(1, dtype="float32"))
                                           Length_RoI_X <- layer_subtract(list(XFinish,XStart))                       # Tensor("subtract/Identity:0", shape=(1,), dtype=float32)
                                           #
                                           YStart <-tf$cast(one_RoI[,5, drop=TRUE], "float32")
                                           YFinish <-tf$cast(one_RoI[,6, drop=TRUE], "float32")
                                           YFinish <-tf$math$add(YFinish, tf$constant(1, dtype="float32"))
                                           Length_RoI_Y <- layer_subtract(list(YFinish,YStart))
                                           #
                                           ZStart <-tf$cast(one_RoI[,7, drop=TRUE], "float32")
                                           ZFinish <-tf$cast(one_RoI[,8, drop=TRUE], "float32")
                                           ZFinish <-tf$math$add(ZFinish, tf$constant(1, dtype="float32"))
                                           Length_RoI_Z <- layer_subtract(list(ZFinish,ZStart))
                                           
                                           # MAKE OUTPUT DIMENSION TENSOR A FLOAT TO ALLOW FOR COMPUTING STEPS USED TO POOL VOXELS
                                           RoI_X_F <- tf$cast(output_dim_T[[1]], "float32") # SUBSETTING STARTS FROM INDEX 0 
                                           RoI_Y_F <-tf$cast(output_dim_T[[2]], "float32")  
                                           RoI_Z_F <-tf$cast(output_dim_T[[3]], "float32")  
                                           
                                           # STEP USED TO POOL VOXELS GIVEN DESIRED OUTPUT DIMENSION AND NUMBER OF VOXELS AFTER CROPPING
                                           X_step = tf$math$divide(Length_RoI_X, RoI_X_F)
                                           Y_step = tf$math$divide(Length_RoI_Y, RoI_Y_F)
                                           Z_step = tf$math$divide(Length_RoI_Z, RoI_Z_F)
                                           
                                           # # LOOP THROUGH EACH CHANNEL AND THEN ALL THREE DIMENSIONS XYZ
                                           # for(ch in tf$range(tf$constant(output_dim[5]+1))) { # LOOP CHANNEL....  SUBSETTING STARTS FROM INDEX 0
                                           #   for (k in tf$range(tf$constant(output_dim[4]+1))) { # LOOP Z....  SUBSETTING STARTS FROM INDEX 0
                                           #     for (j in tf$range(tf$constant(output_dim[3]+1))) { # LOOP Y....  SUBSETTING STARTS FROM INDEX 0
                                           #       for (i in tf$range(tf$constant(output_dim[2]+1))) { # LOOP X....  SUBSETTING STARTS FROM INDEX 0
                                           # LOOP THROUGH EACH CHANNEL AND THEN ALL THREE DIMENSIONS XYZ
                                           for(ch in 1:output_dim[5]) { # LOOP CHANNEL....  SUBSETTING STARTS FROM INDEX 0
                                             for (kk in 1:output_dim[4]) { # LOOP Z....  SUBSETTING STARTS FROM INDEX 0
                                               for (jj in 1:output_dim[3]) { # LOOP Y....  SUBSETTING STARTS FROM INDEX 0
                                                 for (ii in 1:output_dim[2]) { # LOOP X....  SUBSETTING STARTS FROM INDEX 0      
                                                   
                                                   # UNDERTAKING ELEMENT-WISE POOLING
                                                   # FOR X Y & Z GET START AND END INDEX FOR POOLING
                                                   
                                                   #####
                                                   # X 
                                                   #####
                                                   i <- tf$constant(ii, dtype="float32")
                                                   j <- tf$constant(jj, dtype="float32")
                                                   k <- tf$constant(kk, dtype="float32")
                                                   
                                                   # browser()
                                                   
                                                   Index_Xstart <-k_round((i-k_constant(1.0))*X_step) #+k_constant(1)
                                                   Index_Xstart <- tf$cast(Index_Xstart, dtype="int32")
                                                   #browser()
                                                   # USING k_Switch instead of ifelse
                                                   condition <- i+k_constant(1) <= Length_RoI_X
                                                   then_expression <- k_round((i)*X_step)
                                                   else_expression <- Length_RoI_X
                                                   Index_Xend <- k_switch(condition, then_expression, else_expression)
                                                   Index_Xend <- tf$cast(Index_Xend, dtype="int32")
                                                   #Index_Xend <-tf$math$add(Index_Xend, tf$constant(1L, dtype="int32"))
                                                   Length_RoI_X_Ele <- layer_subtract(list(Index_Xend,Index_Xstart)) 
                                                   #####
                                                   # Y  
                                                   #####
                                                   Index_Ystart <-k_round((j-k_constant(1))*Y_step) #+k_constant(1)
                                                   Index_Ystart <- tf$cast(Index_Ystart, dtype="int32")
                                                   #
                                                   condition <- j+k_constant(1) <= Length_RoI_Y
                                                   then_expression <- k_round((j)*Y_step)
                                                   else_expression <- Length_RoI_Y
                                                   Index_Yend <- k_switch(condition, then_expression, else_expression)
                                                   Index_Yend <- tf$cast(Index_Yend, dtype="int32")
                                                   #Index_Yend <-tf$math$add(Index_Yend, tf$constant(1L, dtype="int32"))
                                                   Length_RoI_Y_Ele <- layer_subtract(list(Index_Yend,Index_Ystart)) 
                                                   #
                                                   #####
                                                   # Z 
                                                   #####    
                                                   Index_Zstart <-k_round((k-k_constant(1))*Z_step) #+k_constant(1)
                                                   Index_Zstart <- tf$cast(Index_Zstart, dtype="int32")
                                                   #
                                                   condition <- k+k_constant(1) <= Length_RoI_Z
                                                   then_expression <- k_round((k)*Z_step)
                                                   else_expression <- Length_RoI_Z
                                                   Index_Zend <- k_switch(condition, then_expression, else_expression)
                                                   Index_Zend <- tf$cast(Index_Zend, dtype="int32")
                                                   #Index_Zend <-tf$math$add(Index_Zend, tf$constant(1L, dtype="int32"))
                                                   Length_RoI_Z_Ele <- layer_subtract(list(Index_Zend,Index_Zstart)) 
                                                   
                                                   # browser()
                                                   
                                                   # STACK THE XYZ INDICES FOR tf$slice FORMAT
                                                   Zero_Tensor <- k_zeros(c(1),dtype="int32") # INDEX ZERO IS USED SO RoI and Channel AXIS AREN'T CROPPED 
                                                   one_Tensor <-tf$math$add(Zero_Tensor, tf$constant(1L, dtype="int32"))
                                                   #one_Tensor <- k_constant(1L,dtype="int32")
                                                   Start_maxPool <-k_stack(list(Zero_Tensor,Index_Xstart,Index_Ystart,Index_Zstart, Zero_Tensor), axis = 0)
                                                   Start_maxPool <-k_squeeze(Start_maxPool, 1)
                                                   #browser()
                                                   Length_maxPool <-k_stack(list(one_Tensor, Length_RoI_X_Ele, Length_RoI_Y_Ele, Length_RoI_Z_Ele, one_Tensor), axis = 0)
                                                   Length_maxPool <-k_squeeze(Length_maxPool, 1)
                                                   
                                                   # CROP THE VOXELS THAT WILL BE POOLED (AND GET MAX POOL VALUE)
                                                   RoI_Cropped_Element <- tf$slice(RoI_Cropped, Start_maxPool, Length_maxPool) 
                                                   Max_Pool_Value <-k_max(RoI_Cropped_Element)
                                                   print(Max_Pool_Value)
                                                   # APPEND EACH RoI_Pooled element into a list
                                                   output_Array <- list.append(output_Array, Max_Pool_Value) ### DOM DOM DOM !!! NOT TENSOR BASED
   
                                                 }# i LOOP
                                               } # j LOOP 
                                             } # k Loop
                                           } #Ch LOOP 
                                           print(paste("Doing RoI:", r, "output of total RoIs: ", length(RoI_unpacked))) ### DOM DOM DOM !!! NOT TENSOR BASED
                                         } # r LOOP (ROI)
                                         
                                         # STACK THE OUTPUT LIST AND RESHAPE TO THE DESIRED OUTPUT SIZE 
                                         print(output_Array)
                                         output_Stack <- k_stack(output_Array, axis = 1) 
                                         #browser()
                                         feature_map_ROIpooled <- k_reshape(output_Stack, shape = c(NULL, length(RoI_unpacked), output_dim[2], 
                                                                                                    output_dim[3], 
                                                                                                    output_dim[4], 
                                                                                                    output_dim[5])) 
                                         
                                       } 
                                       # End RoI Function
                                       ####################################
                                       
                                       # DECORATE RoI FUNCTION IN AUTOGRAPH 
                                       ROI_Pool_Autograph <- autograph(ROI_Pool_Fn)
                                       # ROI_Pool_Autograph_TF <-tf_function(ROI_Pool_Autograph) # I GET ERROR WHEN I USE tf_function
                                       
                                       # RUN THE FUNCTION
                                       RoI_Output <- ROI_Pool_Autograph(Input_Vox, Input_RoI, output_dim)
                                       
                                     },
                                     compute_output_shape = function(Vox_shape = dim(Input_Vox)) {
                                       return(self$output_dim)
                                     }
                                   )
)
# Create layer wrapper function
layer_3D_ROI_pooled <- function(object,  output_dim, name) {
  create_layer(ROI_3D_pooled_Layer, object, list(output_dim = as.integer(output_dim),
                                                 name = "3D_RoI_Pooled"))}

Running the custom layer with data works as expected:

Final_3dPool_RoI_Output1 <- layer_3D_ROI_pooled(list(Vox_Tensor, RoI_Tensor), output_dim= output_dim, name="3D_RoI_Pooled")

My problem is that I am not able to integrate this custom layer code into a CNN model that runs on batch data. I would like the code to generate an output with dimension shape (None, 2,2,2,1), where None represents a symbolic batch axis and 1 is the channel axis (Count number of points in voxel). A reproducible code (Custom layer) below demonstrates that by integrating the layer into a compiler using symbolic tensors the input initialises with “None” for batch axis, but output loses this batch axis.

To demonstrate this, first I generate the input layers (symbolic representation of data) for the custom layer:

# Shape of voxel input and define voxel input layer
Vox_Shape <- c(6, 6, 6, 1) # X, Y, Z, Channel
input_layer_Vox <- layer_input(shape = Vox_Shape, name = "Input_Layer_Vox")

# Shape of RoI input and define RoI input layer
RoI_Shape <- c(3, 1, 10) # RoIs, 2 Indices for each dimension to crop Input Voxel space (i.e. the number of voxels to remove from each end)
input_layer_RoI <- layer_input(shape = RoI_Shape, name = "Input_Layer_RoI")

The input layers are used in the custom layer:

Final_3dPool_RoI_Output2 <- layer_3D_ROI_pooled(list(input_layer_Vox, input_layer_RoI), output_dim= output_dim, name="3D_RoI_Pooled")

The resulting output has shape (3, 2,2,2,1) (i.e. RoIs, X,Y,Z,Channel) but it should have a batch axis "None" as well. I think my problem has something to do with the way I use the autograph function and nested “for loops” in the custom layer. Is my approach correct?

Am I misunderstanding how tensor ops are coded in a custom layer using R? I decorated the function in “call” with autograph because I found it difficult to replace the nested “for loop” with the tf$while_loop. I am also not sure if I should be using the list.append function to form a list of Max_Pool_Values for each output element. I wonder if batch axis “None” is lost because of the way the final list of output elements is stacked and reshaped.

Is anyone able to help me understand why the Custom Layer output has no Batch axis?

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.