I am seeking help to solve a Keras custom layer problem demonstrated with attached Data and below code .
Vox_6_Ch_NotNorm.txt
I am using a voxelised 3D point cloud (LiDAR) on a 3D Region of Interest (RoI) max pooling custom layer. The 3D_RoI_MaxPool custom layer works on the attached dataset by first cropping a 6X6X6 voxelised space into a 4X4X4 RoI and then max pools the cropped layer to generate a 2X2X2 output layer of interest. Above is a reproducible example dataset and below is the code.
First I import the data
library(keras)
library(tfdatasets)
library(tfautograph)
library(reticulate)
library(tensorflow)
library(rlist)
#EXAMPLE OF INPUT
Vox1_CSV <- read.csv("D:/DATA/TEST_RoI_Pool/Vox_6_Ch_NotNorm.csv")
CNN_Array_Vox <- array(0, dim = c(1,6,6,6,1)) # THERE ARE TWO SAMPLES
CNN_Array_Vox[1,,,,]<- array(as.matrix(Vox1_CSV$Channel), dim = c(6,6,6,1))
# THIS ARRAY SLICES OFF ONE VOXEL AT EACH END OF X, Y, Z AXIS RESULTS IN RoI OF 4X4X4 with two samples and one channel
CNN_Array_RoI <- array(0, dim = c(3, 1, 10)) # THERE ARE TWO SAMPLES, AND TWO RoI
CNN_Array_RoI[1,,]<- array(as.matrix(c(0,1,1,4,1,4,1,4,0,1)), dim=c(1,10)) # START INDEX (0,1,1,1,0) AND LENGTH OF OUTPUT (1,4,4,4,0)
CNN_Array_RoI[2,,]<- array(as.matrix(c(0,1,1,4,1,4,1,4,0,1)), dim=c(1,10)) # START INDEX (0,1,1,1,0) AND LENGTH OF OUTPUT (1,4,4,4,0)
CNN_Array_RoI[3,,]<- array(as.matrix(c(0,1,1,4,1,4,1,4,0,1)), dim=c(1,10)) # START INDEX (0,1,1,1,0) AND LENGTH OF OUTPUT (1,4,4,4,0)
# Output after RoI max pooling
output_dim <- c(3, 2, 2, 2, 1) # RoIs, X, Y, Z, Channel (THE BATCH AXIS SHOULD APPEAR AS NONE AFTER layer_3D_ROI_pooled )
##########################
# INPUT TENSORS WITH VALUES
##########################
Vox_Tensor <- tf$convert_to_tensor(CNN_Array_Vox, dtype= "float32")
RoI_Tensor <- tf$convert_to_tensor(CNN_Array_RoI, dtype= "float32")
Then I import the custom layer:
ROI_3D_pooled_Layer <- R6::R6Class("KerasLayer",
inherit = KerasLayer,
public = list(
output_dim = NULL,
initialize = function( output_dim) {
self$output_dim = output_dim
},
call = function(x, mask = NULL) {
# INPUT LAYERS
Input_Vox <- x[[1]] # ORIGINAL PLOT AREA (IN VOXELS)
Input_RoI <- x[[2]] # START AND END INDEX OF EACH DIMENSION FOR CROPPING RoI
output_dim <- self$output_dim # DESIRED DIMENSION AFTER POOLING
####################################
# RoI Function
####################################
ROI_Pool_Fn <- function(Input_Vox, Input_RoI, output_dim){
# PUT THE OUTPUT DIMENSION INTO A TENSOR FORMAT
output_dim_T <- tf$convert_to_tensor(matrix(output_dim, nrow = 5, byrow = TRUE))
# EMPTY OUTPUT ARRAY FOR Max_Pool... IS THIS APPROPRIATE APPROACH
output_Array = list()
# UNSTACK EACH ROI AND LOOPING OVER EACH
RoI_unpacked = tf$unstack(Input_RoI, axis=0L)
for(r in 1:length(RoI_unpacked)){
one_RoI <- RoI_unpacked[[r]] # Tensor("3D_RoI_Pooled_1/3D_RoI_Pooled/unstack:0", shape=(1, 10), dtype=float32)
# browser()
# CROP THE VOXEL SPACE TO ONLY INCLUDE THE RoI
Start_RoI <- tf$cast(one_RoI[,c(1, 3, 5, 7, 9), drop=FALSE], "int32")
Start_RoI <- k_squeeze(Start_RoI, 1) # Tensor("Squeeze:0", shape=(5,), dtype=int32)
Finish_RoI <- tf$cast(one_RoI[,c(2, 4, 6, 8, 10), drop=FALSE], "int32")
Finish_RoI <- k_squeeze(Finish_RoI, 1) # Tensor("Squeeze_1:0", shape=(5,), dtype=int32)
#browser()
RoI_Cropped <- tf$slice(Input_Vox, Start_RoI, Finish_RoI) #Tensor("3D_RoI_Pooled_1/3D_RoI_Pooled/Slice:0", shape=(1, 4, 4, 4, 1), dtype=float32)
# GET RoI RANGE FOR EACH OF THREE DIMENSIONS (i.e. NUMBER OF VOXELS AFTER CROPPING)
XStart <-tf$cast(one_RoI[,3, drop=TRUE], "float32") # Tensor("strided_slice_2:0", shape=(1,), dtype=float32)
XFinish <-tf$cast(one_RoI[,4, drop=TRUE], "float32") # Tensor("strided_slice_3:0", shape=(1,), dtype=float32)
XFinish <-tf$math$add(XFinish, tf$constant(1, dtype="float32"))
Length_RoI_X <- layer_subtract(list(XFinish,XStart)) # Tensor("subtract/Identity:0", shape=(1,), dtype=float32)
#
YStart <-tf$cast(one_RoI[,5, drop=TRUE], "float32")
YFinish <-tf$cast(one_RoI[,6, drop=TRUE], "float32")
YFinish <-tf$math$add(YFinish, tf$constant(1, dtype="float32"))
Length_RoI_Y <- layer_subtract(list(YFinish,YStart))
#
ZStart <-tf$cast(one_RoI[,7, drop=TRUE], "float32")
ZFinish <-tf$cast(one_RoI[,8, drop=TRUE], "float32")
ZFinish <-tf$math$add(ZFinish, tf$constant(1, dtype="float32"))
Length_RoI_Z <- layer_subtract(list(ZFinish,ZStart))
# MAKE OUTPUT DIMENSION TENSOR A FLOAT TO ALLOW FOR COMPUTING STEPS USED TO POOL VOXELS
RoI_X_F <- tf$cast(output_dim_T[[1]], "float32") # SUBSETTING STARTS FROM INDEX 0
RoI_Y_F <-tf$cast(output_dim_T[[2]], "float32")
RoI_Z_F <-tf$cast(output_dim_T[[3]], "float32")
# STEP USED TO POOL VOXELS GIVEN DESIRED OUTPUT DIMENSION AND NUMBER OF VOXELS AFTER CROPPING
X_step = tf$math$divide(Length_RoI_X, RoI_X_F)
Y_step = tf$math$divide(Length_RoI_Y, RoI_Y_F)
Z_step = tf$math$divide(Length_RoI_Z, RoI_Z_F)
# # LOOP THROUGH EACH CHANNEL AND THEN ALL THREE DIMENSIONS XYZ
# for(ch in tf$range(tf$constant(output_dim[5]+1))) { # LOOP CHANNEL.... SUBSETTING STARTS FROM INDEX 0
# for (k in tf$range(tf$constant(output_dim[4]+1))) { # LOOP Z.... SUBSETTING STARTS FROM INDEX 0
# for (j in tf$range(tf$constant(output_dim[3]+1))) { # LOOP Y.... SUBSETTING STARTS FROM INDEX 0
# for (i in tf$range(tf$constant(output_dim[2]+1))) { # LOOP X.... SUBSETTING STARTS FROM INDEX 0
# LOOP THROUGH EACH CHANNEL AND THEN ALL THREE DIMENSIONS XYZ
for(ch in 1:output_dim[5]) { # LOOP CHANNEL.... SUBSETTING STARTS FROM INDEX 0
for (kk in 1:output_dim[4]) { # LOOP Z.... SUBSETTING STARTS FROM INDEX 0
for (jj in 1:output_dim[3]) { # LOOP Y.... SUBSETTING STARTS FROM INDEX 0
for (ii in 1:output_dim[2]) { # LOOP X.... SUBSETTING STARTS FROM INDEX 0
# UNDERTAKING ELEMENT-WISE POOLING
# FOR X Y & Z GET START AND END INDEX FOR POOLING
#####
# X
#####
i <- tf$constant(ii, dtype="float32")
j <- tf$constant(jj, dtype="float32")
k <- tf$constant(kk, dtype="float32")
# browser()
Index_Xstart <-k_round((i-k_constant(1.0))*X_step) #+k_constant(1)
Index_Xstart <- tf$cast(Index_Xstart, dtype="int32")
#browser()
# USING k_Switch instead of ifelse
condition <- i+k_constant(1) <= Length_RoI_X
then_expression <- k_round((i)*X_step)
else_expression <- Length_RoI_X
Index_Xend <- k_switch(condition, then_expression, else_expression)
Index_Xend <- tf$cast(Index_Xend, dtype="int32")
#Index_Xend <-tf$math$add(Index_Xend, tf$constant(1L, dtype="int32"))
Length_RoI_X_Ele <- layer_subtract(list(Index_Xend,Index_Xstart))
#####
# Y
#####
Index_Ystart <-k_round((j-k_constant(1))*Y_step) #+k_constant(1)
Index_Ystart <- tf$cast(Index_Ystart, dtype="int32")
#
condition <- j+k_constant(1) <= Length_RoI_Y
then_expression <- k_round((j)*Y_step)
else_expression <- Length_RoI_Y
Index_Yend <- k_switch(condition, then_expression, else_expression)
Index_Yend <- tf$cast(Index_Yend, dtype="int32")
#Index_Yend <-tf$math$add(Index_Yend, tf$constant(1L, dtype="int32"))
Length_RoI_Y_Ele <- layer_subtract(list(Index_Yend,Index_Ystart))
#
#####
# Z
#####
Index_Zstart <-k_round((k-k_constant(1))*Z_step) #+k_constant(1)
Index_Zstart <- tf$cast(Index_Zstart, dtype="int32")
#
condition <- k+k_constant(1) <= Length_RoI_Z
then_expression <- k_round((k)*Z_step)
else_expression <- Length_RoI_Z
Index_Zend <- k_switch(condition, then_expression, else_expression)
Index_Zend <- tf$cast(Index_Zend, dtype="int32")
#Index_Zend <-tf$math$add(Index_Zend, tf$constant(1L, dtype="int32"))
Length_RoI_Z_Ele <- layer_subtract(list(Index_Zend,Index_Zstart))
# browser()
# STACK THE XYZ INDICES FOR tf$slice FORMAT
Zero_Tensor <- k_zeros(c(1),dtype="int32") # INDEX ZERO IS USED SO RoI and Channel AXIS AREN'T CROPPED
one_Tensor <-tf$math$add(Zero_Tensor, tf$constant(1L, dtype="int32"))
#one_Tensor <- k_constant(1L,dtype="int32")
Start_maxPool <-k_stack(list(Zero_Tensor,Index_Xstart,Index_Ystart,Index_Zstart, Zero_Tensor), axis = 0)
Start_maxPool <-k_squeeze(Start_maxPool, 1)
#browser()
Length_maxPool <-k_stack(list(one_Tensor, Length_RoI_X_Ele, Length_RoI_Y_Ele, Length_RoI_Z_Ele, one_Tensor), axis = 0)
Length_maxPool <-k_squeeze(Length_maxPool, 1)
# CROP THE VOXELS THAT WILL BE POOLED (AND GET MAX POOL VALUE)
RoI_Cropped_Element <- tf$slice(RoI_Cropped, Start_maxPool, Length_maxPool)
Max_Pool_Value <-k_max(RoI_Cropped_Element)
print(Max_Pool_Value)
# APPEND EACH RoI_Pooled element into a list
output_Array <- list.append(output_Array, Max_Pool_Value) ### DOM DOM DOM !!! NOT TENSOR BASED
}# i LOOP
} # j LOOP
} # k Loop
} #Ch LOOP
print(paste("Doing RoI:", r, "output of total RoIs: ", length(RoI_unpacked))) ### DOM DOM DOM !!! NOT TENSOR BASED
} # r LOOP (ROI)
# STACK THE OUTPUT LIST AND RESHAPE TO THE DESIRED OUTPUT SIZE
print(output_Array)
output_Stack <- k_stack(output_Array, axis = 1)
#browser()
feature_map_ROIpooled <- k_reshape(output_Stack, shape = c(NULL, length(RoI_unpacked), output_dim[2],
output_dim[3],
output_dim[4],
output_dim[5]))
}
# End RoI Function
####################################
# DECORATE RoI FUNCTION IN AUTOGRAPH
ROI_Pool_Autograph <- autograph(ROI_Pool_Fn)
# ROI_Pool_Autograph_TF <-tf_function(ROI_Pool_Autograph) # I GET ERROR WHEN I USE tf_function
# RUN THE FUNCTION
RoI_Output <- ROI_Pool_Autograph(Input_Vox, Input_RoI, output_dim)
},
compute_output_shape = function(Vox_shape = dim(Input_Vox)) {
return(self$output_dim)
}
)
)
# Create layer wrapper function
layer_3D_ROI_pooled <- function(object, output_dim, name) {
create_layer(ROI_3D_pooled_Layer, object, list(output_dim = as.integer(output_dim),
name = "3D_RoI_Pooled"))}
Running the custom layer with data works as expected:
Final_3dPool_RoI_Output1 <- layer_3D_ROI_pooled(list(Vox_Tensor, RoI_Tensor), output_dim= output_dim, name="3D_RoI_Pooled")
My problem is that I am not able to integrate this custom layer code into a CNN model that runs on batch data. I would like the code to generate an output with dimension shape (None, 2,2,2,1), where None represents a symbolic batch axis and 1 is the channel axis (Count number of points in voxel). A reproducible code (Custom layer) below demonstrates that by integrating the layer into a compiler using symbolic tensors the input initialises with “None” for batch axis, but output loses this batch axis.
To demonstrate this, first I generate the input layers (symbolic representation of data) for the custom layer:
# Shape of voxel input and define voxel input layer
Vox_Shape <- c(6, 6, 6, 1) # X, Y, Z, Channel
input_layer_Vox <- layer_input(shape = Vox_Shape, name = "Input_Layer_Vox")
# Shape of RoI input and define RoI input layer
RoI_Shape <- c(3, 1, 10) # RoIs, 2 Indices for each dimension to crop Input Voxel space (i.e. the number of voxels to remove from each end)
input_layer_RoI <- layer_input(shape = RoI_Shape, name = "Input_Layer_RoI")
The input layers are used in the custom layer:
Final_3dPool_RoI_Output2 <- layer_3D_ROI_pooled(list(input_layer_Vox, input_layer_RoI), output_dim= output_dim, name="3D_RoI_Pooled")
The resulting output has shape (3, 2,2,2,1) (i.e. RoIs, X,Y,Z,Channel) but it should have a batch axis "None" as well. I think my problem has something to do with the way I use the autograph function and nested “for loops” in the custom layer. Is my approach correct?
Am I misunderstanding how tensor ops are coded in a custom layer using R? I decorated the function in “call” with autograph because I found it difficult to replace the nested “for loop” with the tf$while_loop. I am also not sure if I should be using the list.append function to form a list of Max_Pool_Values for each output element. I wonder if batch axis “None” is lost because of the way the final list of output elements is stacked and reshaped.
Is anyone able to help me understand why the Custom Layer output has no Batch axis?