Hi there,
I'm new to R and have been following a document to help develop a basic markov model.
I am running into an issue where when I build the transition matrix, the channel name values drop off and convert to numbers. For every stage / line of code up until the transition matrix, the channel names are showing correctly. I'm curious if anyone has faced this issue before? I'm guessing it's an issue with settings since I'm following line for line the code from this document:
https://rstudio-pubs-static.s3.amazonaws.com/389809_ddda109511bd49c1a8aaaa014197c7bf.html
Code I've used so far:
library(tidyverse)
library(reshape2)
library(ggthemes)
library(ggrepel)
library(RColorBrewer)
library(ChannelAttribution)
library(markovchain)
library(visNetwork)
library(expm)
library(stringr)
library(readr)
library(purrrlyr)
simulating the "real" data
set.seed(454)
df_raw <- read.table("/Documents/Demand Generation/2021/markov_data_c.csv", header=TRUE,
sep=",", row.names="id") %>%
group_by(customer_id) %>%
mutate(conversion = sample(c(0, 1), n(), prob = c(0.975, 0.025), replace = TRUE)) %>%
ungroup() %>%
dmap_at(c(1, 3), as.character) %>%
arrange(customer_id, date)
head(df_raw)
str(df_raw)
summary(df_raw)
library(dplyr)
splitting paths
df_paths <- df_raw %>%
group_by(customer_id) %>%
mutate(path_no = ifelse(is.na(lag(cumsum(conversion))), 0, lag(cumsum(conversion))) + 1) %>%
ungroup()
df_paths_1 <- df_paths %>%
filter(path_no == 1) %>%
select(-path_no)
replace some channels
df_path_1_clean <- df_paths_1 %>%
removing NAs
filter(!is.na(channel))
one- and multi-channel paths
df_path_1_clean <- df_path_1_clean %>%
group_by(customer_id) %>%
mutate(uniq_channel_tag = ifelse(length(unique(channel)) == 1, TRUE, FALSE)) %>%
ungroup()
df_path_1_clean_uniq <- df_path_1_clean %>%
filter(uniq_channel_tag == TRUE) %>%
select(-uniq_channel_tag)
df_path_1_clean_multi <- df_path_1_clean %>%
filter(uniq_channel_tag == FALSE) %>%
select(-uniq_channel_tag)
df_all_paths <- df_path_1_clean %>%
group_by(customer_id) %>%
summarise(path = paste(channel, collapse = ' > '),
conversion = sum(conversion)) %>%
ungroup() %>%
filter(conversion == 1)
tail(df_all_paths,50)
mod_attrib <- markov_model(df_all_paths,
var_path = 'path',
var_conv = 'conversion',
out_more = TRUE)
mod_attrib$removal_effects
mod_attrib$result
d_all <- data.frame(mod_attrib$result)
df_multi_paths <- df_path_1_clean_multi %>%
group_by(customer_id) %>%
summarise(path = paste(channel, collapse = ' > '),
conversion = sum(conversion)) %>%
ungroup() %>%
filter(conversion == 1)
mod_attrib_alt <- markov_model(df_multi_paths,
var_path = 'path',
var_conv = 'conversion',
out_more = TRUE)
mod_attrib_alt$removal_effects
mod_attrib_alt$result
adding unique paths
df_uniq_paths <- df_path_1_clean_uniq %>%
filter(conversion == 1) %>%
group_by(channel) %>%
summarise(conversions = sum(conversion)) %>%
ungroup()
d_multi <- data.frame(mod_attrib_alt$result)
d_split <- full_join(d_multi, df_uniq_paths, by = c('channel_name' = 'channel')) %>%
mutate(result = total_conversions + conversions)
sum(d_all$total_conversions)
sum(d_split$result,na.rm = TRUE)
df_hm <- df_all_paths %>%
mutate(channel_name_ft = trimws(sub('>.', '', path)),
#channel_name_ft = sub(' ', '', channel_name_ft),
channel_name_lt = trimws(sub('.>', '', path)))
#channel_name_lt = sub(' ', '', channel_name_lt))
tail(df_hm,50)
first-touch conversions
df_ft <- df_hm %>%
group_by(channel_name_ft) %>%
summarise(FtouchConv = sum(conversion)) %>%
ungroup()
df_ft
last-touch conversions
df_lt <- df_hm %>%
group_by(channel_name_lt) %>%
summarise(LtouchConv = sum(conversion)) %>%
ungroup()
df_lt
#h_mod2 <- heuristic_models(df_raw, var_path = 'path', var_conv = 'conv')
h_mod2 <- merge(df_lt, df_ft, by.x = 'channel_name_lt', by.y = 'channel_name_ft')
h_mod2
merging all models
all_models <- merge(h_mod2, mod_attrib$result, by.x = 'channel_name_lt', by.y = 'channel_name')
colnames(all_models)[c(1, 4)] <- c('channel_name', 'attribModConv')
all_models
mod_attrib