Thanks!
(I'd like to apologise in advance for the shipping to Australia )
Yay! I barely made it in the list but hey, I'd rather be lucky than good any day of the week! Now I have a chance to complete the collection... This is my work computer proudly displaying some of my favourite R packages (loot from RStudio::conf 2017):
You'll have to ship to Argentina, though!
I'm obviously not @hadley, but I wanted an excuse to try parsing some JSON
:
library(httr)
suppressPackageStartupMessages(library(tidyverse))
url <- "https://forum.posit.co/t/1230.json"
req <- httr::GET(url)
stop_for_status(req)
con <- httr::content(req)
url2 <- paste0(
"https://forum.posit.co/t/1230/posts.json?",
paste0("post_ids%5B%5D=", con$post_stream$stream, collapse = "&")
)
req2 <- httr::GET(url2)
stop_for_status(req2)
con2 <- httr::content(req2)
winners <- con2$post_stream$posts %>%
keep(~ .$username == "Bill") %>%
map_chr("cooked") %>%
stringr::str_match_all("href=\\\"/u/(\\w+)\\\"") %>%
map(~ .[,2]) %>%
flatten_chr()
winners
#> [1] "mara" "alistaire" "daattali" "emilyriederer"
#> [5] "eric_bickel" "nick" "jessemaegan" "raybuhr"
#> [9] "billr" "mmuurr" "hadley" "apreshill"
#> [13] "pavopax" "mfherman" "rensa" "tomtec"
#> [17] "economicurtis" "pssguy" "cdr6934" "rpodcast"
#> [21] "andrea" "timpe" "cderv" "pirategrunt"
#> [25] "mungojam" "rkahne" "davergp"
Filtering the subsequent candidate list is straightforward. It depends on @bill not @
mentioning any non-winners in this thread. Only one false positive at the moment, and @hadley can't win anyway!
2017-10-20 Edit: Fixed code to pull in more than 20 posts
The code for the awards was updated by @EconomiCurtis. If you've got any improvements let us know!
library(httr)
library(dplyr)
library(purrr)
url_users <- function(page = 0, domain = "https://forum.posit.co"){
paste(
domain,
"/directory_items.json?",
"&period=weekly&order=likes_received",
"&page=",
page,
sep = ""
)
}
url <- url_users()
req <- httr::GET(url)
stop_for_status(req)
con <- httr::content(req)
users <- data.frame() %>% tbl_df
user_cnt = 0
page = 0
while (length(con$directory_items) > 0){
users_raw <- con$directory_items
users = bind_rows(
users,
tibble(
id = users_raw %>% map_int(c("user", "id")),
name = users_raw %>% map_chr(c("user", "username")),
likes = users_raw %>% map_int("likes_received"),
title = users_raw %>% map_chr(c("user", "title"), .default = NA),
time_read = users_raw %>% map_chr(c("time_read"), .default = NA),
likes_given = users_raw %>% map_int(c("likes_given"), .default = NA),
topics_entered = users_raw %>% map_int(c("topics_entered"), .default = NA),
post_count = users_raw %>% map_int(c("post_count"), .default = NA),
days_visited = users_raw %>% map_int(c("days_visited"), .default = NA)
)
)
page = page + 1
req <- httr::GET(url_users(page))
stop_for_status(req)
con <- httr::content(req)
user_cnt = users %>% nrow
print(paste(
"Page:", page,
"- users:", user_cnt
))
}
users = users %>%
distinct(name, .keep_all = TRUE) %>%
mutate(
DateTime = Sys.time()
)
winners <- c(
"mara", "alistaire", "daattali", "emilyriederer", "eric_bickel", "nick", "jessemaegan", "raybuhr", "billr", "mmuurr", "apreshill", "pavopax", "mfherman","rensa", "tomtec"
)
users %>%
filter(is.na(title), !name %in% winners) %>%
select(-title) %>%
sample_n(5, weight = likes) %>%
arrange(desc(likes))
Not an improvement on the process for choosing winners, but a way to ease curiosity over how likely it is to win each week. I did this through bootstrap sampling, and then dividing the number of times a particular username wins by the number of samples.
For example, I just ran this using my username and 10,000 bootstrap samples and if the contest were run today, it looks like I would roughly have a 35% chance of winning! Let's hope I have this good of a chance come Friday!!!
library(httr)
library(dplyr)
library(purrr)Input Username & Number of Bootstrap Samples
username <- "dlsweet"
n <- 10000Initialize Counter
w <- 0
url_users <- function(page = 0, domain = "https://forum.posit.co"){
paste(
domain,"/directory_items.json?", "&period=weekly&order=likes_received", "&page=", page, sep = "" )
}
url <- url_users()
req <- httr::GET(url)
stop_for_status(req)
con <- httr::content(req)users <- data.frame() %>% tbl_df
user_cnt = 0
page = 0while (length(con$directory_items) > 0){
users_raw <- con$directory_items users = bind_rows( users, tibble( id = users_raw %>% map_int(c("user", "id")), name = users_raw %>% map_chr(c("user", "username")), likes = users_raw %>% map_int("likes_received"), title = users_raw %>% map_chr(c("user", "title"), .default = NA), time_read = users_raw %>% map_chr(c("time_read"), .default = NA), likes_given = users_raw %>% map_int(c("likes_given"), .default = NA), topics_entered = users_raw %>% map_int(c("topics_entered"), .default = NA), post_count = users_raw %>% map_int(c("post_count"), .default = NA), days_visited = users_raw %>% map_int(c("days_visited"), .default = NA) ) ) page = page + 1 req <- httr::GET(url_users(page)) stop_for_status(req) con <- httr::content(req) user_cnt = users %>% nrow print(paste( "Page:", page, "- users:", user_cnt ))
}
users = users %>%
distinct(name, .keep_all = TRUE) %>%
mutate(
DateTime = Sys.time()
)winners <- c(
"mara", "alistaire", "daattali", "emilyriederer", "eric_bickel",
"nick", "jessemaegan", "raybuhr", "billr", "mmuurr",
"apreshill", "pavopax", "mfherman","rensa", "tomtec"
)
for(i in 1:n){
winners2 <- users %>%
filter(is.na(title), !name %in% winners) %>%
select(-title) %>%
sample_n(5, weight = likes) %>%
arrange(desc(likes))If you win, the count of wins goes up by 1
if(username %in% winners2$name){
w <- w + 1
}
}Divide the number of wins by the number of bootstrap samples for a probability of winning
w / n
It took me 22 samples to get my own name into the top 5.
Plenty more fun to be had here. Let's snag all of the user info.
pacman::p_load(tidyverse)
buildTibble <- function(users_raw) {
tibble(
id = users_raw %>% map_chr('id'),
# id = users_raw %>% map_chr(c("user", 'id')),
username = users_raw %>% map_chr(c("user", "username")),
name = users_raw %>% map_chr(c("user", "name"), .default = NA),
likes = users_raw %>% map_int("likes_received"),
likes_given = users_raw %>% map_int("likes_given"),
topics_entered = users_raw %>% map_int("topics_entered"),
topic_count = users_raw %>% map_int('topic_count'),
posts_read = users_raw %>% map_int('posts_read'),
days_visited = users_raw %>% map_int('days_visited'),
title = users_raw %>% map_chr(c("user", "title"), .default = NA),
avatar_template = users_raw %>% map_chr(c('user', 'avatar_template'))
)
# let's figure out the number of pages to parse.
base_url <- 'https://forum.posit.co'
page_path <- '/directory_items.json?period=weekly&order=posts_read&page=1'
url <- paste0(base_url, page_path)
req <- httr::GET(url)
stop_for_status(req)
con <- httr::content(req)
pages <- seq(0, con$total_rows_directory_items %/% 50)
# first page is zero, caught me starting at 1 on the first go round.
users <- map(pages, function(i) {
Sys.sleep(3) # I'll pretend like I'm playing nice with the servers, at least in public ;)
cat(i, '\n')
base_url <- 'https://forum.posit.co'
page_path <- paste0('/directory_items.json?period=weekly&order=posts_read&page=', i)
url <- paste0(base_url, page_path)
req <- httr::GET(url)
stop_for_status(req)
con <- httr::content(req)
users_raw <- con$directory_items
buildTibble(users_raw)
})
users <- plyr::ldply(users) %>%
tbl_df()
Let's give a big round of applause to folks dropping in eight days a week!
users %>% filter(days_visited == 8) %>% pull(username) %>% sort()
[1] "Abram" "alistaire" "cderv" "cdr6934" "ConnorKirk" "dlsweet" "dylanjm" "EconomiCurtis" "edgararuiz" "emilyriederer"
[11] "Frank" "greg" "hoelk" "jessemaegan" "martin.R" "mfherman" "nick" "paul" "rensa" "taraas"
[21] "Tazinho" "terence" "thoughtfulnz" "veegpap"
How active are RStudio employees in the community?
users %>% filter(grepl('rstud', tolower(title))) %>% mutate(id = as.numeric(id)) %>% arrange(id)
# A tibble: 34 x 11
id username name likes likes_given topics_entered topic_count posts_read days_visited title
<dbl> <chr> <chr> <int> <int> <int> <int> <int> <int> <chr>
1 2 hadley Hadley 20 4 29 0 201 6 RStudio Employee
2 3 jennybryan Jenny Bryan 21 3 28 1 203 5 RStudio Employee
3 4 jimhester Jim Hester 5 3 20 0 196 3 RStudio Employee
4 6 Bill Bill Carney 11 3 18 0 115 4 RStudio Employee
5 7 RogerO Roger Oberg 0 0 0 0 0 0 RStudio Employee
How active am I in the community?
users %>% filter(grepl('jimmy', tolower(name)))
# A tibble: 1 x 11
id username name likes likes_given topics_entered topic_count posts_read days_visited title avatar_template
<chr> <chr> <chr> <int> <int> <int> <int> <int> <int> <chr> <chr>
1 985 jimmy Jimmy Glenn 0 0 0 0 0 0 <NA> /user_avatar/community.rstudio.com/jimmy/{size}/688_1.png
Curses, looks like I need to sign in on mobile!
Well, time to quit procrastinating and get back to work.
Cheers!
Jimmy
Weβre excited to announce the prize winners for the fourth week. The award is an RStudio t-shirt and all of the hex and RStudio stickers we can find. The winners for the week of October 13th are:
Name | likes |
---|---|
pssguy | 15 |
cdr6934 | 5 |
rpodcast | 5 |
Andrea | 4 |
timpe | 2 |
Congrats new winners! Just got my package and I'm really digging the master of the tidyverse sticker. Didn't know it was a thing until now.
Thanks!!!
This is awesome! The swag will be put to good use
Complete reprex
library(httr)
library(magrittr)
#http://memecaptain.com/gend_images/new?src=tWljmg
image_id <- "tWljmg"
body <- list(
src_image_id = image_id,
private = 'false',
captions_attributes = list(
list(
text = "WHAT????",
top_left_x_pct = 0.05,
top_left_y_pct = 0,
width_pct = 0.9,
height_pct = 0.20
),
list(
text = "MORE R HEXSTICkERS!",
top_left_x_pct = 0.05,
top_left_y_pct = 0.75,
width_pct = 0.9,
height_pct = 0.25
)
)
)
res <- httr::POST("https://memecaptain.com/api/v3/gend_images.json", body = body, encode = "json")
parsed <- jsonlite::fromJSON(content(res, 'text'), simplifyVector = FALSE)
# grab status url and parse that
new_img <- httr::GET(parsed$status_url) %>%
jsonlite::fromJSON(txt = content(x = ., 'text'), simplifyVector = FALSE) %>%
.[["url"]]
Why thank you! I look forward to displaying some RStudio swag around the office.
Week #5 winners. The award is an RStudio t-shirt and all of the hex and RStudio stickers we can find. The winners for the week of October 20th are:
Name | likes |
---|---|
cderv | 10 |
PirateGrunt | 4 |
mungojam | 3 |
rkahne | 3 |
DaveRGP | 2 |
Congrats to the new winners! I'm still trying to decide between my work and home laptops for my stickers, personally....
Also, since this thread also has a lot of Discourse scraping code, I'll point out that I updated my previous code that scrapes the winners from this post. It turns out that the basic topic JSON
response only includes the first 20 posts, so you have to use a second request with post IDs to get all of the posts.
Awesome! Thanks so much! My birthday is next week, so I'll count this as my first gift
My birthday was last week, so thanks for remembering guys!
(Incidentally, this place is already building into a great community!)