I have a problem creating ride_length in capstone project

It looks like all_trips_v2 is only the first six rows of your data. Here is the structure() code that reproduces the first six rows of your data, taken from earlier in this thread, and a summary function that reproduces the output that you got, at least in the started_at and ended_at columns. Do you still have the full data set stored in a data frame?

all_trips <- structure(list(ride_id = c("21742443", "21742444", "21742445",
                                        "21742446", "21742447", "21742448"), 
                            started_at = c("1/1/2019 0:04", "1/1/2019 0:08", "1/1/2019 0:13", 
                                           "1/1/2019 0:13", "1/1/2019 0:14","1/1/2019 0:15"), 
                            ended_at = c("1/1/2019 0:11", "1/1/2019 0:15","1/1/2019 0:27", "1/1/2019 0:43", 
                                         "1/1/2019 0:20", "1/1/2019 0:19"), 
                            rideable_type = c("2167", "4386", "1524", "252", "1170", "2437"), 
                            start_station_id = c(199, 44, 15, 123, 173, 98), 
                            start_station_name = c("Wabash Ave & Grand Ave","State St & Randolph St", 
                                                   "Racine Ave & 18th St", "California Ave & Milwaukee Ave",
                                                   "Mies van der Rohe Way & Chicago Ave", "LaSalle St & Washington St"), 
                            end_station_id = c(84, 624, 644, 176, 35, 49), 
                            end_station_name = c("Milwaukee Ave & Grand Ave","Dearborn St & Van Buren St ()", 
                                                 "Western Ave & Fillmore St ()","Clark St & Elm St", "Streeter Dr & Grand Ave", 
                                                 "Dearborn St & Monroe St"), 
                            member_casual = c("member", "member", "member", "member","member", "member"), 
                            date = structure(c(-719143, -719143, -719143,-719143, -719143, -719143), class = "Date"), 
                            month = c("01","01", "01", "01", "01", "01"), 
                            day = c("20", "20", "20", "20","20", "20"), 
                            year = c("1", "1", "1", "1", "1", "1"), 
                            day_of_week = c("Saturday","Saturday", "Saturday", "Saturday", "Saturday", "Saturday")), 
                       row.names = c(NA,-6L), 
                       class = c("tbl_df", "tbl", "data.frame"))

all_trips$started_at <- as.POSIXct(all_trips$started_at, format = "%m/%d/%Y %H:%M") 
all_trips$ended_at <- as.POSIXct(all_trips$ended_at, format = "%m/%d/%Y %H:%M") 

summary(all_trips)
#>    ride_id            started_at                     ended_at                  
#>  Length:6           Min.   :2019-01-01 00:04:00   Min.   :2019-01-01 00:11:00  
#>  Class :character   1st Qu.:2019-01-01 00:09:15   1st Qu.:2019-01-01 00:16:00  
#>  Mode  :character   Median :2019-01-01 00:13:00   Median :2019-01-01 00:19:30  
#>                     Mean   :2019-01-01 00:11:10   Mean   :2019-01-01 00:22:30  
#>                     3rd Qu.:2019-01-01 00:13:45   3rd Qu.:2019-01-01 00:25:15  
#>                     Max.   :2019-01-01 00:15:00   Max.   :2019-01-01 00:43:00  
#>  rideable_type      start_station_id start_station_name end_station_id  
#>  Length:6           Min.   : 15.0    Length:6           Min.   : 35.00  
#>  Class :character   1st Qu.: 57.5    Class :character   1st Qu.: 57.75  
#>  Mode  :character   Median :110.5    Mode  :character   Median :130.00  
#>                     Mean   :108.7                       Mean   :268.67  
#>                     3rd Qu.:160.5                       3rd Qu.:512.00  
#>                     Max.   :199.0                       Max.   :644.00  
#>  end_station_name   member_casual           date               month          
#>  Length:6           Length:6           Min.   :0001-01-20   Length:6          
#>  Class :character   Class :character   1st Qu.:0001-01-20   Class :character  
#>  Mode  :character   Mode  :character   Median :0001-01-20   Mode  :character  
#>                                        Mean   :0001-01-20                     
#>                                        3rd Qu.:0001-01-20                     
#>                                        Max.   :0001-01-20                     
#>      day                year           day_of_week       
#>  Length:6           Length:6           Length:6          
#>  Class :character   Class :character   Class :character  
#>  Mode  :character   Mode  :character   Mode  :character  
#>                                                          
#>                                                          
#> 

Created on 2024-03-03 with reprex v2.0.2

Yes I have but i need to redo it to see the whole data frame

I have the full data frame stored in my previous work, but when I created again and try this new code you tell to do

all_trips <- structure(list(ride_id = c("21742443", "21742444", "21742445",
                                        "21742446", "21742447", "21742448"), 
                            started_at = c("1/1/2019 0:04", "1/1/2019 0:08", "1/1/2019 0:13", 
                                           "1/1/2019 0:13", "1/1/2019 0:14","1/1/2019 0:15"), 
                            ended_at = c("1/1/2019 0:11", "1/1/2019 0:15","1/1/2019 0:27", "1/1/2019 0:43", 
                                         "1/1/2019 0:20", "1/1/2019 0:19"), 
                            rideable_type = c("2167", "4386", "1524", "252", "1170", "2437"), 
                            start_station_id = c(199, 44, 15, 123, 173, 98), 
                            start_station_name = c("Wabash Ave & Grand Ave","State St & Randolph St", 
                                                   "Racine Ave & 18th St", "California Ave & Milwaukee Ave",
                                                   "Mies van der Rohe Way & Chicago Ave", "LaSalle St & Washington St"), 
                            end_station_id = c(84, 624, 644, 176, 35, 49), 
                            end_station_name = c("Milwaukee Ave & Grand Ave","Dearborn St & Van Buren St ()", 
                                                 "Western Ave & Fillmore St ()","Clark St & Elm St", "Streeter Dr & Grand Ave", 
                                                 "Dearborn St & Monroe St"), 
                            member_casual = c("member", "member", "member", "member","member", "member"), 
                            date = structure(c(-719143, -719143, -719143,-719143, -719143, -719143), class = "Date"), 
                            month = c("01","01", "01", "01", "01", "01"), 
                            day = c("20", "20", "20", "20","20", "20"), 
                            year = c("1", "1", "1", "1", "1", "1"), 
                            day_of_week = c("Saturday","Saturday", "Saturday", "Saturday", "Saturday", "Saturday")), 
                       row.names = c(NA,-6L), 
                       class = c("tbl_df", "tbl", "data.frame"))

all_trips$started_at <- as.POSIXct(all_trips$started_at, format = "%m/%d/%Y %H:%M") 
all_trips$ended_at <- as.POSIXct(all_trips$ended_at, format = "%m/%d/%Y %H:%M") 

summary(all_trips)

IThen I have some issues when I continue my the process

ll_trips <- structure(list(ride_id = c("21742443", "21742444", "21742445",
+                                        "21742446", "21742447", "21742448"), 
+                            started_at = c("1/1/2019 0:04", "1/1/2019 0:08", "1/1/2019 0:13", 
+                                           "1/1/2019 0:13", "1/1/2019 0:14","1/1/2019 0:15"), 
+                            ended_at = c("1/1/2019 0:11", "1/1/2019 0:15","1/1/2019 0:27", "1/1/2019 0:43", 
+                                         "1/1/2019 0:20", "1/1/2019 0:19"), 
+                            rideable_type = c("2167", "4386", "1524", "252", "1170", "2437"), 
+                            start_station_id = c(199, 44, 15, 123, 173, 98), 
+                            start_station_name = c("Wabash Ave & Grand Ave","State St & Randolph St", 
+                                                   "Racine Ave & 18th St", "California Ave & Milwaukee Ave",
+                                                   "Mies van der Rohe Way & Chicago Ave", "LaSalle St & Washington St"), 
+                            end_station_id = c(84, 624, 644, 176, 35, 49), 
+                            end_station_name = c("Milwaukee Ave & Grand Ave","Dearborn St & Van Buren St ()", 
+                                                 "Western Ave & Fillmore St ()","Clark St & Elm St", "Streeter Dr & Grand Ave", 
+                                                 "Dearborn St & Monroe St"), 
+                            member_casual = c("member", "member", "member", "member","member", "member"), 
+                            date = structure(c(-719143, -719143, -719143,-719143, -719143, -719143), class = "Date"), 
+                            month = c("01","01", "01", "01", "01", "01"), 
+                            day = c("20", "20", "20", "20","20", "20"), 
+                            year = c("1", "1", "1", "1", "1", "1"), 
+                            day_of_week = c("Saturday","Saturday", "Saturday", "Saturday", "Saturday", "Saturday")), 
+                       row.names = c(NA,-6L), 
+                       class = c("tbl_df", "tbl", "data.frame"))
> all_trips$started_at <- as.POSIXct(all_trips$started_at, format = "%m/%d/%Y %H:%M") 
> all_trips$ended_at <- as.POSIXct(all_trips$ended_at, format = "%m/%d/%Y %H:%M") 
> summary(all_trips)
   ride_id            started_at                        ended_at                     
 Length:791956      Min.   :2019-01-01 00:04:00.00   Min.   :2019-01-01 00:11:00.00  
 Class :character   1st Qu.:2019-01-23 05:26:00.00   1st Qu.:2019-01-23 05:49:00.00  
 Mode  :character   Median :2019-02-25 07:52:00.00   Median :2019-02-25 08:03:00.00  
                    Mean   :2019-02-19 21:42:45.92   Mean   :2019-02-19 21:59:42.47  
                    3rd Qu.:2019-03-17 16:52:00.00   3rd Qu.:2019-03-17 17:16:00.00  
                    Max.   :2019-03-31 23:53:00.00   Max.   :2019-06-17 16:04:00.00  
                    NA's   :426887                   NA's   :426887                  
 rideable_type      start_station_id start_station_name end_station_id  end_station_name  
 Length:791956      Min.   :  2.0    Length:791956      Min.   :  2.0   Length:791956     
 Class :character   1st Qu.: 77.0    Class :character   1st Qu.: 77.0   Class :character  
 Mode  :character   Median :174.0    Mode  :character   Median :174.0   Mode  :character  
                    Mean   :204.4                       Mean   :204.4                     
                    3rd Qu.:291.0                       3rd Qu.:291.0                     
                    Max.   :675.0                       Max.   :675.0                     
                                                        NA's   :1                         
 member_casual     
 Length:791956     
 Class :character  
 Mode  :character  
                   
                   
                   
                   
> View(ll_trips)
> View(all_trips)
> all_trips$ride_length <- difftime(all_trips$ended_at,all_trips$started_at)
> str(all_trips)
tibble [791,956 × 10] (S3: tbl_df/tbl/data.frame)
 $ ride_id           : chr [1:791956] "21742443" "21742444" "21742445" "21742446" ...
 $ started_at        : POSIXct[1:791956], format: "2019-01-01 00:04:00" "2019-01-01 00:08:00" "2019-01-01 00:13:00" ...
 $ ended_at          : POSIXct[1:791956], format: "2019-01-01 00:11:00" "2019-01-01 00:15:00" "2019-01-01 00:27:00" ...
 $ rideable_type     : chr [1:791956] "2167" "4386" "1524" "252" ...
 $ start_station_id  : num [1:791956] 199 44 15 123 173 98 98 211 150 268 ...
 $ start_station_name: chr [1:791956] "Wabash Ave & Grand Ave" "State St & Randolph St" "Racine Ave & 18th St" "California Ave & Milwaukee Ave" ...
 $ end_station_id    : num [1:791956] 84 624 644 176 35 49 49 142 148 141 ...
 $ end_station_name  : chr [1:791956] "Milwaukee Ave & Grand Ave" "Dearborn St & Van Buren St (*)" "Western Ave & Fillmore St (*)" "Clark St & Elm St" ...
 $ member_casual     : chr [1:791956] "member" "member" "member" "member" ...
 $ ride_length       : 'difftime' num [1:791956] 7 7 14 30 ...
  ..- attr(*, "units")= chr "mins"
> is.factor(all_trips$ride_length)
[1] FALSE
> all_trips$ride_length <- as.numeric(as.character(all_trips$ride_length))
> is.numeric(all_trips$ride_length)
[1] TRUE
> all_trips_v2 <- all_trips[!(all_trips$start_station_name == "HQ QR" | all_trips$ride_length<0),]
> View(all_trips_v2)
> mean(all_trips_v2$ride_length)
[1] NA
> median(all_trips_v2$ride_length) 
[1] NA
> max(all_trips_v2$ride_length) 
[1] NA
> min(all_trips_v2$ride_length) 
[1] NA
> summary(all_trips_v2$ride_length)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
     1.0      5.0      9.0     16.9     14.0 177200.0   423120 
> aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = mean)
  all_trips_v2$member_casual all_trips_v2$ride_length
1                     casual                 61.94288
2                     member                 13.89392
> aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = median)
  all_trips_v2$member_casual all_trips_v2$ride_length
1                     casual                       23
2                     member                        8
> aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = max)
  all_trips_v2$member_casual all_trips_v2$ride_length
1                     casual                   177200
2                     member                   101607
> aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = min)
  all_trips_v2$member_casual all_trips_v2$ride_length
1                     casual                        1
2                     member                        1
> aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual + all_trips_v2$day_of_week, FUN = mean)
Error in model.frame.default(formula = all_trips_v2$ride_length ~ all_trips_v2$member_casual +  : 
  invalid type (NULL) for variable 'all_trips_v2$day_of_week'
In addition: Warning message:
Unknown or uninitialised column: `day_of_week`. 

I do not recommend that you use the code from my last post. That was just an example that illustrated why you were getting the unexpected plot in your earlier post.

The reason you are getting an error in the last line of your most recent code is that all_trips_v2 does not have a column named day_of_week. You can see this in the result of the str() function you run on all_trips a few lines before making all_trips_v2. You need to make the day_of_week column.

> str(all_trips)
tibble [791,956 × 10] (S3: tbl_df/tbl/data.frame)
 $ ride_id           : chr [1:791956] "21742443" "21742444" "21742445" "21742446" ...
 $ started_at        : POSIXct[1:791956], format: "2019-01-01 00:04:00" "2019-01-01 00:08:00" "2019-01-01 00:13:00" ...
 $ ended_at          : POSIXct[1:791956], format: "2019-01-01 00:11:00" "2019-01-01 00:15:00" "2019-01-01 00:27:00" ...
 $ rideable_type     : chr [1:791956] "2167" "4386" "1524" "252" ...
 $ start_station_id  : num [1:791956] 199 44 15 123 173 98 98 211 150 268 ...
 $ start_station_name: chr [1:791956] "Wabash Ave & Grand Ave" "State St & Randolph St" "Racine Ave & 18th St" "California Ave & Milwaukee Ave" ...
 $ end_station_id    : num [1:791956] 84 624 644 176 35 49 49 142 148 141 ...
 $ end_station_name  : chr [1:791956] "Milwaukee Ave & Grand Ave" "Dearborn St & Van Buren St (*)" "Western Ave & Fillmore St (*)" "Clark St & Elm St" ...
 $ member_casual     : chr [1:791956] "member" "member" "member" "member" ...
 $ ride_length       : 'difftime' num [1:791956] 7 7 14 30 ...
  ..- attr(*, "units")= chr "mins"

The very large values you are getting for the maximum value of ride_length suggests that some of the data may be bad. 177200 minutes is about 120 days.

So what should we do, should we create all_trips with 6 or more column ?

I think you have the pieces of code you need to calculate the statistics.

  1. Read in the data for all_trips
  2. Change started_at and ended_at into date-time values
  3. Append all of the columns you want
  4. Filter out the rows you want to make all_trips_v2
  5. Calculate the summary statistics.

yes I am doing it now , I not creating only 6 rows or columns but 25 columns and rows, I think that is sufficient for the ggplot to appear

Hi Sir Good morning, I create a code chunk here about this,
'''
all_trips <- structure(list(ride_id = c("21742443", "21742444", "21742445", "21742446", "21742447", "21742448", "21742449”, “21742450”, “21742451”, “21742452”, “21742453”, “21742454”, ”21742455”, “21742456”, “21742457”, “21742458”, “21742459”, “21742460”, “21742461”, “21742463”, “21742464”, “21742465”, “21742466”, “21742467”, “21742468” ),
started_at = c("1/1/2019 0:04", "1/1/2019 0:08", "1/1/2019 0:13", "1/1/2019 0:13", "1/1/2019 0:14","1/1/2019 0:15", “1/1/2019 0:16”, “1/1/2019 0:18”, “1/1/2019 0:18”, “1/1/2019 0:19”, “1/1/2019 0:20”, “1/1/2019 0:21”, “1/1/2019 0:23”, “1/1/2019 0:23”, “1/1/2019 0:23”, “1/1/2019 0:24”, “1/1/2019 0:24”, “1/1/2019 0:24”, “1/1/2019 0:25”, “1/1/2019 0:29”, “1/1/2019 0:29”, “1/1/2019 0:29”, “1/1/2019 0:29”, “1/1/2019 0:30”, “1/1/2019 0:33” ),
ended_at = c("1/1/2019 0:11", "1/1/2019 0:15","1/1/2019 0:27", "1/1/2019 0:43", "1/1/2019 0:20", "1/1/2019 0:19", “1/1/2019 0:19”, “1/1/2019 0:20”, “1/1/2019 0:47”, “1/1/2019 0:24”, “1/1/2019 0:35”, “1/1/2019 0:32”, “1/1/2019 0:33”, “1/1/2019 0:33”, “1/1/2019 0:39”, “1/1/2019 0:39”, “1/1/2019 0:31”, “1/1/2019 0:47”, “1/1/2019 0:27”, “1/1/2019 1:08”, “1/1/2019 0:45”, “1/1/2019 1:07”, “1/1/2019 0:49”, “1/1/2019 0:38”, “1/1/2019 0:50” ),
rideable_type = c("2167", "4386", "1524", "252", "1170", "2437", “2708”, “2796”, “6205”, “3939”, “6243”, “6300”, “3029”, “84”, “5019”, “5526”, “3373”, “5777”, “3940”, “3914”, “140”, “3355”, “5026”, “1998”, “2630” ),
start_station_id = c(199, 44, 15, 123, 173, 98,
98, 211, 150, 268, 299, 204, 90, 90, 289, 289, 152, 268, 35, 35, 47, 35, 85, 289, 13 ),
start_station_name = c("Wabash Ave & Grand Ave","State St & Randolph St", "Racine Ave & 18th St", "California Ave & Milwaukee Ave", "Mies van der Rohe Way & Chicago Ave", "LaSalle St & Washington St", “LaSalle St & Washington St”, “St. Clair St & Erie St”, “Fort Dearborn Dr & 31st St”, “Lake Shore Dr & North Blvd”, “Halsted St & Roscoe St”, “Prairie Ave & Garfield Blvd”, “Millennium Park”, “Millennium Park”, “Wells St & Concord Ln”, “Wells St & Concord Ln”, “Lincoln Ave & Diversey Pkwy”, “Lake Shore Dr & North Blvd”, “Streeter Dr & Grand Ave”, “Streeter Dr & Grand Ave”, “State St & Kinzie St”, “Streeter Dr & Grand Ave”, “Michigan Ave & Oak St”, “Wells St & Concord Ln”, “Wilton Ave & Diversey Pkwy” ),
end_station_id = c(84, 624, 644, 176, 35, 49, 49, 142, 148, 141, 295, 420, 255, 255, 324, 324, 166, 319, 35, 39, 111, 39, 329, 313, 61 ),
end_station_name = c("Milwaukee Ave & Grand Ave","Dearborn St & Van Buren St ()",
"Western Ave & Fillmore St ()","Clark St & Elm St", "Streeter Dr & Grand Ave",
"Dearborn St & Monroe St", “Dearborn St & Monroe St”, “McClurg Ct & Erie St”, “State St & 33rd St”, “Clark St & Lincoln Ave”, “Broadway & Argyle St”, “Ellis Ave & 55th St”, “Indiana Ave & Roosevelt Rd”, “Indiana Ave & Roosevelt Rd”, “Stockton Dr & Wrightwood Ave”, “Stockton Dr & Wrightwood Ave”, “Ashland Ave & Wrightwood Ave”, “Greenview Ave & Diversey Pkwy”, “Streeter Dr & Grand Ave”, “Wabash Ave & Adams St”, “Sedgwick St & Huron St”, “Wabash Ave & Adams St”, “Lake Shore Dr & Diversey Pkwy”, “Lakeview Ave & Fullerton Pkwy”, “Wood St & Milwaukee Ave” ),
member_casual = c("member", "member", "member", "member","member", "member","member","member","member", "member","member","member","member","member","member","member","member","member","member","member","member","member","member","member","member",),
date = structure(c(-719143, -719143, -719143,-719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, ), class = "Date"),
month = c("01","01", "01", "01", "01", "01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01",),
day = c("20", "20", "20", "20","20", "20","20","20","20","20","20","20","20","20","20","20","20","20","20","20","20","20","20","20","20",),
year = c("1", "1", "1", "1", "1", "1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1",),
day_of_week = c("Saturday","Saturday", "Saturday", "Saturday", "Saturday", "Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday",)),
row.names = c(NA,-25L),
class = c("tbl_df", "tbl", "data.frame"))
'''
the code is correct but i have some issues at the end of my code, the code says
'''
class = c("tbl_df", "tbl", "data.frame"))
Error: unexpected ')' in " class = c("tbl_df", "tbl", "data.frame"))"

'''
That is the code says, Can you help a little Sir I think I can finish this now. Thank you.

I fixed a couple of things in your code.

  1. All of the columns from member_casual on have an extra comma between the last quotation make and the closing parenthesis, like this ,"member",). It should be ,"member")
  2. In several columns the double quotes become curly quotes. For example, "21742449”, “21742450”. Notice the difference between " and ” or “.

I also see that your date column has the value -719143. That it January 20, 0001. That is why your day column has the value "20" and your year column has the value "1". You need to change how you are making the date column.

all_trips <- structure(list(ride_id = c("21742443", "21742444", "21742445", "21742446", "21742447", "21742448", "21742449", "21742450", "21742451", "21742452", "21742453", "21742454", "21742455", "21742456", "21742457", "21742458", "21742459", "21742460", "21742461", "21742463", "21742464", "21742465", "21742466", "21742467", "21742468" ),
started_at = c("1/1/2019 0:04", "1/1/2019 0:08", "1/1/2019 0:13", "1/1/2019 0:13", "1/1/2019 0:14","1/1/2019 0:15", "1/1/2019 0:16", "1/1/2019 0:18", "1/1/2019 0:18", "1/1/2019 0:19", "1/1/2019 0:20", "1/1/2019 0:21", "1/1/2019 0:23", "1/1/2019 0:23", "1/1/2019 0:23", "1/1/2019 0:24", "1/1/2019 0:24", "1/1/2019 0:24", "1/1/2019 0:25", "1/1/2019 0:29", "1/1/2019 0:29", "1/1/2019 0:29", "1/1/2019 0:29", "1/1/2019 0:30", "1/1/2019 0:33" ),
ended_at = c("1/1/2019 0:11", "1/1/2019 0:15","1/1/2019 0:27", "1/1/2019 0:43", "1/1/2019 0:20", "1/1/2019 0:19", "1/1/2019 0:19", "1/1/2019 0:20", "1/1/2019 0:47", "1/1/2019 0:24", "1/1/2019 0:35", "1/1/2019 0:32", "1/1/2019 0:33", "1/1/2019 0:33", "1/1/2019 0:39", "1/1/2019 0:39", "1/1/2019 0:31", "1/1/2019 0:47", "1/1/2019 0:27", "1/1/2019 1:08", "1/1/2019 0:45", "1/1/2019 1:07", "1/1/2019 0:49", "1/1/2019 0:38", "1/1/2019 0:50" ),
rideable_type = c("2167", "4386", "1524", "252", "1170", "2437", "2708", "2796", "6205", "3939", "6243", "6300", "3029", "84", "5019", "5526", "3373", "5777", "3940", "3914", "140", "3355", "5026", "1998", "2630" ),
start_station_id = c(199, 44, 15, 123, 173, 98,
98, 211, 150, 268, 299, 204, 90, 90, 289, 289, 152, 268, 35, 35, 47, 35, 85, 289, 13 ),
start_station_name = c("Wabash Ave & Grand Ave","State St & Randolph St", "Racine Ave & 18th St", "California Ave & Milwaukee Ave", "Mies van der Rohe Way & Chicago Ave", "LaSalle St & Washington St", "LaSalle St & Washington St", "St. Clair St & Erie St", "Fort Dearborn Dr & 31st St", "Lake Shore Dr & North Blvd", "Halsted St & Roscoe St", "Prairie Ave & Garfield Blvd", "Millennium Park", "Millennium Park", "Wells St & Concord Ln", "Wells St & Concord Ln", "Lincoln Ave & Diversey Pkwy", "Lake Shore Dr & North Blvd", "Streeter Dr & Grand Ave", "Streeter Dr & Grand Ave", "State St & Kinzie St", "Streeter Dr & Grand Ave", "Michigan Ave & Oak St", "Wells St & Concord Ln", "Wilton Ave & Diversey Pkwy" ),
end_station_id = c(84, 624, 644, 176, 35, 49, 49, 142, 148, 141, 295, 420, 255, 255, 324, 324, 166, 319, 35, 39, 111, 39, 329, 313, 61 ),
end_station_name = c("Milwaukee Ave & Grand Ave","Dearborn St & Van Buren St ()",
"Western Ave & Fillmore St ()","Clark St & Elm St", "Streeter Dr & Grand Ave",
"Dearborn St & Monroe St", "Dearborn St & Monroe St", "McClurg Ct & Erie St", "State St & 33rd St", "Clark St & Lincoln Ave", "Broadway & Argyle St", "Ellis Ave & 55th St", "Indiana Ave & Roosevelt Rd", "Indiana Ave & Roosevelt Rd", "Stockton Dr & Wrightwood Ave", "Stockton Dr & Wrightwood Ave", "Ashland Ave & Wrightwood Ave", "Greenview Ave & Diversey Pkwy", "Streeter Dr & Grand Ave", "Wabash Ave & Adams St", "Sedgwick St & Huron St", "Wabash Ave & Adams St", "Lake Shore Dr & Diversey Pkwy", "Lakeview Ave & Fullerton Pkwy", "Wood St & Milwaukee Ave" ),
member_casual = c("member", "member", "member", "member","member", "member","member","member","member", "member","member","member","member","member","member","member","member","member","member","member","member","member","member","member","member"),
date = structure(c(-719143, -719143, -719143,-719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143, -719143 ), class = "Date"),
month = c("01","01", "01", "01", "01", "01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01","01"),
day = c("20", "20", "20", "20","20", "20","20","20","20","20","20","20","20","20","20","20","20","20","20","20","20","20","20","20","20"),
year = c("1", "1", "1", "1", "1", "1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1"),
day_of_week = c("Saturday","Saturday", "Saturday", "Saturday", "Saturday", "Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday","Saturday")),
row.names = c(NA,-25L),
class = c("tbl_df", "tbl", "data.frame"))
1 Like

it works Sir, my problem now is the ggplot again, We edited all the codes but it is the same I got blank plot.

This is the code I write The same ggplot earlier
'''
all_trips_v2 %>%

  • mutate(weekday = wday(started_at, label = TRUE)) %>%
  • group_by(member_casual, weekday) %>%
  • summarise(number_of_rides = n()
  •         ,average_duration = mean(ride_length)) %>% 
    
  • arrange(member_casual, weekday) %>%
  • ggplot(aes(x = weekday, y = number_of_rides, fill = member_casual)) +
  • geom_col(position = "dodge")
    summarise() has grouped output by 'member_casual'. You can override using the .groups
    argument.
    '''
    I think the problem now is the plot itself[

I know the problem, I think the problem the day of week because it is only tuesday in my data farme so the plot now is only listing tuesday rides

So I make new one, shorter one so here it is
'''
all_trips <- structure(list(ride_id = c("21742460", " 21761461", "21749409"),
started_at = c("1/1/2019 0:24"," 1/4/2019 17:24", "1/3/2019 6:36" ),
ended_at = c("1/1/2019 0:47, " 1/4/2019 17:35", "1/3/2019 6:40" ),
rideable_type = c("5777", " 5881", "3545"),
start_station_id = c(268, 317, 164),
start_station_name = c("Lake Shore Dr & North Blvd"," Wood St & Taylor St", "Franklin St & Lake St" ),
end_station_id = c(319, 208, 52),
end_station_name = c("Greenview Ave & Diversey Pkwy", " Ashland Ave & 21st St", "Michigan Ave & Lake St"),
member_casual = c("casual", "member", "member"),
date = structure(c(-719143, -719143, -719143 ), class = "Date"),
month = c("01","01", "01"),
day = c("20", "20", "20 ),
year = c("1", "1", "1" ),
day_of_week = c("Tuesday", "Wednesday","Thursday"),
row.names = c(NA,-3L),
class = c("tbl_df", "tbl", "data.frame"))
'''
the same problem in my first code
'''
class = c("tbl_df", "tbl", "data.frame"))
Error: unexpected ')' in "class = c("tbl_df", "tbl", "data.frame"))"
'''

I shorten the code so there is 3 columns only and easy to make ggplots

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.