Join dataframes

Hi, guys, I have two datasets, One with a nominal wage of Teachers and another data with nominal wages in the country and a transformation to real wage. I would like to create a column with real age for Teachers using the datasets of real wages next to my nominal wage for Teachers.
For example Data_Frame <- data.frame (

Nominal_wage = c(260, 320, 320,302, 300, 260,260,300,320,350,350,350),
Real_wage = c(269, 331, 331, 330,310, 269,269,310,331,362,362,362)
)

Print the data frame

Data_Frame
Data_Frame2 <- data.frame (

Teacher_Nonimal_wage = c(302, 260, 300,302, 300, 260)

)
Data_Frame2
So in my data frame be like

Data_Frame3 <- data.frame (

Teacher_Nonimal_wage = c(302, 260, 300,302, 300, 260),
Teacher_Real_wage = c(310, 269, 310, 330,310, 269)

)
Data_Frame3

This works but it has the weakness that the real-Wage column in Data-Frame3 will bee NA if the Teacher_Nominal_Wage is not listed in Data_Frame.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
Data_Frame <- data.frame (
  
  Nominal_wage = c(260, 320, 320,302, 300, 260,260,300,320,350,350,350),
  Real_wage = c(269, 331, 331, 330,310, 269,269,310,331,362,362,362)
)
Data_Frame
#>    Nominal_wage Real_wage
#> 1           260       269
#> 2           320       331
#> 3           320       331
#> 4           302       330
#> 5           300       310
#> 6           260       269
#> 7           260       269
#> 8           300       310
#> 9           320       331
#> 10          350       362
#> 11          350       362
#> 12          350       362
Data_Frame <- distinct(Data_Frame)
Data_Frame
#>   Nominal_wage Real_wage
#> 1          260       269
#> 2          320       331
#> 3          302       330
#> 4          300       310
#> 5          350       362
Data_Frame2 <- data.frame (
  
  Teacher_Nonimal_wage = c(302, 260, 300,302, 300, 260)
  
)
Data_Frame3 <- left_join(Data_Frame2, Data_Frame, 
                         by = c("Teacher_Nonimal_wage" = "Nominal_wage"))
Data_Frame3
#>   Teacher_Nonimal_wage Real_wage
#> 1                  302       330
#> 2                  260       269
#> 3                  300       310
#> 4                  302       330
#> 5                  300       310
#> 6                  260       269

Created on 2022-06-22 by the reprex package (v2.0.1)

Hi there,

You can use the dplyr package for this.

library(dplyr)

Data_Frame <- data.frame (
  Nominal_wage = c(260, 320, 320,302, 300, 260,260,300,320,350,350,350),
  Real_wage = c(269, 331, 331, 330,310, 269,269,310,331,362,362,362)
)

Data_Frame2 <- data.frame (
  Teacher_Nonimal_wage = c(302, 260, 300,302, 300, 260)
)

Data_Frame2 %>% distinct() %>% left_join(
  Data_Frame, by = c("Teacher_Nonimal_wage" =  "Nominal_wage")
)
#>   Teacher_Nonimal_wage Real_wage
#> 1                  302       330
#> 2                  260       269
#> 3                  260       269
#> 4                  260       269
#> 5                  300       310
#> 6                  300       310

Created on 2022-06-22 by the reprex package (v2.0.1)

Not though that your data is not unique, so you will have multiple different Real_wage for the same Teacher_Nonimal_wage.

PJ

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.