Dear alexgold,
Thank you for looking into this.
Below is the entire .Rmd file.
It runs fine if I simply step through it chunk by chunk. It produces the histograms both via {python plot_histograms} and via {r r_ggpplot_histograms} chunks.
It also runs fine if I comment out the {python plot_histograms} chunk and then knit the .Rmd file. In this case it produces the histograms via {r r_ggpplot_histograms} chunk.
But if I try to knit the .Rmd file without commenting out the {python plot_histograms} chunk, then it gets stuck on running that chunk and eventually pops up an error message about Rterm application (failed to start).
Studiosa
title: "Chapter 2 ML A Geron book"
output:
html_document:
df_print: paged
editor_options:
chunk_output_type: inline
Load and histogram housing data chapt 2
Load the reticulate
library after installing it (install.packages("reticulate")
)
{r setup, include=TRUE}
knitr::opts_chunk$set(echo = TRUE, error = TRUE)
library(knitr)
library(reticulate)
# getwd()
temp_start_systime = Sys.time()
temp_start_systime
# install.packages("reprex")
reprex::reprex()
type or paste code here
use_condaenv("r-reticulate")
cat(" about to do py_discover_config()")
py_discover_config()
Python chunks can be used in RMD files (https://urldefense.com/v3/https://rstudio.github.io/reticulate/articles/r_markdown.html;!!CQl3mcHX2A!SPizAgI9ndlZx-L7OiIgpkCdwKvK5IADaNZVv8CitZMAp-RT3JMJpcbkDVcCygVsYLI8AA$ )
import os
print("print (done import os) do os.getcwd()")
print(os.getcwd() )
import tarfile
import urllib
import pandas as pd
print("done import_os_tarfile_urllib_packages")
import numpy as np
import matplotlib.pyplot as plt
DOWNLOAD_ROOT="https://raw.githubusercontent.com/ageron/handson-ml/master/"
HOUSING_URL= DOWNLOAD_ROOT + "datasets/housing/housing.tgz"
print(HOUSING_URL)
print(type(HOUSING_URL))
HOUSING_PATH = os.path.join("datasets", "housing")
print(HOUSING_PATH)
print(type(HOUSING_PATH))
# os.makedirs(HOUSING_PATH, exist_ok=False)
# FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'datasets\\housing
os.makedirs(HOUSING_PATH, exist_ok=True)
# Does not seem to create the directory anew
# (does not delete already existing file in that directory)
print("\n os.getcwd() \n")
# print(os.getcwd())
tgz_path=os.path.join(HOUSING_PATH, "housing.tgz")
# print( "\n tgz_path\n",tgz_path)
urllib.request.urlretrieve(HOUSING_URL, tgz_path)
Unpack data via python
housing_tgz = tarfile.open(tgz_path)
housing_tgz.extractall(path = HOUSING_PATH)
housing_tgz.close()
Load data via python
csv_path=csv_path = os.path.join(HOUSING_PATH, "housing.csv")
print("csv_path", csv_path)
housing = pd.read_csv(csv_path)
housing.head(6)
housing.shape
housing.info()
print("\n housing[ ocean_proximity ].value_counts() \n")
housing[ "ocean_proximity" ].value_counts()
print("\n housing.describe() \n")
housing.describe()
print("\n housing[longitude].describe() \n")
housing["longitude"].describe()
print("\n housing[ocean_proximity].describe() \n")
housing["ocean_proximity"].describe()
# Try to do histograms in python
housing.hist(bins=50, figsize=(20,15))
save_fig("attribute_histogram_plots_housing")
plt.savefig("attribute_histogram_plots_housing.png")
plt.show()
housing.hist(bins=50, figsize=(10,15), color="red")
save_fig("attribute_histogram_plots_housing_red")
plt.show()
housing.hist(bins=50, figsize=(20,10), color="green")
save_fig("attribute_histogram_plots_housing_green")
plt.show()
# Runs fine when I run 1 chunk at a time, but causes R to get stuck
#If I try to knit the .Rmd
# So Comment this out for now
Us ggplot to plot histograms
# https://www.r-bloggers.com/quick-plot-of-all-variables/
library(purrr)
library(tidyr)
library(ggplot2)
housing_r = py$housing
str(housing_r)
housing_r %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(value)) +
facet_wrap(~ key, scales = "free", ncol=2) +
geom_histogram(color="green", fill="blue")
housing_r %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(value)) +
facet_wrap(~ key, scales = "free", nrow=2) +
geom_density(color="red")