Hi,
I am trying to scrape some data off a website that I am writing a university project on, I am trying to export the data to an xlsx file and either can't find the file or it is not writing to/creating the said file.
It is meant to look at each page on this website and collect data on each car, then move on to the next page.
import requests
import os
from bs4 import BeautifulSoup as BS
import xlsxwriter
page_number=0
page_no=str(page_number)
workbook.write.xlsx(df,"C:\Users\lukem\Desktop\datacollection", col_names = TRUE)
workbook = xlsxwriter.Workbook('data_collection')
worksheet = workbook.add_worksheet()
n = 1
car_entry=
agent = {"User-Agent":'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
while page_number < 5:
n=n
page_number = page_number+1
page=requests.get(URL + page_no, headers=agent)
print(page_number)
soup =BS(page.text,'html.parser')
car_elements = soup.find_all('div', class_='product-card-content__car-info')
for element in car_elements:
m = 0
age_element = soup.find('div', class_='product-card-pricing__price')
age_t = age_element.text
worksheet.write(n,m,age_t)
m=m+1
name_element = soup.find('h3', class_='product-card-details__title')
name_t = name_element.text
worksheet.write(n,m,name_t)
m=m+1
key_spec_elements = soup.find('ul', class_='listing-key-specs')
spec_list = key_spec_elements.text.split()
b=1
while b < len(spec_list):
worksheet.write(n,m,spec_list[b])
b = b+1
car_entry.append([n,((spec_list,age_element,name_element))])
print(car_entry)
n = n + 1
n=n
workbook.close()
#car_entry= [n, (spec_list,age_el6ement,name_element)