这是一个脚本,用于获取图书标题列表(BookTitles.txt),搜索站点Goodreads以获得每个标题的第一个结果,并将一个URL列表返回给csv文件(GoodReadsBooksNew.csv)。
我得到的错误如下:
@iii:~$ python /home/iii/AudioBookReviews/WebScraping/GoodreadsScraper.py
追溯(最近一次调用):文件"/home/iii/AudioBookReviews/WebScraping/GoodreadsScraper.py",第72行,create_csv_file() "/home/iii/AudioBookReviews/WebScraping/GoodreadsScraper.py",第29行,create_csv_file中的open('/home/iii/AudioBookReviews/WebScraping/GoodReadsBooksNew.csv','w+',编码=‘utf-8’)为csv_file:
TypeError:“编码”是此函数的无效关键字参数
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.chrome.options import Options
from pyvirtualdisplay import Display
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common import keys
import csv
import time
import json
class Book:
def __init__(self, title, url):
self.title = title
self.url = url
def __iter__(self):
return iter([self.title, self.url])
url = 'https://www.goodreads.com/'
def create_csv_file():
header = ['Title', 'URL']
with open('/home/iii/AudioBookReviews/WebScraping/GoodReadsBooksNew.csv', 'w+', encoding='utf-8') as csv_file:
wr = csv.writer(csv_file, delimiter=',')
wr.writerow(header)
def read_from_txt_file():
lines = [line.rstrip('\n') for line in open('/home/iii/AudioBookReviews/WebScraping/BookTitles.txt', encoding='utf-8')]
return lines
def init_selenium():
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
options = Options()
options.add_argument('--headless')
global driver
driver = webdriver.Chrome("/home/iii/AudioBookReviews/WebScraping/chromedriver", chrome_options=chrome_options)
driver.get(url)
time.sleep(5)
driver.get('https://www.goodreads.com/search?q=')
def search_for_title(title):
driver.get('https://www.goodreads.com/search?q=')
search_field = driver.find_element_by_name('q')
search_field.clear()
search_field.send_keys(title)
search_field.send_keys(keys.Keys.RETURN) # you missed this part
url = driver.find_element_by_xpath(
'/html/body/div[2]/div[3]/div[1]/div[2]/div[2]/table/tbody/tr[1]/td[2]/a')
print(url.get_attribute('href'))
def scrape_url():
try:
url = driver.find_element_by_css_selector('a.bookTitle').get_attribute('href')
except:
url = "N/A"
return url
def write_into_csv_file(vendor):
with open('/home/iii/AudioBookReviews/WebScraping/GoodReadsBooksNew.csv', 'a', encoding='utf-8') as csv_file:
wr = csv.writer(csv_file, delimiter=',')
wr.writerow(list(vendor))
create_csv_file()
titles = read_from_txt_file()
init_selenium()
for title in titles:
search_for_title(title)
url = scrape_url()
book = Book(title, url)
write_into_csv_file(book)发布于 2019-11-24 06:28:17
我认为您正在使用python 2.7版本。
open函数在python 2.7中具有以下签名
open(name[, mode[, buffering]])
另一方面,python 3+有以下签名
open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None)https://stackoverflow.com/questions/59015018
复制相似问题