我试图抓取一个表,但是Beautifulsoup只返回60个元素,这个表要大得多。我试着滚动到页面底部(允许加载整个表格),然后解析HTML,但不起作用。
webdriver=webdriver.Chrome()
webdriver.get('https://www.bluenile.com/diamonds/round-cut')
WebDriverWait(webdriver,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"#diamond-result
webdriver.execute_script("window.scrollTo(0, document.body.scrollHeight);
data=BeautifulSoup(webdriver.page_source,'html5lib')
shape=[]
price=[]
carat=[]
cut=[]
color=[]
clarity=[]
date=[]
table=data.find('div',{'class':'grid-body'})
data_shape=table.find_all('span',{'class':'single-cell'})
for items in data_shape:
shape.append(items.getText())
data_price=(table.find_all('div',{'class':'row-cell price'}))
for items in data_price:
price.append(items.getText())
data_carat=(table.find_all('div',{'class':'row-cell carat'}))
for items in data_carat:
carat.append(items.getText())
data_cut=(table.find_all('div',{'class':'row-cell cut'}))
for items in data_cut:
cut.append(items.getText())
data_color=(table.find_all('div',{'class':'row-cell color'}))
for items in data_color:
color.append(items.getText())
data_clarity=(table.find_all('div',{'class':'row-cell clarity'}))
for items in data_clarity:
clarity.append(items.getText())
data_date=(table.find_all('div',{'class':'row-cell date'}))
for items in data_date:
date.append(items.getText())
print(str(shape))
print(str(price))发布于 2020-01-23 18:18:02
你的脚本fine.However,因为当你滚动页面到底部时,它需要一些时间来加载数据表data.You需要添加一些延迟time.sleep(5)来获得更多的records.With,除此之外,我还获得了510记录。
webdriver=webdriver.Chrome()
webdriver.get('https://www.bluenile.com/diamonds/round-cut')
WebDriverWait(webdriver,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"#diamond-result")))
webdriver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
#Added some delay
time.sleep(5)
data=BeautifulSoup(webdriver.page_source,'html5lib')
shape=[]
price=[]
carat=[]
cut=[]
color=[]
clarity=[]
date=[]
table=data.find('div',{'class':'grid-body'})
data_shape=table.find_all('span',{'class':'single-cell'})
for items in data_shape:
shape.append(items.getText())
data_price=(table.find_all('div',{'class':'row-cell price'}))
for items in data_price:
price.append(items.getText())
data_carat=(table.find_all('div',{'class':'row-cell carat'}))
for items in data_carat:
carat.append(items.getText())
data_cut=(table.find_all('div',{'class':'row-cell cut'}))
for items in data_cut:
cut.append(items.getText())
data_color=(table.find_all('div',{'class':'row-cell color'}))
for items in data_color:
color.append(items.getText())
data_clarity=(table.find_all('div',{'class':'row-cell clarity'}))
for items in data_clarity:
clarity.append(items.getText())
data_date=(table.find_all('div',{'class':'row-cell date'}))
for items in data_date:
date.append(items.getText())
print(len(shape))
print(len(price))https://stackoverflow.com/questions/59875156
复制相似问题